157 files changed, 57213 insertions, 11 deletions
diff --git a/Makefile.am b/Makefile.am
index 15ea2b3..14edaa3 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -1,3 +1,3 @@
 AUTOMAKE_OPTIONS = foreign
 
-SUBDIRS = src
+SUBDIRS = i915 i965
diff --git a/configure.ac b/configure.ac
index 2a78cfd..70d46ac 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,7 +1,7 @@
 # Process this file with autoconf to produce a configure script
 
 AC_PREREQ(2.57)
-AC_INIT([mesa-dri-xxx], 7.0.3, [], mesa-dri-xxx)
+AC_INIT([mesa-dri-i9xx], 7.0.3, [], mesa-dri-i9xx)
 
 AM_INIT_AUTOMAKE([dist-bzip2])
 
@@ -16,9 +16,11 @@ AC_PROG_CC
 AC_HEADER_STDC
 
 PKG_CHECK_MODULES([DRM], [libdrm >= 2.3.0])
-PKG_CHECK_MODULES([DRI], [libmesadri = 7.0.3 libmesadricommon = 7.0.3])
+PKG_CHECK_MODULES([DRI], [libmesadri >= 7.0.3 libmesadri < 7.1.0
+			  libmesadricommon >= 7.0.3 libmesadricommon < 7.1.0])
 
 AC_OUTPUT([
 	Makefile
-	src/Makefile
+	i915/Makefile
+	i965/Makefile
 ])
diff --git a/i915/Makefile.am b/i915/Makefile.am
new file mode 100644
index 0000000..c921354
--- /dev/null
+++ b/i915/Makefile.am
@@ -0,0 +1,37 @@
+AM_CFLAGS = -DIN_DRI_DRIVER -DGLX_DIRECT_RENDERING -DGLX_INDIRECT_RENDERING
+
+i915_dri_la_LTLIBRARIES = i915_dri.la
+i915_dri_la_CFLAGS = $(AM_CFLAGS) $(DRM_CFLAGS) $(DRI_CFLAGS) -Iserver
+i915_dri_la_LDFLAGS = -module -noprefix -avoid-version -lm -ldl \
+			 $(DRM_LIBS) $(DRI_LIBS)
+i915_dri_ladir = @libdir@/dri
+i915_dri_la_SOURCES = \
+	i915_context.c \
+	i915_debug.c \
+	i915_fragprog.c \
+	i915_metaops.c \
+	i915_program.c \
+	i915_state.c \
+	i915_tex.c \
+	i915_texprog.c \
+	i915_texstate.c \
+	i915_vtbl.c \
+	i830_context.c \
+	i830_metaops.c \
+	i830_state.c \
+	i830_texblend.c \
+	i830_tex.c \
+	i830_texstate.c \
+	i830_vtbl.c \
+	intel_batchbuffer.c \
+	intel_context.c \
+	intel_ioctl.c \
+	intel_pixel.c \
+	intel_render.c \
+	intel_rotate.c \
+	intel_screen.c \
+	intel_span.c \
+	intel_state.c \
+	intel_tex.c \
+	intel_texmem.c \
+	intel_tris.c 
diff --git a/i915/i830_context.c b/i915/i830_context.c
new file mode 100644
index 0000000..7ca601e
--- /dev/null
+++ b/i915/i830_context.c
@@ -0,0 +1,124 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "i830_context.h"
+#include "imports.h"
+#include "texmem.h"
+#include "intel_tex.h"
+#include "tnl/tnl.h"
+#include "tnl/t_vertex.h"
+#include "tnl/t_context.h"
+#include "utils.h"
+
+/***************************************
+ * Mesa's Driver Functions
+ ***************************************/
+
+static const struct dri_extension i830_extensions[] =
+{
+    { "GL_ARB_texture_env_crossbar",       NULL },
+    { NULL,                                NULL }
+};
+
+
+static void i830InitDriverFunctions( struct dd_function_table *functions )
+{
+   intelInitDriverFunctions( functions );
+   i830InitStateFuncs( functions );
+   i830InitTextureFuncs( functions );
+}
+
+
+GLboolean i830CreateContext( const __GLcontextModes *mesaVis,
+			    __DRIcontextPrivate *driContextPriv,
+			    void *sharedContextPrivate)
+{
+   struct dd_function_table functions;
+   i830ContextPtr i830 = (i830ContextPtr) CALLOC_STRUCT(i830_context);
+   intelContextPtr intel = &i830->intel;
+   GLcontext *ctx = &intel->ctx;
+   GLuint i;
+   if (!i830) return GL_FALSE;
+
+   i830InitVtbl( i830 );
+   i830InitDriverFunctions( &functions );
+
+   if (!intelInitContext( intel, mesaVis, driContextPriv,
+			  sharedContextPrivate, &functions )) {
+      FREE(i830);
+      return GL_FALSE;
+   }
+
+   intel->ctx.Const.MaxTextureUnits = I830_TEX_UNITS;
+   intel->ctx.Const.MaxTextureImageUnits = I830_TEX_UNITS;
+   intel->ctx.Const.MaxTextureCoordUnits = I830_TEX_UNITS;
+
+   intel->nr_heaps = 1;
+   intel->texture_heaps[0] = 
+      driCreateTextureHeap( 0, intel,
+			    intel->intelScreen->tex.size,
+			    12,
+			    I830_NR_TEX_REGIONS,
+			    intel->sarea->texList,
+			    (unsigned *) & intel->sarea->texAge,
+			    & intel->swapped,
+			    sizeof( struct i830_texture_object ),
+			    (destroy_texture_object_t *)intelDestroyTexObj );
+
+   /* FIXME: driCalculateMaxTextureLevels assumes that mipmaps are tightly
+    * FIXME: packed, but they're not in Intel graphics hardware.
+    */
+   intel->ctx.Const.MaxTextureUnits = I830_TEX_UNITS;
+   i = driQueryOptioni( &intel->optionCache, "allow_large_textures");
+   driCalculateMaxTextureLevels( intel->texture_heaps,
+				 intel->nr_heaps,
+				 &intel->ctx.Const,
+				 4,
+				 11, /* max 2D texture size is 2048x2048 */
+				 8,  /* max 3D texture size is 256^3 */
+				 10, /* max CUBE texture size is 1024x1024 */
+				 11, /* max RECT. supported */
+				 12,
+				 GL_FALSE,
+				 i );
+
+   _tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12, 
+		       18 * sizeof(GLfloat) );
+
+   intel->verts = TNL_CONTEXT(ctx)->clipspace.vertex_buf;
+
+   driInitExtensions( ctx, i830_extensions, GL_FALSE );
+
+   i830InitState( i830 );
+
+
+   _tnl_allow_vertex_fog( ctx, 1 ); 
+   _tnl_allow_pixel_fog( ctx, 0 ); 
+
+   return GL_TRUE;
+}
+
diff --git a/i915/i830_context.h b/i915/i830_context.h
new file mode 100644
index 0000000..bae777d
--- /dev/null
+++ b/i915/i830_context.h
@@ -0,0 +1,218 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef I830CONTEXT_INC
+#define I830CONTEXT_INC
+
+#include "intel_context.h"
+
+#define I830_FALLBACK_TEXTURE		 0x1000
+#define I830_FALLBACK_COLORMASK		 0x2000
+#define I830_FALLBACK_STENCIL		 0x4000
+#define I830_FALLBACK_STIPPLE		 0x8000
+#define I830_FALLBACK_LOGICOP		 0x10000
+
+#define I830_UPLOAD_CTX              0x1
+#define I830_UPLOAD_BUFFERS          0x2
+#define I830_UPLOAD_STIPPLE          0x4
+#define I830_UPLOAD_INVARIENT        0x8
+#define I830_UPLOAD_TEX(i)           (0x10<<(i))
+#define I830_UPLOAD_TEXBLEND(i)      (0x100<<(i))
+#define I830_UPLOAD_TEX_ALL          (0x0f0)
+#define I830_UPLOAD_TEXBLEND_ALL     (0xf00)
+
+/* State structure offsets - these will probably disappear.
+ */
+#define I830_DESTREG_CBUFADDR0 0
+#define I830_DESTREG_CBUFADDR1 1
+#define I830_DESTREG_CBUFADDR2 2
+#define I830_DESTREG_DBUFADDR0 3
+#define I830_DESTREG_DBUFADDR1 4
+#define I830_DESTREG_DBUFADDR2 5
+#define I830_DESTREG_DV0 6
+#define I830_DESTREG_DV1 7
+#define I830_DESTREG_SENABLE 8
+#define I830_DESTREG_SR0 9
+#define I830_DESTREG_SR1 10
+#define I830_DESTREG_SR2 11
+#define I830_DEST_SETUP_SIZE 12
+
+#define I830_CTXREG_STATE1		0
+#define I830_CTXREG_STATE2		1
+#define I830_CTXREG_STATE3		2
+#define I830_CTXREG_STATE4		3
+#define I830_CTXREG_STATE5		4
+#define I830_CTXREG_IALPHAB		5
+#define I830_CTXREG_STENCILTST		6
+#define I830_CTXREG_ENABLES_1		7
+#define I830_CTXREG_ENABLES_2		8
+#define I830_CTXREG_AA			9
+#define I830_CTXREG_FOGCOLOR		10
+#define I830_CTXREG_BLENDCOLOR0		11
+#define I830_CTXREG_BLENDCOLOR1		12 
+#define I830_CTXREG_VF			13
+#define I830_CTXREG_VF2			14
+#define I830_CTXREG_MCSB0		15
+#define I830_CTXREG_MCSB1		16
+#define I830_CTX_SETUP_SIZE		17
+
+#define I830_STPREG_ST0        0
+#define I830_STPREG_ST1        1
+#define I830_STP_SETUP_SIZE    2
+
+#define I830_TEXREG_TM0LI      0 /* load immediate 2 texture map n */
+#define I830_TEXREG_TM0S0      1
+#define I830_TEXREG_TM0S1      2
+#define I830_TEXREG_TM0S2      3
+#define I830_TEXREG_TM0S3      4
+#define I830_TEXREG_TM0S4      5
+#define I830_TEXREG_MCS	       6	/* _3DSTATE_MAP_COORD_SETS */
+#define I830_TEXREG_CUBE       7	/* _3DSTATE_MAP_SUBE */
+#define I830_TEX_SETUP_SIZE    8
+
+#define I830_TEXBLEND_SIZE	12	/* (4 args + op) * 2 + COLOR_FACTOR */
+
+struct i830_texture_object
+{
+   struct intel_texture_object intel;
+   GLuint Setup[I830_TEX_SETUP_SIZE];
+};
+
+#define I830_TEX_UNITS 4
+
+struct i830_hw_state {
+   GLuint Ctx[I830_CTX_SETUP_SIZE];
+   GLuint Buffer[I830_DEST_SETUP_SIZE];
+   GLuint Stipple[I830_STP_SETUP_SIZE];
+   GLuint Tex[I830_TEX_UNITS][I830_TEX_SETUP_SIZE];
+   GLuint TexBlend[I830_TEX_UNITS][I830_TEXBLEND_SIZE];
+   GLuint TexBlendWordsUsed[I830_TEX_UNITS];
+   GLuint emitted;		/* I810_UPLOAD_* */
+   GLuint active;
+};
+
+struct i830_context 
+{
+   struct intel_context intel;
+   
+   DECLARE_RENDERINPUTS(last_index_bitset);
+
+   struct i830_hw_state meta, initial, state, *current;
+};
+
+typedef struct i830_context *i830ContextPtr;
+typedef struct i830_texture_object *i830TextureObjectPtr;
+
+#define I830_CONTEXT(ctx)	((i830ContextPtr)(ctx))
+
+
+
+#define I830_STATECHANGE(i830, flag)				\
+do {								\
+   INTEL_FIREVERTICES( &i830->intel );				\
+   i830->state.emitted &= ~flag;					\
+} while (0)
+
+#define I830_ACTIVESTATE(i830, flag, mode)	\
+do {						\
+   INTEL_FIREVERTICES( &i830->intel );		\
+   if (mode)					\
+      i830->state.active |= flag;		\
+   else						\
+      i830->state.active &= ~flag;		\
+} while (0)
+
+/* i830_vtbl.c
+ */
+extern void 
+i830InitVtbl( i830ContextPtr i830 );
+
+/* i830_context.c
+ */
+extern GLboolean 
+i830CreateContext( const __GLcontextModes *mesaVis,
+		   __DRIcontextPrivate *driContextPriv,
+		   void *sharedContextPrivate);
+
+/* i830_tex.c, i830_texstate.c
+ */
+extern void 
+i830UpdateTextureState( intelContextPtr intel );
+
+extern void 
+i830InitTextureFuncs( struct dd_function_table *functions );
+
+extern intelTextureObjectPtr
+i830AllocTexObj( struct gl_texture_object *tObj );
+
+/* i830_texblend.c
+ */
+extern GLuint i830SetTexEnvCombine(i830ContextPtr i830,
+    const struct gl_tex_env_combine_state * combine, GLint blendUnit,
+     GLuint texel_op, GLuint *state, const GLfloat *factor );
+
+extern void 
+i830EmitTextureBlend( i830ContextPtr i830 );
+
+
+/* i830_state.c
+ */
+extern void 
+i830InitStateFuncs( struct dd_function_table *functions );
+
+extern void 
+i830EmitState( i830ContextPtr i830 );
+
+extern void 
+i830InitState( i830ContextPtr i830 );
+
+/* i830_metaops.c
+ */
+extern GLboolean
+i830TryTextureReadPixels( GLcontext *ctx,
+			  GLint x, GLint y, GLsizei width, GLsizei height,
+			  GLenum format, GLenum type,
+			  const struct gl_pixelstore_attrib *pack,
+			  GLvoid *pixels );
+
+extern GLboolean
+i830TryTextureDrawPixels( GLcontext *ctx,
+			  GLint x, GLint y, GLsizei width, GLsizei height,
+			  GLenum format, GLenum type,
+			  const struct gl_pixelstore_attrib *unpack,
+			  const GLvoid *pixels );
+
+extern void 
+i830ClearWithTris( intelContextPtr intel, GLbitfield mask,
+		   GLboolean all, GLint cx, GLint cy, GLint cw, GLint ch);
+
+extern void
+i830RotateWindow(intelContextPtr intel, __DRIdrawablePrivate *dPriv,
+                 GLuint srcBuf);
+
+#endif
+
diff --git a/i915/i830_metaops.c b/i915/i830_metaops.c
new file mode 100644
index 0000000..c1d7fe3
--- /dev/null
+++ b/i915/i830_metaops.c
@@ -0,0 +1,922 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "glheader.h"
+#include "enums.h"
+#include "mtypes.h"
+#include "macros.h"
+#include "utils.h"
+
+#include "intel_screen.h"
+#include "intel_batchbuffer.h"
+#include "intel_ioctl.h"
+
+#include "i830_context.h"
+#include "i830_reg.h"
+
+/* A large amount of state doesn't need to be uploaded.
+ */
+#define ACTIVE (I830_UPLOAD_INVARIENT |         \
+		I830_UPLOAD_TEXBLEND(0) |	\
+		I830_UPLOAD_STIPPLE |		\
+		I830_UPLOAD_CTX |		\
+		I830_UPLOAD_BUFFERS |		\
+		I830_UPLOAD_TEX(0))		
+
+
+#define SET_STATE( i830, STATE )		\
+do {						\
+   i830->current->emitted = 0;			\
+   i830->current = &i830->STATE;		\
+   i830->current->emitted = 0;			\
+} while (0)
+
+/* Operations where the 3D engine is decoupled temporarily from the
+ * current GL state and used for other purposes than simply rendering
+ * incoming triangles.
+ */
+static void set_initial_state( i830ContextPtr i830 )
+{
+   memcpy(&i830->meta, &i830->initial, sizeof(i830->meta) );
+   i830->meta.active = ACTIVE;
+   i830->meta.emitted = 0;
+}
+
+
+static void set_no_depth_stencil_write( i830ContextPtr i830 )
+{
+   /* ctx->Driver.Enable( ctx, GL_STENCIL_TEST, GL_FALSE )
+    */
+   i830->meta.Ctx[I830_CTXREG_ENABLES_1] &= ~ENABLE_STENCIL_TEST;
+   i830->meta.Ctx[I830_CTXREG_ENABLES_2] &= ~ENABLE_STENCIL_WRITE;
+   i830->meta.Ctx[I830_CTXREG_ENABLES_1] |= DISABLE_STENCIL_TEST;
+   i830->meta.Ctx[I830_CTXREG_ENABLES_2] |= DISABLE_STENCIL_WRITE;
+
+
+   /* ctx->Driver.Enable( ctx, GL_DEPTH_TEST, GL_FALSE )
+    */
+   i830->meta.Ctx[I830_CTXREG_ENABLES_1] &= ~ENABLE_DIS_DEPTH_TEST_MASK;
+   i830->meta.Ctx[I830_CTXREG_ENABLES_2] &= ~ENABLE_DIS_DEPTH_WRITE_MASK;
+   i830->meta.Ctx[I830_CTXREG_ENABLES_1] |= DISABLE_DEPTH_TEST;
+   i830->meta.Ctx[I830_CTXREG_ENABLES_2] |= DISABLE_DEPTH_WRITE;
+
+   i830->meta.emitted &= ~I830_UPLOAD_CTX;
+}
+
+/* Set stencil unit to replace always with the reference value.
+ */
+static void set_stencil_replace( i830ContextPtr i830,
+				 GLuint s_mask,
+				 GLuint s_clear)
+{
+   /* ctx->Driver.Enable( ctx, GL_STENCIL_TEST, GL_TRUE )
+    */
+   i830->meta.Ctx[I830_CTXREG_ENABLES_1] |= ENABLE_STENCIL_TEST;
+   i830->meta.Ctx[I830_CTXREG_ENABLES_2] |= ENABLE_STENCIL_WRITE;
+
+
+   /* ctx->Driver.Enable( ctx, GL_DEPTH_TEST, GL_FALSE )
+    */
+   i830->meta.Ctx[I830_CTXREG_ENABLES_1] &= ~ENABLE_DIS_DEPTH_TEST_MASK;
+   i830->meta.Ctx[I830_CTXREG_ENABLES_2] &= ~ENABLE_DIS_DEPTH_WRITE_MASK;
+   i830->meta.Ctx[I830_CTXREG_ENABLES_1] |= DISABLE_DEPTH_TEST;
+   i830->meta.Ctx[I830_CTXREG_ENABLES_2] |= DISABLE_DEPTH_WRITE;
+
+   /* ctx->Driver.StencilMask( ctx, s_mask )
+    */
+   i830->meta.Ctx[I830_CTXREG_STATE4] &= ~MODE4_ENABLE_STENCIL_WRITE_MASK;
+   i830->meta.Ctx[I830_CTXREG_STATE4] |= (ENABLE_STENCIL_WRITE_MASK |
+					   STENCIL_WRITE_MASK((s_mask&0xff)));
+
+   /* ctx->Driver.StencilOp( ctx, GL_REPLACE, GL_REPLACE, GL_REPLACE )
+    */
+   i830->meta.Ctx[I830_CTXREG_STENCILTST] &= ~(STENCIL_OPS_MASK);
+   i830->meta.Ctx[I830_CTXREG_STENCILTST] |= 
+      (ENABLE_STENCIL_PARMS |
+       STENCIL_FAIL_OP(STENCILOP_REPLACE) |
+       STENCIL_PASS_DEPTH_FAIL_OP(STENCILOP_REPLACE) |
+       STENCIL_PASS_DEPTH_PASS_OP(STENCILOP_REPLACE));
+
+   /* ctx->Driver.StencilFunc( ctx, GL_ALWAYS, s_clear, ~0 )
+    */
+   i830->meta.Ctx[I830_CTXREG_STATE4] &= ~MODE4_ENABLE_STENCIL_TEST_MASK;
+   i830->meta.Ctx[I830_CTXREG_STATE4] |= (ENABLE_STENCIL_TEST_MASK |
+					   STENCIL_TEST_MASK(0xff));
+
+   i830->meta.Ctx[I830_CTXREG_STENCILTST] &= ~(STENCIL_REF_VALUE_MASK |
+						ENABLE_STENCIL_TEST_FUNC_MASK);
+   i830->meta.Ctx[I830_CTXREG_STENCILTST] |= 
+      (ENABLE_STENCIL_REF_VALUE |
+       ENABLE_STENCIL_TEST_FUNC |
+       STENCIL_REF_VALUE((s_clear&0xff)) |
+       STENCIL_TEST_FUNC(COMPAREFUNC_ALWAYS));
+
+
+
+   i830->meta.emitted &= ~I830_UPLOAD_CTX;
+}
+
+
+static void set_color_mask( i830ContextPtr i830, GLboolean state )
+{
+   const GLuint mask = ((1 << WRITEMASK_RED_SHIFT) |
+			(1 << WRITEMASK_GREEN_SHIFT) |
+			(1 << WRITEMASK_BLUE_SHIFT) |
+			(1 << WRITEMASK_ALPHA_SHIFT));
+
+   i830->meta.Ctx[I830_CTXREG_ENABLES_2] &= ~mask;
+
+   if (state) {
+      i830->meta.Ctx[I830_CTXREG_ENABLES_2] |= 
+	 (i830->state.Ctx[I830_CTXREG_ENABLES_2] & mask);
+   }
+      
+   i830->meta.emitted &= ~I830_UPLOAD_CTX;
+}
+
+/* Installs a one-stage passthrough texture blend pipeline.  Is there
+ * more that can be done to turn off texturing?
+ */
+static void set_no_texture( i830ContextPtr i830 )
+{
+   static const struct gl_tex_env_combine_state comb = {
+      GL_NONE, GL_NONE,
+      { GL_TEXTURE, 0, 0, }, { GL_TEXTURE, 0, 0, },
+      { GL_SRC_COLOR, 0, 0 }, { GL_SRC_ALPHA, 0, 0 },
+      0, 0, 0, 0
+   };
+
+   i830->meta.TexBlendWordsUsed[0] =
+     i830SetTexEnvCombine( i830, & comb, 0, TEXBLENDARG_TEXEL0,
+			   i830->meta.TexBlend[0], NULL);
+
+   i830->meta.TexBlend[0][0] |= TEXOP_LAST_STAGE;
+   i830->meta.emitted &= ~I830_UPLOAD_TEXBLEND(0);
+}
+
+/* Set up a single element blend stage for 'replace' texturing with no
+ * funny ops.
+ */
+static void enable_texture_blend_replace( i830ContextPtr i830 )
+{
+   static const struct gl_tex_env_combine_state comb = {
+      GL_REPLACE, GL_REPLACE,
+      { GL_TEXTURE, GL_TEXTURE, GL_TEXTURE }, { GL_TEXTURE, GL_TEXTURE, GL_TEXTURE, },
+      { GL_SRC_COLOR, GL_SRC_COLOR, GL_SRC_COLOR }, { GL_SRC_ALPHA, GL_SRC_ALPHA, GL_SRC_ALPHA },
+      0, 0, 1, 1
+   };
+
+   i830->meta.TexBlendWordsUsed[0] =
+     i830SetTexEnvCombine( i830, & comb, 0, TEXBLENDARG_TEXEL0,
+			   i830->meta.TexBlend[0], NULL);
+
+   i830->meta.TexBlend[0][0] |= TEXOP_LAST_STAGE;
+   i830->meta.emitted &= ~I830_UPLOAD_TEXBLEND(0);
+
+/*    fprintf(stderr, "%s: TexBlendWordsUsed[0]: %d\n",  */
+/* 	   __FUNCTION__, i830->meta.TexBlendWordsUsed[0]); */
+}
+
+
+
+/* Set up an arbitary piece of memory as a rectangular texture
+ * (including the front or back buffer).
+ */
+static void set_tex_rect_source( i830ContextPtr i830,
+				 GLuint offset,
+				 GLuint width, 
+				 GLuint height,
+				 GLuint pitch, /* in bytes */
+				 GLuint textureFormat )
+{
+   GLint numLevels = 1;
+   GLuint *setup = i830->meta.Tex[0];
+
+/*    fprintf(stderr, "%s: offset: %x w: %d h: %d pitch %d format %x\n", */
+/* 	   __FUNCTION__, offset, width, height, pitch, textureFormat ); */
+
+   setup[I830_TEXREG_TM0LI] = (_3DSTATE_LOAD_STATE_IMMEDIATE_2 | 
+			       (LOAD_TEXTURE_MAP0 << 0) | 4);
+   setup[I830_TEXREG_TM0S0] = (TM0S0_USE_FENCE | offset);
+   setup[I830_TEXREG_TM0S1] = (((height - 1) << TM0S1_HEIGHT_SHIFT) |
+			       ((width - 1) << TM0S1_WIDTH_SHIFT) |
+			       textureFormat);
+   setup[I830_TEXREG_TM0S2] = ((((pitch / 4) - 1) << TM0S2_PITCH_SHIFT));   
+   setup[I830_TEXREG_TM0S3] &= ~TM0S3_MAX_MIP_MASK;
+   setup[I830_TEXREG_TM0S3] &= ~TM0S3_MIN_MIP_MASK;
+   setup[I830_TEXREG_TM0S3] |= ((numLevels - 1)*4) << TM0S3_MIN_MIP_SHIFT;
+
+   setup[I830_TEXREG_MCS] = (_3DSTATE_MAP_COORD_SET_CMD |
+			     MAP_UNIT(0) |
+			     ENABLE_TEXCOORD_PARAMS |
+			     TEXCOORDS_ARE_IN_TEXELUNITS |
+			     TEXCOORDTYPE_CARTESIAN |
+			     ENABLE_ADDR_V_CNTL |
+			     TEXCOORD_ADDR_V_MODE(TEXCOORDMODE_WRAP) |
+			     ENABLE_ADDR_U_CNTL |
+			     TEXCOORD_ADDR_U_MODE(TEXCOORDMODE_WRAP));
+
+   i830->meta.emitted &= ~I830_UPLOAD_TEX(0);
+}
+
+
+/* Select between front and back draw buffers.
+ */
+static void set_draw_region( i830ContextPtr i830,
+			      const intelRegion *region )
+{
+   i830->meta.Buffer[I830_DESTREG_CBUFADDR1] =
+      (BUF_3D_ID_COLOR_BACK | BUF_3D_PITCH(region->pitch) | BUF_3D_USE_FENCE);
+   i830->meta.Buffer[I830_DESTREG_CBUFADDR2] = region->offset;
+   i830->meta.emitted &= ~I830_UPLOAD_BUFFERS;
+}
+
+/* Setup an arbitary draw format, useful for targeting
+ * texture or agp memory.
+ */
+#if 0
+static void set_draw_format( i830ContextPtr i830,
+			     GLuint format,
+			     GLuint depth_format)
+{
+   i830->meta.Buffer[I830_DESTREG_DV1] = (DSTORG_HORT_BIAS(0x8) | /* .5 */
+					  DSTORG_VERT_BIAS(0x8) | /* .5 */
+					  format |
+					  DEPTH_IS_Z |
+					  depth_format);
+}
+#endif
+
+
+static void set_vertex_format( i830ContextPtr i830 )
+{
+   i830->meta.Ctx[I830_CTXREG_VF] =  (_3DSTATE_VFT0_CMD |
+				      VFT0_TEX_COUNT(1) |
+				      VFT0_DIFFUSE |
+				      VFT0_SPEC |
+				      VFT0_XYZW);
+   i830->meta.Ctx[I830_CTXREG_VF2] = (_3DSTATE_VFT1_CMD |
+				      VFT1_TEX0_FMT(TEXCOORDFMT_2D) |
+				      VFT1_TEX1_FMT(TEXCOORDFMT_2D) | 
+				      VFT1_TEX2_FMT(TEXCOORDFMT_2D) |
+				      VFT1_TEX3_FMT(TEXCOORDFMT_2D));
+   i830->meta.emitted &= ~I830_UPLOAD_CTX;
+}
+
+
+static void draw_quad(i830ContextPtr i830, 
+		      GLfloat x0, GLfloat x1,
+		      GLfloat y0, GLfloat y1, 
+		      GLubyte red, GLubyte green,
+		      GLubyte blue, GLubyte alpha,
+		      GLfloat s0, GLfloat s1,
+		      GLfloat t0, GLfloat t1 )
+{
+   GLuint vertex_size = 8;
+   GLuint *vb = intelEmitInlinePrimitiveLocked( &i830->intel, 
+						PRIM3D_TRIFAN, 
+						4*vertex_size,
+						vertex_size );
+   intelVertex tmp;
+   int i;
+
+   
+/*    fprintf(stderr, "%s: %f,%f-%f,%f 0x%x%x%x%x %f,%f-%f,%f\n", */
+/* 	   __FUNCTION__, */
+/* 	   x0,y0,x1,y1,red,green,blue,alpha,s0,t0,s1,t1); */
+
+
+   /* initial vertex, left bottom */
+   tmp.v.x = x0;
+   tmp.v.y = y0;
+   tmp.v.z = 1.0;
+   tmp.v.w = 1.0;
+   tmp.v.color.red = red;
+   tmp.v.color.green = green;
+   tmp.v.color.blue = blue;
+   tmp.v.color.alpha = alpha;
+   tmp.v.specular.red = 0;
+   tmp.v.specular.green = 0;
+   tmp.v.specular.blue = 0;
+   tmp.v.specular.alpha = 0;
+   tmp.v.u0 = s0;
+   tmp.v.v0 = t0;
+   for (i = 0 ; i < 8 ; i++)
+      vb[i] = tmp.ui[i];
+
+   /* right bottom */
+   vb += 8;
+   tmp.v.x = x1;
+   tmp.v.u0 = s1;
+   for (i = 0 ; i < 8 ; i++)
+      vb[i] = tmp.ui[i];
+
+   /* right top */
+   vb += 8;
+   tmp.v.y = y1;
+   tmp.v.v0 = t1;
+   for (i = 0 ; i < 8 ; i++)
+      vb[i] = tmp.ui[i];
+
+   /* left top */
+   vb += 8;
+   tmp.v.x = x0;
+   tmp.v.u0 = s0;
+   for (i = 0 ; i < 8 ; i++)
+      vb[i] = tmp.ui[i];
+
+/*    fprintf(stderr, "%s: DV1: %x\n",  */
+/* 	   __FUNCTION__, i830->meta.Buffer[I830_DESTREG_DV1]); */
+}
+
+static void draw_poly(i830ContextPtr i830, 
+		      GLubyte red, GLubyte green, GLubyte blue, GLubyte alpha,
+                      GLuint numVerts,
+                      GLfloat verts[][2],
+                      GLfloat texcoords[][2])
+{
+   GLuint vertex_size = 8;
+   GLuint *vb = intelEmitInlinePrimitiveLocked( &i830->intel, 
+						PRIM3D_TRIFAN, 
+						numVerts * vertex_size,
+						vertex_size );
+   intelVertex tmp;
+   int i, k;
+
+   /* initial constant vertex fields */
+   tmp.v.z = 1.0;
+   tmp.v.w = 1.0; 
+   tmp.v.color.red = red;
+   tmp.v.color.green = green;
+   tmp.v.color.blue = blue;
+   tmp.v.color.alpha = alpha;
+   tmp.v.specular.red = 0;
+   tmp.v.specular.green = 0;
+   tmp.v.specular.blue = 0;
+   tmp.v.specular.alpha = 0;
+
+   for (k = 0; k < numVerts; k++) {
+      tmp.v.x = verts[k][0];
+      tmp.v.y = verts[k][1];
+      tmp.v.u0 = texcoords[k][0];
+      tmp.v.v0 = texcoords[k][1];
+
+      for (i = 0 ; i < vertex_size ; i++)
+         vb[i] = tmp.ui[i];
+
+      vb += vertex_size;
+   }
+}
+
+void 
+i830ClearWithTris(intelContextPtr intel, GLbitfield mask,
+		  GLboolean allFoo,
+		  GLint cxFoo, GLint cyFoo, GLint cwFoo, GLint chFoo)
+{
+   i830ContextPtr i830 = I830_CONTEXT( intel );
+   __DRIdrawablePrivate *dPriv = intel->driDrawable;
+   intelScreenPrivate *screen = intel->intelScreen;
+   int x0, y0, x1, y1;
+   GLint cx, cy, cw, ch;
+   GLboolean all;
+
+   INTEL_FIREVERTICES(intel);
+   SET_STATE( i830, meta );
+   set_initial_state( i830 );
+/*    set_no_texture( i830 ); */
+   set_vertex_format( i830 ); 
+
+   LOCK_HARDWARE(intel);
+
+   /* get clear bounds after locking */
+   cx = intel->ctx.DrawBuffer->_Xmin;
+   cy = intel->ctx.DrawBuffer->_Ymin;
+   cw = intel->ctx.DrawBuffer->_Xmax - cx;
+   ch = intel->ctx.DrawBuffer->_Ymax - cy;
+   all = (cw == intel->ctx.DrawBuffer->Width &&
+          ch == intel->ctx.DrawBuffer->Height);
+
+   if(!all) {
+      x0 = cx;
+      y0 = cy;
+      x1 = x0 + cw;
+      y1 = y0 + ch;
+   } else {
+      x0 = 0;
+      y0 = 0;
+      x1 = x0 + dPriv->w;
+      y1 = y0 + dPriv->h;
+   }
+
+   /* Don't do any clipping to screen - these are window coordinates.
+    * The active cliprects will be applied as for any other geometry.
+    */
+
+   if(mask & BUFFER_BIT_FRONT_LEFT) {
+      set_no_depth_stencil_write( i830 );
+      set_color_mask( i830, GL_TRUE );
+      set_draw_region( i830, &screen->front );
+      draw_quad(i830, x0, x1, y0, y1,
+		intel->clear_red, intel->clear_green,
+		intel->clear_blue, intel->clear_alpha,
+		0, 0, 0, 0);
+   }
+
+   if(mask & BUFFER_BIT_BACK_LEFT) {
+      set_no_depth_stencil_write( i830 );
+      set_color_mask( i830, GL_TRUE );
+      set_draw_region( i830, &screen->back );
+
+      draw_quad(i830, x0, x1, y0, y1,
+		intel->clear_red, intel->clear_green,
+		intel->clear_blue, intel->clear_alpha,
+		0, 0, 0, 0);
+   }
+
+   if(mask & BUFFER_BIT_STENCIL) {
+      set_stencil_replace( i830, 
+			   intel->ctx.Stencil.WriteMask[0], 
+			   intel->ctx.Stencil.Clear);
+
+      set_color_mask( i830, GL_FALSE );
+      set_draw_region( i830, &screen->front );
+      draw_quad( i830, x0, x1, y0, y1, 0, 0, 0, 0, 0, 0, 0, 0 );
+   }
+
+   UNLOCK_HARDWARE(intel);
+
+   INTEL_FIREVERTICES(intel);
+   SET_STATE( i830, state );
+}
+
+
+#if 0
+
+GLboolean
+i830TryTextureReadPixels( GLcontext *ctx,
+			  GLint x, GLint y, GLsizei width, GLsizei height,
+			  GLenum format, GLenum type,
+			  const struct gl_pixelstore_attrib *pack,
+			  GLvoid *pixels )
+{
+   i830ContextPtr i830 = I830_CONTEXT(ctx);
+   intelContextPtr intel = INTEL_CONTEXT(ctx);
+   intelScreenPrivate *screen = i830->intel.intelScreen;
+   GLint pitch = pack->RowLength ? pack->RowLength : width;
+   __DRIdrawablePrivate *dPriv = i830->intel.driDrawable;
+   int textureFormat;
+   GLenum glTextureFormat;
+   int src_offset = i830->meta.Buffer[I830_DESTREG_CBUFADDR2];
+   int destOffset = intelAgpOffsetFromVirtual( &i830->intel, pixels);
+   int destFormat, depthFormat, destPitch;
+   drm_clip_rect_t tmp;
+
+   if (INTEL_DEBUG & DEBUG_PIXEL)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+
+   if (	ctx->_ImageTransferState ||
+	pack->SwapBytes ||
+	pack->LsbFirst ||
+	!pack->Invert) {
+      fprintf(stderr, "%s: check_color failed\n", __FUNCTION__);
+      return GL_FALSE;
+   }
+
+   switch (screen->fbFormat) {
+   case DV_PF_565:
+      textureFormat = MAPSURF_16BIT | MT_16BIT_RGB565;
+      glTextureFormat = GL_RGB;
+      break;
+   case DV_PF_555:
+      textureFormat = MAPSURF_16BIT | MT_16BIT_ARGB1555;
+      glTextureFormat = GL_RGBA;
+      break;
+   case DV_PF_8888:
+      textureFormat = MAPSURF_32BIT | MT_32BIT_ARGB8888;
+      glTextureFormat = GL_RGBA;
+      break;
+   default:
+      fprintf(stderr, "%s: textureFormat failed %x\n", __FUNCTION__,
+	      screen->fbFormat);
+      return GL_FALSE;
+   }
+
+
+   switch (type) {
+   case GL_UNSIGNED_SHORT_5_6_5: 
+      if (format != GL_RGB) return GL_FALSE;
+      destFormat = COLR_BUF_RGB565; 
+      depthFormat = DEPTH_FRMT_16_FIXED;
+      destPitch = pitch * 2;
+      break;
+   case GL_UNSIGNED_INT_8_8_8_8_REV: 
+      if (format != GL_BGRA) return GL_FALSE;
+      destFormat = COLR_BUF_ARGB8888; 
+      depthFormat = DEPTH_FRMT_24_FIXED_8_OTHER;
+      destPitch = pitch * 4;
+      break;
+   default:
+      fprintf(stderr, "%s: destFormat failed %s\n", __FUNCTION__,
+	      _mesa_lookup_enum_by_nr(type));
+      return GL_FALSE;
+   }
+
+   destFormat |= (0x02<<24);
+
+/*    fprintf(stderr, "type: %s destFormat: %x\n", */
+/* 	   _mesa_lookup_enum_by_nr(type), */
+/* 	   destFormat); */
+
+   intelFlush( ctx );
+
+   SET_STATE( i830, meta );
+   set_initial_state( i830 );
+   set_no_depth_stencil_write( i830 );
+
+   LOCK_HARDWARE( intel );
+   {
+      intelWaitForIdle( intel ); /* required by GL */
+
+      if (!driClipRectToFramebuffer(ctx->ReadBuffer, &x, &y, &width, &height)) {
+	 UNLOCK_HARDWARE( intel );
+	 SET_STATE(i830, state);
+	 fprintf(stderr, "%s: cliprect failed\n", __FUNCTION__);
+	 return GL_TRUE;
+      }
+
+#if 0
+      /* FIXME -- Just emit the correct state
+       */
+      if (i830SetParam(i830->driFd, I830_SETPARAM_CBUFFER_PITCH, 
+		      destPitch) != 0) {
+	 UNLOCK_HARDWARE( intel );
+	 SET_STATE(i830, state);
+	 fprintf(stderr, "%s: setparam failed\n", __FUNCTION__);
+	 return GL_FALSE;
+      }
+#endif
+
+
+      y = dPriv->h - y - height;
+      x += dPriv->x;
+      y += dPriv->y;
+
+
+      /* Set the frontbuffer up as a large rectangular texture.
+       */
+      set_tex_rect_source( i830, 
+			   src_offset, 
+			   screen->width, 
+			   screen->height, 
+			   screen->front.pitch, 
+			   textureFormat ); 
+   
+   
+      enable_texture_blend_replace( i830 ); 
+
+
+      /* Set the 3d engine to draw into the agp memory
+       */
+
+      set_draw_region( i830, destOffset ); 
+      set_draw_format( i830, destFormat, depthFormat );  
+
+
+      /* Draw a single quad, no cliprects:
+       */
+      i830->intel.numClipRects = 1;
+      i830->intel.pClipRects = &tmp;
+      i830->intel.pClipRects[0].x1 = 0;
+      i830->intel.pClipRects[0].y1 = 0;
+      i830->intel.pClipRects[0].x2 = width;
+      i830->intel.pClipRects[0].y2 = height;
+
+      draw_quad( i830, 
+		 0, width, 0, height, 
+		 0, 255, 0, 0, 
+		 x, x+width, y, y+height );
+
+      intelWindowMoved( intel );
+   }
+   UNLOCK_HARDWARE( intel );
+   intelFinish( ctx ); /* required by GL */
+
+   SET_STATE( i830, state );
+   return GL_TRUE;
+}
+
+
+GLboolean
+i830TryTextureDrawPixels( GLcontext *ctx,
+			  GLint x, GLint y, GLsizei width, GLsizei height,
+			  GLenum format, GLenum type,
+			  const struct gl_pixelstore_attrib *unpack,
+			  const GLvoid *pixels )
+{
+   intelContextPtr intel = INTEL_CONTEXT(ctx);
+   i830ContextPtr i830 = I830_CONTEXT(ctx);
+   GLint pitch = unpack->RowLength ? unpack->RowLength : width;
+   __DRIdrawablePrivate *dPriv = intel->driDrawable;
+   int textureFormat;
+   GLenum glTextureFormat;
+   int dst_offset = i830->meta.Buffer[I830_DESTREG_CBUFADDR2];
+   int src_offset = intelAgpOffsetFromVirtual( intel, pixels );
+
+   if (INTEL_DEBUG & DEBUG_PIXEL)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   /* Todo -- upload images that aren't in agp space, then texture
+    * from them.  
+    */
+
+   if ( !intelIsAgpMemory( intel, pixels, pitch*height ) ) {
+      fprintf(stderr, "%s: intelIsAgpMemory failed\n", __FUNCTION__);
+      return GL_FALSE;
+   }
+
+   /* Todo -- don't want to clobber all the drawing state like we do
+    * for readpixels -- most of this state can be handled just fine.
+    */
+   if (	ctx->_ImageTransferState ||
+	unpack->SwapBytes ||
+	unpack->LsbFirst ||
+	ctx->Color.AlphaEnabled || 
+	ctx->Depth.Test ||
+	ctx->Fog.Enabled ||
+	ctx->Scissor.Enabled ||
+	ctx->Stencil.Enabled ||
+	!ctx->Color.ColorMask[0] ||
+	!ctx->Color.ColorMask[1] ||
+	!ctx->Color.ColorMask[2] ||
+	!ctx->Color.ColorMask[3] ||
+	ctx->Color.ColorLogicOpEnabled ||
+	ctx->Texture._EnabledUnits ||
+	ctx->Depth.OcclusionTest) {
+      fprintf(stderr, "%s: other tests failed\n", __FUNCTION__);
+      return GL_FALSE;
+   }
+
+   /* Todo -- remove these restrictions:
+    */
+   if (ctx->Pixel.ZoomX != 1.0F ||
+       ctx->Pixel.ZoomY != -1.0F)
+      return GL_FALSE;
+
+
+
+   switch (type) {
+   case GL_UNSIGNED_SHORT_1_5_5_5_REV:
+      if (format != GL_BGRA) return GL_FALSE;
+      textureFormat = MAPSURF_16BIT | MT_16BIT_ARGB1555;
+      glTextureFormat = GL_RGBA;
+      break;
+   case GL_UNSIGNED_SHORT_5_6_5: 
+      if (format != GL_RGB) return GL_FALSE;
+      textureFormat = MAPSURF_16BIT | MT_16BIT_RGB565;
+      glTextureFormat = GL_RGB;
+      break;
+   case GL_UNSIGNED_SHORT_8_8_MESA: 
+      if (format != GL_YCBCR_MESA) return GL_FALSE;
+      textureFormat = (MAPSURF_422 | MT_422_YCRCB_SWAPY 
+/*  		       | TM0S1_COLORSPACE_CONVERSION */
+	 );
+      glTextureFormat = GL_YCBCR_MESA;
+      break;
+   case GL_UNSIGNED_SHORT_8_8_REV_MESA: 
+      if (format != GL_YCBCR_MESA) return GL_FALSE;
+      textureFormat = (MAPSURF_422 | MT_422_YCRCB_NORMAL 
+/* 		       | TM0S1_COLORSPACE_CONVERSION */
+	 );
+      glTextureFormat = GL_YCBCR_MESA;
+      break;
+   case GL_UNSIGNED_INT_8_8_8_8_REV: 
+      if (format != GL_BGRA) return GL_FALSE;
+      textureFormat = MAPSURF_32BIT | MT_32BIT_ARGB8888;
+      glTextureFormat = GL_RGBA;
+      break;
+   default:
+      fprintf(stderr, "%s: destFormat failed\n", __FUNCTION__);
+      return GL_FALSE;
+   }
+
+   intelFlush( ctx );
+
+   SET_STATE( i830, meta );
+
+   LOCK_HARDWARE( intel );
+   {
+      intelWaitForIdle( intel ); /* required by GL */
+
+      y -= height;			/* cope with pixel zoom */
+
+      if (!driClipRectToFramebuffer(ctx->ReadBuffer, &x, &y, &width, &height)) {
+	 UNLOCK_HARDWARE( intel );
+	 SET_STATE(i830, state);
+	 fprintf(stderr, "%s: cliprect failed\n", __FUNCTION__);
+	 return GL_TRUE;
+      }
+
+
+      y = dPriv->h - y - height;
+
+      set_initial_state( i830 );
+
+      /* Set the pixel image up as a rectangular texture.
+       */
+      set_tex_rect_source( i830, 
+			   src_offset, 
+			   width, 
+			   height, 
+			   pitch, /* XXXX!!!! -- /2 sometimes */
+			   textureFormat ); 
+   
+   
+      enable_texture_blend_replace( i830 ); 
+
+   
+      /* Draw to the current draw buffer:
+       */
+      set_draw_offset( i830, dst_offset );
+
+      /* Draw a quad, use regular cliprects
+       */
+/*       fprintf(stderr, "x: %d y: %d width %d height %d\n", x, y, width, height); */
+
+      draw_quad( i830, 
+		 x, x+width, y, y+height,
+		 0, 255, 0, 0, 
+		 0, width, 0, height );
+
+      intelWindowMoved( intel );
+   }
+   UNLOCK_HARDWARE( intel );
+   intelFinish( ctx ); /* required by GL */
+   
+   SET_STATE(i830, state);
+
+   return GL_TRUE;
+}
+
+#endif
+
+/**
+ * Copy the window contents named by dPriv to the rotated (or reflected)
+ * color buffer.
+ * srcBuf is BUFFER_BIT_FRONT_LEFT or BUFFER_BIT_BACK_LEFT to indicate the source.
+ */
+void
+i830RotateWindow(intelContextPtr intel, __DRIdrawablePrivate *dPriv,
+                 GLuint srcBuf)
+{
+   i830ContextPtr i830 = I830_CONTEXT( intel );
+   intelScreenPrivate *screen = intel->intelScreen;
+   const GLuint cpp = screen->cpp;
+   drm_clip_rect_t fullRect;
+   GLuint textureFormat, srcOffset, srcPitch;
+   const drm_clip_rect_t *clipRects;
+   int numClipRects;
+   int i;
+
+   int xOrig, yOrig;
+   int origNumClipRects;
+   drm_clip_rect_t *origRects;
+
+   /*
+    * set up hardware state
+    */
+   intelFlush( &intel->ctx );
+
+   SET_STATE( i830, meta ); 
+   set_initial_state( i830 ); 
+   set_no_texture( i830 ); 
+   set_vertex_format( i830 ); 
+   set_no_depth_stencil_write( i830 );
+   set_color_mask( i830, GL_FALSE );
+
+   LOCK_HARDWARE(intel);
+
+   /* save current drawing origin and cliprects (restored at end) */
+   xOrig = intel->drawX;
+   yOrig = intel->drawY;
+   origNumClipRects = intel->numClipRects;
+   origRects = intel->pClipRects;
+
+   if (!intel->numClipRects)
+      goto done;
+
+   /*
+    * set drawing origin, cliprects for full-screen access to rotated screen
+    */
+   fullRect.x1 = 0;
+   fullRect.y1 = 0;
+   fullRect.x2 = screen->rotatedWidth;
+   fullRect.y2 = screen->rotatedHeight;
+   intel->drawX = 0;
+   intel->drawY = 0;
+   intel->numClipRects = 1;
+   intel->pClipRects = &fullRect;
+
+   set_draw_region( i830, &screen->rotated );
+
+   if (cpp == 4)
+      textureFormat = MAPSURF_32BIT | MT_32BIT_ARGB8888;
+   else
+      textureFormat = MAPSURF_16BIT | MT_16BIT_RGB565;
+
+   if (srcBuf == BUFFER_BIT_FRONT_LEFT) {
+      srcPitch = screen->front.pitch;   /* in bytes */
+      srcOffset = screen->front.offset; /* bytes */
+      clipRects = dPriv->pClipRects;
+      numClipRects = dPriv->numClipRects;
+   }
+   else {
+      srcPitch = screen->back.pitch;   /* in bytes */
+      srcOffset = screen->back.offset; /* bytes */
+      clipRects = dPriv->pBackClipRects;
+      numClipRects = dPriv->numBackClipRects;
+   }
+
+   /* set the whole screen up as a texture to avoid alignment issues */
+   set_tex_rect_source(i830,
+                       srcOffset,
+                       screen->width,
+                       screen->height,
+                       srcPitch,
+                       textureFormat);
+
+   enable_texture_blend_replace(i830);
+
+   /*
+    * loop over the source window's cliprects
+    */
+   for (i = 0; i < numClipRects; i++) {
+      int srcX0 = clipRects[i].x1;
+      int srcY0 = clipRects[i].y1;
+      int srcX1 = clipRects[i].x2;
+      int srcY1 = clipRects[i].y2;
+      GLfloat verts[4][2], tex[4][2];
+      int j;
+
+      /* build vertices for four corners of clip rect */
+      verts[0][0] = srcX0;  verts[0][1] = srcY0;
+      verts[1][0] = srcX1;  verts[1][1] = srcY0;
+      verts[2][0] = srcX1;  verts[2][1] = srcY1;
+      verts[3][0] = srcX0;  verts[3][1] = srcY1;
+
+      /* .. and texcoords */
+      tex[0][0] = srcX0;  tex[0][1] = srcY0;
+      tex[1][0] = srcX1;  tex[1][1] = srcY0;
+      tex[2][0] = srcX1;  tex[2][1] = srcY1;
+      tex[3][0] = srcX0;  tex[3][1] = srcY1;
+
+      /* transform coords to rotated screen coords */
+
+      for (j = 0; j < 4; j++) {
+         matrix23TransformCoordf(&screen->rotMatrix,
+                                 &verts[j][0], &verts[j][1]);
+      }
+
+      /* draw polygon to map source image to dest region */
+      draw_poly(i830, 255, 255, 255, 255, 4, verts, tex);
+
+   } /* cliprect loop */
+
+   intelFlushBatchLocked( intel, GL_FALSE, GL_FALSE, GL_FALSE );
+
+ done:
+   /* restore original drawing origin and cliprects */
+   intel->drawX = xOrig;
+   intel->drawY = yOrig;
+   intel->numClipRects = origNumClipRects;
+   intel->pClipRects = origRects;
+
+   UNLOCK_HARDWARE(intel);
+
+   SET_STATE( i830, state );
+}
+
diff --git a/i915/i830_reg.h b/i915/i830_reg.h
new file mode 100644
index 0000000..98cee2f
--- /dev/null
+++ b/i915/i830_reg.h
@@ -0,0 +1,641 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+
+#ifndef _I830_REG_H_
+#define _I830_REG_H_
+
+
+#include "intel_reg.h"
+
+#define I830_SET_FIELD( var, mask, value ) (var &= ~(mask), var |= value)
+
+#define _3DSTATE_AA_CMD			(CMD_3D | (0x06<<24))
+#define AA_LINE_ECAAR_WIDTH_ENABLE	(1<<16)
+#define AA_LINE_ECAAR_WIDTH_0_5 	0
+#define AA_LINE_ECAAR_WIDTH_1_0		(1<<14)
+#define AA_LINE_ECAAR_WIDTH_2_0 	(2<<14)
+#define AA_LINE_ECAAR_WIDTH_4_0 	(3<<14)
+#define AA_LINE_REGION_WIDTH_ENABLE	(1<<8)
+#define AA_LINE_REGION_WIDTH_0_5	0
+#define AA_LINE_REGION_WIDTH_1_0	(1<<6)
+#define AA_LINE_REGION_WIDTH_2_0	(2<<6)
+#define AA_LINE_REGION_WIDTH_4_0	(3<<6)
+#define AA_LINE_ENABLE			((1<<1) | 1)
+#define AA_LINE_DISABLE			(1<<1)
+
+#define _3DSTATE_BUF_INFO_CMD	(CMD_3D | (0x1d<<24) | (0x8e<<16) | 1)
+/* Dword 1 */
+#define BUF_3D_ID_COLOR_BACK	(0x3<<24)
+#define BUF_3D_ID_DEPTH 	(0x7<<24)
+#define BUF_3D_USE_FENCE	(1<<23)
+#define BUF_3D_TILED_SURFACE	(1<<22)
+#define BUF_3D_TILE_WALK_X	0
+#define BUF_3D_TILE_WALK_Y	(1<<21)
+#define BUF_3D_PITCH(x)         (((x)/4)<<2)
+/* Dword 2 */
+#define BUF_3D_ADDR(x)		((x) & ~0x3)
+
+
+#define _3DSTATE_COLOR_FACTOR_CMD	(CMD_3D | (0x1d<<24) | (0x1<<16))
+
+#define _3DSTATE_COLOR_FACTOR_N_CMD(stage)	(CMD_3D | (0x1d<<24) | \
+					         ((0x90+(stage))<<16))
+
+#define _3DSTATE_CONST_BLEND_COLOR_CMD	(CMD_3D | (0x1d<<24) | (0x88<<16))
+
+#define _3DSTATE_DFLT_DIFFUSE_CMD	(CMD_3D | (0x1d<<24) | (0x99<<16))
+
+#define _3DSTATE_DFLT_SPEC_CMD		(CMD_3D | (0x1d<<24) | (0x9a<<16))
+
+#define _3DSTATE_DFLT_Z_CMD		(CMD_3D | (0x1d<<24) | (0x98<<16))
+
+
+#define _3DSTATE_DST_BUF_VARS_CMD	(CMD_3D | (0x1d<<24) | (0x85<<16))
+/* Dword 1 */
+#define DSTORG_HORT_BIAS(x)		((x)<<20)
+#define DSTORG_VERT_BIAS(x)		((x)<<16)
+#define COLOR_4_2_2_CHNL_WRT_ALL	0
+#define COLOR_4_2_2_CHNL_WRT_Y		(1<<12)
+#define COLOR_4_2_2_CHNL_WRT_CR		(2<<12)
+#define COLOR_4_2_2_CHNL_WRT_CB		(3<<12)
+#define COLOR_4_2_2_CHNL_WRT_CRCB	(4<<12)
+#define COLR_BUF_8BIT			0
+#define COLR_BUF_RGB555 		(1<<8)
+#define COLR_BUF_RGB565 		(2<<8)
+#define COLR_BUF_ARGB8888		(3<<8)
+#define DEPTH_IS_Z			0
+#define DEPTH_IS_W			(1<<6)
+#define DEPTH_FRMT_16_FIXED		0
+#define DEPTH_FRMT_16_FLOAT		(1<<2)
+#define DEPTH_FRMT_24_FIXED_8_OTHER	(2<<2)
+#define DEPTH_FRMT_24_FLOAT_8_OTHER	(3<<2)
+#define VERT_LINE_STRIDE_1		(1<<1)
+#define VERT_LINE_STRIDE_0		0
+#define VERT_LINE_STRIDE_OFS_1		1
+#define VERT_LINE_STRIDE_OFS_0		0
+
+
+#define _3DSTATE_DRAW_RECT_CMD		(CMD_3D|(0x1d<<24)|(0x80<<16)|3)
+/* Dword 1 */
+#define DRAW_RECT_DIS_DEPTH_OFS 	(1<<30)
+#define DRAW_DITHER_OFS_X(x)		((x)<<26)
+#define DRAW_DITHER_OFS_Y(x)		((x)<<24)
+/* Dword 2 */
+#define DRAW_YMIN(x)			((x)<<16)
+#define DRAW_XMIN(x)			(x)
+/* Dword 3 */
+#define DRAW_YMAX(x)			((x)<<16)
+#define DRAW_XMAX(x)			(x)
+/* Dword 4 */
+#define DRAW_YORG(x)			((x)<<16)
+#define DRAW_XORG(x)			(x)
+
+
+#define _3DSTATE_ENABLES_1_CMD		(CMD_3D|(0x3<<24))
+#define ENABLE_LOGIC_OP_MASK		((1<<23)|(1<<22))
+#define ENABLE_LOGIC_OP 		((1<<23)|(1<<22))
+#define DISABLE_LOGIC_OP		(1<<23)
+#define ENABLE_STENCIL_TEST		((1<<21)|(1<<20))
+#define DISABLE_STENCIL_TEST		(1<<21)
+#define ENABLE_DEPTH_BIAS		((1<<11)|(1<<10))
+#define DISABLE_DEPTH_BIAS		(1<<11)
+#define ENABLE_SPEC_ADD_MASK		((1<<9)|(1<<8))
+#define ENABLE_SPEC_ADD 		((1<<9)|(1<<8))
+#define DISABLE_SPEC_ADD		(1<<9)
+#define ENABLE_DIS_FOG_MASK		((1<<7)|(1<<6))
+#define ENABLE_FOG			((1<<7)|(1<<6))
+#define DISABLE_FOG			(1<<7)
+#define ENABLE_DIS_ALPHA_TEST_MASK	((1<<5)|(1<<4))
+#define ENABLE_ALPHA_TEST		((1<<5)|(1<<4))
+#define DISABLE_ALPHA_TEST		(1<<5)
+#define ENABLE_DIS_CBLEND_MASK		((1<<3)|(1<<2))
+#define ENABLE_COLOR_BLEND		((1<<3)|(1<<2))
+#define DISABLE_COLOR_BLEND		(1<<3)
+#define ENABLE_DIS_DEPTH_TEST_MASK	((1<<1)|1)
+#define ENABLE_DEPTH_TEST		((1<<1)|1)
+#define DISABLE_DEPTH_TEST		(1<<1)
+
+/* _3DSTATE_ENABLES_2, p138 */
+#define _3DSTATE_ENABLES_2_CMD		(CMD_3D|(0x4<<24))
+#define ENABLE_STENCIL_WRITE		((1<<21)|(1<<20))
+#define DISABLE_STENCIL_WRITE		(1<<21)
+#define ENABLE_TEX_CACHE		((1<<17)|(1<<16))
+#define DISABLE_TEX_CACHE		(1<<17)
+#define ENABLE_DITHER			((1<<9)|(1<<8))
+#define DISABLE_DITHER			(1<<9)
+#define ENABLE_COLOR_MASK		(1<<10)
+#define WRITEMASK_ALPHA			(1<<7)
+#define WRITEMASK_ALPHA_SHIFT		7
+#define WRITEMASK_RED			(1<<6)
+#define WRITEMASK_RED_SHIFT		6
+#define WRITEMASK_GREEN 		(1<<5)
+#define WRITEMASK_GREEN_SHIFT		5
+#define WRITEMASK_BLUE			(1<<4)
+#define WRITEMASK_BLUE_SHIFT		4
+#define WRITEMASK_MASK			((1<<4)|(1<<5)|(1<<6)|(1<<7))
+#define ENABLE_COLOR_WRITE		((1<<3)|(1<<2))
+#define DISABLE_COLOR_WRITE		(1<<3)
+#define ENABLE_DIS_DEPTH_WRITE_MASK	0x3
+#define ENABLE_DEPTH_WRITE		((1<<1)|1)
+#define DISABLE_DEPTH_WRITE		(1<<1)
+
+/* _3DSTATE_FOG_COLOR, p139 */
+#define _3DSTATE_FOG_COLOR_CMD		(CMD_3D|(0x15<<24))
+#define FOG_COLOR_RED(x)		((x)<<16)
+#define FOG_COLOR_GREEN(x)		((x)<<8)
+#define FOG_COLOR_BLUE(x)		(x)
+
+/* _3DSTATE_FOG_MODE, p140 */
+#define _3DSTATE_FOG_MODE_CMD		(CMD_3D|(0x1d<<24)|(0x89<<16)|2)
+/* Dword 1 */
+#define FOGFUNC_ENABLE			(1<<31)
+#define FOGFUNC_VERTEX			0
+#define FOGFUNC_PIXEL_EXP		(1<<28)
+#define FOGFUNC_PIXEL_EXP2		(2<<28)
+#define FOGFUNC_PIXEL_LINEAR		(3<<28)
+#define FOGSRC_INDEX_Z			(1<<27)
+#define FOGSRC_INDEX_W			((1<<27)|(1<<25))
+#define FOG_LINEAR_CONST		(1<<24)
+#define FOG_CONST_1(x)			((x)<<4)
+#define ENABLE_FOG_DENSITY		(1<<23)
+/* Dword 2 */
+#define FOG_CONST_2(x)			(x)
+/* Dword 3 */
+#define FOG_DENSITY(x)			(x)
+
+/* _3DSTATE_INDEPENDENT_ALPHA_BLEND, p142 */
+#define _3DSTATE_INDPT_ALPHA_BLEND_CMD	(CMD_3D|(0x0b<<24))
+#define ENABLE_INDPT_ALPHA_BLEND	((1<<23)|(1<<22))
+#define DISABLE_INDPT_ALPHA_BLEND	(1<<23)
+#define ALPHA_BLENDFUNC_MASK		0x3f0000
+#define ENABLE_ALPHA_BLENDFUNC		(1<<21)
+#define ABLENDFUNC_ADD			0
+#define ABLENDFUNC_SUB			(1<<16)
+#define ABLENDFUNC_RVSE_SUB		(2<<16)
+#define ABLENDFUNC_MIN			(3<<16)
+#define ABLENDFUNC_MAX			(4<<16)
+#define SRC_DST_ABLEND_MASK		0xfff
+#define ENABLE_SRC_ABLEND_FACTOR	(1<<11)
+#define SRC_ABLEND_FACT(x)		((x)<<6)
+#define ENABLE_DST_ABLEND_FACTOR	(1<<5)
+#define DST_ABLEND_FACT(x)		(x)
+
+
+/* _3DSTATE_MAP_BLEND_ARG, p152 */
+#define _3DSTATE_MAP_BLEND_ARG_CMD(stage)	(CMD_3D|(0x0e<<24)|((stage)<<20))
+
+#define TEXPIPE_COLOR			0
+#define TEXPIPE_ALPHA			(1<<18)
+#define TEXPIPE_KILL			(2<<18)
+#define TEXBLEND_ARG0			0
+#define TEXBLEND_ARG1			(1<<15)
+#define TEXBLEND_ARG2			(2<<15)
+#define TEXBLEND_ARG3			(3<<15)
+#define TEXBLENDARG_MODIFY_PARMS	(1<<6)
+#define TEXBLENDARG_REPLICATE_ALPHA 	(1<<5)
+#define TEXBLENDARG_INV_ARG 		(1<<4)
+#define TEXBLENDARG_ONE 		0
+#define TEXBLENDARG_FACTOR		0x01
+#define TEXBLENDARG_ACCUM		0x02
+#define TEXBLENDARG_DIFFUSE		0x03
+#define TEXBLENDARG_SPEC		0x04
+#define TEXBLENDARG_CURRENT		0x05
+#define TEXBLENDARG_TEXEL0		0x06
+#define TEXBLENDARG_TEXEL1		0x07
+#define TEXBLENDARG_TEXEL2		0x08
+#define TEXBLENDARG_TEXEL3		0x09
+#define TEXBLENDARG_FACTOR_N		0x0e
+
+/* _3DSTATE_MAP_BLEND_OP, p155 */
+#define _3DSTATE_MAP_BLEND_OP_CMD(stage)	(CMD_3D|(0x0d<<24)|((stage)<<20))
+#if 0
+#   define TEXPIPE_COLOR		0
+#   define TEXPIPE_ALPHA		(1<<18)
+#   define TEXPIPE_KILL			(2<<18)
+#endif
+#define ENABLE_TEXOUTPUT_WRT_SEL	(1<<17)
+#define TEXOP_OUTPUT_CURRENT		0
+#define TEXOP_OUTPUT_ACCUM		(1<<15)
+#define ENABLE_TEX_CNTRL_STAGE		((1<<12)|(1<<11))
+#define DISABLE_TEX_CNTRL_STAGE		(1<<12)
+#define TEXOP_SCALE_SHIFT		9
+#define TEXOP_SCALE_1X			(0 << TEXOP_SCALE_SHIFT)
+#define TEXOP_SCALE_2X			(1 << TEXOP_SCALE_SHIFT)
+#define TEXOP_SCALE_4X			(2 << TEXOP_SCALE_SHIFT)
+#define TEXOP_MODIFY_PARMS		(1<<8)
+#define TEXOP_LAST_STAGE		(1<<7)
+#define TEXBLENDOP_KILLPIXEL		0x02
+#define TEXBLENDOP_ARG1 		0x01
+#define TEXBLENDOP_ARG2 		0x02
+#define TEXBLENDOP_MODULATE		0x03
+#define TEXBLENDOP_ADD			0x06
+#define TEXBLENDOP_ADDSIGNED		0x07
+#define TEXBLENDOP_BLEND		0x08
+#define TEXBLENDOP_BLEND_AND_ADD	0x09
+#define TEXBLENDOP_SUBTRACT		0x0a
+#define TEXBLENDOP_DOT3 		0x0b
+#define TEXBLENDOP_DOT4 		0x0c
+#define TEXBLENDOP_MODULATE_AND_ADD	0x0d
+#define TEXBLENDOP_MODULATE_2X_AND_ADD	0x0e
+#define TEXBLENDOP_MODULATE_4X_AND_ADD	0x0f
+
+/* _3DSTATE_MAP_BUMP_TABLE, p160 TODO */
+/* _3DSTATE_MAP_COLOR_CHROMA_KEY, p161 TODO */
+
+#define _3DSTATE_MAP_COORD_TRANSFORM	((3<<29)|(0x1d<<24)|(0x8c<<16))
+#define DISABLE_TEX_TRANSFORM		(1<<28)
+#define TEXTURE_SET(x)			(x<<29)
+
+#define _3DSTATE_VERTEX_TRANSFORM	((3<<29)|(0x1d<<24)|(0x8b<<16))
+#define DISABLE_VIEWPORT_TRANSFORM	(1<<31)
+#define DISABLE_PERSPECTIVE_DIVIDE	(1<<29)
+
+
+/* _3DSTATE_MAP_COORD_SET_BINDINGS, p162 */
+#define _3DSTATE_MAP_COORD_SETBIND_CMD	(CMD_3D|(0x1d<<24)|(0x02<<16))
+#define TEXBIND_MASK3			((1<<15)|(1<<14)|(1<<13)|(1<<12))
+#define TEXBIND_MASK2			((1<<11)|(1<<10)|(1<<9)|(1<<8))
+#define TEXBIND_MASK1			((1<<7)|(1<<6)|(1<<5)|(1<<4))
+#define TEXBIND_MASK0			((1<<3)|(1<<2)|(1<<1)|1)
+
+#define TEXBIND_SET3(x) 		((x)<<12)
+#define TEXBIND_SET2(x) 		((x)<<8)
+#define TEXBIND_SET1(x) 		((x)<<4)
+#define TEXBIND_SET0(x) 		(x)
+
+#define TEXCOORDSRC_KEEP		0
+#define TEXCOORDSRC_DEFAULT		0x01
+#define TEXCOORDSRC_VTXSET_0		0x08
+#define TEXCOORDSRC_VTXSET_1		0x09
+#define TEXCOORDSRC_VTXSET_2		0x0a
+#define TEXCOORDSRC_VTXSET_3		0x0b
+#define TEXCOORDSRC_VTXSET_4		0x0c
+#define TEXCOORDSRC_VTXSET_5		0x0d
+#define TEXCOORDSRC_VTXSET_6		0x0e
+#define TEXCOORDSRC_VTXSET_7		0x0f
+
+#define MAP_UNIT(unit)			((unit)<<16)
+#define MAP_UNIT_MASK			(0x7<<16)
+
+/* _3DSTATE_MAP_COORD_SETS, p164 */
+#define _3DSTATE_MAP_COORD_SET_CMD	(CMD_3D|(0x1c<<24)|(0x01<<19))
+#define ENABLE_TEXCOORD_PARAMS		(1<<15)
+#define TEXCOORDS_ARE_NORMAL		(1<<14)
+#define TEXCOORDS_ARE_IN_TEXELUNITS	0
+#define TEXCOORDTYPE_CARTESIAN		0
+#define TEXCOORDTYPE_HOMOGENEOUS	(1<<11)
+#define TEXCOORDTYPE_VECTOR		(2<<11)
+#define TEXCOORDTYPE_MASK	        (0x7<<11)
+#define ENABLE_ADDR_V_CNTL		(1<<7)
+#define ENABLE_ADDR_U_CNTL		(1<<3)
+#define TEXCOORD_ADDR_V_MODE(x) 	((x)<<4)
+#define TEXCOORD_ADDR_U_MODE(x) 	(x)
+#define TEXCOORDMODE_WRAP		0
+#define TEXCOORDMODE_MIRROR		1
+#define TEXCOORDMODE_CLAMP		2
+#define TEXCOORDMODE_WRAP_SHORTEST	3
+#define TEXCOORDMODE_CLAMP_BORDER	4
+#define TEXCOORD_ADDR_V_MASK		0x70
+#define TEXCOORD_ADDR_U_MASK		0x7
+
+/* _3DSTATE_MAP_CUBE, p168 TODO */
+#define _3DSTATE_MAP_CUBE		(CMD_3D|(0x1c<<24)|(0x0a<<19))
+#define CUBE_NEGX_ENABLE                (1<<5)
+#define CUBE_POSX_ENABLE                (1<<4)
+#define CUBE_NEGY_ENABLE                (1<<3)
+#define CUBE_POSY_ENABLE                (1<<2)
+#define CUBE_NEGZ_ENABLE                (1<<1)
+#define CUBE_POSZ_ENABLE                (1<<0)
+
+
+/* _3DSTATE_MODES_1, p190 */
+#define _3DSTATE_MODES_1_CMD		(CMD_3D|(0x08<<24))
+#define BLENDFUNC_MASK			0x3f0000
+#define ENABLE_COLR_BLND_FUNC		(1<<21)
+#define BLENDFUNC_ADD			0
+#define BLENDFUNC_SUB			(1<<16)
+#define BLENDFUNC_RVRSE_SUB		(2<<16)
+#define BLENDFUNC_MIN			(3<<16)
+#define BLENDFUNC_MAX			(4<<16)
+#define SRC_DST_BLND_MASK		0xfff
+#define ENABLE_SRC_BLND_FACTOR		(1<<11)
+#define ENABLE_DST_BLND_FACTOR		(1<<5)
+#define SRC_BLND_FACT(x)		((x)<<6)
+#define DST_BLND_FACT(x)		(x)
+
+
+/* _3DSTATE_MODES_2, p192 */
+#define _3DSTATE_MODES_2_CMD		(CMD_3D|(0x0f<<24))
+#define ENABLE_GLOBAL_DEPTH_BIAS	(1<<22)
+#define GLOBAL_DEPTH_BIAS(x)		((x)<<14)
+#define ENABLE_ALPHA_TEST_FUNC		(1<<13)
+#define ENABLE_ALPHA_REF_VALUE		(1<<8)
+#define ALPHA_TEST_FUNC(x)		((x)<<9)
+#define ALPHA_REF_VALUE(x)		(x)
+
+#define ALPHA_TEST_REF_MASK		0x3fff
+
+/* _3DSTATE_MODES_3, p193 */
+#define _3DSTATE_MODES_3_CMD		(CMD_3D|(0x02<<24))
+#define DEPTH_TEST_FUNC_MASK		0x1f0000
+#define ENABLE_DEPTH_TEST_FUNC		(1<<20)
+/* Uses COMPAREFUNC */
+#define DEPTH_TEST_FUNC(x)		((x)<<16)
+#define ENABLE_ALPHA_SHADE_MODE 	(1<<11)
+#define ENABLE_FOG_SHADE_MODE		(1<<9)
+#define ENABLE_SPEC_SHADE_MODE		(1<<7)
+#define ENABLE_COLOR_SHADE_MODE 	(1<<5)
+#define ALPHA_SHADE_MODE(x)		((x)<<10)
+#define FOG_SHADE_MODE(x)		((x)<<8)
+#define SPEC_SHADE_MODE(x)		((x)<<6)
+#define COLOR_SHADE_MODE(x)		((x)<<4)
+#define CULLMODE_MASK			0xf
+#define ENABLE_CULL_MODE		(1<<3)
+#define CULLMODE_BOTH			0
+#define CULLMODE_NONE			1
+#define CULLMODE_CW			2
+#define CULLMODE_CCW			3
+
+#define SHADE_MODE_LINEAR		0
+#define SHADE_MODE_FLAT 		0x1
+
+/* _3DSTATE_MODES_4, p195 */
+#define _3DSTATE_MODES_4_CMD		(CMD_3D|(0x16<<24))
+#define ENABLE_LOGIC_OP_FUNC		(1<<23)
+#define LOGIC_OP_FUNC(x)		((x)<<18)
+#define LOGICOP_MASK			((1<<18)|(1<<19)|(1<<20)|(1<<21))
+#define LOGICOP_CLEAR			0
+#define LOGICOP_NOR			0x1
+#define LOGICOP_AND_INV 		0x2
+#define LOGICOP_COPY_INV		0x3
+#define LOGICOP_AND_RVRSE		0x4
+#define LOGICOP_INV			0x5
+#define LOGICOP_XOR			0x6
+#define LOGICOP_NAND			0x7
+#define LOGICOP_AND			0x8
+#define LOGICOP_EQUIV			0x9
+#define LOGICOP_NOOP			0xa
+#define LOGICOP_OR_INV			0xb
+#define LOGICOP_COPY			0xc
+#define LOGICOP_OR_RVRSE		0xd
+#define LOGICOP_OR			0xe
+#define LOGICOP_SET			0xf
+#define MODE4_ENABLE_STENCIL_TEST_MASK	((1<<17)|(0xff00))
+#define ENABLE_STENCIL_TEST_MASK	(1<<17)
+#define STENCIL_TEST_MASK(x)		((x)<<8)
+#define MODE4_ENABLE_STENCIL_WRITE_MASK	((1<<16)|(0x00ff))
+#define ENABLE_STENCIL_WRITE_MASK	(1<<16)
+#define STENCIL_WRITE_MASK(x)		((x)&0xff)
+
+/* _3DSTATE_MODES_5, p196 */
+#define _3DSTATE_MODES_5_CMD		(CMD_3D|(0x0c<<24))
+#define ENABLE_SPRITE_POINT_TEX 	(1<<23)
+#define SPRITE_POINT_TEX_ON		(1<<22)
+#define SPRITE_POINT_TEX_OFF		0
+#define FLUSH_RENDER_CACHE		(1<<18)
+#define FLUSH_TEXTURE_CACHE		(1<<16)
+#define FIXED_LINE_WIDTH_MASK		0xfc00
+#define ENABLE_FIXED_LINE_WIDTH 	(1<<15)
+#define FIXED_LINE_WIDTH(x)		((x)<<10)
+#define FIXED_POINT_WIDTH_MASK		0x3ff
+#define ENABLE_FIXED_POINT_WIDTH	(1<<9)
+#define FIXED_POINT_WIDTH(x)		(x)
+
+/* _3DSTATE_RASTERIZATION_RULES, p198 */
+#define _3DSTATE_RASTER_RULES_CMD	(CMD_3D|(0x07<<24))
+#define ENABLE_POINT_RASTER_RULE	(1<<15)
+#define OGL_POINT_RASTER_RULE		(1<<13)
+#define ENABLE_LINE_STRIP_PROVOKE_VRTX	(1<<8)
+#define ENABLE_TRI_FAN_PROVOKE_VRTX	(1<<5)
+#define ENABLE_TRI_STRIP_PROVOKE_VRTX	(1<<2)
+#define LINE_STRIP_PROVOKE_VRTX(x)	((x)<<6)
+#define TRI_FAN_PROVOKE_VRTX(x) 	((x)<<3)
+#define TRI_STRIP_PROVOKE_VRTX(x)	(x)
+
+/* _3DSTATE_SCISSOR_ENABLE, p200 */
+#define _3DSTATE_SCISSOR_ENABLE_CMD	(CMD_3D|(0x1c<<24)|(0x10<<19))
+#define ENABLE_SCISSOR_RECT		((1<<1) | 1)
+#define DISABLE_SCISSOR_RECT		(1<<1)
+
+/* _3DSTATE_SCISSOR_RECTANGLE_0, p201 */
+#define _3DSTATE_SCISSOR_RECT_0_CMD	(CMD_3D|(0x1d<<24)|(0x81<<16)|1)
+/* Dword 1 */
+#define SCISSOR_RECT_0_YMIN(x)		((x)<<16)
+#define SCISSOR_RECT_0_XMIN(x)		(x)
+/* Dword 2 */
+#define SCISSOR_RECT_0_YMAX(x)		((x)<<16)
+#define SCISSOR_RECT_0_XMAX(x)		(x)
+
+/* _3DSTATE_STENCIL_TEST, p202 */
+#define _3DSTATE_STENCIL_TEST_CMD	(CMD_3D|(0x09<<24))
+#define ENABLE_STENCIL_PARMS		(1<<23)
+#define STENCIL_OPS_MASK		(0xffc000)
+#define STENCIL_FAIL_OP(x)		((x)<<20)
+#define STENCIL_PASS_DEPTH_FAIL_OP(x)	((x)<<17)
+#define STENCIL_PASS_DEPTH_PASS_OP(x)	((x)<<14)
+
+#define ENABLE_STENCIL_TEST_FUNC_MASK	((1<<13)|(1<<12)|(1<<11)|(1<<10)|(1<<9))
+#define ENABLE_STENCIL_TEST_FUNC	(1<<13)
+/* Uses COMPAREFUNC */
+#define STENCIL_TEST_FUNC(x)		((x)<<9)
+#define STENCIL_REF_VALUE_MASK		((1<<8)|0xff)
+#define ENABLE_STENCIL_REF_VALUE	(1<<8)
+#define STENCIL_REF_VALUE(x)		(x)
+
+/* _3DSTATE_VERTEX_FORMAT, p204 */
+#define _3DSTATE_VFT0_CMD	(CMD_3D|(0x05<<24))
+#define VFT0_POINT_WIDTH	(1<<12)
+#define VFT0_TEX_COUNT_MASK    	(7<<8)
+#define VFT0_TEX_COUNT_SHIFT    8
+#define VFT0_TEX_COUNT(x) 	((x)<<8)
+#define VFT0_SPEC		(1<<7)
+#define VFT0_DIFFUSE		(1<<6)
+#define VFT0_DEPTH_OFFSET  	(1<<5)
+#define VFT0_XYZ		(1<<1)
+#define VFT0_XYZW		(2<<1)
+#define VFT0_XY			(3<<1)
+#define VFT0_XYW		(4<<1)
+#define VFT0_XYZW_MASK          (7<<1)
+
+/* _3DSTATE_VERTEX_FORMAT_2, p206 */
+#define _3DSTATE_VFT1_CMD	(CMD_3D|(0x0a<<24))
+#define VFT1_TEX7_FMT(x)	((x)<<14)
+#define VFT1_TEX6_FMT(x)	((x)<<12)
+#define VFT1_TEX5_FMT(x)	((x)<<10)
+#define VFT1_TEX4_FMT(x)	((x)<<8)
+#define VFT1_TEX3_FMT(x)	((x)<<6)
+#define VFT1_TEX2_FMT(x)	((x)<<4)
+#define VFT1_TEX1_FMT(x)	((x)<<2)
+#define VFT1_TEX0_FMT(x)	(x)
+#define VFT1_TEX0_MASK          3
+#define VFT1_TEX1_SHIFT         2
+#define TEXCOORDFMT_2D		0
+#define TEXCOORDFMT_3D		1
+#define TEXCOORDFMT_4D		2
+#define TEXCOORDFMT_1D		3
+
+/*New stuff picked up along the way */
+
+#define MLC_LOD_BIAS_MASK ((1<<7)-1)
+
+
+/* _3DSTATE_VERTEX_TRANSFORM, p207 */
+#define _3DSTATE_VERTEX_TRANS_CMD	(CMD_3D|(0x1d<<24)|(0x8b<<16)|0)
+#define _3DSTATE_VERTEX_TRANS_MTX_CMD	(CMD_3D|(0x1d<<24)|(0x8b<<16)|6)
+/* Dword 1 */
+#define ENABLE_VIEWPORT_TRANSFORM	((1<<31)|(1<<30))
+#define DISABLE_VIEWPORT_TRANSFORM	(1<<31)
+#define ENABLE_PERSP_DIVIDE		((1<<29)|(1<<28))
+#define DISABLE_PERSP_DIVIDE		(1<<29)
+#define VRTX_TRANS_LOAD_MATRICES	0x7421
+#define VRTX_TRANS_NO_LOAD_MATRICES	0x0000
+/* Dword 2 -> 7  are matrix elements */
+
+/* _3DSTATE_W_STATE, p209 */
+#define _3DSTATE_W_STATE_CMD		(CMD_3D|(0x1d<<24)|(0x8d<<16)|1)
+/* Dword 1 */
+#define MAGIC_W_STATE_DWORD1		0x00000008
+/* Dword 2 */
+#define WFAR_VALUE(x)			(x)
+
+
+/* Stipple command, carried over from the i810, apparently:
+ */
+#define _3DSTATE_STIPPLE           ((0x3<<29)|(0x1d<<24)|(0x83<<16))
+#define ST1_ENABLE               (1<<16)
+#define ST1_MASK                 (0xffff)
+
+
+
+#define _3DSTATE_LOAD_STATE_IMMEDIATE_2      ((0x3<<29)|(0x1d<<24)|(0x03<<16))
+#define LOAD_TEXTURE_MAP0                   (1<<11)
+#define LOAD_GLOBAL_COLOR_FACTOR            (1<<6)
+
+#define TM0S0_ADDRESS_MASK              0xfffffffc
+#define TM0S0_USE_FENCE                 (1<<1)
+
+#define TM0S1_HEIGHT_SHIFT              21
+#define TM0S1_WIDTH_SHIFT               10
+#define TM0S1_PALETTE_SELECT            (1<<9)
+#define TM0S1_MAPSURF_FORMAT_MASK       (0x7 << 6)
+#define TM0S1_MAPSURF_FORMAT_SHIFT      6
+#define    MAPSURF_8BIT_INDEXED		   (0<<6)
+#define    MAPSURF_8BIT		 	   (1<<6)
+#define    MAPSURF_16BIT		   (2<<6)
+#define    MAPSURF_32BIT		   (3<<6)
+#define    MAPSURF_411			   (4<<6)
+#define    MAPSURF_422			   (5<<6)
+#define    MAPSURF_COMPRESSED		   (6<<6)
+#define    MAPSURF_4BIT_INDEXED		   (7<<6)
+#define TM0S1_MT_FORMAT_MASK         (0x7 << 3)
+#define TM0S1_MT_FORMAT_SHIFT        3
+#define    MT_4BIT_IDX_ARGB8888	           (7<<3) /* SURFACE_4BIT_INDEXED */
+#define    MT_8BIT_IDX_RGB565	           (0<<3) /* SURFACE_8BIT_INDEXED */
+#define    MT_8BIT_IDX_ARGB1555	           (1<<3)
+#define    MT_8BIT_IDX_ARGB4444	           (2<<3)
+#define    MT_8BIT_IDX_AY88		   (3<<3)
+#define    MT_8BIT_IDX_ABGR8888	           (4<<3)
+#define    MT_8BIT_IDX_BUMP_88DVDU 	   (5<<3)
+#define    MT_8BIT_IDX_BUMP_655LDVDU	   (6<<3)
+#define    MT_8BIT_IDX_ARGB8888	           (7<<3)
+#define    MT_8BIT_I8		           (0<<3) /* SURFACE_8BIT */
+#define    MT_8BIT_L8		           (1<<3)
+#define    MT_16BIT_RGB565 		   (0<<3) /* SURFACE_16BIT */
+#define    MT_16BIT_ARGB1555		   (1<<3)
+#define    MT_16BIT_ARGB4444		   (2<<3)
+#define    MT_16BIT_AY88		   (3<<3)
+#define    MT_16BIT_DIB_ARGB1555_8888      (4<<3)
+#define    MT_16BIT_BUMP_88DVDU	           (5<<3)
+#define    MT_16BIT_BUMP_655LDVDU	   (6<<3)
+#define    MT_16BIT_DIB_RGB565_8888	   (7<<3)
+#define    MT_32BIT_ARGB8888		   (0<<3) /* SURFACE_32BIT */
+#define    MT_32BIT_ABGR8888		   (1<<3)
+#define    MT_32BIT_BUMP_XLDVDU_8888	   (6<<3)
+#define    MT_32BIT_DIB_8888		   (7<<3)
+#define    MT_411_YUV411		   (0<<3) /* SURFACE_411 */
+#define    MT_422_YCRCB_SWAPY	           (0<<3) /* SURFACE_422 */
+#define    MT_422_YCRCB_NORMAL	           (1<<3)
+#define    MT_422_YCRCB_SWAPUV	           (2<<3)
+#define    MT_422_YCRCB_SWAPUVY	           (3<<3)
+#define    MT_COMPRESS_DXT1		   (0<<3) /* SURFACE_COMPRESSED */
+#define    MT_COMPRESS_DXT2_3	           (1<<3)
+#define    MT_COMPRESS_DXT4_5	           (2<<3)
+#define    MT_COMPRESS_FXT1		   (3<<3)
+#define TM0S1_COLORSPACE_CONVERSION     (1 << 2)
+#define TM0S1_TILED_SURFACE             (1 << 1)
+#define TM0S1_TILE_WALK                 (1 << 0)
+
+#define TM0S2_PITCH_SHIFT               21
+#define TM0S2_CUBE_FACE_ENA_SHIFT       15
+#define TM0S2_CUBE_FACE_ENA_MASK        (1<<15)
+#define TM0S2_MAP_FORMAT                (1<<14)
+#define TM0S2_VERTICAL_LINE_STRIDE      (1<<13)
+#define TM0S2_VERITCAL_LINE_STRIDE_OFF  (1<<12)
+#define TM0S2_OUTPUT_CHAN_SHIFT         10
+#define TM0S2_OUTPUT_CHAN_MASK          (3<<10)
+
+#define TM0S3_MIP_FILTER_MASK           (0x3<<30)
+#define TM0S3_MIP_FILTER_SHIFT          30
+#define MIPFILTER_NONE		0
+#define MIPFILTER_NEAREST	1
+#define MIPFILTER_LINEAR	3
+#define TM0S3_MAG_FILTER_MASK           (0x3<<28)
+#define TM0S3_MAG_FILTER_SHIFT          28
+#define TM0S3_MIN_FILTER_MASK           (0x3<<26)
+#define TM0S3_MIN_FILTER_SHIFT          26
+#define FILTER_NEAREST		0
+#define FILTER_LINEAR		1
+#define FILTER_ANISOTROPIC	2
+
+#define TM0S3_LOD_BIAS_SHIFT		17
+#define TM0S3_LOD_BIAS_MASK		(0x1ff<<17)
+#define TM0S3_MAX_MIP_SHIFT		9
+#define TM0S3_MAX_MIP_MASK		(0xff<<9)
+#define TM0S3_MIN_MIP_SHIFT		3
+#define TM0S3_MIN_MIP_MASK		(0x3f<<3)
+#define TM0S3_KILL_PIXEL		(1<<2)
+#define TM0S3_KEYED_FILTER		(1<<1)
+#define TM0S3_CHROMA_KEY		(1<<0)
+
+
+/* _3DSTATE_MAP_TEXEL_STREAM, p188 */
+#define _3DSTATE_MAP_TEX_STREAM_CMD	(CMD_3D|(0x1c<<24)|(0x05<<19))
+#define DISABLE_TEX_STREAM_BUMP 	(1<<12)
+#define ENABLE_TEX_STREAM_BUMP		((1<<12)|(1<<11))
+#define TEX_MODIFY_UNIT_0		0
+#define TEX_MODIFY_UNIT_1		(1<<8)
+#define ENABLE_TEX_STREAM_COORD_SET	(1<<7)
+#define TEX_STREAM_COORD_SET(x) 	((x)<<4)
+#define ENABLE_TEX_STREAM_MAP_IDX	(1<<3)
+#define TEX_STREAM_MAP_IDX(x)		(x)
+
+
+#define MI_FLUSH           ((0<<29)|(4<<23))
+#define FLUSH_MAP_CACHE    (1<<0)
+
+#endif
diff --git a/i915/i830_state.c b/i915/i830_state.c
new file mode 100644
index 0000000..9512519
--- /dev/null
+++ b/i915/i830_state.c
@@ -0,0 +1,1092 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+
+#include "glheader.h"
+#include "context.h"
+#include "macros.h"
+#include "enums.h"
+#include "dd.h"
+
+#include "texmem.h"
+
+#include "intel_screen.h"
+#include "intel_batchbuffer.h"
+
+#include "i830_context.h"
+#include "i830_reg.h"
+
+static void
+i830StencilFuncSeparate(GLcontext *ctx, GLenum face, GLenum func, GLint ref,
+                        GLuint mask)
+{
+   i830ContextPtr i830 = I830_CONTEXT(ctx);
+   int test = intel_translate_compare_func(func);
+
+   mask = mask & 0xff;
+
+   if (INTEL_DEBUG&DEBUG_DRI)
+      fprintf(stderr, "%s : func: %s, ref : 0x%x, mask: 0x%x\n", __FUNCTION__,
+	      _mesa_lookup_enum_by_nr(func), ref, mask);
+
+
+   I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+   i830->state.Ctx[I830_CTXREG_STATE4] &= ~MODE4_ENABLE_STENCIL_TEST_MASK;
+   i830->state.Ctx[I830_CTXREG_STATE4] |= (ENABLE_STENCIL_TEST_MASK |
+					STENCIL_TEST_MASK(mask));
+   i830->state.Ctx[I830_CTXREG_STENCILTST] &= ~(STENCIL_REF_VALUE_MASK |
+					     ENABLE_STENCIL_TEST_FUNC_MASK);
+   i830->state.Ctx[I830_CTXREG_STENCILTST] |= (ENABLE_STENCIL_REF_VALUE |
+					    ENABLE_STENCIL_TEST_FUNC |
+					    STENCIL_REF_VALUE(ref) |
+					    STENCIL_TEST_FUNC(test));
+}
+
+static void
+i830StencilMaskSeparate(GLcontext *ctx, GLenum face, GLuint mask)
+{
+   i830ContextPtr i830 = I830_CONTEXT(ctx);
+
+   if (INTEL_DEBUG&DEBUG_DRI)
+      fprintf(stderr, "%s : mask 0x%x\n", __FUNCTION__, mask);
+
+   mask = mask & 0xff;
+
+   I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+   i830->state.Ctx[I830_CTXREG_STATE4] &= ~MODE4_ENABLE_STENCIL_WRITE_MASK;
+   i830->state.Ctx[I830_CTXREG_STATE4] |= (ENABLE_STENCIL_WRITE_MASK |
+					STENCIL_WRITE_MASK(mask));
+}
+
+static void
+i830StencilOpSeparate(GLcontext *ctx, GLenum face, GLenum fail, GLenum zfail,
+                      GLenum zpass)
+{
+   i830ContextPtr i830 = I830_CONTEXT(ctx);
+   int fop, dfop, dpop;
+
+   if (INTEL_DEBUG&DEBUG_DRI)
+      fprintf(stderr, "%s: fail : %s, zfail: %s, zpass : %s\n", __FUNCTION__,
+	      _mesa_lookup_enum_by_nr(fail),
+	      _mesa_lookup_enum_by_nr(zfail),
+	      _mesa_lookup_enum_by_nr(zpass));
+
+   fop = 0; dfop = 0; dpop = 0;
+
+   switch(fail) {
+   case GL_KEEP: 
+      fop = STENCILOP_KEEP; 
+      break;
+   case GL_ZERO: 
+      fop = STENCILOP_ZERO; 
+      break;
+   case GL_REPLACE: 
+      fop = STENCILOP_REPLACE; 
+      break;
+   case GL_INCR: 
+      fop = STENCILOP_INCRSAT;
+      break;
+   case GL_DECR: 
+      fop = STENCILOP_DECRSAT;
+      break;
+   case GL_INCR_WRAP:
+      fop = STENCILOP_INCR; 
+      break;
+   case GL_DECR_WRAP:
+      fop = STENCILOP_DECR; 
+      break;
+   case GL_INVERT: 
+      fop = STENCILOP_INVERT; 
+      break;
+   default: 
+      break;
+   }
+   switch(zfail) {
+   case GL_KEEP: 
+      dfop = STENCILOP_KEEP; 
+      break;
+   case GL_ZERO: 
+      dfop = STENCILOP_ZERO; 
+      break;
+   case GL_REPLACE: 
+      dfop = STENCILOP_REPLACE; 
+      break;
+   case GL_INCR: 
+      dfop = STENCILOP_INCRSAT;
+      break;
+   case GL_DECR: 
+      dfop = STENCILOP_DECRSAT;
+      break;
+   case GL_INCR_WRAP:
+      dfop = STENCILOP_INCR; 
+      break;
+   case GL_DECR_WRAP:
+      dfop = STENCILOP_DECR; 
+      break;
+   case GL_INVERT: 
+      dfop = STENCILOP_INVERT; 
+      break;
+   default: 
+      break;
+   }
+   switch(zpass) {
+   case GL_KEEP: 
+      dpop = STENCILOP_KEEP; 
+      break;
+   case GL_ZERO: 
+      dpop = STENCILOP_ZERO; 
+      break;
+   case GL_REPLACE: 
+      dpop = STENCILOP_REPLACE; 
+      break;
+   case GL_INCR: 
+      dpop = STENCILOP_INCRSAT;
+      break;
+   case GL_DECR: 
+      dpop = STENCILOP_DECRSAT;
+      break;
+   case GL_INCR_WRAP:
+      dpop = STENCILOP_INCR; 
+      break;
+   case GL_DECR_WRAP:
+      dpop = STENCILOP_DECR; 
+      break;
+   case GL_INVERT: 
+      dpop = STENCILOP_INVERT; 
+      break;
+   default: 
+      break;
+   }
+
+
+   I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+   i830->state.Ctx[I830_CTXREG_STENCILTST] &= ~(STENCIL_OPS_MASK);
+   i830->state.Ctx[I830_CTXREG_STENCILTST] |= (ENABLE_STENCIL_PARMS |
+					    STENCIL_FAIL_OP(fop) |
+					    STENCIL_PASS_DEPTH_FAIL_OP(dfop) |
+					    STENCIL_PASS_DEPTH_PASS_OP(dpop));
+}
+
+static void i830AlphaFunc(GLcontext *ctx, GLenum func, GLfloat ref)
+{
+   i830ContextPtr i830 = I830_CONTEXT(ctx);
+   int test = intel_translate_compare_func(func);
+   GLubyte refByte;
+   GLuint refInt;
+
+   UNCLAMPED_FLOAT_TO_UBYTE(refByte, ref);
+   refInt = (GLuint)refByte;
+
+   I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+   i830->state.Ctx[I830_CTXREG_STATE2] &= ~ALPHA_TEST_REF_MASK;
+   i830->state.Ctx[I830_CTXREG_STATE2] |= (ENABLE_ALPHA_TEST_FUNC |
+					ENABLE_ALPHA_REF_VALUE |
+					ALPHA_TEST_FUNC(test) |
+					ALPHA_REF_VALUE(refInt));
+}
+
+/**
+ * Makes sure that the proper enables are set for LogicOp, Independant Alpha
+ * Blend, and Blending.  It needs to be called from numerous places where we
+ * could change the LogicOp or Independant Alpha Blend without subsequent
+ * calls to glEnable.
+ * 
+ * \todo
+ * This function is substantially different from the old i830-specific driver.
+ * I'm not sure which is correct.
+ */
+static void i830EvalLogicOpBlendState(GLcontext *ctx)
+{
+   i830ContextPtr i830 = I830_CONTEXT(ctx);
+
+   I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+
+   if (RGBA_LOGICOP_ENABLED(ctx)) {
+      i830->state.Ctx[I830_CTXREG_ENABLES_1] &= ~(ENABLE_COLOR_BLEND |
+					       ENABLE_LOGIC_OP_MASK);
+      i830->state.Ctx[I830_CTXREG_ENABLES_1] |= (DISABLE_COLOR_BLEND |
+					      ENABLE_LOGIC_OP);
+   } else if (ctx->Color.BlendEnabled) {
+      i830->state.Ctx[I830_CTXREG_ENABLES_1] &= ~(ENABLE_COLOR_BLEND |
+					       ENABLE_LOGIC_OP_MASK);
+      i830->state.Ctx[I830_CTXREG_ENABLES_1] |= (ENABLE_COLOR_BLEND |
+					      DISABLE_LOGIC_OP);
+   } else {
+      i830->state.Ctx[I830_CTXREG_ENABLES_1] &= ~(ENABLE_COLOR_BLEND |
+					       ENABLE_LOGIC_OP_MASK);
+      i830->state.Ctx[I830_CTXREG_ENABLES_1] |= (DISABLE_COLOR_BLEND |
+					      DISABLE_LOGIC_OP);
+   }
+}
+
+static void i830BlendColor(GLcontext *ctx, const GLfloat color[4])
+{
+   i830ContextPtr i830 = I830_CONTEXT(ctx);
+   GLubyte r, g, b, a;
+
+   if (INTEL_DEBUG&DEBUG_DRI)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   UNCLAMPED_FLOAT_TO_UBYTE(r, color[RCOMP]);
+   UNCLAMPED_FLOAT_TO_UBYTE(g, color[GCOMP]);
+   UNCLAMPED_FLOAT_TO_UBYTE(b, color[BCOMP]);
+   UNCLAMPED_FLOAT_TO_UBYTE(a, color[ACOMP]);
+
+   I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+   i830->state.Ctx[I830_CTXREG_BLENDCOLOR1] = (a<<24) | (r<<16) | (g<<8) | b;
+}
+
+/**
+ * Sets both the blend equation (called "function" in i830 docs) and the
+ * blend function (called "factor" in i830 docs).  This is done in a single
+ * function because some blend equations (i.e., \c GL_MIN and \c GL_MAX)
+ * change the interpretation of the blend function.
+ */
+static void i830_set_blend_state( GLcontext * ctx )
+{
+   i830ContextPtr i830 = I830_CONTEXT(ctx);
+   int funcA;
+   int funcRGB;
+   int eqnA;
+   int eqnRGB;
+   int iab;
+   int s1;
+
+
+   funcRGB = SRC_BLND_FACT( intel_translate_blend_factor( ctx->Color.BlendSrcRGB ) )
+       | DST_BLND_FACT( intel_translate_blend_factor( ctx->Color.BlendDstRGB ) );
+
+   switch(ctx->Color.BlendEquationRGB) {
+   case GL_FUNC_ADD:
+      eqnRGB = BLENDFUNC_ADD; 
+      break;
+   case GL_MIN:
+      eqnRGB = BLENDFUNC_MIN;
+      funcRGB = SRC_BLND_FACT(BLENDFACT_ONE) | DST_BLND_FACT(BLENDFACT_ONE);
+      break;
+   case GL_MAX: 
+      eqnRGB = BLENDFUNC_MAX;
+      funcRGB = SRC_BLND_FACT(BLENDFACT_ONE) | DST_BLND_FACT(BLENDFACT_ONE);
+      break;
+   case GL_FUNC_SUBTRACT: 
+      eqnRGB = BLENDFUNC_SUB; 
+      break;
+   case GL_FUNC_REVERSE_SUBTRACT:
+      eqnRGB = BLENDFUNC_RVRSE_SUB; 
+      break;
+   default:
+      fprintf( stderr, "[%s:%u] Invalid RGB blend equation (0x%04x).\n",
+	       __FUNCTION__, __LINE__, ctx->Color.BlendEquationRGB );
+      return;
+   }
+
+
+   funcA = SRC_ABLEND_FACT( intel_translate_blend_factor( ctx->Color.BlendSrcA ) )
+       | DST_ABLEND_FACT( intel_translate_blend_factor( ctx->Color.BlendDstA ) );
+
+   switch(ctx->Color.BlendEquationA) {
+   case GL_FUNC_ADD:
+      eqnA = BLENDFUNC_ADD; 
+      break;
+   case GL_MIN: 
+      eqnA = BLENDFUNC_MIN;
+      funcA = SRC_BLND_FACT(BLENDFACT_ONE) | DST_BLND_FACT(BLENDFACT_ONE);
+      break;
+   case GL_MAX: 
+      eqnA = BLENDFUNC_MAX;
+      funcA = SRC_BLND_FACT(BLENDFACT_ONE) | DST_BLND_FACT(BLENDFACT_ONE);
+      break;
+   case GL_FUNC_SUBTRACT: 
+      eqnA = BLENDFUNC_SUB; 
+      break;
+   case GL_FUNC_REVERSE_SUBTRACT:
+      eqnA = BLENDFUNC_RVRSE_SUB; 
+      break;
+   default:
+      fprintf( stderr, "[%s:%u] Invalid alpha blend equation (0x%04x).\n",
+	       __FUNCTION__, __LINE__, ctx->Color.BlendEquationA );
+      return;
+   }
+
+   iab = eqnA | funcA
+       | _3DSTATE_INDPT_ALPHA_BLEND_CMD
+       | ENABLE_SRC_ABLEND_FACTOR | ENABLE_DST_ABLEND_FACTOR
+       | ENABLE_ALPHA_BLENDFUNC;
+   s1 = eqnRGB | funcRGB
+       | _3DSTATE_MODES_1_CMD
+       | ENABLE_SRC_BLND_FACTOR | ENABLE_DST_BLND_FACTOR
+       | ENABLE_COLR_BLND_FUNC;
+
+   if ( (eqnA | funcA) != (eqnRGB | funcRGB) )
+      iab |= ENABLE_INDPT_ALPHA_BLEND;
+   else
+      iab |= DISABLE_INDPT_ALPHA_BLEND;
+
+   if (iab != i830->state.Ctx[I830_CTXREG_IALPHAB] ||
+       s1 != i830->state.Ctx[I830_CTXREG_STATE1]) {
+      I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+      i830->state.Ctx[I830_CTXREG_IALPHAB] = iab;
+      i830->state.Ctx[I830_CTXREG_STATE1] = s1;
+   }
+
+   /* This will catch a logicop blend equation.  It will also ensure
+    * independant alpha blend is really in the correct state (either enabled
+    * or disabled) if blending is already enabled.
+    */
+
+   i830EvalLogicOpBlendState(ctx);
+
+   if (0) {
+      fprintf(stderr, "[%s:%u] STATE1: 0x%08x IALPHAB: 0x%08x blend is %sabled\n",
+	      __FUNCTION__, __LINE__,
+	      i830->state.Ctx[I830_CTXREG_STATE1],
+	      i830->state.Ctx[I830_CTXREG_IALPHAB],
+	      (ctx->Color.BlendEnabled) ? "en" : "dis");
+   }
+}
+
+
+static void i830BlendEquationSeparate(GLcontext *ctx, GLenum modeRGB,
+				      GLenum modeA) 
+{
+   if (INTEL_DEBUG&DEBUG_DRI)
+     fprintf(stderr, "%s -> %s, %s\n", __FUNCTION__,
+	     _mesa_lookup_enum_by_nr(modeRGB),
+	     _mesa_lookup_enum_by_nr(modeA));
+
+   (void) modeRGB;
+   (void) modeA;
+   i830_set_blend_state( ctx );
+}
+
+
+static void i830BlendFuncSeparate(GLcontext *ctx, GLenum sfactorRGB, 
+				  GLenum dfactorRGB, GLenum sfactorA,
+				  GLenum dfactorA )
+{
+   if (INTEL_DEBUG&DEBUG_DRI)
+     fprintf(stderr, "%s -> RGB(%s, %s) A(%s, %s)\n", __FUNCTION__,
+	     _mesa_lookup_enum_by_nr(sfactorRGB),
+	     _mesa_lookup_enum_by_nr(dfactorRGB),
+	     _mesa_lookup_enum_by_nr(sfactorA),
+	     _mesa_lookup_enum_by_nr(dfactorA));
+
+   (void) sfactorRGB;
+   (void) dfactorRGB;
+   (void) sfactorA;
+   (void) dfactorA;
+   i830_set_blend_state( ctx );
+}
+
+
+
+static void i830DepthFunc(GLcontext *ctx, GLenum func)
+{
+   i830ContextPtr i830 = I830_CONTEXT(ctx);
+   int test = intel_translate_compare_func(func);
+
+   if (INTEL_DEBUG&DEBUG_DRI)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+   i830->state.Ctx[I830_CTXREG_STATE3] &= ~DEPTH_TEST_FUNC_MASK;
+   i830->state.Ctx[I830_CTXREG_STATE3] |= (ENABLE_DEPTH_TEST_FUNC |
+				       DEPTH_TEST_FUNC(test));
+}
+
+static void i830DepthMask(GLcontext *ctx, GLboolean flag)
+{
+   i830ContextPtr i830 = I830_CONTEXT(ctx);
+
+   if (INTEL_DEBUG&DEBUG_DRI)
+      fprintf(stderr, "%s flag (%d)\n", __FUNCTION__, flag);
+
+   I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+
+   i830->state.Ctx[I830_CTXREG_ENABLES_2] &= ~ENABLE_DIS_DEPTH_WRITE_MASK;
+
+   if (flag && ctx->Depth.Test)
+      i830->state.Ctx[I830_CTXREG_ENABLES_2] |= ENABLE_DEPTH_WRITE;
+   else
+      i830->state.Ctx[I830_CTXREG_ENABLES_2] |= DISABLE_DEPTH_WRITE;
+}
+
+/* =============================================================
+ * Polygon stipple
+ *
+ * The i830 supports a 4x4 stipple natively, GL wants 32x32.
+ * Fortunately stipple is usually a repeating pattern.
+ */
+static void i830PolygonStipple( GLcontext *ctx, const GLubyte *mask )
+{
+   i830ContextPtr i830 = I830_CONTEXT(ctx);
+   const GLubyte *m = mask;
+   GLubyte p[4];
+   int i,j,k;
+   int active = (ctx->Polygon.StippleFlag &&
+		 i830->intel.reduced_primitive == GL_TRIANGLES);
+   GLuint newMask;
+
+   if (active) {
+      I830_STATECHANGE(i830, I830_UPLOAD_STIPPLE);
+      i830->state.Stipple[I830_STPREG_ST1] &= ~ST1_ENABLE;
+   }
+
+   p[0] = mask[12] & 0xf; p[0] |= p[0] << 4;
+   p[1] = mask[8] & 0xf; p[1] |= p[1] << 4;
+   p[2] = mask[4] & 0xf; p[2] |= p[2] << 4;
+   p[3] = mask[0] & 0xf; p[3] |= p[3] << 4;
+
+   for (k = 0 ; k < 8 ; k++)
+      for (j = 3 ; j >= 0; j--)
+	 for (i = 0 ; i < 4 ; i++, m++)
+	    if (*m != p[j]) {
+	       i830->intel.hw_stipple = 0;
+	       return;
+	    }
+
+   newMask = (((p[0] & 0xf) << 0) |
+	      ((p[1] & 0xf) << 4) |
+	      ((p[2] & 0xf) << 8) |
+	      ((p[3] & 0xf) << 12));
+
+
+   if (newMask == 0xffff || newMask == 0x0) {
+      /* this is needed to make conform pass */
+      i830->intel.hw_stipple = 0;
+      return;
+   }
+
+   i830->state.Stipple[I830_STPREG_ST1] &= ~0xffff;
+   i830->state.Stipple[I830_STPREG_ST1] |= newMask;
+   i830->intel.hw_stipple = 1;
+
+   if (active)
+      i830->state.Stipple[I830_STPREG_ST1] |= ST1_ENABLE;
+}
+
+
+/* =============================================================
+ * Hardware clipping
+ */
+static void i830Scissor(GLcontext *ctx, GLint x, GLint y, 
+			GLsizei w, GLsizei h)
+{
+   i830ContextPtr i830 = I830_CONTEXT(ctx);
+   intelScreenPrivate *screen = i830->intel.intelScreen;
+   int x1, y1, x2, y2;
+
+   if (!i830->intel.driDrawable)
+      return;
+
+   x1 = x;
+   y1 = i830->intel.driDrawable->h - (y + h);
+   x2 = x + w - 1;
+   y2 = y1 + h - 1;
+
+   if (INTEL_DEBUG&DEBUG_DRI)
+      fprintf(stderr, "[%s] x(%d) y(%d) w(%d) h(%d)\n", __FUNCTION__,
+	      x, y, w, h);
+
+   if (x1 < 0) x1 = 0;
+   if (y1 < 0) y1 = 0;
+   if (x2 < 0) x2 = 0;
+   if (y2 < 0) y2 = 0;
+
+   if (x2 >= screen->width) x2 = screen->width-1;
+   if (y2 >= screen->height) y2 = screen->height-1;
+   if (x1 >= screen->width) x1 = screen->width-1;
+   if (y1 >= screen->height) y1 = screen->height-1;
+
+
+   I830_STATECHANGE(i830, I830_UPLOAD_BUFFERS);
+   i830->state.Buffer[I830_DESTREG_SR1] = (y1 << 16) | (x1 & 0xffff);
+   i830->state.Buffer[I830_DESTREG_SR2] = (y2 << 16) | (x2 & 0xffff);
+}
+
+static void i830LogicOp(GLcontext *ctx, GLenum opcode)
+{
+   i830ContextPtr i830 = I830_CONTEXT(ctx);
+   int tmp = intel_translate_logic_op( opcode );
+
+   if (INTEL_DEBUG&DEBUG_DRI)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+   i830->state.Ctx[I830_CTXREG_STATE4] &= ~LOGICOP_MASK;
+   i830->state.Ctx[I830_CTXREG_STATE4] |= LOGIC_OP_FUNC(tmp);
+}
+
+
+
+static void i830CullFaceFrontFace(GLcontext *ctx, GLenum unused)
+{
+   i830ContextPtr i830 = I830_CONTEXT(ctx);
+   GLuint mode;
+
+   if (INTEL_DEBUG&DEBUG_DRI)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   if (!ctx->Polygon.CullFlag) {
+      mode = CULLMODE_NONE;
+   }
+   else if (ctx->Polygon.CullFaceMode != GL_FRONT_AND_BACK) {
+      mode = CULLMODE_CW;
+
+      if (ctx->Polygon.CullFaceMode == GL_FRONT)
+	 mode ^= (CULLMODE_CW ^ CULLMODE_CCW);
+      if (ctx->Polygon.FrontFace != GL_CCW)
+	 mode ^= (CULLMODE_CW ^ CULLMODE_CCW);
+   }
+   else {
+      mode = CULLMODE_BOTH;
+   }
+
+   I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+   i830->state.Ctx[I830_CTXREG_STATE3] &= ~CULLMODE_MASK;
+   i830->state.Ctx[I830_CTXREG_STATE3] |= ENABLE_CULL_MODE | mode;
+}
+
+static void i830LineWidth( GLcontext *ctx, GLfloat widthf )
+{
+   i830ContextPtr i830 = I830_CONTEXT( ctx );
+   int width;
+   int state5;
+
+   if (INTEL_DEBUG&DEBUG_DRI)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   width = (int)(widthf * 2);
+   CLAMP_SELF(width, 1, 15);
+   
+   state5 = i830->state.Ctx[I830_CTXREG_STATE5] & ~FIXED_LINE_WIDTH_MASK;
+   state5 |= (ENABLE_FIXED_LINE_WIDTH | FIXED_LINE_WIDTH(width));
+
+   if (state5 != i830->state.Ctx[I830_CTXREG_STATE5]) {
+      I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+      i830->state.Ctx[I830_CTXREG_STATE5] = state5;
+   }
+}
+
+static void i830PointSize(GLcontext *ctx, GLfloat size)
+{
+   i830ContextPtr i830 = I830_CONTEXT(ctx);
+   GLint point_size = (int)size;
+
+   if (INTEL_DEBUG&DEBUG_DRI)
+     fprintf(stderr, "%s\n", __FUNCTION__);
+
+   CLAMP_SELF(point_size, 1, 256);
+   I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+   i830->state.Ctx[I830_CTXREG_STATE5] &= ~FIXED_POINT_WIDTH_MASK;
+   i830->state.Ctx[I830_CTXREG_STATE5] |= (ENABLE_FIXED_POINT_WIDTH |
+				       FIXED_POINT_WIDTH(point_size));
+}
+
+
+/* =============================================================
+ * Color masks
+ */
+
+static void i830ColorMask(GLcontext *ctx,
+			  GLboolean r, GLboolean g,
+			  GLboolean b, GLboolean a)
+{
+   i830ContextPtr i830 = I830_CONTEXT( ctx );
+   GLuint tmp = 0;
+
+   if (INTEL_DEBUG&DEBUG_DRI)
+      fprintf(stderr, "%s r(%d) g(%d) b(%d) a(%d)\n", __FUNCTION__, r, g, b, a);
+
+   tmp = ((i830->state.Ctx[I830_CTXREG_ENABLES_2] & ~WRITEMASK_MASK) |
+	  ENABLE_COLOR_MASK |
+	  ENABLE_COLOR_WRITE |
+	  ((!r) << WRITEMASK_RED_SHIFT) |
+	  ((!g) << WRITEMASK_GREEN_SHIFT) |
+	  ((!b) << WRITEMASK_BLUE_SHIFT) |
+	  ((!a) << WRITEMASK_ALPHA_SHIFT));
+
+   if (tmp != i830->state.Ctx[I830_CTXREG_ENABLES_2]) {
+      I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+      i830->state.Ctx[I830_CTXREG_ENABLES_2] = tmp;
+   }
+}
+
+static void update_specular( GLcontext *ctx )
+{
+   i830ContextPtr i830 = I830_CONTEXT( ctx );
+
+   I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+   i830->state.Ctx[I830_CTXREG_ENABLES_1] &= ~ENABLE_SPEC_ADD_MASK;
+
+   if (NEED_SECONDARY_COLOR(ctx))
+      i830->state.Ctx[I830_CTXREG_ENABLES_1] |= ENABLE_SPEC_ADD;
+   else
+      i830->state.Ctx[I830_CTXREG_ENABLES_1] |= DISABLE_SPEC_ADD;
+}
+
+static void i830LightModelfv(GLcontext *ctx, GLenum pname, 
+			     const GLfloat *param)
+{
+   if (INTEL_DEBUG&DEBUG_DRI)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   if (pname == GL_LIGHT_MODEL_COLOR_CONTROL) {
+      update_specular( ctx );
+   }
+}
+
+/* In Mesa 3.5 we can reliably do native flatshading.
+ */
+static void i830ShadeModel(GLcontext *ctx, GLenum mode)
+{
+   i830ContextPtr i830 = I830_CONTEXT(ctx);
+   I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+
+
+#define SHADE_MODE_MASK ((1<<10)|(1<<8)|(1<<6)|(1<<4))
+
+   i830->state.Ctx[I830_CTXREG_STATE3] &= ~SHADE_MODE_MASK;
+
+   if (mode == GL_FLAT) {
+     i830->state.Ctx[I830_CTXREG_STATE3] |= (ALPHA_SHADE_MODE(SHADE_MODE_FLAT) |
+					  FOG_SHADE_MODE(SHADE_MODE_FLAT) |
+					  SPEC_SHADE_MODE(SHADE_MODE_FLAT) |
+					  COLOR_SHADE_MODE(SHADE_MODE_FLAT));
+   } else {
+     i830->state.Ctx[I830_CTXREG_STATE3] |= (ALPHA_SHADE_MODE(SHADE_MODE_LINEAR) |
+					  FOG_SHADE_MODE(SHADE_MODE_LINEAR) |
+					  SPEC_SHADE_MODE(SHADE_MODE_LINEAR) |
+					  COLOR_SHADE_MODE(SHADE_MODE_LINEAR));
+   }
+}
+
+/* =============================================================
+ * Fog
+ */
+static void i830Fogfv(GLcontext *ctx, GLenum pname, const GLfloat *param)
+{
+   i830ContextPtr i830 = I830_CONTEXT(ctx);
+
+   if (INTEL_DEBUG&DEBUG_DRI)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   if (pname == GL_FOG_COLOR) {      
+      GLuint color = (((GLubyte)(ctx->Fog.Color[0]*255.0F) << 16) |
+		      ((GLubyte)(ctx->Fog.Color[1]*255.0F) << 8) |
+		      ((GLubyte)(ctx->Fog.Color[2]*255.0F) << 0));
+
+      I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+      i830->state.Ctx[I830_CTXREG_FOGCOLOR] = (_3DSTATE_FOG_COLOR_CMD | color);
+   }
+}
+
+/* =============================================================
+ */
+
+static void i830Enable(GLcontext *ctx, GLenum cap, GLboolean state)
+{
+   i830ContextPtr i830 = I830_CONTEXT(ctx);
+
+   switch(cap) {
+   case GL_LIGHTING:
+   case GL_COLOR_SUM:
+      update_specular( ctx );
+      break;
+
+   case GL_ALPHA_TEST:
+      I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+      i830->state.Ctx[I830_CTXREG_ENABLES_1] &= ~ENABLE_DIS_ALPHA_TEST_MASK;
+      if (state)
+	 i830->state.Ctx[I830_CTXREG_ENABLES_1] |= ENABLE_ALPHA_TEST;
+      else
+	 i830->state.Ctx[I830_CTXREG_ENABLES_1] |= DISABLE_ALPHA_TEST;
+
+      break;
+
+   case GL_BLEND:
+      i830EvalLogicOpBlendState(ctx);
+      break;
+
+   case GL_COLOR_LOGIC_OP:
+      i830EvalLogicOpBlendState(ctx);
+
+      /* Logicop doesn't seem to work at 16bpp:
+       */
+      if (i830->intel.intelScreen->cpp == 2)
+	 FALLBACK( &i830->intel, I830_FALLBACK_LOGICOP, state );
+      break;
+ 
+   case GL_DITHER:
+      I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+      i830->state.Ctx[I830_CTXREG_ENABLES_2] &= ~ENABLE_DITHER;
+
+      if (state)
+	 i830->state.Ctx[I830_CTXREG_ENABLES_2] |= ENABLE_DITHER;
+      else
+	 i830->state.Ctx[I830_CTXREG_ENABLES_2] |= DISABLE_DITHER;
+      break;
+
+   case GL_DEPTH_TEST:
+      I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+      i830->state.Ctx[I830_CTXREG_ENABLES_1] &= ~ENABLE_DIS_DEPTH_TEST_MASK;
+
+      if (state)
+	 i830->state.Ctx[I830_CTXREG_ENABLES_1] |= ENABLE_DEPTH_TEST;
+      else
+	 i830->state.Ctx[I830_CTXREG_ENABLES_1] |= DISABLE_DEPTH_TEST;
+
+      /* Also turn off depth writes when GL_DEPTH_TEST is disabled:
+       */
+      i830DepthMask( ctx, ctx->Depth.Mask );
+      break;
+
+   case GL_SCISSOR_TEST:
+      I830_STATECHANGE(i830, I830_UPLOAD_BUFFERS);
+      
+      if (state)
+	 i830->state.Buffer[I830_DESTREG_SENABLE] = 
+	    (_3DSTATE_SCISSOR_ENABLE_CMD |
+	     ENABLE_SCISSOR_RECT);
+      else
+	 i830->state.Buffer[I830_DESTREG_SENABLE] = 
+	    (_3DSTATE_SCISSOR_ENABLE_CMD |
+	     DISABLE_SCISSOR_RECT);
+
+      break;
+
+   case GL_LINE_SMOOTH:
+      I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+      
+      i830->state.Ctx[I830_CTXREG_AA] &= ~AA_LINE_ENABLE;
+      if (state)
+	 i830->state.Ctx[I830_CTXREG_AA] |= AA_LINE_ENABLE;
+      else
+	 i830->state.Ctx[I830_CTXREG_AA] |= AA_LINE_DISABLE;
+      break;
+
+   case GL_FOG:
+      I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+      i830->state.Ctx[I830_CTXREG_ENABLES_1] &= ~ENABLE_DIS_FOG_MASK;
+      if (state)
+	 i830->state.Ctx[I830_CTXREG_ENABLES_1] |= ENABLE_FOG;
+      else
+	 i830->state.Ctx[I830_CTXREG_ENABLES_1] |= DISABLE_FOG;
+      break;
+
+   case GL_CULL_FACE:
+      i830CullFaceFrontFace(ctx, 0);
+      break;
+
+   case GL_TEXTURE_2D:
+      break;
+
+   case GL_STENCIL_TEST:
+      if (i830->intel.hw_stencil) {
+	 I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+
+	 if (state) {
+	    i830->state.Ctx[I830_CTXREG_ENABLES_1] |= ENABLE_STENCIL_TEST;
+	    i830->state.Ctx[I830_CTXREG_ENABLES_2] |= ENABLE_STENCIL_WRITE;
+	 } else {
+	    i830->state.Ctx[I830_CTXREG_ENABLES_1] &= ~ENABLE_STENCIL_TEST;
+	    i830->state.Ctx[I830_CTXREG_ENABLES_2] &= ~ENABLE_STENCIL_WRITE;
+	    i830->state.Ctx[I830_CTXREG_ENABLES_1] |= DISABLE_STENCIL_TEST;
+	    i830->state.Ctx[I830_CTXREG_ENABLES_2] |= DISABLE_STENCIL_WRITE;
+	 }
+      } else {
+	 FALLBACK( &i830->intel, I830_FALLBACK_STENCIL, state );
+      }
+      break;
+
+   case GL_POLYGON_STIPPLE:
+      /* The stipple command worked on my 855GM box, but not my 845G.
+       * I'll do more testing later to find out exactly which hardware
+       * supports it.  Disabled for now.
+       */
+      if (i830->intel.hw_stipple && 
+	  i830->intel.reduced_primitive == GL_TRIANGLES)
+      {
+	 I830_STATECHANGE(i830, I830_UPLOAD_STIPPLE);
+	 i830->state.Stipple[I830_STPREG_ST1] &= ~ST1_ENABLE;
+	 if (state)
+	    i830->state.Stipple[I830_STPREG_ST1] |= ST1_ENABLE;
+      }
+      break;
+
+   default:
+      ;
+   }
+}
+
+
+static void i830_init_packets( i830ContextPtr i830 )
+{
+   intelScreenPrivate *screen = i830->intel.intelScreen;
+
+   /* Zero all state */
+   memset(&i830->state, 0, sizeof(i830->state));
+
+   /* Set default blend state */
+   i830->state.TexBlend[0][0] = (_3DSTATE_MAP_BLEND_OP_CMD(0) |
+				  TEXPIPE_COLOR |
+				  ENABLE_TEXOUTPUT_WRT_SEL |
+				  TEXOP_OUTPUT_CURRENT |
+				  DISABLE_TEX_CNTRL_STAGE |
+				  TEXOP_SCALE_1X |
+				  TEXOP_MODIFY_PARMS |
+				  TEXOP_LAST_STAGE |
+				  TEXBLENDOP_ARG1);
+   i830->state.TexBlend[0][1] = (_3DSTATE_MAP_BLEND_OP_CMD(0) |
+				  TEXPIPE_ALPHA |
+				  ENABLE_TEXOUTPUT_WRT_SEL |
+				  TEXOP_OUTPUT_CURRENT |
+				  TEXOP_SCALE_1X |
+				  TEXOP_MODIFY_PARMS |
+				  TEXBLENDOP_ARG1);
+   i830->state.TexBlend[0][2] = (_3DSTATE_MAP_BLEND_ARG_CMD(0) |
+				  TEXPIPE_COLOR |
+				  TEXBLEND_ARG1 |
+				  TEXBLENDARG_MODIFY_PARMS |
+				  TEXBLENDARG_DIFFUSE);
+   i830->state.TexBlend[0][3] = (_3DSTATE_MAP_BLEND_ARG_CMD(0) |
+				  TEXPIPE_ALPHA |
+				  TEXBLEND_ARG1 |
+				  TEXBLENDARG_MODIFY_PARMS |
+				  TEXBLENDARG_DIFFUSE);
+
+   i830->state.TexBlendWordsUsed[0] = 4;
+
+
+   i830->state.Ctx[I830_CTXREG_VF] =  0;
+   i830->state.Ctx[I830_CTXREG_VF2] = 0;
+
+   i830->state.Ctx[I830_CTXREG_AA] = (_3DSTATE_AA_CMD |
+				      AA_LINE_ECAAR_WIDTH_ENABLE |
+				      AA_LINE_ECAAR_WIDTH_1_0 |
+				      AA_LINE_REGION_WIDTH_ENABLE |
+				      AA_LINE_REGION_WIDTH_1_0 | 
+				      AA_LINE_DISABLE);
+
+   i830->state.Ctx[I830_CTXREG_ENABLES_1] = (_3DSTATE_ENABLES_1_CMD |
+					     DISABLE_LOGIC_OP |
+					     DISABLE_STENCIL_TEST |
+					     DISABLE_DEPTH_BIAS |
+					     DISABLE_SPEC_ADD |
+					     DISABLE_FOG |
+					     DISABLE_ALPHA_TEST |
+					     DISABLE_COLOR_BLEND |
+					     DISABLE_DEPTH_TEST);
+
+   if (i830->intel.hw_stencil) {
+      i830->state.Ctx[I830_CTXREG_ENABLES_2] = (_3DSTATE_ENABLES_2_CMD |
+						ENABLE_STENCIL_WRITE |
+						ENABLE_TEX_CACHE |
+						ENABLE_DITHER |
+						ENABLE_COLOR_MASK |
+						/* set no color comps disabled */
+						ENABLE_COLOR_WRITE |
+						ENABLE_DEPTH_WRITE);
+   } else {
+      i830->state.Ctx[I830_CTXREG_ENABLES_2] = (_3DSTATE_ENABLES_2_CMD |
+						DISABLE_STENCIL_WRITE |
+						ENABLE_TEX_CACHE |
+						ENABLE_DITHER |
+						ENABLE_COLOR_MASK |
+						/* set no color comps disabled */
+						ENABLE_COLOR_WRITE |
+						ENABLE_DEPTH_WRITE);
+   }
+
+   i830->state.Ctx[I830_CTXREG_STATE1] = (_3DSTATE_MODES_1_CMD |
+					  ENABLE_COLR_BLND_FUNC |
+					  BLENDFUNC_ADD |
+					  ENABLE_SRC_BLND_FACTOR |
+					  SRC_BLND_FACT(BLENDFACT_ONE) | 
+					  ENABLE_DST_BLND_FACTOR |
+					  DST_BLND_FACT(BLENDFACT_ZERO) );
+
+   i830->state.Ctx[I830_CTXREG_STATE2] = (_3DSTATE_MODES_2_CMD |
+					  ENABLE_GLOBAL_DEPTH_BIAS | 
+					  GLOBAL_DEPTH_BIAS(0) |
+					  ENABLE_ALPHA_TEST_FUNC | 
+					  ALPHA_TEST_FUNC(COMPAREFUNC_ALWAYS) |
+					  ALPHA_REF_VALUE(0) );
+
+   i830->state.Ctx[I830_CTXREG_STATE3] = (_3DSTATE_MODES_3_CMD |
+					  ENABLE_DEPTH_TEST_FUNC |
+					  DEPTH_TEST_FUNC(COMPAREFUNC_LESS) |
+					  ENABLE_ALPHA_SHADE_MODE |
+					  ALPHA_SHADE_MODE(SHADE_MODE_LINEAR) |
+					  ENABLE_FOG_SHADE_MODE |
+					  FOG_SHADE_MODE(SHADE_MODE_LINEAR) |
+					  ENABLE_SPEC_SHADE_MODE |
+					  SPEC_SHADE_MODE(SHADE_MODE_LINEAR) |
+					  ENABLE_COLOR_SHADE_MODE |
+					  COLOR_SHADE_MODE(SHADE_MODE_LINEAR) |
+					  ENABLE_CULL_MODE |
+					  CULLMODE_NONE);
+
+   i830->state.Ctx[I830_CTXREG_STATE4] = (_3DSTATE_MODES_4_CMD |
+					  ENABLE_LOGIC_OP_FUNC |
+					  LOGIC_OP_FUNC(LOGICOP_COPY) |
+					  ENABLE_STENCIL_TEST_MASK |
+					  STENCIL_TEST_MASK(0xff) |
+					  ENABLE_STENCIL_WRITE_MASK |
+					  STENCIL_WRITE_MASK(0xff));
+
+   i830->state.Ctx[I830_CTXREG_STENCILTST] = (_3DSTATE_STENCIL_TEST_CMD |
+					      ENABLE_STENCIL_PARMS |
+					      STENCIL_FAIL_OP(STENCILOP_KEEP) |
+					      STENCIL_PASS_DEPTH_FAIL_OP(STENCILOP_KEEP) |
+					      STENCIL_PASS_DEPTH_PASS_OP(STENCILOP_KEEP) |
+					      ENABLE_STENCIL_TEST_FUNC |
+					      STENCIL_TEST_FUNC(COMPAREFUNC_ALWAYS) |
+					      ENABLE_STENCIL_REF_VALUE |
+					      STENCIL_REF_VALUE(0) );
+
+   i830->state.Ctx[I830_CTXREG_STATE5] = (_3DSTATE_MODES_5_CMD |
+					  FLUSH_TEXTURE_CACHE |
+					  ENABLE_SPRITE_POINT_TEX |
+					  SPRITE_POINT_TEX_OFF |
+					  ENABLE_FIXED_LINE_WIDTH |
+					  FIXED_LINE_WIDTH(0x2) | /* 1.0 */
+					  ENABLE_FIXED_POINT_WIDTH |
+					  FIXED_POINT_WIDTH(1) );
+
+   i830->state.Ctx[I830_CTXREG_IALPHAB] = (_3DSTATE_INDPT_ALPHA_BLEND_CMD |
+					   DISABLE_INDPT_ALPHA_BLEND |
+					   ENABLE_ALPHA_BLENDFUNC |
+					   ABLENDFUNC_ADD);
+
+   i830->state.Ctx[I830_CTXREG_FOGCOLOR] = (_3DSTATE_FOG_COLOR_CMD |
+					    FOG_COLOR_RED(0) |
+					    FOG_COLOR_GREEN(0) |
+					    FOG_COLOR_BLUE(0));
+
+   i830->state.Ctx[I830_CTXREG_BLENDCOLOR0] = _3DSTATE_CONST_BLEND_COLOR_CMD;
+   i830->state.Ctx[I830_CTXREG_BLENDCOLOR1] = 0;
+
+   i830->state.Ctx[I830_CTXREG_MCSB0] = _3DSTATE_MAP_COORD_SETBIND_CMD;
+   i830->state.Ctx[I830_CTXREG_MCSB1] = (TEXBIND_SET3(TEXCOORDSRC_VTXSET_3) |
+					 TEXBIND_SET2(TEXCOORDSRC_VTXSET_2) |
+					 TEXBIND_SET1(TEXCOORDSRC_VTXSET_1) |
+					 TEXBIND_SET0(TEXCOORDSRC_VTXSET_0));
+					 
+
+   i830->state.Stipple[I830_STPREG_ST0] = _3DSTATE_STIPPLE;
+
+   i830->state.Buffer[I830_DESTREG_CBUFADDR0] = _3DSTATE_BUF_INFO_CMD;
+   i830->state.Buffer[I830_DESTREG_CBUFADDR1] = 
+      (BUF_3D_ID_COLOR_BACK | 
+       BUF_3D_PITCH(screen->front.pitch) |  /* pitch in bytes */
+       BUF_3D_USE_FENCE);
+
+
+   i830->state.Buffer[I830_DESTREG_DBUFADDR0] = _3DSTATE_BUF_INFO_CMD;
+   i830->state.Buffer[I830_DESTREG_DBUFADDR1] = 
+      (BUF_3D_ID_DEPTH |
+       BUF_3D_PITCH(screen->depth.pitch) |  /* pitch in bytes */
+       BUF_3D_USE_FENCE);
+   i830->state.Buffer[I830_DESTREG_DBUFADDR2] = screen->depth.offset;
+
+
+   i830->state.Buffer[I830_DESTREG_DV0] = _3DSTATE_DST_BUF_VARS_CMD;
+
+   switch (screen->fbFormat) {
+   case DV_PF_555:
+   case DV_PF_565:
+      i830->state.Buffer[I830_DESTREG_DV1] = (DSTORG_HORT_BIAS(0x8) | /* .5 */
+					      DSTORG_VERT_BIAS(0x8) | /* .5 */
+					      screen->fbFormat |
+					      DEPTH_IS_Z |
+					      DEPTH_FRMT_16_FIXED);
+      break;
+   case DV_PF_8888:
+      i830->state.Buffer[I830_DESTREG_DV1] = (DSTORG_HORT_BIAS(0x8) | /* .5 */
+					      DSTORG_VERT_BIAS(0x8) | /* .5 */
+					      screen->fbFormat |
+					      DEPTH_IS_Z |
+					      DEPTH_FRMT_24_FIXED_8_OTHER);
+      break;
+   }
+
+   i830->state.Buffer[I830_DESTREG_SENABLE] = (_3DSTATE_SCISSOR_ENABLE_CMD |
+					       DISABLE_SCISSOR_RECT);
+   i830->state.Buffer[I830_DESTREG_SR0] = _3DSTATE_SCISSOR_RECT_0_CMD;
+   i830->state.Buffer[I830_DESTREG_SR1] = 0;
+   i830->state.Buffer[I830_DESTREG_SR2] = 0;
+}
+
+
+void i830InitStateFuncs( struct dd_function_table *functions )
+{
+   functions->AlphaFunc = i830AlphaFunc;
+   functions->BlendColor = i830BlendColor;
+   functions->BlendEquationSeparate = i830BlendEquationSeparate;
+   functions->BlendFuncSeparate = i830BlendFuncSeparate;
+   functions->ColorMask = i830ColorMask;
+   functions->CullFace = i830CullFaceFrontFace;
+   functions->DepthFunc = i830DepthFunc;
+   functions->DepthMask = i830DepthMask;
+   functions->Enable = i830Enable;
+   functions->Fogfv = i830Fogfv;
+   functions->FrontFace = i830CullFaceFrontFace;
+   functions->LightModelfv = i830LightModelfv;
+   functions->LineWidth = i830LineWidth;
+   functions->LogicOpcode = i830LogicOp;
+   functions->PointSize = i830PointSize;
+   functions->PolygonStipple = i830PolygonStipple;
+   functions->Scissor = i830Scissor;
+   functions->ShadeModel = i830ShadeModel;
+   functions->StencilFuncSeparate = i830StencilFuncSeparate;
+   functions->StencilMaskSeparate = i830StencilMaskSeparate;
+   functions->StencilOpSeparate = i830StencilOpSeparate;
+}
+
+void i830InitState( i830ContextPtr i830 )
+{
+   GLcontext *ctx = &i830->intel.ctx;
+
+   i830_init_packets( i830 );
+
+   intelInitState( ctx );
+
+   memcpy( &i830->initial, &i830->state, sizeof(i830->state) );
+
+   i830->current = &i830->state;
+   i830->state.emitted = 0;
+   i830->state.active = (I830_UPLOAD_TEXBLEND(0) |
+			 I830_UPLOAD_STIPPLE |
+			 I830_UPLOAD_CTX |
+			 I830_UPLOAD_BUFFERS);
+}
+
+
+
+
+
diff --git a/i915/i830_tex.c b/i915/i830_tex.c
new file mode 100644
index 0000000..3c4aedb
--- /dev/null
+++ b/i915/i830_tex.c
@@ -0,0 +1,356 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "glheader.h"
+#include "mtypes.h"
+#include "imports.h"
+#include "simple_list.h"
+#include "enums.h"
+#include "image.h"
+#include "texstore.h"
+#include "texformat.h"
+#include "texmem.h"
+#include "swrast/swrast.h"
+
+#include "mm.h"
+
+#include "intel_ioctl.h"
+
+#include "i830_context.h"
+#include "i830_reg.h"
+
+
+
+
+/**
+ * Set the texture wrap modes.
+ * 
+ * The i830M (and related graphics cores) do not support GL_CLAMP.  The Intel
+ * drivers for "other operating systems" implement GL_CLAMP as
+ * GL_CLAMP_TO_EDGE, so the same is done here.
+ * 
+ * \param t Texture object whose wrap modes are to be set
+ * \param swrap Wrap mode for the \a s texture coordinate
+ * \param twrap Wrap mode for the \a t texture coordinate
+ */
+static void i830SetTexWrapping(i830TextureObjectPtr tex,
+			       GLenum swrap, 
+			       GLenum twrap)
+{
+   tex->Setup[I830_TEXREG_MCS] &= ~(TEXCOORD_ADDR_U_MASK|TEXCOORD_ADDR_V_MASK);
+
+   switch( swrap ) {
+   case GL_REPEAT:
+      tex->Setup[I830_TEXREG_MCS] |= TEXCOORD_ADDR_U_MODE(TEXCOORDMODE_WRAP);
+      break;
+   case GL_CLAMP:
+   case GL_CLAMP_TO_EDGE:
+      tex->Setup[I830_TEXREG_MCS] |= TEXCOORD_ADDR_U_MODE(TEXCOORDMODE_CLAMP);
+      break;
+   case GL_CLAMP_TO_BORDER:
+      tex->Setup[I830_TEXREG_MCS] |= 
+			TEXCOORD_ADDR_U_MODE(TEXCOORDMODE_CLAMP_BORDER);
+      break;
+   case GL_MIRRORED_REPEAT:
+      tex->Setup[I830_TEXREG_MCS] |= 
+			TEXCOORD_ADDR_U_MODE(TEXCOORDMODE_MIRROR);
+      break;
+   default:
+      break;
+   }
+
+   switch( twrap ) {
+   case GL_REPEAT:
+      tex->Setup[I830_TEXREG_MCS] |= TEXCOORD_ADDR_V_MODE(TEXCOORDMODE_WRAP);
+      break;
+   case GL_CLAMP:
+   case GL_CLAMP_TO_EDGE:
+      tex->Setup[I830_TEXREG_MCS] |= TEXCOORD_ADDR_V_MODE(TEXCOORDMODE_CLAMP);
+      break;
+   case GL_CLAMP_TO_BORDER:
+      tex->Setup[I830_TEXREG_MCS] |= 
+			TEXCOORD_ADDR_V_MODE(TEXCOORDMODE_CLAMP_BORDER);
+      break;
+   case GL_MIRRORED_REPEAT:
+      tex->Setup[I830_TEXREG_MCS] |=
+			TEXCOORD_ADDR_V_MODE(TEXCOORDMODE_MIRROR);
+      break;
+   default:
+      break;
+   }
+}
+
+
+/**
+ * Set the texture magnification and minification modes.
+ * 
+ * \param t Texture whose filter modes are to be set
+ * \param minf Texture minification mode
+ * \param magf Texture magnification mode
+ * \param bias LOD bias for this texture unit.
+ */
+
+static void i830SetTexFilter( i830TextureObjectPtr t, GLenum minf, GLenum magf,
+			      GLfloat maxanisotropy )
+{
+   int minFilt = 0, mipFilt = 0, magFilt = 0;
+
+   if(INTEL_DEBUG&DEBUG_DRI)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   if ( maxanisotropy > 1.0 ) {
+      minFilt = FILTER_ANISOTROPIC;
+      magFilt = FILTER_ANISOTROPIC;
+   }
+   else {
+      switch (minf) {
+      case GL_NEAREST:
+	 minFilt = FILTER_NEAREST;
+	 mipFilt = MIPFILTER_NONE;
+	 break;
+      case GL_LINEAR:
+	 minFilt = FILTER_LINEAR;
+	 mipFilt = MIPFILTER_NONE;
+	 break;
+      case GL_NEAREST_MIPMAP_NEAREST:
+	 minFilt = FILTER_NEAREST;
+	 mipFilt = MIPFILTER_NEAREST;
+	 break;
+      case GL_LINEAR_MIPMAP_NEAREST:
+	 minFilt = FILTER_LINEAR;
+	 mipFilt = MIPFILTER_NEAREST;
+	 break;
+      case GL_NEAREST_MIPMAP_LINEAR:
+	 minFilt = FILTER_NEAREST;
+	 mipFilt = MIPFILTER_LINEAR;
+	 break;
+      case GL_LINEAR_MIPMAP_LINEAR:
+	 minFilt = FILTER_LINEAR;
+	 mipFilt = MIPFILTER_LINEAR;
+	 break;
+      default:
+	 break;
+      }
+
+      switch (magf) {
+      case GL_NEAREST:
+	 magFilt = FILTER_NEAREST;
+	 break;
+      case GL_LINEAR:
+	 magFilt = FILTER_LINEAR;
+	 break;
+      default:
+	 break;
+      }  
+   }
+
+   t->Setup[I830_TEXREG_TM0S3] &= ~TM0S3_MIN_FILTER_MASK;
+   t->Setup[I830_TEXREG_TM0S3] &= ~TM0S3_MIP_FILTER_MASK;
+   t->Setup[I830_TEXREG_TM0S3] &= ~TM0S3_MAG_FILTER_MASK;
+   t->Setup[I830_TEXREG_TM0S3] |= ((minFilt << TM0S3_MIN_FILTER_SHIFT) |
+				   (mipFilt << TM0S3_MIP_FILTER_SHIFT) |
+				   (magFilt << TM0S3_MAG_FILTER_SHIFT));
+}
+
+static void i830SetTexBorderColor(i830TextureObjectPtr t, GLubyte color[4])
+{
+   if(INTEL_DEBUG&DEBUG_DRI)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+    t->Setup[I830_TEXREG_TM0S4] = 
+        INTEL_PACKCOLOR8888(color[0],color[1],color[2],color[3]);
+}
+
+
+/**
+ * Allocate space for and load the mesa images into the texture memory block.
+ * This will happen before drawing with a new texture, or drawing with a
+ * texture after it was swapped out or teximaged again.
+ */
+
+intelTextureObjectPtr i830AllocTexObj( struct gl_texture_object *texObj )
+{
+   i830TextureObjectPtr t = CALLOC_STRUCT( i830_texture_object );
+   if ( !t ) 
+      return NULL;
+
+   texObj->DriverData = t;
+   t->intel.base.tObj = texObj;
+   t->intel.dirty = I830_UPLOAD_TEX_ALL;
+   make_empty_list( &t->intel.base );
+
+   t->Setup[I830_TEXREG_TM0LI] = 0; /* not used */
+   t->Setup[I830_TEXREG_TM0S0] = 0;
+   t->Setup[I830_TEXREG_TM0S1] = 0;
+   t->Setup[I830_TEXREG_TM0S2] = 0;
+   t->Setup[I830_TEXREG_TM0S3] = 0;
+   t->Setup[I830_TEXREG_MCS] = (_3DSTATE_MAP_COORD_SET_CMD |
+				MAP_UNIT(0) |
+				ENABLE_TEXCOORD_PARAMS |
+				TEXCOORDS_ARE_NORMAL |
+				TEXCOORDTYPE_CARTESIAN |
+				ENABLE_ADDR_V_CNTL |
+				TEXCOORD_ADDR_V_MODE(TEXCOORDMODE_WRAP) |
+				ENABLE_ADDR_U_CNTL |
+				TEXCOORD_ADDR_U_MODE(TEXCOORDMODE_WRAP));
+
+   
+   i830SetTexWrapping( t, texObj->WrapS, texObj->WrapT );
+   i830SetTexFilter( t, texObj->MinFilter, texObj->MagFilter, 
+		     texObj->MaxAnisotropy );
+   i830SetTexBorderColor( t, texObj->_BorderChan );
+
+   return &t->intel;
+}
+
+
+static void i830TexParameter( GLcontext *ctx, GLenum target,
+			      struct gl_texture_object *tObj,
+			      GLenum pname, const GLfloat *params )
+{
+   i830TextureObjectPtr t = (i830TextureObjectPtr) tObj->DriverData;
+   if (!t)
+      return;
+
+   switch (pname) {
+   case GL_TEXTURE_MIN_FILTER:
+   case GL_TEXTURE_MAG_FILTER:
+   case GL_TEXTURE_MAX_ANISOTROPY_EXT:
+      i830SetTexFilter( t, tObj->MinFilter, tObj->MagFilter,
+			tObj->MaxAnisotropy);
+      break;
+
+   case GL_TEXTURE_WRAP_S:
+   case GL_TEXTURE_WRAP_T:
+      i830SetTexWrapping( t, tObj->WrapS, tObj->WrapT );
+      break;
+  
+   case GL_TEXTURE_BORDER_COLOR:
+      i830SetTexBorderColor( t, tObj->_BorderChan );
+      break;
+
+   case GL_TEXTURE_BASE_LEVEL:
+   case GL_TEXTURE_MAX_LEVEL:
+   case GL_TEXTURE_MIN_LOD:
+   case GL_TEXTURE_MAX_LOD:
+      /* The i830 and its successors can do a lot of this without
+       * reloading the textures.  A project for someone?
+       */
+      intelFlush( ctx );
+      driSwapOutTextureObject( (driTextureObject *) t );
+      break;
+
+   default:
+      return;
+   }
+
+   t->intel.dirty = I830_UPLOAD_TEX_ALL;
+}
+
+
+static void i830TexEnv( GLcontext *ctx, GLenum target, 
+			GLenum pname, const GLfloat *param )
+{
+   i830ContextPtr i830 = I830_CONTEXT( ctx );
+   GLuint unit = ctx->Texture.CurrentUnit;
+
+   switch (pname) {
+   case GL_TEXTURE_ENV_COLOR: 
+#if 0
+   {
+      GLubyte r, g, b, a;
+      GLuint col;
+      
+      UNCLAMPED_FLOAT_TO_UBYTE(r, param[RCOMP]);
+      UNCLAMPED_FLOAT_TO_UBYTE(g, param[GCOMP]);
+      UNCLAMPED_FLOAT_TO_UBYTE(b, param[BCOMP]);
+      UNCLAMPED_FLOAT_TO_UBYTE(a, param[ACOMP]);
+
+      col = ((a << 24) | (r << 16) | (g << 8) | b);
+
+      if (col != i830->state.TexEnv[unit][I830_TEXENVREG_COL1]) {
+	 I830_STATECHANGE(i830, I830_UPLOAD_TEXENV);
+	 i830->state.TexEnv[unit][I830_TEXENVREG_COL1] = col;
+      }
+
+      break;
+   }
+#endif
+   case GL_TEXTURE_ENV_MODE:
+   case GL_COMBINE_RGB:
+   case GL_COMBINE_ALPHA:
+   case GL_SOURCE0_RGB:
+   case GL_SOURCE1_RGB:
+   case GL_SOURCE2_RGB:
+   case GL_SOURCE0_ALPHA:
+   case GL_SOURCE1_ALPHA:
+   case GL_SOURCE2_ALPHA:
+   case GL_OPERAND0_RGB:
+   case GL_OPERAND1_RGB:
+   case GL_OPERAND2_RGB:
+   case GL_OPERAND0_ALPHA:
+   case GL_OPERAND1_ALPHA:
+   case GL_OPERAND2_ALPHA:
+   case GL_RGB_SCALE:
+   case GL_ALPHA_SCALE:
+      break;
+
+   case GL_TEXTURE_LOD_BIAS: {
+      int b = (int) ((*param) * 16.0);
+      if (b > 63) b = 63;
+      if (b < -64) b = -64;
+      I830_STATECHANGE(i830, I830_UPLOAD_TEX(unit));
+      i830->state.Tex[unit][I830_TEXREG_TM0S3] &= ~TM0S3_LOD_BIAS_MASK;
+      i830->state.Tex[unit][I830_TEXREG_TM0S3] |= 
+	 ((b << TM0S3_LOD_BIAS_SHIFT) & TM0S3_LOD_BIAS_MASK);
+      break;
+   }
+
+   default:
+      break;
+   }
+}
+
+static void i830BindTexture( GLcontext *ctx, GLenum target,
+			    struct gl_texture_object *texObj )
+{
+   i830TextureObjectPtr tex;
+   
+   if (!texObj->DriverData)
+      i830AllocTexObj( texObj );
+   
+   tex = (i830TextureObjectPtr)texObj->DriverData;
+}
+
+
+
+void i830InitTextureFuncs( struct dd_function_table *functions )
+{
+   functions->BindTexture 		= i830BindTexture;
+   functions->TexEnv                    = i830TexEnv;
+   functions->TexParameter              = i830TexParameter;
+}
diff --git a/i915/i830_texblend.c b/i915/i830_texblend.c
new file mode 100644
index 0000000..49e0347
--- /dev/null
+++ b/i915/i830_texblend.c
@@ -0,0 +1,465 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "glheader.h"
+#include "macros.h"
+#include "mtypes.h"
+#include "simple_list.h"
+#include "enums.h"
+#include "texformat.h"
+#include "texstore.h"
+
+#include "mm.h"
+
+#include "intel_screen.h"
+#include "intel_ioctl.h"
+#include "intel_tex.h"
+
+#include "i830_context.h"
+#include "i830_reg.h"
+
+
+/* ================================================================
+ * Texture combine functions
+ */
+static GLuint pass_through( GLuint *state, GLuint blendUnit )
+{
+   state[0] = (_3DSTATE_MAP_BLEND_OP_CMD(blendUnit) |
+	       TEXPIPE_COLOR |
+	       ENABLE_TEXOUTPUT_WRT_SEL |
+	       TEXOP_OUTPUT_CURRENT |
+	       DISABLE_TEX_CNTRL_STAGE |
+	       TEXOP_SCALE_1X |
+	       TEXOP_MODIFY_PARMS |
+	       TEXBLENDOP_ARG1);
+   state[1] = (_3DSTATE_MAP_BLEND_OP_CMD(blendUnit) |
+	       TEXPIPE_ALPHA |
+	       ENABLE_TEXOUTPUT_WRT_SEL |
+	       TEXOP_OUTPUT_CURRENT |
+	       TEXOP_SCALE_1X |
+	       TEXOP_MODIFY_PARMS |
+	       TEXBLENDOP_ARG1);
+   state[2] = (_3DSTATE_MAP_BLEND_ARG_CMD(blendUnit) |
+	       TEXPIPE_COLOR |
+	       TEXBLEND_ARG1 |
+	       TEXBLENDARG_MODIFY_PARMS |
+	       TEXBLENDARG_CURRENT);
+   state[3] = (_3DSTATE_MAP_BLEND_ARG_CMD(blendUnit) |
+	       TEXPIPE_ALPHA |
+	       TEXBLEND_ARG1 |
+	       TEXBLENDARG_MODIFY_PARMS |
+	       TEXBLENDARG_CURRENT);
+
+   return 4;
+}
+
+static GLuint emit_factor( GLuint blendUnit, GLuint *state, GLuint count, 
+			   const GLfloat *factor )
+{
+   GLubyte r, g, b, a;
+   GLuint col;
+      
+   if (0)
+      fprintf(stderr, "emit constant %d: %.2f %.2f %.2f %.2f\n",
+	  blendUnit, factor[0], factor[1], factor[2], factor[3]);
+
+   UNCLAMPED_FLOAT_TO_UBYTE(r, factor[0]);
+   UNCLAMPED_FLOAT_TO_UBYTE(g, factor[1]);
+   UNCLAMPED_FLOAT_TO_UBYTE(b, factor[2]);
+   UNCLAMPED_FLOAT_TO_UBYTE(a, factor[3]);
+
+   col = ((a << 24) | (r << 16) | (g << 8) | b);
+
+   state[count++] = _3DSTATE_COLOR_FACTOR_N_CMD(blendUnit); 
+   state[count++] = col;
+
+   return count;
+}
+
+
+static __inline__ GLuint GetTexelOp(GLint unit)
+{
+   switch(unit) {
+   case 0: return TEXBLENDARG_TEXEL0;
+   case 1: return TEXBLENDARG_TEXEL1;
+   case 2: return TEXBLENDARG_TEXEL2;
+   case 3: return TEXBLENDARG_TEXEL3;
+   default: return TEXBLENDARG_TEXEL0;
+   }
+}
+
+
+/**
+ * Calculate the hardware instuctions to setup the current texture enviromnemt
+ * settings.  Since \c gl_texture_unit::_CurrentCombine is used, both
+ * "classic" texture enviroments and GL_ARB_texture_env_combine type texture
+ * environments are treated identically.
+ *
+ * \todo
+ * This function should return \c GLboolean.  When \c GL_FALSE is returned,
+ * it means that an environment is selected that the hardware cannot do.  This
+ * is the way the Radeon and R200 drivers work.
+ * 
+ * \todo
+ * Looking at i830_3d_regs.h, it seems the i830 can do part of
+ * GL_ATI_texture_env_combine3.  It can handle using \c GL_ONE and
+ * \c GL_ZERO as combine inputs (which the code already supports).  It can
+ * also handle the \c GL_MODULATE_ADD_ATI mode.  Is it worth investigating
+ * partial support for the extension?
+ */
+GLuint
+i830SetTexEnvCombine(i830ContextPtr i830,
+		     const struct gl_tex_env_combine_state * combine,
+		     GLint blendUnit,
+		     GLuint texel_op,
+		     GLuint *state,
+		     const GLfloat *factor )
+{
+   const GLuint numColorArgs = combine->_NumArgsRGB;
+   const GLuint numAlphaArgs = combine->_NumArgsA;
+
+   GLuint blendop;
+   GLuint ablendop;
+   GLuint args_RGB[3];
+   GLuint args_A[3];
+   GLuint rgb_shift;
+   GLuint alpha_shift;
+   GLboolean need_factor = 0;
+   int i;
+   unsigned used;
+   static const GLuint tex_blend_rgb[3] = {
+      TEXPIPE_COLOR | TEXBLEND_ARG1 | TEXBLENDARG_MODIFY_PARMS,
+      TEXPIPE_COLOR | TEXBLEND_ARG2 | TEXBLENDARG_MODIFY_PARMS,
+      TEXPIPE_COLOR | TEXBLEND_ARG0 | TEXBLENDARG_MODIFY_PARMS,
+   };
+   static const GLuint tex_blend_a[3] = {
+      TEXPIPE_ALPHA | TEXBLEND_ARG1 | TEXBLENDARG_MODIFY_PARMS,
+      TEXPIPE_ALPHA | TEXBLEND_ARG2 | TEXBLENDARG_MODIFY_PARMS,
+      TEXPIPE_ALPHA | TEXBLEND_ARG0 | TEXBLENDARG_MODIFY_PARMS,
+   };
+
+   if(INTEL_DEBUG&DEBUG_TEXTURE)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+
+   /* The EXT version of the DOT3 extension does not support the
+    * scale factor, but the ARB version (and the version in OpenGL
+    * 1.3) does.
+    */
+   switch (combine->ModeRGB) {
+   case GL_DOT3_RGB_EXT:
+      alpha_shift = combine->ScaleShiftA;
+      rgb_shift = 0;
+      break;
+
+   case GL_DOT3_RGBA_EXT:
+      alpha_shift = 0;
+      rgb_shift = 0;
+      break;
+
+   default:
+      rgb_shift = combine->ScaleShiftRGB;
+      alpha_shift = combine->ScaleShiftA;
+      break;
+   }
+
+
+   switch(combine->ModeRGB) {
+   case GL_REPLACE: 
+      blendop = TEXBLENDOP_ARG1;
+      break;
+   case GL_MODULATE: 
+      blendop = TEXBLENDOP_MODULATE;
+      break;
+   case GL_ADD: 
+      blendop = TEXBLENDOP_ADD;
+      break;
+   case GL_ADD_SIGNED:
+      blendop = TEXBLENDOP_ADDSIGNED; 
+      break;
+   case GL_INTERPOLATE:
+      blendop = TEXBLENDOP_BLEND; 
+      break;
+   case GL_SUBTRACT: 
+      blendop = TEXBLENDOP_SUBTRACT;
+      break;
+   case GL_DOT3_RGB_EXT:
+   case GL_DOT3_RGB:
+      blendop = TEXBLENDOP_DOT3;
+      break;
+   case GL_DOT3_RGBA_EXT:
+   case GL_DOT3_RGBA:
+      blendop = TEXBLENDOP_DOT3;
+      break;
+   default: 
+      return pass_through( state, blendUnit );
+   }
+
+   blendop |= (rgb_shift << TEXOP_SCALE_SHIFT);
+
+
+   /* Handle RGB args */
+   for(i = 0; i < 3; i++) {
+      switch(combine->SourceRGB[i]) {
+      case GL_TEXTURE: 
+	 args_RGB[i] = texel_op;
+	 break;
+      case GL_TEXTURE0:
+      case GL_TEXTURE1:
+      case GL_TEXTURE2:
+      case GL_TEXTURE3:
+	 args_RGB[i] = GetTexelOp( combine->SourceRGB[i] - GL_TEXTURE0 );
+	 break;
+      case GL_CONSTANT:
+	 args_RGB[i] = TEXBLENDARG_FACTOR_N; 
+	 need_factor = 1;
+	 break;
+      case GL_PRIMARY_COLOR:
+	 args_RGB[i] = TEXBLENDARG_DIFFUSE;
+	 break;
+      case GL_PREVIOUS:
+	 args_RGB[i] = TEXBLENDARG_CURRENT; 
+	 break;
+      default: 
+	 return pass_through( state, blendUnit );
+      }
+
+      switch(combine->OperandRGB[i]) {
+      case GL_SRC_COLOR: 
+	 args_RGB[i] |= 0;
+	 break;
+      case GL_ONE_MINUS_SRC_COLOR: 
+	 args_RGB[i] |= TEXBLENDARG_INV_ARG;
+	 break;
+      case GL_SRC_ALPHA: 
+	 args_RGB[i] |= TEXBLENDARG_REPLICATE_ALPHA;
+	 break;
+      case GL_ONE_MINUS_SRC_ALPHA: 
+	 args_RGB[i] |= (TEXBLENDARG_REPLICATE_ALPHA | 
+			 TEXBLENDARG_INV_ARG);
+	 break;
+      default: 
+	 return pass_through( state, blendUnit );
+      }
+   }
+
+
+   /* Need to knobble the alpha calculations of TEXBLENDOP_DOT4 to
+    * match the spec.  Can't use DOT3 as it won't propogate values
+    * into alpha as required:
+    *
+    * Note - the global factor is set up with alpha == .5, so 
+    * the alpha part of the DOT4 calculation should be zero.
+    */
+   if ( combine->ModeRGB == GL_DOT3_RGBA_EXT || 
+	combine->ModeRGB == GL_DOT3_RGBA ) {
+      ablendop = TEXBLENDOP_DOT4;
+      args_A[0] = TEXBLENDARG_FACTOR; /* the global factor */
+      args_A[1] = TEXBLENDARG_FACTOR;
+      args_A[2] = TEXBLENDARG_FACTOR;
+   }
+   else {
+      switch(combine->ModeA) {
+      case GL_REPLACE: 
+	 ablendop = TEXBLENDOP_ARG1;
+	 break;
+      case GL_MODULATE: 
+	 ablendop = TEXBLENDOP_MODULATE;
+	 break;
+      case GL_ADD: 
+	 ablendop = TEXBLENDOP_ADD;
+	 break;
+      case GL_ADD_SIGNED:
+	 ablendop = TEXBLENDOP_ADDSIGNED; 
+	 break;
+      case GL_INTERPOLATE:
+	 ablendop = TEXBLENDOP_BLEND; 
+	 break;
+      case GL_SUBTRACT: 
+	 ablendop = TEXBLENDOP_SUBTRACT;
+	 break;
+      default:
+	 return pass_through( state, blendUnit );
+      }
+
+
+      ablendop |= (alpha_shift << TEXOP_SCALE_SHIFT);
+
+      /* Handle A args */
+      for(i = 0; i < 3; i++) {
+	 switch(combine->SourceA[i]) {
+	 case GL_TEXTURE: 
+	    args_A[i] = texel_op;
+	    break;
+	 case GL_TEXTURE0:
+	 case GL_TEXTURE1:
+	 case GL_TEXTURE2:
+	 case GL_TEXTURE3:
+	    args_A[i] = GetTexelOp( combine->SourceA[i] - GL_TEXTURE0 );
+	    break;
+	 case GL_CONSTANT:
+	    args_A[i] = TEXBLENDARG_FACTOR_N; 
+	    need_factor = 1;
+	    break;
+	 case GL_PRIMARY_COLOR:
+	    args_A[i] = TEXBLENDARG_DIFFUSE; 
+	    break;
+	 case GL_PREVIOUS:
+	    args_A[i] = TEXBLENDARG_CURRENT; 
+	    break;
+	 default: 
+	    return pass_through( state, blendUnit );
+	 }
+
+	 switch(combine->OperandA[i]) {
+	 case GL_SRC_ALPHA: 
+	    args_A[i] |= 0;
+	    break;
+	 case GL_ONE_MINUS_SRC_ALPHA: 
+	    args_A[i] |= TEXBLENDARG_INV_ARG;
+	    break;
+	 default: 
+	    return pass_through( state, blendUnit );
+	 }
+      }
+   }
+
+
+
+   /* Native Arg1 == Arg0 in GL_EXT_texture_env_combine spec */
+   /* Native Arg2 == Arg1 in GL_EXT_texture_env_combine spec */
+   /* Native Arg0 == Arg2 in GL_EXT_texture_env_combine spec */
+
+   /* When we render we need to figure out which is the last really enabled
+    * tex unit, and put last stage on it
+    */
+
+
+   /* Build color & alpha pipelines */
+
+   used = 0;
+   state[used++] = (_3DSTATE_MAP_BLEND_OP_CMD(blendUnit) |
+		    TEXPIPE_COLOR |
+		    ENABLE_TEXOUTPUT_WRT_SEL |
+		    TEXOP_OUTPUT_CURRENT |
+		    DISABLE_TEX_CNTRL_STAGE |
+		    TEXOP_MODIFY_PARMS |
+		    blendop);
+   state[used++] = (_3DSTATE_MAP_BLEND_OP_CMD(blendUnit) |
+		    TEXPIPE_ALPHA |
+		    ENABLE_TEXOUTPUT_WRT_SEL |
+		    TEXOP_OUTPUT_CURRENT |
+		    TEXOP_MODIFY_PARMS |
+		    ablendop);
+
+   for ( i = 0 ; i < numColorArgs ; i++ ) {
+      state[used++] = (_3DSTATE_MAP_BLEND_ARG_CMD(blendUnit) |
+		       tex_blend_rgb[i] | args_RGB[i]);
+   }
+
+   for ( i = 0 ; i < numAlphaArgs ; i++ ) {
+      state[used++] = (_3DSTATE_MAP_BLEND_ARG_CMD(blendUnit) |
+		       tex_blend_a[i] | args_A[i]);
+   }
+
+
+   if (need_factor) 
+      return emit_factor( blendUnit, state, used, factor );
+   else 
+      return used;
+}
+
+
+static void emit_texblend( i830ContextPtr i830, GLuint unit, GLuint blendUnit,
+			   GLboolean last_stage )
+{
+   struct gl_texture_unit *texUnit = &i830->intel.ctx.Texture.Unit[unit];
+   GLuint tmp[I830_TEXBLEND_SIZE], tmp_sz;
+
+
+   if (0) fprintf(stderr, "%s unit %d\n", __FUNCTION__, unit);
+
+   /* Update i830->state.TexBlend
+    */ 
+   tmp_sz = i830SetTexEnvCombine(i830, texUnit->_CurrentCombine, blendUnit, 
+				 GetTexelOp(unit), tmp,
+				 texUnit->EnvColor );
+
+   if (last_stage) 
+      tmp[0] |= TEXOP_LAST_STAGE;
+
+   if (tmp_sz != i830->state.TexBlendWordsUsed[blendUnit] ||
+       memcmp( tmp, i830->state.TexBlend[blendUnit], tmp_sz * sizeof(GLuint))) {
+      
+      I830_STATECHANGE( i830, I830_UPLOAD_TEXBLEND(blendUnit) );
+      memcpy( i830->state.TexBlend[blendUnit], tmp, tmp_sz * sizeof(GLuint));
+      i830->state.TexBlendWordsUsed[blendUnit] = tmp_sz;
+   }
+
+   I830_ACTIVESTATE(i830, I830_UPLOAD_TEXBLEND(blendUnit), GL_TRUE);
+}
+
+static void emit_passthrough( i830ContextPtr i830 )
+{
+   GLuint tmp[I830_TEXBLEND_SIZE], tmp_sz;
+   GLuint unit = 0;
+
+   tmp_sz = pass_through( tmp, unit );
+   tmp[0] |= TEXOP_LAST_STAGE;
+
+   if (tmp_sz != i830->state.TexBlendWordsUsed[unit] ||
+       memcmp( tmp, i830->state.TexBlend[unit], tmp_sz * sizeof(GLuint))) {
+      
+      I830_STATECHANGE( i830, I830_UPLOAD_TEXBLEND(unit) );
+      memcpy( i830->state.TexBlend[unit], tmp, tmp_sz * sizeof(GLuint));
+      i830->state.TexBlendWordsUsed[unit] = tmp_sz;
+   }
+
+   I830_ACTIVESTATE(i830, I830_UPLOAD_TEXBLEND(unit), GL_TRUE);
+}
+
+void i830EmitTextureBlend( i830ContextPtr i830 )
+{
+   GLcontext *ctx = &i830->intel.ctx;
+   GLuint unit, last_stage = 0, blendunit = 0;
+
+   I830_ACTIVESTATE(i830, I830_UPLOAD_TEXBLEND_ALL, GL_FALSE);
+
+   if (ctx->Texture._EnabledUnits) {
+      for (unit = 0 ; unit < ctx->Const.MaxTextureUnits ; unit++)
+	 if (ctx->Texture.Unit[unit]._ReallyEnabled) 
+	    last_stage = unit;
+
+      for (unit = 0 ; unit < ctx->Const.MaxTextureUnits ; unit++)
+	 if (ctx->Texture.Unit[unit]._ReallyEnabled) 
+	    emit_texblend( i830, unit, blendunit++, last_stage == unit );
+   }
+   else {
+      emit_passthrough( i830 );
+   }
+}
+
diff --git a/i915/i830_texstate.c b/i915/i830_texstate.c
new file mode 100644
index 0000000..ba972da
--- /dev/null
+++ b/i915/i830_texstate.c
@@ -0,0 +1,483 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "glheader.h"
+#include "macros.h"
+#include "mtypes.h"
+#include "simple_list.h"
+#include "enums.h"
+#include "texformat.h"
+#include "texstore.h"
+
+#include "mm.h"
+
+#include "intel_screen.h"
+#include "intel_ioctl.h"
+#include "intel_tex.h"
+
+#include "i830_context.h"
+#include "i830_reg.h"
+
+static const GLint initial_offsets[6][2] = { {0,0},
+				       {0,2},
+				       {1,0},
+				       {1,2},
+				       {1,1},
+				       {1,3} };
+
+static const GLint step_offsets[6][2] = { {0,2},
+				    {0,2},
+				    {-1,2},
+				    {-1,2},
+				    {-1,1},
+				    {-1,1} };
+
+#define I830_TEX_UNIT_ENABLED(unit)		(1<<unit)
+
+static GLboolean i830SetTexImages( i830ContextPtr i830, 
+				  struct gl_texture_object *tObj )
+{
+   GLuint total_height, pitch, i, textureFormat;
+   i830TextureObjectPtr t = (i830TextureObjectPtr) tObj->DriverData;
+   const struct gl_texture_image *baseImage = tObj->Image[0][tObj->BaseLevel];
+   GLint firstLevel, lastLevel, numLevels;
+
+   switch( baseImage->TexFormat->MesaFormat ) {
+   case MESA_FORMAT_L8:
+      t->intel.texelBytes = 1;
+      textureFormat = MAPSURF_8BIT | MT_8BIT_L8;
+      break;
+
+   case MESA_FORMAT_I8:
+      t->intel.texelBytes = 1;
+      textureFormat = MAPSURF_8BIT | MT_8BIT_I8;
+      break;
+
+   case MESA_FORMAT_A8:
+      t->intel.texelBytes = 1;
+      textureFormat = MAPSURF_8BIT | MT_8BIT_I8; /* Kludge -- check with conform, glean */
+      break;
+
+   case MESA_FORMAT_AL88:
+      t->intel.texelBytes = 2;
+      textureFormat = MAPSURF_16BIT | MT_16BIT_AY88;
+      break;
+
+   case MESA_FORMAT_RGB565:
+      t->intel.texelBytes = 2;
+      textureFormat = MAPSURF_16BIT | MT_16BIT_RGB565;
+      break;
+
+   case MESA_FORMAT_ARGB1555:
+      t->intel.texelBytes = 2;
+      textureFormat = MAPSURF_16BIT | MT_16BIT_ARGB1555;
+      break;
+
+   case MESA_FORMAT_ARGB4444:
+      t->intel.texelBytes = 2;
+      textureFormat = MAPSURF_16BIT | MT_16BIT_ARGB4444;
+      break;
+
+   case MESA_FORMAT_ARGB8888:
+      t->intel.texelBytes = 4;
+      textureFormat = MAPSURF_32BIT | MT_32BIT_ARGB8888;
+      break;
+
+   case MESA_FORMAT_YCBCR_REV:
+      t->intel.texelBytes = 2;
+      textureFormat = (MAPSURF_422 | MT_422_YCRCB_NORMAL | 
+		       TM0S1_COLORSPACE_CONVERSION);
+      break;
+
+   case MESA_FORMAT_YCBCR:
+      t->intel.texelBytes = 2;
+      textureFormat = (MAPSURF_422 | MT_422_YCRCB_SWAPY | /* ??? */
+		       TM0S1_COLORSPACE_CONVERSION);
+      break;
+
+   case MESA_FORMAT_RGB_FXT1:
+   case MESA_FORMAT_RGBA_FXT1:
+     t->intel.texelBytes = 2;
+     textureFormat = MAPSURF_COMPRESSED | MT_COMPRESS_FXT1;
+     break;
+
+   case MESA_FORMAT_RGBA_DXT1:
+   case MESA_FORMAT_RGB_DXT1:
+     /* 
+      * DXTn pitches are Width/4 * blocksize in bytes 
+      * for DXT1: blocksize=8 so Width/4*8 = Width * 2 
+      * for DXT3/5: blocksize=16 so Width/4*16 = Width * 4
+      */
+     t->intel.texelBytes = 2;
+     textureFormat = (MAPSURF_COMPRESSED | MT_COMPRESS_DXT1);
+     break;
+   case MESA_FORMAT_RGBA_DXT3:
+     t->intel.texelBytes = 4;
+     textureFormat = (MAPSURF_COMPRESSED | MT_COMPRESS_DXT2_3);
+     break;
+   case MESA_FORMAT_RGBA_DXT5:
+     t->intel.texelBytes = 4;
+     textureFormat = (MAPSURF_COMPRESSED | MT_COMPRESS_DXT4_5);
+     break;
+
+   default:
+      fprintf(stderr, "%s: bad image format\n", __FUNCTION__);
+      abort();
+   }
+
+   /* Compute which mipmap levels we really want to send to the hardware.
+    * This depends on the base image size, GL_TEXTURE_MIN_LOD,
+    * GL_TEXTURE_MAX_LOD, GL_TEXTURE_BASE_LEVEL, and GL_TEXTURE_MAX_LEVEL.
+    * Yes, this looks overly complicated, but it's all needed.
+    */
+   driCalculateTextureFirstLastLevel( (driTextureObject *) t );
+
+
+   /* Figure out the amount of memory required to hold all the mipmap
+    * levels.  Choose the smallest pitch to accomodate the largest
+    * mipmap:
+    */
+   firstLevel = t->intel.base.firstLevel;
+   lastLevel = t->intel.base.lastLevel;
+   numLevels = lastLevel - firstLevel + 1;
+
+
+   /* All images must be loaded at this pitch.  Count the number of
+    * lines required:
+    */
+   switch (tObj->Target) {
+   case GL_TEXTURE_CUBE_MAP: {
+      const GLuint dim = tObj->Image[0][firstLevel]->Width;
+      GLuint face;
+
+      pitch = dim * t->intel.texelBytes;
+      pitch *= 2;		/* double pitch for cube layouts */
+      pitch = (pitch + 3) & ~3;
+      
+      total_height = dim * 4;
+
+      for ( face = 0 ; face < 6 ; face++) {
+	 GLuint x = initial_offsets[face][0] * dim;
+	 GLuint y = initial_offsets[face][1] * dim;
+	 GLuint d = dim;
+	 
+	 t->intel.base.dirty_images[face] = ~0;
+
+	 assert(tObj->Image[face][firstLevel]->Width == dim);
+	 assert(tObj->Image[face][firstLevel]->Height == dim);
+
+	 for (i = 0; i < numLevels; i++) {
+	    t->intel.image[face][i].image = tObj->Image[face][firstLevel + i];
+	    if (!t->intel.image[face][i].image) {
+	       fprintf(stderr, "no image %d %d\n", face, i);
+	       break;		/* can't happen */
+	    }
+	 
+	    t->intel.image[face][i].offset = 
+	       y * pitch + x * t->intel.texelBytes;
+	    t->intel.image[face][i].internalFormat = baseImage->_BaseFormat;
+
+	    d >>= 1;
+	    x += step_offsets[face][0] * d;
+	    y += step_offsets[face][1] * d;
+	 }
+      }
+      break;
+   }
+   default:
+      pitch = tObj->Image[0][firstLevel]->Width * t->intel.texelBytes;
+      pitch = (pitch + 3) & ~3;
+      t->intel.base.dirty_images[0] = ~0;
+
+      for ( total_height = i = 0 ; i < numLevels ; i++ ) {
+	 t->intel.image[0][i].image = tObj->Image[0][firstLevel + i];
+	 if (!t->intel.image[0][i].image) 
+	    break;
+	 
+	 t->intel.image[0][i].offset = total_height * pitch;
+	 t->intel.image[0][i].internalFormat = baseImage->_BaseFormat;
+	 if (t->intel.image[0][i].image->IsCompressed)
+	 {
+	   if (t->intel.image[0][i].image->Height > 4)
+	     total_height += t->intel.image[0][i].image->Height/4;
+	   else
+	     total_height += 1;
+	 }
+	 else
+	   total_height += MAX2(2, t->intel.image[0][i].image->Height);
+      }
+      break;
+   }
+
+   t->intel.Pitch = pitch;
+   t->intel.base.totalSize = total_height*pitch;
+   t->intel.max_level = i-1;
+   t->Setup[I830_TEXREG_TM0S1] = 
+      (((tObj->Image[0][firstLevel]->Height - 1) << TM0S1_HEIGHT_SHIFT) |
+       ((tObj->Image[0][firstLevel]->Width - 1) << TM0S1_WIDTH_SHIFT) |
+       textureFormat);
+   t->Setup[I830_TEXREG_TM0S2] = 
+      (((pitch / 4) - 1) << TM0S2_PITCH_SHIFT) |
+      TM0S2_CUBE_FACE_ENA_MASK;
+   t->Setup[I830_TEXREG_TM0S3] &= ~TM0S3_MAX_MIP_MASK;
+   t->Setup[I830_TEXREG_TM0S3] &= ~TM0S3_MIN_MIP_MASK;
+   t->Setup[I830_TEXREG_TM0S3] |= ((numLevels - 1)*4) << TM0S3_MIN_MIP_SHIFT;
+   t->intel.dirty = I830_UPLOAD_TEX_ALL;
+
+   return intelUploadTexImages( &i830->intel, &t->intel, 0 );
+}
+
+
+static void i830_import_tex_unit( i830ContextPtr i830, 
+			   i830TextureObjectPtr t,
+			   GLuint unit )
+{
+   if(INTEL_DEBUG&DEBUG_TEXTURE)
+      fprintf(stderr, "%s unit(%d)\n", __FUNCTION__, unit);
+   
+   if (i830->intel.CurrentTexObj[unit]) 
+      i830->intel.CurrentTexObj[unit]->base.bound &= ~(1U << unit);
+
+   i830->intel.CurrentTexObj[unit] = (intelTextureObjectPtr)t;
+   t->intel.base.bound |= (1 << unit);
+
+   I830_STATECHANGE( i830, I830_UPLOAD_TEX(unit) );
+
+   i830->state.Tex[unit][I830_TEXREG_TM0LI] = (_3DSTATE_LOAD_STATE_IMMEDIATE_2 | 
+					       (LOAD_TEXTURE_MAP0 << unit) | 4);
+   i830->state.Tex[unit][I830_TEXREG_TM0S0] = (TM0S0_USE_FENCE |
+					       t->intel.TextureOffset);
+
+   i830->state.Tex[unit][I830_TEXREG_TM0S1] = t->Setup[I830_TEXREG_TM0S1];
+   i830->state.Tex[unit][I830_TEXREG_TM0S2] = t->Setup[I830_TEXREG_TM0S2];
+
+   i830->state.Tex[unit][I830_TEXREG_TM0S3] &= TM0S3_LOD_BIAS_MASK;
+   i830->state.Tex[unit][I830_TEXREG_TM0S3] |= (t->Setup[I830_TEXREG_TM0S3] &
+						~TM0S3_LOD_BIAS_MASK);
+
+   i830->state.Tex[unit][I830_TEXREG_TM0S4] = t->Setup[I830_TEXREG_TM0S4];
+   i830->state.Tex[unit][I830_TEXREG_MCS] = (t->Setup[I830_TEXREG_MCS] & 
+					     ~MAP_UNIT_MASK);   
+   i830->state.Tex[unit][I830_TEXREG_CUBE] = t->Setup[I830_TEXREG_CUBE];
+   i830->state.Tex[unit][I830_TEXREG_MCS] |= MAP_UNIT(unit);
+
+   t->intel.dirty &= ~I830_UPLOAD_TEX(unit);
+}
+
+
+
+static GLboolean enable_tex_common( GLcontext *ctx, GLuint unit )
+{
+   i830ContextPtr i830 = I830_CONTEXT(ctx);
+   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+   struct gl_texture_object *tObj = texUnit->_Current;
+   i830TextureObjectPtr t = (i830TextureObjectPtr)tObj->DriverData;
+
+   if (0) fprintf(stderr, "%s\n", __FUNCTION__);
+
+   /* Fallback if there's a texture border */
+   if ( tObj->Image[0][tObj->BaseLevel]->Border > 0 ) {
+      fprintf(stderr, "Texture border\n");
+      return GL_FALSE;
+   }
+
+   /* Upload teximages (not pipelined)
+    */
+   if (t->intel.base.dirty_images[0]) {
+      if (!i830SetTexImages( i830, tObj )) {
+	 return GL_FALSE;
+      }
+   }
+
+   /* Update state if this is a different texture object to last
+    * time.
+    */
+   if (i830->intel.CurrentTexObj[unit] != &t->intel || 
+       (t->intel.dirty & I830_UPLOAD_TEX(unit))) {
+      i830_import_tex_unit( i830, t, unit);
+   }
+
+   I830_ACTIVESTATE(i830, I830_UPLOAD_TEX(unit), GL_TRUE);
+
+   return GL_TRUE;
+}
+
+static GLboolean enable_tex_rect( GLcontext *ctx, GLuint unit )
+{
+   i830ContextPtr i830 = I830_CONTEXT(ctx);
+   GLuint mcs = i830->state.Tex[unit][I830_TEXREG_MCS];
+
+   mcs &= ~TEXCOORDS_ARE_NORMAL;
+   mcs |= TEXCOORDS_ARE_IN_TEXELUNITS;
+
+   if ((mcs != i830->state.Tex[unit][I830_TEXREG_MCS])
+       || (0 != i830->state.Tex[unit][I830_TEXREG_CUBE])) {
+      I830_STATECHANGE(i830, I830_UPLOAD_TEX(unit));
+      i830->state.Tex[unit][I830_TEXREG_MCS] = mcs;
+      i830->state.Tex[unit][I830_TEXREG_CUBE] = 0;
+   }
+
+   return GL_TRUE;
+}
+
+
+static GLboolean enable_tex_2d( GLcontext *ctx, GLuint unit )
+{
+   i830ContextPtr i830 = I830_CONTEXT(ctx);
+   GLuint mcs = i830->state.Tex[unit][I830_TEXREG_MCS];
+
+   mcs &= ~TEXCOORDS_ARE_IN_TEXELUNITS;
+   mcs |= TEXCOORDS_ARE_NORMAL;
+
+   if ((mcs != i830->state.Tex[unit][I830_TEXREG_MCS])
+       || (0 != i830->state.Tex[unit][I830_TEXREG_CUBE])) {
+      I830_STATECHANGE(i830, I830_UPLOAD_TEX(unit));
+      i830->state.Tex[unit][I830_TEXREG_MCS] = mcs;
+      i830->state.Tex[unit][I830_TEXREG_CUBE] = 0;
+   }
+
+   return GL_TRUE;
+}
+
+ 
+static GLboolean enable_tex_cube( GLcontext *ctx, GLuint unit )
+{
+   i830ContextPtr i830 = I830_CONTEXT(ctx);
+   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+   struct gl_texture_object *tObj = texUnit->_Current;
+   i830TextureObjectPtr t = (i830TextureObjectPtr)tObj->DriverData;
+   GLuint mcs = i830->state.Tex[unit][I830_TEXREG_MCS];
+   const GLuint cube = CUBE_NEGX_ENABLE | CUBE_POSX_ENABLE
+     | CUBE_NEGY_ENABLE | CUBE_POSY_ENABLE
+     | CUBE_NEGZ_ENABLE | CUBE_POSZ_ENABLE;
+   GLuint face;
+
+   mcs &= ~TEXCOORDS_ARE_IN_TEXELUNITS;
+   mcs |= TEXCOORDS_ARE_NORMAL;
+
+   if ((mcs != i830->state.Tex[unit][I830_TEXREG_MCS])
+       || (cube != i830->state.Tex[unit][I830_TEXREG_CUBE])) {
+      I830_STATECHANGE(i830, I830_UPLOAD_TEX(unit));
+      i830->state.Tex[unit][I830_TEXREG_MCS] = mcs;
+      i830->state.Tex[unit][I830_TEXREG_CUBE] = cube;
+   }
+
+   /* Upload teximages (not pipelined)
+    */
+   if ( t->intel.base.dirty_images[0] || t->intel.base.dirty_images[1] ||
+        t->intel.base.dirty_images[2] || t->intel.base.dirty_images[3] ||
+        t->intel.base.dirty_images[4] || t->intel.base.dirty_images[5] ) {
+      i830SetTexImages( i830, tObj );
+   }
+
+   /* upload (per face) */
+   for (face = 0; face < 6; face++) {
+      if (t->intel.base.dirty_images[face]) {
+	 if (!intelUploadTexImages( &i830->intel, &t->intel, face )) {
+	    return GL_FALSE;
+	 }
+      }
+   }
+
+
+   return GL_TRUE;
+}
+
+
+static GLboolean disable_tex( GLcontext *ctx, GLuint unit )
+{
+   i830ContextPtr i830 = I830_CONTEXT(ctx);
+
+   /* This is happening too often.  I need to conditionally send diffuse
+    * state to the card.  Perhaps a diffuse dirty flag of some kind.
+    * Will need to change this logic if more than 2 texture units are
+    * used.  We need to only do this up to the last unit enabled, or unit
+    * one if nothing is enabled.
+    */
+
+   if ( i830->intel.CurrentTexObj[unit] != NULL ) {
+      /* The old texture is no longer bound to this texture unit.
+       * Mark it as such.
+       */
+
+      i830->intel.CurrentTexObj[unit]->base.bound &= ~(1U << 0);
+      i830->intel.CurrentTexObj[unit] = NULL;
+   }
+
+   return GL_TRUE;
+}
+
+static GLboolean i830UpdateTexUnit( GLcontext *ctx, GLuint unit )
+{
+   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+
+   if (texUnit->_ReallyEnabled &&
+       INTEL_CONTEXT(ctx)->intelScreen->tex.size < 2048 * 1024)
+      return GL_FALSE;
+
+   switch(texUnit->_ReallyEnabled) {
+   case TEXTURE_1D_BIT:
+   case TEXTURE_2D_BIT:
+      return (enable_tex_common( ctx, unit ) &&
+	      enable_tex_2d( ctx, unit ));
+   case TEXTURE_RECT_BIT:
+      return (enable_tex_common( ctx, unit ) &&
+	      enable_tex_rect( ctx, unit ));
+   case TEXTURE_CUBE_BIT:
+      return (enable_tex_common( ctx, unit ) &&
+	      enable_tex_cube( ctx, unit ));
+   case 0:
+      return disable_tex( ctx, unit );
+   default:
+      return GL_FALSE;
+   }
+}
+
+
+void i830UpdateTextureState( intelContextPtr intel )
+{
+   i830ContextPtr i830 = I830_CONTEXT(intel);
+   GLcontext *ctx = &intel->ctx;
+   GLboolean ok;
+
+   if (0) fprintf(stderr, "%s\n", __FUNCTION__);
+
+   I830_ACTIVESTATE(i830, I830_UPLOAD_TEX_ALL, GL_FALSE);
+
+   ok = (i830UpdateTexUnit( ctx, 0 ) &&
+	 i830UpdateTexUnit( ctx, 1 ) &&
+	 i830UpdateTexUnit( ctx, 2 ) &&
+	 i830UpdateTexUnit( ctx, 3 ));
+
+   FALLBACK( intel, I830_FALLBACK_TEXTURE, !ok );
+
+   if (ok)
+      i830EmitTextureBlend( i830 );
+}
+
+
+
diff --git a/i915/i830_vtbl.c b/i915/i830_vtbl.c
new file mode 100644
index 0000000..d40cf70
--- /dev/null
+++ b/i915/i830_vtbl.c
@@ -0,0 +1,534 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+
+#include "i830_context.h"
+#include "i830_reg.h"
+
+#include "intel_batchbuffer.h"
+
+#include "tnl/t_context.h"
+#include "tnl/t_vertex.h"
+
+static GLboolean i830_check_vertex_size( intelContextPtr intel,
+					 GLuint expected );
+
+#define SZ_TO_HW(sz)  ((sz-2)&0x3)
+#define EMIT_SZ(sz)   (EMIT_1F + (sz) - 1)
+#define EMIT_ATTR( ATTR, STYLE, V0 )					\
+do {									\
+   intel->vertex_attrs[intel->vertex_attr_count].attrib = (ATTR);	\
+   intel->vertex_attrs[intel->vertex_attr_count].format = (STYLE);	\
+   intel->vertex_attr_count++;						\
+   v0 |= V0;								\
+} while (0)
+
+#define EMIT_PAD( N )							\
+do {									\
+   intel->vertex_attrs[intel->vertex_attr_count].attrib = 0;		\
+   intel->vertex_attrs[intel->vertex_attr_count].format = EMIT_PAD;	\
+   intel->vertex_attrs[intel->vertex_attr_count].offset = (N);		\
+   intel->vertex_attr_count++;						\
+} while (0)
+
+
+#define VRTX_TEX_SET_FMT(n, x)          ((x)<<((n)*2))
+#define TEXBIND_SET(n, x) 		((x)<<((n)*4))
+
+static void i830_render_start( intelContextPtr intel )
+{
+   GLcontext *ctx = &intel->ctx;
+   i830ContextPtr i830 = I830_CONTEXT(intel);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   struct vertex_buffer *VB = &tnl->vb;
+   DECLARE_RENDERINPUTS(index_bitset);
+   GLuint v0 = _3DSTATE_VFT0_CMD;
+   GLuint v2 = _3DSTATE_VFT1_CMD;
+   GLuint mcsb1 = 0;
+
+   RENDERINPUTS_COPY( index_bitset, tnl->render_inputs_bitset );
+
+   /* Important:
+    */
+   VB->AttribPtr[VERT_ATTRIB_POS] = VB->NdcPtr;
+   intel->vertex_attr_count = 0;
+
+   /* EMIT_ATTR's must be in order as they tell t_vertex.c how to
+    * build up a hardware vertex.
+    */
+   if (RENDERINPUTS_TEST_RANGE( index_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX )) {
+      EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_4F_VIEWPORT, VFT0_XYZW );
+      intel->coloroffset = 4;
+   }
+   else {
+      EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_3F_VIEWPORT, VFT0_XYZ );
+      intel->coloroffset = 3;
+   }
+
+   if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_POINTSIZE )) {
+      EMIT_ATTR( _TNL_ATTRIB_POINTSIZE, EMIT_1F, VFT0_POINT_WIDTH );
+   }
+
+   EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_BGRA, VFT0_DIFFUSE );
+      
+   intel->specoffset = 0;
+   if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_COLOR1 ) ||
+       RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_FOG )) {
+      if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_COLOR1 )) {
+         intel->specoffset = intel->coloroffset + 1;
+         EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_3UB_3F_BGR, VFT0_SPEC );
+      }
+      else
+         EMIT_PAD( 3 );
+
+      if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_FOG ))
+         EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1UB_1F, VFT0_SPEC );
+      else
+         EMIT_PAD( 1 );
+   }
+
+   if (RENDERINPUTS_TEST_RANGE( index_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX )) {
+      int i, count = 0;
+
+      for (i = 0; i < I830_TEX_UNITS; i++) {
+         if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_TEX(i) )) {
+            GLuint sz = VB->TexCoordPtr[i]->size;
+            GLuint emit;
+            GLuint mcs = (i830->state.Tex[i][I830_TEXREG_MCS] & 
+                          ~TEXCOORDTYPE_MASK);
+
+	    switch (sz) {
+	    case 1: 
+	    case 2: 
+	       emit = EMIT_2F; 
+	       sz = 2; 
+	       mcs |= TEXCOORDTYPE_CARTESIAN; 
+	       break;
+	    case 3:
+	       emit = EMIT_3F; 
+	       sz = 3;
+	       mcs |= TEXCOORDTYPE_VECTOR;
+	       break;
+	    case 4: 
+	       emit = EMIT_3F_XYW; 
+	       sz = 3;     
+	       mcs |= TEXCOORDTYPE_HOMOGENEOUS;
+	       break;
+	    default: 
+	       continue;
+	    };
+	      
+
+	    EMIT_ATTR( _TNL_ATTRIB_TEX0+i, emit, 0 );	       
+	    v2 |= VRTX_TEX_SET_FMT(count, SZ_TO_HW(sz));
+	    mcsb1 |= (count+8)<<(i*4);
+
+	    if (mcs != i830->state.Tex[i][I830_TEXREG_MCS]) {
+	       I830_STATECHANGE(i830, I830_UPLOAD_TEX(i));
+	       i830->state.Tex[i][I830_TEXREG_MCS] = mcs;
+	    }
+
+	    count++;
+	 }
+      }
+
+      v0 |= VFT0_TEX_COUNT(count);
+   }
+   
+   /* Only need to change the vertex emit code if there has been a
+    * statechange to a new hardware vertex format:
+    */
+   if (v0 != i830->state.Ctx[I830_CTXREG_VF] ||
+       v2 != i830->state.Ctx[I830_CTXREG_VF2] ||
+       mcsb1 != i830->state.Ctx[I830_CTXREG_MCSB1] ||
+       !RENDERINPUTS_EQUAL( index_bitset, i830->last_index_bitset )) {
+    
+      I830_STATECHANGE( i830, I830_UPLOAD_CTX );
+
+      /* Must do this *after* statechange, so as not to affect
+       * buffered vertices reliant on the old state:
+       */
+      intel->vertex_size = 
+	 _tnl_install_attrs( ctx, 
+			     intel->vertex_attrs, 
+			     intel->vertex_attr_count,
+			     intel->ViewportMatrix.m, 0 );
+
+      intel->vertex_size >>= 2;
+
+      i830->state.Ctx[I830_CTXREG_VF] = v0;
+      i830->state.Ctx[I830_CTXREG_VF2] = v2;
+      i830->state.Ctx[I830_CTXREG_MCSB1] = mcsb1;
+      RENDERINPUTS_COPY( i830->last_index_bitset, index_bitset );
+
+      assert(i830_check_vertex_size( intel, intel->vertex_size ));
+   }
+}
+
+static void i830_reduced_primitive_state( intelContextPtr intel,
+					  GLenum rprim )
+{
+    i830ContextPtr i830 = I830_CONTEXT(intel);
+    GLuint st1 = i830->state.Stipple[I830_STPREG_ST1];
+
+    st1 &= ~ST1_ENABLE;
+
+    switch (rprim) {
+    case GL_TRIANGLES:
+       if (intel->ctx.Polygon.StippleFlag &&
+	   intel->hw_stipple)
+	  st1 |= ST1_ENABLE;
+       break;
+    case GL_LINES:
+    case GL_POINTS:
+    default:
+       break;
+    }
+
+    i830->intel.reduced_primitive = rprim;
+
+    if (st1 != i830->state.Stipple[I830_STPREG_ST1]) {
+       I830_STATECHANGE(i830, I830_UPLOAD_STIPPLE);
+       i830->state.Stipple[I830_STPREG_ST1] = st1;
+    }
+}
+
+/* Pull apart the vertex format registers and figure out how large a
+ * vertex is supposed to be. 
+ */
+static GLboolean i830_check_vertex_size( intelContextPtr intel,
+					 GLuint expected )
+{
+   i830ContextPtr i830 = I830_CONTEXT(intel);
+   int vft0 = i830->current->Ctx[I830_CTXREG_VF];
+   int vft1 = i830->current->Ctx[I830_CTXREG_VF2];
+   int nrtex = (vft0 & VFT0_TEX_COUNT_MASK) >> VFT0_TEX_COUNT_SHIFT;
+   int i, sz = 0;
+
+   switch (vft0 & VFT0_XYZW_MASK) {
+   case VFT0_XY: sz = 2; break;
+   case VFT0_XYZ: sz = 3; break;
+   case VFT0_XYW: sz = 3; break;
+   case VFT0_XYZW: sz = 4; break;
+   default: 
+      fprintf(stderr, "no xyzw specified\n");
+      return 0;
+   }
+
+   if (vft0 & VFT0_SPEC) sz++;
+   if (vft0 & VFT0_DIFFUSE) sz++;
+   if (vft0 & VFT0_DEPTH_OFFSET) sz++;
+   if (vft0 & VFT0_POINT_WIDTH) sz++;
+	
+   for (i = 0 ; i < nrtex ; i++) { 
+      switch (vft1 & VFT1_TEX0_MASK) {
+      case TEXCOORDFMT_2D: sz += 2; break;
+      case TEXCOORDFMT_3D: sz += 3; break;
+      case TEXCOORDFMT_4D: sz += 4; break;
+      case TEXCOORDFMT_1D: sz += 1; break;
+      }
+      vft1 >>= VFT1_TEX1_SHIFT;
+   }
+	
+   if (sz != expected) 
+      fprintf(stderr, "vertex size mismatch %d/%d\n", sz, expected);
+   
+   return sz == expected;
+}
+
+static void i830_emit_invarient_state( intelContextPtr intel )
+{
+   BATCH_LOCALS;
+
+   BEGIN_BATCH( 40 );
+
+   OUT_BATCH(_3DSTATE_MAP_CUBE | MAP_UNIT(0));
+   OUT_BATCH(_3DSTATE_MAP_CUBE | MAP_UNIT(1));
+   OUT_BATCH(_3DSTATE_MAP_CUBE | MAP_UNIT(2));
+   OUT_BATCH(_3DSTATE_MAP_CUBE | MAP_UNIT(3));
+
+   OUT_BATCH(_3DSTATE_DFLT_DIFFUSE_CMD);
+   OUT_BATCH(0);
+
+   OUT_BATCH(_3DSTATE_DFLT_SPEC_CMD);
+   OUT_BATCH(0);
+
+   OUT_BATCH(_3DSTATE_DFLT_Z_CMD);
+   OUT_BATCH(0);
+
+   OUT_BATCH(_3DSTATE_FOG_MODE_CMD);
+   OUT_BATCH(FOGFUNC_ENABLE |
+	     FOG_LINEAR_CONST | 
+	     FOGSRC_INDEX_Z | 
+	     ENABLE_FOG_DENSITY);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+
+
+   OUT_BATCH(_3DSTATE_MAP_TEX_STREAM_CMD |
+	     MAP_UNIT(0) |
+	     DISABLE_TEX_STREAM_BUMP |
+	     ENABLE_TEX_STREAM_COORD_SET |
+	     TEX_STREAM_COORD_SET(0) |
+	     ENABLE_TEX_STREAM_MAP_IDX | TEX_STREAM_MAP_IDX(0));
+   OUT_BATCH(_3DSTATE_MAP_TEX_STREAM_CMD |
+	     MAP_UNIT(1) |
+	     DISABLE_TEX_STREAM_BUMP |
+	     ENABLE_TEX_STREAM_COORD_SET |
+	     TEX_STREAM_COORD_SET(1) |
+	     ENABLE_TEX_STREAM_MAP_IDX | TEX_STREAM_MAP_IDX(1));
+   OUT_BATCH(_3DSTATE_MAP_TEX_STREAM_CMD |
+	     MAP_UNIT(2) |
+	     DISABLE_TEX_STREAM_BUMP |
+	     ENABLE_TEX_STREAM_COORD_SET |
+	     TEX_STREAM_COORD_SET(2) |
+	     ENABLE_TEX_STREAM_MAP_IDX | TEX_STREAM_MAP_IDX(2));
+   OUT_BATCH(_3DSTATE_MAP_TEX_STREAM_CMD |
+	     MAP_UNIT(3) |
+	     DISABLE_TEX_STREAM_BUMP |
+	     ENABLE_TEX_STREAM_COORD_SET |
+	     TEX_STREAM_COORD_SET(3) |
+	     ENABLE_TEX_STREAM_MAP_IDX | TEX_STREAM_MAP_IDX(3));
+
+   OUT_BATCH(_3DSTATE_MAP_COORD_TRANSFORM);
+   OUT_BATCH(DISABLE_TEX_TRANSFORM | TEXTURE_SET(0));
+   OUT_BATCH(_3DSTATE_MAP_COORD_TRANSFORM);
+   OUT_BATCH(DISABLE_TEX_TRANSFORM | TEXTURE_SET(1));
+   OUT_BATCH(_3DSTATE_MAP_COORD_TRANSFORM);
+   OUT_BATCH(DISABLE_TEX_TRANSFORM | TEXTURE_SET(2));
+   OUT_BATCH(_3DSTATE_MAP_COORD_TRANSFORM);
+   OUT_BATCH(DISABLE_TEX_TRANSFORM | TEXTURE_SET(3));
+
+   OUT_BATCH(_3DSTATE_RASTER_RULES_CMD |
+	     ENABLE_POINT_RASTER_RULE |
+	     OGL_POINT_RASTER_RULE |
+	     ENABLE_LINE_STRIP_PROVOKE_VRTX |
+	     ENABLE_TRI_FAN_PROVOKE_VRTX |
+	     ENABLE_TRI_STRIP_PROVOKE_VRTX |
+	     LINE_STRIP_PROVOKE_VRTX(1) |
+	     TRI_FAN_PROVOKE_VRTX(2) | 
+	     TRI_STRIP_PROVOKE_VRTX(2));
+
+   OUT_BATCH(_3DSTATE_SCISSOR_ENABLE_CMD | 
+	     DISABLE_SCISSOR_RECT);
+
+   OUT_BATCH(_3DSTATE_SCISSOR_RECT_0_CMD);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+
+   OUT_BATCH(_3DSTATE_VERTEX_TRANSFORM);
+   OUT_BATCH(DISABLE_VIEWPORT_TRANSFORM | DISABLE_PERSPECTIVE_DIVIDE);
+
+   OUT_BATCH(_3DSTATE_W_STATE_CMD);
+   OUT_BATCH(MAGIC_W_STATE_DWORD1);
+   OUT_BATCH(0x3f800000 /* 1.0 in IEEE float */ );
+
+
+   OUT_BATCH(_3DSTATE_COLOR_FACTOR_CMD);
+   OUT_BATCH(0x80808080);	/* .5 required in alpha for GL_DOT3_RGBA_EXT */
+
+   ADVANCE_BATCH();
+}
+
+
+#define emit( intel, state, size )			\
+do {							\
+   int k;						\
+   BEGIN_BATCH( size / sizeof(GLuint));			\
+   for (k = 0 ; k < size / sizeof(GLuint) ; k++)	\
+      OUT_BATCH(state[k]);				\
+   ADVANCE_BATCH();					\
+} while (0);
+
+static GLuint get_state_size( struct i830_hw_state *state )
+{
+   GLuint dirty = state->active & ~state->emitted;
+   GLuint sz = 0;
+   GLuint i;
+
+   if (dirty & I830_UPLOAD_INVARIENT)
+      sz += 40 * sizeof(int);
+
+   if (dirty & I830_UPLOAD_CTX) 
+      sz += sizeof(state->Ctx);
+
+   if (dirty & I830_UPLOAD_BUFFERS) 
+      sz += sizeof(state->Buffer);
+
+   if (dirty & I830_UPLOAD_STIPPLE) 
+      sz += sizeof(state->Stipple);
+
+   for (i = 0; i < I830_TEX_UNITS; i++) {
+      if ((dirty & I830_UPLOAD_TEX(i)))  
+	 sz += sizeof(state->Tex[i]); 
+
+      if (dirty & I830_UPLOAD_TEXBLEND(i)) 
+	 sz += state->TexBlendWordsUsed[i] * 4;
+   }
+
+   return sz;
+}
+
+
+/* Push the state into the sarea and/or texture memory.
+ */
+static void i830_emit_state( intelContextPtr intel )
+{
+   i830ContextPtr i830 = I830_CONTEXT(intel);
+   struct i830_hw_state *state = i830->current;
+   int i;
+   GLuint dirty = state->active & ~state->emitted;
+   GLuint counter = intel->batch.counter;
+   BATCH_LOCALS;
+
+   if (intel->batch.space < get_state_size(state)) {
+      intelFlushBatch(intel, GL_TRUE);
+      dirty = state->active & ~state->emitted;
+      counter = intel->batch.counter;
+   }
+
+   if (dirty & I830_UPLOAD_INVARIENT) {
+      if (VERBOSE) fprintf(stderr, "I830_UPLOAD_INVARIENT:\n"); 
+      i830_emit_invarient_state( intel );
+   }
+
+   if (dirty & I830_UPLOAD_CTX) {
+      if (VERBOSE) fprintf(stderr, "I830_UPLOAD_CTX:\n"); 
+      emit( i830, state->Ctx, sizeof(state->Ctx) );
+   }
+
+   if (dirty & I830_UPLOAD_BUFFERS) {
+      if (VERBOSE) fprintf(stderr, "I830_UPLOAD_BUFFERS:\n"); 
+      emit( i830, state->Buffer, sizeof(state->Buffer) );
+   }
+
+   if (dirty & I830_UPLOAD_STIPPLE) {
+      if (VERBOSE) fprintf(stderr, "I830_UPLOAD_STIPPLE:\n"); 
+      emit( i830, state->Stipple, sizeof(state->Stipple) );
+   }
+
+   for (i = 0; i < I830_TEX_UNITS; i++) {
+      if ((dirty & I830_UPLOAD_TEX(i))) { 
+ 	 if (VERBOSE) fprintf(stderr, "I830_UPLOAD_TEX(%d):\n", i); 
+	 emit( i830, state->Tex[i], sizeof(state->Tex[i])); 
+      } 
+
+      if (dirty & I830_UPLOAD_TEXBLEND(i)) {
+	 if (VERBOSE) fprintf(stderr, "I830_UPLOAD_TEXBLEND(%d):\n", i); 
+	 emit( i830, state->TexBlend[i], 
+	       state->TexBlendWordsUsed[i] * 4 );
+      }
+   }
+
+   state->emitted |= dirty;
+   intel->batch.last_emit_state = counter;
+   assert(counter == intel->batch.counter);
+}
+
+static void i830_destroy_context( intelContextPtr intel )
+{
+   _tnl_free_vertices(&intel->ctx);
+}
+
+static void
+i830_set_color_region(intelContextPtr intel, const intelRegion *region)
+{
+   i830ContextPtr i830 = I830_CONTEXT(intel);
+   I830_STATECHANGE( i830, I830_UPLOAD_BUFFERS );
+   i830->state.Buffer[I830_DESTREG_CBUFADDR1] =
+      (BUF_3D_ID_COLOR_BACK | BUF_3D_PITCH(region->pitch) | BUF_3D_USE_FENCE);
+   i830->state.Buffer[I830_DESTREG_CBUFADDR2] = region->offset;
+}
+
+
+static void
+i830_set_z_region(intelContextPtr intel, const intelRegion *region)
+{
+   i830ContextPtr i830 = I830_CONTEXT(intel);
+   I830_STATECHANGE( i830, I830_UPLOAD_BUFFERS );
+   i830->state.Buffer[I830_DESTREG_DBUFADDR1] =
+      (BUF_3D_ID_DEPTH | BUF_3D_PITCH(region->pitch) | BUF_3D_USE_FENCE);
+   i830->state.Buffer[I830_DESTREG_DBUFADDR2] = region->offset;
+}
+
+
+static void
+i830_update_color_z_regions(intelContextPtr intel,
+                            const intelRegion *colorRegion,
+                            const intelRegion *depthRegion)
+{
+   i830ContextPtr i830 = I830_CONTEXT(intel);
+
+   i830->state.Buffer[I830_DESTREG_CBUFADDR1] =
+      (BUF_3D_ID_COLOR_BACK | BUF_3D_PITCH(colorRegion->pitch) | BUF_3D_USE_FENCE);
+   i830->state.Buffer[I830_DESTREG_CBUFADDR2] = colorRegion->offset;
+
+   i830->state.Buffer[I830_DESTREG_DBUFADDR1] =
+      (BUF_3D_ID_DEPTH | BUF_3D_PITCH(depthRegion->pitch) | BUF_3D_USE_FENCE);
+   i830->state.Buffer[I830_DESTREG_DBUFADDR2] = depthRegion->offset;
+}
+
+
+/* This isn't really handled at the moment.
+ */
+static void i830_lost_hardware( intelContextPtr intel )
+{
+   I830_CONTEXT(intel)->state.emitted = 0;
+}
+
+
+
+static void i830_emit_flush( intelContextPtr intel )
+{
+   BATCH_LOCALS;
+
+   BEGIN_BATCH(2);
+   OUT_BATCH( MI_FLUSH | FLUSH_MAP_CACHE ); 
+   OUT_BATCH( 0 );
+   ADVANCE_BATCH();
+}
+
+
+
+
+void i830InitVtbl( i830ContextPtr i830 )
+{
+   i830->intel.vtbl.alloc_tex_obj = i830AllocTexObj;
+   i830->intel.vtbl.check_vertex_size = i830_check_vertex_size;
+   i830->intel.vtbl.clear_with_tris = i830ClearWithTris;
+   i830->intel.vtbl.rotate_window = i830RotateWindow;
+   i830->intel.vtbl.destroy = i830_destroy_context;
+   i830->intel.vtbl.emit_state = i830_emit_state;
+   i830->intel.vtbl.lost_hardware = i830_lost_hardware;
+   i830->intel.vtbl.reduced_primitive_state = i830_reduced_primitive_state;
+   i830->intel.vtbl.set_color_region = i830_set_color_region;
+   i830->intel.vtbl.set_z_region = i830_set_z_region;
+   i830->intel.vtbl.update_color_z_regions = i830_update_color_z_regions;
+   i830->intel.vtbl.update_texture_state = i830UpdateTextureState;
+   i830->intel.vtbl.emit_flush = i830_emit_flush;
+   i830->intel.vtbl.render_start = i830_render_start;
+}
diff --git a/i915/i915_context.c b/i915/i915_context.c
new file mode 100644
index 0000000..2bc1cae
--- /dev/null
+++ b/i915/i915_context.c
@@ -0,0 +1,186 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "i915_context.h"
+#include "imports.h"
+#include "intel_tex.h"
+#include "intel_tris.h"
+#include "tnl/t_context.h"
+#include "tnl/t_pipeline.h"
+#include "tnl/t_vertex.h"
+
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "tnl/tnl.h"
+#include "vbo/vbo.h"
+
+
+#include "utils.h"
+#include "i915_reg.h"
+
+/***************************************
+ * Mesa's Driver Functions
+ ***************************************/
+
+static const struct dri_extension i915_extensions[] =
+{
+    { "GL_ARB_depth_texture",              NULL },
+    { "GL_ARB_fragment_program",           NULL },
+    { "GL_ARB_shadow",                     NULL },
+    { "GL_ARB_texture_env_crossbar",       NULL },
+    { "GL_EXT_shadow_funcs",               NULL },
+    /* ARB extn won't work if not enabled */
+    { "GL_SGIX_depth_texture",             NULL },
+    { NULL,                                NULL }
+};
+
+/* Override intel default.
+ */
+static void i915InvalidateState( GLcontext *ctx, GLuint new_state )
+{
+   _swrast_InvalidateState( ctx, new_state );
+   _swsetup_InvalidateState( ctx, new_state );
+   _vbo_InvalidateState( ctx, new_state );
+   _tnl_InvalidateState( ctx, new_state );
+   _tnl_invalidate_vertex_state( ctx, new_state );
+   INTEL_CONTEXT(ctx)->NewGLState |= new_state;
+
+   /* Todo: gather state values under which tracked parameters become
+    * invalidated, add callbacks for things like
+    * ProgramLocalParameters, etc.
+    */
+   {
+      struct i915_fragment_program *p = 
+	 (struct i915_fragment_program *)ctx->FragmentProgram._Current;
+      if (p && p->nr_params)
+	 p->params_uptodate = 0;
+   }
+
+   if (new_state & (_NEW_FOG|_NEW_HINT|_NEW_PROGRAM))
+      i915_update_fog(ctx);
+}
+
+
+static void i915InitDriverFunctions( struct dd_function_table *functions )
+{
+   intelInitDriverFunctions( functions );
+   i915InitStateFunctions( functions );
+   i915InitTextureFuncs( functions );
+   i915InitFragProgFuncs( functions );
+   functions->UpdateState = i915InvalidateState;
+}
+
+
+
+GLboolean i915CreateContext( const __GLcontextModes *mesaVis,
+			    __DRIcontextPrivate *driContextPriv,
+			    void *sharedContextPrivate)
+{
+   struct dd_function_table functions;
+   i915ContextPtr i915 = (i915ContextPtr) CALLOC_STRUCT(i915_context);
+   intelContextPtr intel = &i915->intel;
+   GLcontext *ctx = &intel->ctx;
+   GLuint i;
+
+   if (!i915) return GL_FALSE;
+
+   i915InitVtbl( i915 );
+
+   i915InitDriverFunctions( &functions );
+
+   if (!intelInitContext( intel, mesaVis, driContextPriv,
+			  sharedContextPrivate, &functions )) {
+      FREE(i915);
+      return GL_FALSE;
+   }
+
+   ctx->Const.MaxTextureUnits = I915_TEX_UNITS;
+   ctx->Const.MaxTextureImageUnits = I915_TEX_UNITS;
+   ctx->Const.MaxTextureCoordUnits = I915_TEX_UNITS;
+
+   intel->nr_heaps = 1;
+   intel->texture_heaps[0] = 
+      driCreateTextureHeap( 0, intel,
+			    intel->intelScreen->tex.size,
+			    12,
+			    I830_NR_TEX_REGIONS,
+			    intel->sarea->texList,
+			    (unsigned *) & intel->sarea->texAge,
+			    & intel->swapped,
+			    sizeof( struct i915_texture_object ),
+			    (destroy_texture_object_t *)intelDestroyTexObj );
+
+   /* FIXME: driCalculateMaxTextureLevels assumes that mipmaps are
+    * tightly packed, but they're not in Intel graphics
+    * hardware.
+    */
+   ctx->Const.MaxTextureUnits = I915_TEX_UNITS;
+   i = driQueryOptioni( &intel->optionCache, "allow_large_textures");
+   driCalculateMaxTextureLevels( intel->texture_heaps,
+				 intel->nr_heaps,
+				 &intel->ctx.Const,
+				 4,
+				 11, /* max 2D texture size is 2048x2048 */
+				 8,  /* 3D texture */
+				 11, /* cube texture. */
+				 11, /* rect texture */
+				 12,
+				 GL_FALSE,
+				 i );
+
+   /* GL_ARB_fragment_program limits - don't think Mesa actually
+    * validates programs against these, and in any case one ARB
+    * instruction can translate to more than one HW instruction, so
+    * we'll still have to check and fallback each time.
+    */
+   
+   ctx->Const.FragmentProgram.MaxNativeTemps = I915_MAX_TEMPORARY;
+   ctx->Const.FragmentProgram.MaxNativeAttribs = 11; /* 8 tex, 2 color, fog */
+   ctx->Const.FragmentProgram.MaxNativeParameters = I915_MAX_CONSTANT;
+   ctx->Const.FragmentProgram.MaxNativeAluInstructions = I915_MAX_ALU_INSN;
+   ctx->Const.FragmentProgram.MaxNativeTexInstructions = I915_MAX_TEX_INSN;
+   ctx->Const.FragmentProgram.MaxNativeInstructions = (I915_MAX_ALU_INSN + 
+						I915_MAX_TEX_INSN);
+   ctx->Const.FragmentProgram.MaxNativeTexIndirections = I915_MAX_TEX_INDIRECT;
+   ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0; /* I don't think we have one */
+   ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE;
+   ctx->FragmentProgram._UseTexEnvProgram = GL_TRUE;
+
+
+   driInitExtensions( ctx, i915_extensions, GL_FALSE );
+
+
+   _tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12, 
+		       36 * sizeof(GLfloat) );
+
+   intel->verts = TNL_CONTEXT(ctx)->clipspace.vertex_buf;
+
+   i915InitState( i915 );
+
+   return GL_TRUE;
+}
+
diff --git a/i915/i915_context.h b/i915/i915_context.h
new file mode 100644
index 0000000..ec15501
--- /dev/null
+++ b/i915/i915_context.h
@@ -0,0 +1,358 @@
+ /**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef I915CONTEXT_INC
+#define I915CONTEXT_INC
+
+#include "intel_context.h"
+
+#define I915_FALLBACK_TEXTURE		 0x1000
+#define I915_FALLBACK_COLORMASK		 0x2000
+#define I915_FALLBACK_STENCIL		 0x4000
+#define I915_FALLBACK_STIPPLE		 0x8000
+#define I915_FALLBACK_PROGRAM		 0x10000
+#define I915_FALLBACK_LOGICOP		 0x20000
+#define I915_FALLBACK_POLYGON_SMOOTH	 0x40000
+#define I915_FALLBACK_POINT_SMOOTH	 0x80000
+
+#define I915_UPLOAD_CTX              0x1
+#define I915_UPLOAD_BUFFERS          0x2
+#define I915_UPLOAD_STIPPLE          0x4
+#define I915_UPLOAD_PROGRAM          0x8
+#define I915_UPLOAD_CONSTANTS        0x10
+#define I915_UPLOAD_FOG              0x20
+#define I915_UPLOAD_INVARIENT        0x40
+#define I915_UPLOAD_TEX(i)           (0x00010000<<(i))
+#define I915_UPLOAD_TEX_ALL          (0x00ff0000)
+#define I915_UPLOAD_TEX_0_SHIFT      16
+
+
+/* State structure offsets - these will probably disappear.
+ */
+#define I915_DESTREG_CBUFADDR0 0
+#define I915_DESTREG_CBUFADDR1 1
+#define I915_DESTREG_CBUFADDR2 2
+#define I915_DESTREG_DBUFADDR0 3
+#define I915_DESTREG_DBUFADDR1 4
+#define I915_DESTREG_DBUFADDR2 5
+#define I915_DESTREG_DV0 6
+#define I915_DESTREG_DV1 7
+#define I915_DESTREG_SENABLE 8
+#define I915_DESTREG_SR0 9
+#define I915_DESTREG_SR1 10
+#define I915_DESTREG_SR2 11
+#define I915_DEST_SETUP_SIZE 12
+
+#define I915_CTXREG_STATE4		0
+#define I915_CTXREG_LI	        	1
+#define I915_CTXREG_LIS2		        2
+#define I915_CTXREG_LIS4	        	3
+#define I915_CTXREG_LIS5	        	4
+#define I915_CTXREG_LIS6	         	5
+#define I915_CTXREG_IAB   	 	6
+#define I915_CTXREG_BLENDCOLOR0		7
+#define I915_CTXREG_BLENDCOLOR1		8
+#define I915_CTX_SETUP_SIZE		9
+
+#define I915_FOGREG_COLOR		0
+#define I915_FOGREG_MODE0		1
+#define I915_FOGREG_MODE1		2
+#define I915_FOGREG_MODE2		3
+#define I915_FOGREG_MODE3		4
+#define I915_FOG_SETUP_SIZE		5
+
+#define I915_STPREG_ST0        0
+#define I915_STPREG_ST1        1
+#define I915_STP_SETUP_SIZE    2
+
+#define I915_TEXREG_MS2        0
+#define I915_TEXREG_MS3        1
+#define I915_TEXREG_MS4        2
+#define I915_TEXREG_SS2        3
+#define I915_TEXREG_SS3        4
+#define I915_TEXREG_SS4        5
+#define I915_TEX_SETUP_SIZE    6
+
+#define I915_MAX_CONSTANT      32
+#define I915_CONSTANT_SIZE     (2+(4*I915_MAX_CONSTANT))
+
+
+#define I915_PROGRAM_SIZE      192
+
+
+/* Hardware version of a parsed fragment program.  "Derived" from the
+ * mesa fragment_program struct.
+ */
+struct i915_fragment_program {
+   struct gl_fragment_program FragProg;
+
+   GLboolean translated;
+   GLboolean params_uptodate;
+   GLboolean on_hardware;
+   GLboolean error;		/* If program is malformed for any reason. */
+
+   GLuint nr_tex_indirect;
+   GLuint nr_tex_insn;
+   GLuint nr_alu_insn;
+   GLuint nr_decl_insn;
+
+
+
+
+   /* TODO: split between the stored representation of a program and
+    * the state used to build that representation.
+    */
+   GLcontext *ctx;
+
+   GLuint declarations[I915_PROGRAM_SIZE];
+   GLuint program[I915_PROGRAM_SIZE];
+
+   GLfloat constant[I915_MAX_CONSTANT][4];
+   GLuint constant_flags[I915_MAX_CONSTANT];
+   GLuint nr_constants;
+
+   GLuint *csr;			/* Cursor, points into program.
+				 */
+
+   GLuint *decl;		/* Cursor, points into declarations.
+				 */
+   
+   GLuint decl_s;		/* flags for which s regs need to be decl'd */
+   GLuint decl_t;		/* flags for which t regs need to be decl'd */
+
+   GLuint temp_flag;		/* Tracks temporary regs which are in
+				 * use.
+				 */
+
+   GLuint utemp_flag;		/* Tracks TYPE_U temporary regs which are in
+				 * use.
+				 */
+
+
+
+   /* Helpers for i915_fragprog.c:
+    */
+   GLuint wpos_tex;
+   GLboolean depth_written;
+
+   struct { 
+      GLuint reg;		/* Hardware constant idx */
+      const GLfloat *values; 	/* Pointer to tracked values */
+   } param[I915_MAX_CONSTANT];
+   GLuint nr_params;
+      
+
+
+
+   /* Helpers for i915_texprog.c:
+    */
+   GLuint src_texture;		/* Reg containing sampled texture color,
+				 * else UREG_BAD.
+				 */
+
+   GLuint src_previous;		/* Reg containing color from previous 
+				 * stage.  May need to be decl'd.
+				 */
+
+   GLuint last_tex_stage;	/* Number of last enabled texture unit */
+
+   struct vertex_buffer *VB;
+};
+
+
+
+
+
+
+struct i915_texture_object
+{
+   struct intel_texture_object intel;
+   GLenum lastTarget;
+   GLboolean refs_border_color;
+   GLuint Setup[I915_TEX_SETUP_SIZE];
+};
+
+#define I915_TEX_UNITS 8
+
+
+struct i915_hw_state {
+   GLuint Ctx[I915_CTX_SETUP_SIZE];
+   GLuint Buffer[I915_DEST_SETUP_SIZE];
+   GLuint Stipple[I915_STP_SETUP_SIZE];
+   GLuint Fog[I915_FOG_SETUP_SIZE];
+   GLuint Tex[I915_TEX_UNITS][I915_TEX_SETUP_SIZE];
+   GLuint Constant[I915_CONSTANT_SIZE];
+   GLuint ConstantSize;
+   GLuint Program[I915_PROGRAM_SIZE];
+   GLuint ProgramSize;
+   GLuint active;		/* I915_UPLOAD_* */
+   GLuint emitted;		/* I915_UPLOAD_* */
+};
+
+#define I915_FOG_PIXEL  2
+#define I915_FOG_VERTEX 1
+#define I915_FOG_NONE   0
+
+struct i915_context 
+{
+   struct intel_context intel;
+
+   GLuint last_ReallyEnabled;
+   GLuint vertex_fog;
+
+   struct i915_fragment_program tex_program;
+   struct i915_fragment_program *current_program;
+
+   struct i915_hw_state meta, initial, state, *current;
+};
+
+
+typedef struct i915_context *i915ContextPtr;
+typedef struct i915_texture_object *i915TextureObjectPtr;
+
+#define I915_CONTEXT(ctx)	((i915ContextPtr)(ctx))
+
+
+
+#define I915_STATECHANGE(i915, flag)					\
+do {									\
+   if (0) fprintf(stderr, "I915_STATECHANGE %x in %s\n", flag, __FUNCTION__);	\
+   INTEL_FIREVERTICES( &(i915)->intel );					\
+   (i915)->state.emitted &= ~(flag);					\
+} while (0)
+
+#define I915_ACTIVESTATE(i915, flag, mode)			\
+do {								\
+   if (0) fprintf(stderr, "I915_ACTIVESTATE %x %d in %s\n",	\
+		  flag, mode, __FUNCTION__);			\
+   INTEL_FIREVERTICES( &(i915)->intel );				\
+   if (mode)							\
+      (i915)->state.active |= (flag);				\
+   else								\
+      (i915)->state.active &= ~(flag);				\
+} while (0)
+
+
+/*======================================================================
+ * i915_vtbl.c
+ */
+extern void i915InitVtbl( i915ContextPtr i915 );
+
+
+
+#define SZ_TO_HW(sz)  ((sz-2)&0x3)
+#define EMIT_SZ(sz)   (EMIT_1F + (sz) - 1)
+#define EMIT_ATTR( ATTR, STYLE, S4, SZ )				\
+do {									\
+   intel->vertex_attrs[intel->vertex_attr_count].attrib = (ATTR);	\
+   intel->vertex_attrs[intel->vertex_attr_count].format = (STYLE);	\
+   s4 |= S4;								\
+   intel->vertex_attr_count++;						\
+   offset += (SZ);							\
+} while (0)
+
+#define EMIT_PAD( N )							\
+do {									\
+   intel->vertex_attrs[intel->vertex_attr_count].attrib = 0;		\
+   intel->vertex_attrs[intel->vertex_attr_count].format = EMIT_PAD;	\
+   intel->vertex_attrs[intel->vertex_attr_count].offset = (N);		\
+   intel->vertex_attr_count++;						\
+   offset += (N);							\
+} while (0)
+
+
+
+/*======================================================================
+ * i915_context.c
+ */
+extern GLboolean i915CreateContext( const __GLcontextModes *mesaVis,
+				    __DRIcontextPrivate *driContextPriv,
+				    void *sharedContextPrivate);
+
+
+/*======================================================================
+ * i915_texprog.c
+ */
+extern void i915ValidateTextureProgram( i915ContextPtr i915 );
+
+
+/*======================================================================
+ * i915_debug.c
+ */
+extern void i915_disassemble_program( const GLuint *program, GLuint sz );
+extern void i915_print_ureg( const char *msg, GLuint ureg );
+
+
+/*======================================================================
+ * i915_state.c
+ */
+extern void i915InitStateFunctions( struct dd_function_table *functions );
+extern void i915InitState( i915ContextPtr i915 );
+extern void i915_update_fog(GLcontext *ctxx);
+
+
+/*======================================================================
+ * i915_tex.c
+ */
+extern void i915UpdateTextureState( intelContextPtr intel );
+extern void i915InitTextureFuncs( struct dd_function_table *functions );
+extern intelTextureObjectPtr i915AllocTexObj( struct gl_texture_object *texObj );
+
+/*======================================================================
+ * i915_metaops.c
+ */
+extern GLboolean
+i915TryTextureReadPixels( GLcontext *ctx,
+			  GLint x, GLint y, GLsizei width, GLsizei height,
+			  GLenum format, GLenum type,
+			  const struct gl_pixelstore_attrib *pack,
+			  GLvoid *pixels );
+
+extern GLboolean
+i915TryTextureDrawPixels( GLcontext *ctx,
+			  GLint x, GLint y, GLsizei width, GLsizei height,
+			  GLenum format, GLenum type,
+			  const struct gl_pixelstore_attrib *unpack,
+			  const GLvoid *pixels );
+
+extern void 
+i915ClearWithTris( intelContextPtr intel, GLbitfield mask,
+		   GLboolean all, GLint cx, GLint cy, GLint cw, GLint ch);
+
+
+extern void
+i915RotateWindow(intelContextPtr intel, __DRIdrawablePrivate *dPriv,
+                 GLuint srcBuf);
+
+/*======================================================================
+ * i915_fragprog.c
+ */
+extern void i915ValidateFragmentProgram( i915ContextPtr i915 );
+extern void i915InitFragProgFuncs( struct dd_function_table *functions );
+	
+#endif
+
diff --git a/i915/i915_debug.c b/i915/i915_debug.c
new file mode 100644
index 0000000..054b561
--- /dev/null
+++ b/i915/i915_debug.c
@@ -0,0 +1,299 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "i915_reg.h"
+#include "i915_context.h"
+#include <stdio.h>
+
+
+static const char *opcodes[0x20] = {
+   "NOP",
+   "ADD",
+   "MOV",
+   "MUL",
+   "MAD",
+   "DP2ADD",
+   "DP3",
+   "DP4",
+   "FRC",
+   "RCP",
+   "RSQ",
+   "EXP",
+   "LOG",
+   "CMP",
+   "MIN",
+   "MAX",
+   "FLR",
+   "MOD",
+   "TRC",
+   "SGE",
+   "SLT",
+   "TEXLD",
+   "TEXLDP",
+   "TEXLDB",
+   "TEXKILL",
+   "DCL",
+   "0x1a",
+   "0x1b",
+   "0x1c",
+   "0x1d",
+   "0x1e",
+   "0x1f",
+};
+
+
+static const int args[0x20] = {
+   0,				/* 0 nop */
+   2,				/* 1 add */
+   1,				/* 2 mov */
+   2,				/* 3 m ul */
+   3, 				/* 4 mad */
+   3,				/* 5 dp2add */
+   2,				/* 6 dp3 */
+   2,				/* 7 dp4 */
+   1,				/* 8 frc */
+   1,				/* 9 rcp */
+   1,				/* a rsq */
+   1,				/* b exp */
+   1,				/* c log */
+   3,				/* d cmp */
+   2,				/* e min */
+   2,				/* f max */
+   1,				/* 10 flr */
+   1,				/* 11 mod */
+   1,				/* 12 trc */
+   2,				/* 13 sge */
+   2,				/* 14 slt */
+   1,
+   1,
+   1,
+   1,
+   0,
+   0,
+   0,
+   0,
+   0,
+   0,
+   0,
+};
+
+
+static const char *regname[0x8] = {
+   "R",
+   "T",
+   "CONST",
+   "S",
+   "OC",
+   "OD",
+   "U",
+   "UNKNOWN",
+};
+
+static void print_reg_type_nr( GLuint type, GLuint nr )
+{
+   switch (type) {
+   case REG_TYPE_T:
+      switch (nr) {
+      case T_DIFFUSE: fprintf(stderr, "T_DIFFUSE"); return;
+      case T_SPECULAR: fprintf(stderr, "T_SPECULAR"); return;
+      case T_FOG_W: fprintf(stderr, "T_FOG_W"); return;
+      default: fprintf(stderr, "T_TEX%d", nr); return;
+      }
+   case REG_TYPE_OC:
+      if (nr == 0) {
+	 fprintf(stderr, "oC");
+	 return;
+      }
+      break;
+   case REG_TYPE_OD:
+      if (nr == 0) {
+	 fprintf(stderr, "oD");
+	 return;
+      }
+      break;
+   default:
+      break;
+   }
+
+   fprintf(stderr, "%s[%d]", regname[type], nr);
+}
+
+#define REG_SWIZZLE_MASK 0x7777
+#define REG_NEGATE_MASK 0x8888
+
+#define REG_SWIZZLE_XYZW ((SRC_X << A2_SRC2_CHANNEL_X_SHIFT) |	\
+		      (SRC_Y << A2_SRC2_CHANNEL_Y_SHIFT) |	\
+		      (SRC_Z << A2_SRC2_CHANNEL_Z_SHIFT) |	\
+		      (SRC_W << A2_SRC2_CHANNEL_W_SHIFT))
+
+
+static void print_reg_neg_swizzle( GLuint reg )
+{
+   int i;
+
+   if ((reg & REG_SWIZZLE_MASK) == REG_SWIZZLE_XYZW &&
+       (reg & REG_NEGATE_MASK) == 0)
+      return;
+
+   fprintf(stderr, ".");
+
+   for (i = 3 ; i >= 0; i--) {
+      if (reg & (1<<((i*4)+3))) 
+	 fprintf(stderr, "-");
+	 
+      switch ((reg>>(i*4)) & 0x7) {
+      case 0: fprintf(stderr, "x"); break;
+      case 1: fprintf(stderr, "y"); break;
+      case 2: fprintf(stderr, "z"); break;
+      case 3: fprintf(stderr, "w"); break;
+      case 4: fprintf(stderr, "0"); break;
+      case 5: fprintf(stderr, "1"); break;
+      default: fprintf(stderr, "?"); break;
+      }
+   }
+}
+
+
+static void print_src_reg( GLuint dword )
+{
+   GLuint nr = (dword >> A2_SRC2_NR_SHIFT) & REG_NR_MASK;
+   GLuint type = (dword >> A2_SRC2_TYPE_SHIFT) & REG_TYPE_MASK;
+   print_reg_type_nr( type, nr );
+   print_reg_neg_swizzle( dword );
+}
+
+void i915_print_ureg( const char *msg, GLuint ureg )
+{
+   fprintf(stderr, "%s: ", msg);
+   print_src_reg( ureg >> 8 );
+   fprintf(stderr, "\n");
+}
+
+static void print_dest_reg( GLuint dword )
+{
+   GLuint nr = (dword >> A0_DEST_NR_SHIFT) & REG_NR_MASK;
+   GLuint type = (dword >> A0_DEST_TYPE_SHIFT) & REG_TYPE_MASK;
+   print_reg_type_nr( type, nr );
+   if ((dword & A0_DEST_CHANNEL_ALL) == A0_DEST_CHANNEL_ALL)
+      return;
+   fprintf(stderr, ".");
+   if (dword & A0_DEST_CHANNEL_X) fprintf(stderr, "x");
+   if (dword & A0_DEST_CHANNEL_Y) fprintf(stderr, "y");
+   if (dword & A0_DEST_CHANNEL_Z) fprintf(stderr, "z");
+   if (dword & A0_DEST_CHANNEL_W) fprintf(stderr, "w");
+}
+
+
+#define GET_SRC0_REG(r0, r1) ((r0<<14)|(r1>>A1_SRC0_CHANNEL_W_SHIFT))
+#define GET_SRC1_REG(r0, r1) ((r0<<8)|(r1>>A2_SRC1_CHANNEL_W_SHIFT))
+#define GET_SRC2_REG(r)      (r)
+
+
+static void print_arith_op( GLuint opcode, const GLuint *program )
+{
+   if (opcode != A0_NOP) {
+      print_dest_reg(program[0]);
+      if (program[0] & A0_DEST_SATURATE)
+	 fprintf(stderr, " = SATURATE ");
+      else
+	 fprintf(stderr, " = ");
+   }
+
+   fprintf(stderr, "%s ", opcodes[opcode]);
+
+   print_src_reg(GET_SRC0_REG(program[0], program[1]));
+   if (args[opcode] == 1) {
+      fprintf(stderr, "\n");
+      return;
+   }
+
+   fprintf(stderr, ", ");
+   print_src_reg(GET_SRC1_REG(program[1], program[2]));
+   if (args[opcode] == 2) { 
+      fprintf(stderr, "\n");
+      return;
+   }
+
+   fprintf(stderr, ", ");
+   print_src_reg(GET_SRC2_REG(program[2]));
+   fprintf(stderr, "\n");
+   return;
+}
+
+
+static void print_tex_op( GLuint opcode, const GLuint *program )
+{
+   print_dest_reg(program[0] | A0_DEST_CHANNEL_ALL);
+   fprintf(stderr, " = ");
+
+   fprintf(stderr, "%s ", opcodes[opcode]);
+
+   fprintf(stderr, "S[%d],", 
+	   program[0] & T0_SAMPLER_NR_MASK);
+
+   print_reg_type_nr( (program[1]>>T1_ADDRESS_REG_TYPE_SHIFT) & REG_TYPE_MASK,
+		      (program[1]>>T1_ADDRESS_REG_NR_SHIFT) & REG_NR_MASK );
+   fprintf(stderr, "\n");
+}
+
+static void print_dcl_op( GLuint opcode, const GLuint *program )
+{
+   fprintf(stderr, "%s ", opcodes[opcode]);
+   print_dest_reg(program[0] | A0_DEST_CHANNEL_ALL);
+   fprintf(stderr, "\n");
+}
+
+
+void i915_disassemble_program( const GLuint *program, GLuint sz )
+{
+   GLuint size = program[0] & 0x1ff;
+   GLint i;
+   
+   fprintf(stderr, "BEGIN\n");
+
+   if (size+2 != sz) {
+      fprintf(stderr, "%s: program size mismatch %d/%d\n", __FUNCTION__,
+	      size+2, sz);
+      exit(1);
+   }
+
+   program ++;
+   for (i = 1 ; i < sz ; i+=3, program+=3) {
+      GLuint opcode = program[0] & (0x1f<<24);
+
+      if ((GLint) opcode >= A0_NOP && opcode <= A0_SLT)
+	 print_arith_op(opcode >> 24, program);
+      else if (opcode >= T0_TEXLD && opcode <= T0_TEXKILL)
+	 print_tex_op(opcode >> 24, program);
+      else if (opcode == D0_DCL)
+	 print_dcl_op(opcode >> 24, program);
+      else 
+	 fprintf(stderr, "Unknown opcode 0x%x\n", opcode);
+   }
+
+   fprintf(stderr, "END\n\n");
+}
diff --git a/i915/i915_fragprog.c b/i915/i915_fragprog.c
new file mode 100644
index 0000000..a28c8bb
--- /dev/null
+++ b/i915/i915_fragprog.c
@@ -0,0 +1,1087 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "glheader.h"
+#include "macros.h"
+#include "enums.h"
+
+#include "tnl/tnl.h"
+#include "tnl/t_context.h"
+#include "intel_batchbuffer.h"
+
+#include "i915_reg.h"
+#include "i915_context.h"
+#include "i915_program.h"
+
+#include "prog_instruction.h"
+#include "prog_parameter.h"
+#include "program.h"
+#include "programopt.h"
+
+
+
+/* 1, -1/3!, 1/5!, -1/7! */
+static const GLfloat sin_constants[4] = {  1.0, 
+					   -1.0/(3*2*1),
+					   1.0/(5*4*3*2*1),
+					   -1.0/(7*6*5*4*3*2*1) };
+
+/* 1, -1/2!, 1/4!, -1/6! */
+static const GLfloat cos_constants[4] = {  1.0, 
+					   -1.0/(2*1),
+					   1.0/(4*3*2*1),
+					   -1.0/(6*5*4*3*2*1) };
+
+/**
+ * Retrieve a ureg for the given source register.  Will emit
+ * constants, apply swizzling and negation as needed.
+ */
+static GLuint src_vector( struct i915_fragment_program *p,
+			  const struct prog_src_register *source,
+			  const struct gl_fragment_program *program )
+{
+   GLuint src;
+
+   switch (source->File) {
+
+      /* Registers:
+       */
+      case PROGRAM_TEMPORARY:
+	 if (source->Index >= I915_MAX_TEMPORARY) {
+	    i915_program_error( p, "Exceeded max temporary reg" );
+	    return 0;
+	 }
+	 src = UREG( REG_TYPE_R, source->Index );
+         break;
+      case PROGRAM_INPUT:
+	 switch (source->Index) {
+	 case FRAG_ATTRIB_WPOS:
+	    src = i915_emit_decl( p,  REG_TYPE_T, p->wpos_tex, D0_CHANNEL_ALL ); 
+	    break;
+	 case FRAG_ATTRIB_COL0:
+	    src = i915_emit_decl( p,  REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL ); 
+	    break;
+	 case FRAG_ATTRIB_COL1:
+	    src = i915_emit_decl( p,  REG_TYPE_T, T_SPECULAR, D0_CHANNEL_XYZ ); 
+	    src = swizzle( src, X, Y, Z, ONE );
+	    break;
+	 case FRAG_ATTRIB_FOGC:
+	    src = i915_emit_decl( p,  REG_TYPE_T, T_FOG_W, D0_CHANNEL_W ); 
+	    src = swizzle( src, W, W, W, W );
+	    break;
+	 case FRAG_ATTRIB_TEX0:
+	 case FRAG_ATTRIB_TEX1:
+	 case FRAG_ATTRIB_TEX2:
+	 case FRAG_ATTRIB_TEX3:
+	 case FRAG_ATTRIB_TEX4:
+	 case FRAG_ATTRIB_TEX5:
+	 case FRAG_ATTRIB_TEX6:
+	 case FRAG_ATTRIB_TEX7:
+	    src = i915_emit_decl( p,  REG_TYPE_T, 
+				 T_TEX0 + (source->Index - FRAG_ATTRIB_TEX0),
+				 D0_CHANNEL_ALL ); 
+	    break;
+
+	 default:
+	    i915_program_error( p, "Bad source->Index" ); 
+	    return 0;
+	 }
+         break;
+
+	 /* Various paramters and env values.  All emitted to
+	  * hardware as program constants.
+	  */
+      case PROGRAM_LOCAL_PARAM:
+         src = i915_emit_param4fv( 
+	    p, program->Base.LocalParams[source->Index]);
+	 break;
+
+      case PROGRAM_ENV_PARAM:
+         src = i915_emit_param4fv( 
+	    p, p->ctx->FragmentProgram.Parameters[source->Index]);
+	 break;
+
+      case PROGRAM_CONSTANT:
+      case PROGRAM_STATE_VAR:
+      case PROGRAM_NAMED_PARAM:
+         src = i915_emit_param4fv( 
+	    p, program->Base.Parameters->ParameterValues[source->Index] );
+	 break;
+
+      default:
+	 i915_program_error( p, "Bad source->File" ); 
+	 return 0;
+   }
+
+   src = swizzle(src, 
+		 GET_SWZ(source->Swizzle, 0),
+		 GET_SWZ(source->Swizzle, 1),
+		 GET_SWZ(source->Swizzle, 2),
+		 GET_SWZ(source->Swizzle, 3));
+
+   if (source->NegateBase)
+      src = negate( src, 
+		    GET_BIT(source->NegateBase, 0),
+		    GET_BIT(source->NegateBase, 1),
+		    GET_BIT(source->NegateBase, 2),
+		    GET_BIT(source->NegateBase, 3));
+
+   return src;
+}
+
+
+static GLuint get_result_vector( struct i915_fragment_program *p,
+				 const struct prog_instruction *inst )
+{
+   switch (inst->DstReg.File) {
+   case PROGRAM_OUTPUT:
+      switch (inst->DstReg.Index) {
+      case FRAG_RESULT_COLR: 
+	 return UREG(REG_TYPE_OC, 0);
+      case FRAG_RESULT_DEPR: 
+	 p->depth_written = 1;
+	 return UREG(REG_TYPE_OD, 0);
+      default: 
+	 i915_program_error( p, "Bad inst->DstReg.Index" ); 
+	 return 0;
+      }
+   case PROGRAM_TEMPORARY:
+      return UREG(REG_TYPE_R, inst->DstReg.Index);
+   default:
+      i915_program_error( p, "Bad inst->DstReg.File" ); 
+      return 0;
+   }
+}
+   
+static GLuint get_result_flags( const struct prog_instruction *inst )
+{
+   GLuint flags = 0;
+
+   if (inst->SaturateMode == SATURATE_ZERO_ONE) flags |= A0_DEST_SATURATE;
+   if (inst->DstReg.WriteMask & WRITEMASK_X) flags |= A0_DEST_CHANNEL_X;
+   if (inst->DstReg.WriteMask & WRITEMASK_Y) flags |= A0_DEST_CHANNEL_Y;
+   if (inst->DstReg.WriteMask & WRITEMASK_Z) flags |= A0_DEST_CHANNEL_Z;
+   if (inst->DstReg.WriteMask & WRITEMASK_W) flags |= A0_DEST_CHANNEL_W;
+
+   return flags;
+}
+
+static GLuint translate_tex_src_target( struct i915_fragment_program *p,
+				     GLubyte bit )
+{
+   switch (bit) {
+   case TEXTURE_1D_INDEX:   return D0_SAMPLE_TYPE_2D;
+   case TEXTURE_2D_INDEX:   return D0_SAMPLE_TYPE_2D;
+   case TEXTURE_RECT_INDEX: return D0_SAMPLE_TYPE_2D;
+   case TEXTURE_3D_INDEX:   return D0_SAMPLE_TYPE_VOLUME;
+   case TEXTURE_CUBE_INDEX: return D0_SAMPLE_TYPE_CUBE;
+   default: i915_program_error(p, "TexSrcBit"); return 0;
+   }
+}
+
+#define EMIT_TEX( OP )						\
+do {								\
+   GLuint dim = translate_tex_src_target( p, inst->TexSrcTarget );	\
+   GLuint sampler = i915_emit_decl(p, REG_TYPE_S,		\
+				  inst->TexSrcUnit, dim);	\
+   GLuint coord = src_vector( p, &inst->SrcReg[0], program);	\
+   /* Texel lookup */						\
+								\
+   i915_emit_texld( p,						\
+	       get_result_vector( p, inst ),			\
+	       get_result_flags( inst ),			\
+	       sampler,						\
+	       coord,						\
+	       OP);						\
+} while (0)
+
+#define EMIT_ARITH( OP, N )						\
+do {									\
+   i915_emit_arith( p,							\
+	       OP,							\
+	       get_result_vector( p, inst ), 				\
+	       get_result_flags( inst ), 0,			\
+	       (N<1)?0:src_vector( p, &inst->SrcReg[0], program),	\
+	       (N<2)?0:src_vector( p, &inst->SrcReg[1], program),	\
+	       (N<3)?0:src_vector( p, &inst->SrcReg[2], program));	\
+} while (0)
+
+#define EMIT_1ARG_ARITH( OP ) EMIT_ARITH( OP, 1 )
+#define EMIT_2ARG_ARITH( OP ) EMIT_ARITH( OP, 2 )
+#define EMIT_3ARG_ARITH( OP ) EMIT_ARITH( OP, 3 )
+
+
+/* Possible concerns:
+ *
+ * SIN, COS -- could use another taylor step?
+ * LIT      -- results seem a little different to sw mesa
+ * LOG      -- different to mesa on negative numbers, but this is conformant.
+ * 
+ * Parse failures -- Mesa doesn't currently give a good indication
+ * internally whether a particular program string parsed or not.  This
+ * can lead to confusion -- hopefully we cope with it ok now.
+ *
+ */
+static void upload_program( struct i915_fragment_program *p )
+{
+   const struct gl_fragment_program *program = p->ctx->FragmentProgram._Current;
+   const struct prog_instruction *inst = program->Base.Instructions;
+
+/*    _mesa_debug_fp_inst(program->Base.NumInstructions, inst); */
+
+   /* Is this a parse-failed program?  Ensure a valid program is
+    * loaded, as the flagging of an error isn't sufficient to stop
+    * this being uploaded to hardware.
+    */
+   if (inst[0].Opcode == OPCODE_END) {
+      GLuint tmp = i915_get_utemp( p );
+      i915_emit_arith( p,
+		      A0_MOV,
+		      UREG(REG_TYPE_OC, 0), 
+		      A0_DEST_CHANNEL_ALL, 0,
+		      swizzle(tmp,ONE,ZERO,ONE,ONE), 0, 0);
+      return;
+   }
+
+   while (1) {
+      GLuint src0, src1, src2, flags;
+      GLuint tmp = 0;
+
+      switch (inst->Opcode) {
+      case OPCODE_ABS: 
+	 src0 = src_vector( p, &inst->SrcReg[0], program);
+	 i915_emit_arith( p, 
+			 A0_MAX,
+			 get_result_vector( p, inst ), 
+			 get_result_flags( inst ), 0,
+			 src0, negate(src0, 1,1,1,1), 0);
+	 break;
+
+      case OPCODE_ADD: 
+	 EMIT_2ARG_ARITH( A0_ADD );
+	 break;
+
+      case OPCODE_CMP: 
+	 src0 = src_vector( p, &inst->SrcReg[0], program);
+	 src1 = src_vector( p, &inst->SrcReg[1], program);
+	 src2 = src_vector( p, &inst->SrcReg[2], program);
+	 i915_emit_arith( p, 
+			 A0_CMP,
+			 get_result_vector( p, inst ), 
+			 get_result_flags( inst ), 0,
+			 src0, src2, src1);	/* NOTE: order of src2, src1 */
+	 break;
+
+      case OPCODE_COS:
+	 src0 = src_vector( p, &inst->SrcReg[0], program);
+	 tmp = i915_get_utemp( p );
+
+	 i915_emit_arith( p, 
+			 A0_MUL,
+			 tmp, A0_DEST_CHANNEL_X, 0,
+			 src0, 
+			 i915_emit_const1f(p, 1.0/(M_PI * 2)),
+			 0);
+
+	 i915_emit_arith( p, 
+			 A0_MOD,
+			 tmp, A0_DEST_CHANNEL_X, 0,
+			 tmp, 
+			 0, 0 );
+
+	 /* By choosing different taylor constants, could get rid of this mul:
+	  */
+	 i915_emit_arith( p, 
+			 A0_MUL,
+			 tmp, A0_DEST_CHANNEL_X, 0,
+			 tmp, 
+			 i915_emit_const1f(p, (M_PI * 2)),
+			 0);
+
+	 /* 
+	  * t0.xy = MUL x.xx11, x.x1111  ; x^2, x, 1, 1
+	  * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, 1
+	  * t0 = MUL t0.xxz1 t0.z111    ; x^6 x^4 x^2 1
+	  * result = DP4 t0, cos_constants
+	  */
+	 i915_emit_arith( p, 
+			 A0_MUL,
+			 tmp, A0_DEST_CHANNEL_XY, 0,
+			 swizzle(tmp, X,X,ONE,ONE), 
+			 swizzle(tmp, X,ONE,ONE,ONE), 0);
+
+	 i915_emit_arith( p, 
+			 A0_MUL,
+			 tmp, A0_DEST_CHANNEL_XYZ, 0,
+			 swizzle(tmp, X,Y,X,ONE), 
+			 swizzle(tmp, X,X,ONE,ONE), 0);
+
+	 i915_emit_arith( p, 
+			 A0_MUL,
+			 tmp, A0_DEST_CHANNEL_XYZ, 0,
+			 swizzle(tmp, X,X,Z,ONE), 
+			 swizzle(tmp, Z,ONE,ONE,ONE), 0);
+	    
+	 i915_emit_arith( p, 
+			 A0_DP4,
+			 get_result_vector( p, inst ), 
+			 get_result_flags( inst ), 0,
+			 swizzle(tmp, ONE,Z,Y,X),
+			 i915_emit_const4fv( p, cos_constants ), 0);
+
+	 break;
+
+      case OPCODE_DP3: 
+	 EMIT_2ARG_ARITH( A0_DP3 );
+	 break;
+
+      case OPCODE_DP4: 
+	 EMIT_2ARG_ARITH( A0_DP4 );
+	 break;
+
+      case OPCODE_DPH:  
+	 src0 = src_vector( p, &inst->SrcReg[0], program);
+	 src1 = src_vector( p, &inst->SrcReg[1], program);
+
+	 i915_emit_arith( p, 
+			 A0_DP4,
+			 get_result_vector( p, inst ), 
+			 get_result_flags( inst ), 0,
+			 swizzle(src0, X,Y,Z,ONE), src1, 0);
+	 break;
+
+      case OPCODE_DST: 
+	 src0 = src_vector( p, &inst->SrcReg[0], program);
+	 src1 = src_vector( p, &inst->SrcReg[1], program);
+
+	 /* result[0] = 1    * 1;
+	  * result[1] = a[1] * b[1];
+	  * result[2] = a[2] * 1;
+	  * result[3] = 1    * b[3];
+	  */
+	 i915_emit_arith( p, 
+			 A0_MUL,
+			 get_result_vector( p, inst ), 
+			 get_result_flags( inst ), 0,
+			 swizzle(src0, ONE, Y, Z,   ONE), 
+			 swizzle(src1, ONE, Y, ONE, W  ),
+			 0);
+	 break;
+
+      case OPCODE_EX2: 
+	 src0 = src_vector( p, &inst->SrcReg[0], program);
+
+	 i915_emit_arith( p, 
+			 A0_EXP,
+			 get_result_vector( p, inst ), 
+			 get_result_flags( inst ), 0,
+			 swizzle(src0,X,X,X,X), 0, 0);
+	 break;
+
+      case OPCODE_FLR: 
+	 EMIT_1ARG_ARITH( A0_FLR );
+	 break;
+
+      case OPCODE_FRC: 
+	 EMIT_1ARG_ARITH( A0_FRC );
+	 break;
+
+      case OPCODE_KIL:
+	 src0 = src_vector( p, &inst->SrcReg[0], program);
+	 tmp = i915_get_utemp( p );
+
+	 i915_emit_texld( p,
+			 tmp, A0_DEST_CHANNEL_ALL, /* use a dummy dest reg */
+			 0,
+			 src0,
+			 T0_TEXKILL );
+	 break;
+
+      case OPCODE_LG2: 
+	 src0 = src_vector( p, &inst->SrcReg[0], program);
+
+	 i915_emit_arith( p, 
+			 A0_LOG,
+			 get_result_vector( p, inst ), 
+			 get_result_flags( inst ), 0,
+			 swizzle(src0,X,X,X,X), 0, 0);
+	 break;
+
+      case OPCODE_LIT: 
+	 src0 = src_vector( p, &inst->SrcReg[0], program);
+	 tmp = i915_get_utemp( p );
+
+	 /* tmp = max( a.xyzw, a.00zw )
+	  * XXX: Clamp tmp.w to -128..128
+	  * tmp.y = log(tmp.y)
+	  * tmp.y = tmp.w * tmp.y
+	  * tmp.y = exp(tmp.y)
+	  * result = cmp (a.11-x1, a.1x01, a.1xy1 )
+	  */
+	 i915_emit_arith( p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0, 
+			 src0, swizzle(src0, ZERO, ZERO, Z, W), 0 );
+
+	 i915_emit_arith( p, A0_LOG, tmp, A0_DEST_CHANNEL_Y, 0, 
+			 swizzle(tmp, Y, Y, Y, Y), 0, 0 );
+
+	 i915_emit_arith( p, A0_MUL, tmp, A0_DEST_CHANNEL_Y, 0, 
+			 swizzle(tmp, ZERO, Y, ZERO, ZERO), 
+			 swizzle(tmp, ZERO, W, ZERO, ZERO), 0 );
+
+	 i915_emit_arith( p, A0_EXP, tmp, A0_DEST_CHANNEL_Y, 0, 
+			 swizzle(tmp, Y, Y, Y, Y), 0, 0 );
+
+	 i915_emit_arith( p, A0_CMP,
+			 get_result_vector( p, inst ), 
+			 get_result_flags( inst ), 0,
+			 negate(swizzle(tmp, ONE, ONE, X, ONE),0,0,1,0),
+			 swizzle(tmp, ONE, X, ZERO, ONE),
+			 swizzle(tmp, ONE, X, Y, ONE));
+		     
+	 break;
+
+      case OPCODE_LRP: 
+	 src0 = src_vector( p, &inst->SrcReg[0], program);
+	 src1 = src_vector( p, &inst->SrcReg[1], program);
+	 src2 = src_vector( p, &inst->SrcReg[2], program);
+	 flags = get_result_flags( inst );
+	 tmp = i915_get_utemp( p );
+
+	 /* b*a + c*(1-a)
+	  *
+	  * b*a + c - ca 
+	  *
+	  * tmp = b*a + c, 
+	  * result = (-c)*a + tmp 
+	  */
+	 i915_emit_arith( p, A0_MAD, tmp, 
+			 flags & A0_DEST_CHANNEL_ALL, 0,
+			 src1, src0, src2 );
+
+	 i915_emit_arith( p, A0_MAD, 
+			 get_result_vector( p, inst ), 
+			 flags, 0, 
+			 negate(src2, 1,1,1,1), src0, tmp );
+	 break;
+
+      case OPCODE_MAD:
+	 EMIT_3ARG_ARITH( A0_MAD );
+	 break;
+
+      case OPCODE_MAX:
+	 EMIT_2ARG_ARITH( A0_MAX );
+	 break;
+
+      case OPCODE_MIN: 
+	 src0 = src_vector( p, &inst->SrcReg[0], program);
+	 src1 = src_vector( p, &inst->SrcReg[1], program);
+	 tmp = i915_get_utemp( p );
+	 flags = get_result_flags( inst );
+
+	 i915_emit_arith( p, 
+			 A0_MAX,
+			 tmp, flags & A0_DEST_CHANNEL_ALL, 0,
+			 negate(src0,1,1,1,1), 
+			 negate(src1,1,1,1,1), 0);
+
+	 i915_emit_arith( p,
+			 A0_MOV,
+			 get_result_vector( p, inst ), 
+			 flags, 0,
+			 negate(tmp, 1,1,1,1), 0, 0);
+	 break;
+
+      case OPCODE_MOV: 
+	 EMIT_1ARG_ARITH( A0_MOV );
+	 break;
+
+      case OPCODE_MUL: 
+	 EMIT_2ARG_ARITH( A0_MUL );
+	 break;
+
+      case OPCODE_POW: 
+	 src0 = src_vector( p, &inst->SrcReg[0], program);
+	 src1 = src_vector( p, &inst->SrcReg[1], program);
+	 tmp = i915_get_utemp( p );
+	 flags = get_result_flags( inst );
+
+	 /* XXX: masking on intermediate values, here and elsewhere.
+	  */
+	 i915_emit_arith( p, 
+			 A0_LOG,
+			 tmp, A0_DEST_CHANNEL_X, 0,
+			 swizzle(src0,X,X,X,X), 0, 0);
+
+	 i915_emit_arith( p,
+			 A0_MUL,
+			 tmp, A0_DEST_CHANNEL_X, 0,
+			 tmp, src1, 0);
+
+
+	 i915_emit_arith( p,
+			 A0_EXP,
+			 get_result_vector( p, inst ), 
+			 flags, 0,
+			 swizzle(tmp,X,X,X,X), 0, 0);
+
+	 break;
+
+      case OPCODE_RCP: 
+	 src0 = src_vector( p, &inst->SrcReg[0], program);
+
+	 i915_emit_arith( p, 
+			 A0_RCP,
+			 get_result_vector( p, inst ), 
+			 get_result_flags( inst ), 0,
+			 swizzle(src0,X,X,X,X), 0, 0);
+	 break;
+
+      case OPCODE_RSQ: 
+
+	 src0 = src_vector( p, &inst->SrcReg[0], program);
+
+	 i915_emit_arith( p, 
+			 A0_RSQ,
+			 get_result_vector( p, inst ), 
+			 get_result_flags( inst ), 0,
+			 swizzle(src0,X,X,X,X), 0, 0);
+	 break;
+	 
+      case OPCODE_SCS:
+	 src0 = src_vector( p, &inst->SrcReg[0], program);
+	 tmp = i915_get_utemp( p );
+
+	 /* 
+	  * t0.xy = MUL x.xx11, x.x1111  ; x^2, x, 1, 1
+	  * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x
+	  * t1 = MUL t0.xyyw t0.yz11    ; x^7 x^5 x^3 x
+	  * scs.x = DP4 t1, sin_constants
+	  * t1 = MUL t0.xxz1 t0.z111    ; x^6 x^4 x^2 1
+	  * scs.y = DP4 t1, cos_constants
+	  */
+	 i915_emit_arith( p, 
+			 A0_MUL,
+			 tmp, A0_DEST_CHANNEL_XY, 0,
+			 swizzle(src0, X,X,ONE,ONE), 
+			 swizzle(src0, X,ONE,ONE,ONE), 0);
+
+	 i915_emit_arith( p, 
+			 A0_MUL,
+			 tmp, A0_DEST_CHANNEL_ALL, 0,
+			 swizzle(tmp, X,Y,X,Y), 
+			 swizzle(tmp, X,X,ONE,ONE), 0);
+
+	 if (inst->DstReg.WriteMask & WRITEMASK_Y) {
+	    GLuint tmp1;
+	    
+	    if (inst->DstReg.WriteMask & WRITEMASK_X)
+	       tmp1 = i915_get_utemp( p );
+	    else
+	       tmp1 = tmp;
+
+	    i915_emit_arith( p, 
+			    A0_MUL,
+			    tmp1, A0_DEST_CHANNEL_ALL, 0,
+			    swizzle(tmp, X,Y,Y,W), 
+			    swizzle(tmp, X,Z,ONE,ONE), 0);
+	    
+	    i915_emit_arith( p, 
+			    A0_DP4,
+			    get_result_vector( p, inst ), 
+			    A0_DEST_CHANNEL_Y, 0,
+			    swizzle(tmp1, W,Z,Y,X),
+			    i915_emit_const4fv( p, sin_constants ), 0);
+	 }
+
+	 if (inst->DstReg.WriteMask & WRITEMASK_X) {
+	    i915_emit_arith( p, 
+			    A0_MUL,
+			    tmp, A0_DEST_CHANNEL_XYZ, 0,
+			    swizzle(tmp, X,X,Z,ONE), 
+			    swizzle(tmp, Z,ONE,ONE,ONE), 0);
+	    
+	    i915_emit_arith( p, 
+			    A0_DP4,
+			    get_result_vector( p, inst ), 
+			    A0_DEST_CHANNEL_X, 0,
+			    swizzle(tmp, ONE,Z,Y,X), 
+			    i915_emit_const4fv( p, cos_constants ), 0);
+	 }
+	 break;
+
+      case OPCODE_SGE: 
+	 EMIT_2ARG_ARITH( A0_SGE );
+	 break;
+
+      case OPCODE_SIN:
+	 src0 = src_vector( p, &inst->SrcReg[0], program);
+	 tmp = i915_get_utemp( p );
+
+	 i915_emit_arith( p, 
+			 A0_MUL,
+			 tmp, A0_DEST_CHANNEL_X, 0,
+			 src0, 
+			 i915_emit_const1f(p, 1.0/(M_PI * 2)),
+			 0);
+
+	 i915_emit_arith( p, 
+			 A0_MOD,
+			 tmp, A0_DEST_CHANNEL_X, 0,
+			 tmp, 
+			 0, 0 );
+
+	 /* By choosing different taylor constants, could get rid of this mul:
+	  */
+	 i915_emit_arith( p, 
+			 A0_MUL,
+			 tmp, A0_DEST_CHANNEL_X, 0,
+			 tmp, 
+			 i915_emit_const1f(p, (M_PI * 2)),
+			 0);
+
+	 /* 
+	  * t0.xy = MUL x.xx11, x.x1111  ; x^2, x, 1, 1
+	  * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x
+	  * t1 = MUL t0.xyyw t0.yz11    ; x^7 x^5 x^3 x
+	  * result = DP4 t1.wzyx, sin_constants
+	  */
+	 i915_emit_arith( p, 
+			 A0_MUL,
+			 tmp, A0_DEST_CHANNEL_XY, 0,
+			 swizzle(tmp, X,X,ONE,ONE), 
+			 swizzle(tmp, X,ONE,ONE,ONE), 0);
+
+	 i915_emit_arith( p, 
+			 A0_MUL,
+			 tmp, A0_DEST_CHANNEL_ALL, 0,
+			 swizzle(tmp, X,Y,X,Y), 
+			 swizzle(tmp, X,X,ONE,ONE), 0);
+
+	 i915_emit_arith( p, 
+			 A0_MUL,
+			 tmp, A0_DEST_CHANNEL_ALL, 0,
+			 swizzle(tmp, X,Y,Y,W), 
+			 swizzle(tmp, X,Z,ONE,ONE), 0);
+	    
+	 i915_emit_arith( p, 
+			 A0_DP4,
+			 get_result_vector( p, inst ), 
+			 get_result_flags( inst ), 0,
+			 swizzle(tmp, W, Z, Y, X ),
+			 i915_emit_const4fv( p, sin_constants ), 0);
+	 break;
+
+      case OPCODE_SLT: 
+	 EMIT_2ARG_ARITH( A0_SLT );
+	 break;
+
+      case OPCODE_SUB: 
+	 src0 = src_vector( p, &inst->SrcReg[0], program);
+	 src1 = src_vector( p, &inst->SrcReg[1], program);
+
+	 i915_emit_arith( p, 
+			 A0_ADD,
+			 get_result_vector( p, inst ), 
+			 get_result_flags( inst ), 0,
+			 src0, negate(src1, 1,1,1,1), 0);
+	 break;
+
+      case OPCODE_SWZ: 
+	 EMIT_1ARG_ARITH( A0_MOV ); /* extended swizzle handled natively */
+	 break;
+
+      case OPCODE_TEX: 
+	 EMIT_TEX( T0_TEXLD );
+	 break;
+
+      case OPCODE_TXB:
+	 EMIT_TEX( T0_TEXLDB );
+	 break;
+
+      case OPCODE_TXP:
+	 EMIT_TEX( T0_TEXLDP );
+	 break;
+
+      case OPCODE_XPD:
+	 /* Cross product:
+	  *      result.x = src0.y * src1.z - src0.z * src1.y;
+	  *      result.y = src0.z * src1.x - src0.x * src1.z;
+	  *      result.z = src0.x * src1.y - src0.y * src1.x;
+	  *      result.w = undef;
+	  */
+	 src0 = src_vector( p, &inst->SrcReg[0], program);
+	 src1 = src_vector( p, &inst->SrcReg[1], program);
+	 tmp = i915_get_utemp( p );
+	 
+	 i915_emit_arith( p, 
+			 A0_MUL,
+			 tmp, A0_DEST_CHANNEL_ALL, 0,
+			 swizzle(src0,Z,X,Y,ONE), 
+			 swizzle(src1,Y,Z,X,ONE), 0);
+
+	 i915_emit_arith( p, 
+			 A0_MAD,
+			 get_result_vector( p, inst ), 
+			 get_result_flags( inst ), 0,
+			 swizzle(src0,Y,Z,X,ONE), 
+			 swizzle(src1,Z,X,Y,ONE), 
+			 negate(tmp,1,1,1,0));
+	 break;
+
+      case OPCODE_END:
+	 return;
+	 
+      default:
+	 i915_program_error( p, "bad opcode" );
+	 return;
+      }
+
+      inst++;
+      i915_release_utemps( p ); 
+   }
+}
+
+/* Rather than trying to intercept and jiggle depth writes during
+ * emit, just move the value into its correct position at the end of
+ * the program:
+ */
+static void fixup_depth_write( struct i915_fragment_program *p )
+{
+   if (p->depth_written) {
+      GLuint depth = UREG(REG_TYPE_OD, 0);
+
+      i915_emit_arith( p, 
+		      A0_MOV,
+		      depth, A0_DEST_CHANNEL_W, 0,
+		      swizzle(depth,X,Y,Z,Z), 
+		      0, 0);
+   }
+}
+
+
+static void check_wpos( struct i915_fragment_program *p )
+{
+   GLuint inputs = p->FragProg.Base.InputsRead;
+   GLint i;
+
+   p->wpos_tex = -1;
+
+   for (i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) {
+      if (inputs & FRAG_BIT_TEX(i)) 
+	 continue;
+      else if (inputs & FRAG_BIT_WPOS) {
+	 p->wpos_tex = i;
+	 inputs &= ~FRAG_BIT_WPOS;
+      }   
+   }
+
+   if (inputs & FRAG_BIT_WPOS) {
+      i915_program_error(p, "No free texcoord for wpos value");
+   }
+}
+
+
+static void translate_program( struct i915_fragment_program *p )
+{
+   i915ContextPtr i915 = I915_CONTEXT(p->ctx);
+   
+   i915_init_program( i915, p );
+   check_wpos( p ); 
+   upload_program( p );
+   fixup_depth_write( p );
+   i915_fini_program( p ); 
+   
+   p->translated = 1;
+}
+
+
+static void track_params( struct i915_fragment_program *p )
+{
+   GLint i;
+
+   if (p->nr_params)
+      _mesa_load_state_parameters(p->ctx, p->FragProg.Base.Parameters); 
+
+   for (i = 0; i < p->nr_params; i++) {
+      GLint reg = p->param[i].reg;
+      COPY_4V( p->constant[reg], p->param[i].values );
+   }
+   
+   p->params_uptodate = 1;
+   p->on_hardware = 0;		/* overkill */
+}
+
+
+static void i915BindProgram( GLcontext *ctx,
+			    GLenum target, 
+			    struct gl_program *prog )
+{
+   if (target == GL_FRAGMENT_PROGRAM_ARB) {
+      i915ContextPtr i915 = I915_CONTEXT(ctx);
+      struct i915_fragment_program *p = (struct i915_fragment_program *)prog;
+
+      if (i915->current_program == p) 
+	 return;
+      
+      if (i915->current_program) {
+	 i915->current_program->on_hardware = 0;
+	 i915->current_program->params_uptodate = 0;
+      }
+      
+      i915->current_program = p;
+
+      assert(p->on_hardware == 0);
+      assert(p->params_uptodate == 0);
+
+      /* Hack: make sure fog is correctly enabled according to this
+       * fragment program's fog options.
+       */
+      ctx->Driver.Enable( ctx, GL_FRAGMENT_PROGRAM_ARB, 
+			  ctx->FragmentProgram.Enabled );
+   }
+}
+
+static struct gl_program *i915NewProgram( GLcontext *ctx,
+				      GLenum target, 
+				      GLuint id )
+{
+   switch (target) {
+   case GL_VERTEX_PROGRAM_ARB:
+      return _mesa_init_vertex_program( ctx, CALLOC_STRUCT(gl_vertex_program),
+					target, id );
+
+   case GL_FRAGMENT_PROGRAM_ARB: {
+      struct i915_fragment_program *prog = CALLOC_STRUCT(i915_fragment_program);
+      if (prog) {
+	 i915_init_program( I915_CONTEXT(ctx), prog );
+
+	 return _mesa_init_fragment_program( ctx, &prog->FragProg,
+					     target, id );
+      }
+      else
+	 return NULL;
+   }
+
+   default:
+      /* Just fallback:
+       */
+      return _mesa_new_program( ctx, target, id );
+   }
+}
+
+static void i915DeleteProgram( GLcontext *ctx,
+			      struct gl_program *prog )
+{
+   if (prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
+      i915ContextPtr i915 = I915_CONTEXT(ctx);
+      struct i915_fragment_program *p = (struct i915_fragment_program *)prog;
+      
+      if (i915->current_program == p) 
+	 i915->current_program = 0;
+   }
+
+   _mesa_delete_program( ctx, prog );
+}
+
+
+static GLboolean i915IsProgramNative( GLcontext *ctx,
+				     GLenum target, 
+				     struct gl_program *prog )
+{
+   if (target == GL_FRAGMENT_PROGRAM_ARB) {
+      struct i915_fragment_program *p = (struct i915_fragment_program *)prog;
+
+      if (!p->translated)
+	 translate_program( p );
+      
+      return !p->error;
+   }
+   else
+      return GL_TRUE;
+}
+
+static void i915ProgramStringNotify( GLcontext *ctx,
+				    GLenum target,
+				    struct gl_program *prog )
+{
+   if (target == GL_FRAGMENT_PROGRAM_ARB) {
+      struct i915_fragment_program *p = (struct i915_fragment_program *)prog;
+      p->translated = 0;
+
+      /* Hack: make sure fog is correctly enabled according to this
+       * fragment program's fog options.
+       */
+      ctx->Driver.Enable( ctx, GL_FRAGMENT_PROGRAM_ARB, 
+			  ctx->FragmentProgram.Enabled );
+
+      if (p->FragProg.FogOption) {
+         /* add extra instructions to do fog, then turn off FogOption field */
+         _mesa_append_fog_code(ctx, &p->FragProg);
+         p->FragProg.FogOption = GL_NONE;
+      }
+   }
+
+   _tnl_program_string(ctx, target, prog);
+}
+
+
+void i915ValidateFragmentProgram( i915ContextPtr i915 )
+{
+   GLcontext *ctx = &i915->intel.ctx;
+   intelContextPtr intel = INTEL_CONTEXT(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   struct vertex_buffer *VB = &tnl->vb;
+
+   struct i915_fragment_program *p = 
+      (struct i915_fragment_program *)ctx->FragmentProgram._Current;
+
+   const GLuint inputsRead = p->FragProg.Base.InputsRead;
+   GLuint s4 = i915->state.Ctx[I915_CTXREG_LIS4] & ~S4_VFMT_MASK;
+   GLuint s2 = S2_TEXCOORD_NONE;
+   int i, offset = 0;
+
+   if (i915->current_program != p) 
+   {
+      if (i915->current_program) {
+	 i915->current_program->on_hardware = 0;
+	 i915->current_program->params_uptodate = 0;
+      }
+      
+      i915->current_program = p;
+   }
+
+
+   /* Important:
+    */
+   VB->AttribPtr[VERT_ATTRIB_POS] = VB->NdcPtr;
+
+   if (!p->translated) 
+      translate_program( p );
+
+   intel->vertex_attr_count = 0;
+   intel->wpos_offset = 0;
+   intel->wpos_size = 0;
+   intel->coloroffset = 0;
+   intel->specoffset = 0;
+
+   if (inputsRead & FRAG_BITS_TEX_ANY) {
+      EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_4F_VIEWPORT, S4_VFMT_XYZW, 16 );
+   }
+   else {
+      EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_3F_VIEWPORT, S4_VFMT_XYZ, 12 );
+   }
+
+   if (inputsRead & FRAG_BIT_COL0) {
+      intel->coloroffset = offset / 4;
+      EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_BGRA, S4_VFMT_COLOR, 4 );
+   }
+   
+   if ((inputsRead & (FRAG_BIT_COL1|FRAG_BIT_FOGC)) || 
+       i915->vertex_fog != I915_FOG_NONE) {
+
+      if (inputsRead & FRAG_BIT_COL1) {
+	 intel->specoffset = offset / 4;
+	 EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_3UB_3F_BGR, S4_VFMT_SPEC_FOG, 3 );
+      }
+      else
+	 EMIT_PAD(3);
+
+      if ((inputsRead & FRAG_BIT_FOGC) || i915->vertex_fog != I915_FOG_NONE) 
+	 EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1UB_1F, S4_VFMT_SPEC_FOG, 1 );
+      else
+	 EMIT_PAD( 1 );
+   }
+
+   /* XXX this was disabled, but enabling this code helped fix the Glean
+    * tfragprog1 fog tests.
+    */
+#if 1
+   if ((inputsRead & FRAG_BIT_FOGC) || i915->vertex_fog != I915_FOG_NONE) {
+      EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1F, S4_VFMT_FOG_PARAM, 4 );
+   }
+#endif
+
+   for (i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) {
+      if (inputsRead & FRAG_BIT_TEX(i)) {
+	 int sz = VB->TexCoordPtr[i]->size;
+	    
+	 s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK);
+	 s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(sz));
+
+	 EMIT_ATTR( _TNL_ATTRIB_TEX0+i, EMIT_SZ(sz), 0, sz * 4 );
+      }
+      else if (i == p->wpos_tex) {
+	
+	 /* If WPOS is required, duplicate the XYZ position data in an
+	  * unused texture coordinate:
+	  */
+	 s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK);
+	 s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(3));
+
+	 intel->wpos_offset = offset;
+	 intel->wpos_size = 3 * sizeof(GLuint);
+
+	 EMIT_PAD( intel->wpos_size );
+      }   
+   }
+
+   if (s2 != i915->state.Ctx[I915_CTXREG_LIS2] ||
+       s4 != i915->state.Ctx[I915_CTXREG_LIS4]) {
+    
+      I915_STATECHANGE( i915, I915_UPLOAD_CTX );
+
+      /* Must do this *after* statechange, so as not to affect
+       * buffered vertices reliant on the old state:
+       */
+      intel->vertex_size = _tnl_install_attrs( &intel->ctx, 
+					       intel->vertex_attrs, 
+					       intel->vertex_attr_count,
+					       intel->ViewportMatrix.m, 0 ); 
+
+      intel->vertex_size >>= 2;
+
+      i915->state.Ctx[I915_CTXREG_LIS2] = s2;
+      i915->state.Ctx[I915_CTXREG_LIS4] = s4;
+
+      assert(intel->vtbl.check_vertex_size( intel, intel->vertex_size ));
+   }
+
+   if (!p->params_uptodate) 
+      track_params( p );
+
+   if (!p->on_hardware) 
+      i915_upload_program( i915, p );
+}
+
+void i915InitFragProgFuncs( struct dd_function_table *functions )
+{
+   functions->BindProgram = i915BindProgram;
+   functions->NewProgram = i915NewProgram;
+   functions->DeleteProgram = i915DeleteProgram;
+   functions->IsProgramNative = i915IsProgramNative;
+   functions->ProgramStringNotify = i915ProgramStringNotify;
+}
diff --git a/i915/i915_metaops.c b/i915/i915_metaops.c
new file mode 100644
index 0000000..1be7ac4
--- /dev/null
+++ b/i915/i915_metaops.c
@@ -0,0 +1,711 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "glheader.h"
+#include "enums.h"
+#include "mtypes.h"
+#include "macros.h"
+#include "utils.h"
+
+#include "intel_screen.h"
+#include "intel_batchbuffer.h"
+#include "intel_ioctl.h"
+#include "intel_rotate.h"
+
+#include "i915_context.h"
+#include "i915_reg.h"
+
+/* A large amount of state doesn't need to be uploaded.
+ */
+#define ACTIVE (I915_UPLOAD_INVARIENT |         \
+		I915_UPLOAD_PROGRAM | 		\
+		I915_UPLOAD_STIPPLE |		\
+		I915_UPLOAD_CTX |		\
+		I915_UPLOAD_BUFFERS |		\
+		I915_UPLOAD_TEX(0))		
+
+#define SET_STATE( i915, STATE )			\
+do {						\
+   i915->current->emitted &= ~ACTIVE;		\
+   i915->current = &i915->STATE;			\
+   i915->current->emitted &= ~ACTIVE;		\
+} while (0)
+
+/* Operations where the 3D engine is decoupled temporarily from the
+ * current GL state and used for other purposes than simply rendering
+ * incoming triangles.
+ */
+static void set_initial_state( i915ContextPtr i915 )
+{
+   memcpy(&i915->meta, &i915->initial, sizeof(i915->meta) );
+   i915->meta.active = ACTIVE;
+   i915->meta.emitted = 0;
+}
+
+
+static void set_no_depth_stencil_write( i915ContextPtr i915 )
+{
+   /* ctx->Driver.Enable( ctx, GL_STENCIL_TEST, GL_FALSE )
+    */
+   i915->meta.Ctx[I915_CTXREG_LIS5] &= ~(S5_STENCIL_TEST_ENABLE | 
+				       S5_STENCIL_WRITE_ENABLE);
+
+   /* ctx->Driver.Enable( ctx, GL_DEPTH_TEST, GL_FALSE )
+    */
+   i915->meta.Ctx[I915_CTXREG_LIS6] &= ~(S6_DEPTH_TEST_ENABLE |
+				       S6_DEPTH_WRITE_ENABLE);
+
+   i915->meta.emitted &= ~I915_UPLOAD_CTX;
+}
+
+/* Set stencil unit to replace always with the reference value.
+ */
+static void set_stencil_replace( i915ContextPtr i915,
+				 GLuint s_mask,
+				 GLuint s_clear)
+{
+   GLuint op = STENCILOP_REPLACE;
+   GLuint func = COMPAREFUNC_ALWAYS;
+
+   /* ctx->Driver.Enable( ctx, GL_STENCIL_TEST, GL_TRUE )
+    */
+   i915->meta.Ctx[I915_CTXREG_LIS5] |= (S5_STENCIL_TEST_ENABLE | 
+				      S5_STENCIL_WRITE_ENABLE);
+
+
+   /* ctx->Driver.Enable( ctx, GL_DEPTH_TEST, GL_FALSE )
+    */
+   i915->meta.Ctx[I915_CTXREG_LIS6] &= ~(S6_DEPTH_TEST_ENABLE |
+				       S6_DEPTH_WRITE_ENABLE);
+
+
+   /* ctx->Driver.StencilMask( ctx, s_mask )
+    */
+   i915->meta.Ctx[I915_CTXREG_STATE4] &= ~MODE4_ENABLE_STENCIL_WRITE_MASK;
+
+   i915->meta.Ctx[I915_CTXREG_STATE4] |= (ENABLE_STENCIL_WRITE_MASK |
+					STENCIL_WRITE_MASK(s_mask));
+
+
+   /* ctx->Driver.StencilOp( ctx, GL_REPLACE, GL_REPLACE, GL_REPLACE )
+    */
+   i915->meta.Ctx[I915_CTXREG_LIS5] &= ~(S5_STENCIL_FAIL_MASK |
+				       S5_STENCIL_PASS_Z_FAIL_MASK |
+				       S5_STENCIL_PASS_Z_PASS_MASK);
+
+   i915->meta.Ctx[I915_CTXREG_LIS5] |= ((op << S5_STENCIL_FAIL_SHIFT) |
+				      (op << S5_STENCIL_PASS_Z_FAIL_SHIFT) |
+				      (op << S5_STENCIL_PASS_Z_PASS_SHIFT));
+
+
+   /* ctx->Driver.StencilFunc( ctx, GL_ALWAYS, s_ref, ~0 )
+    */
+   i915->meta.Ctx[I915_CTXREG_STATE4] &= ~MODE4_ENABLE_STENCIL_TEST_MASK;
+   i915->meta.Ctx[I915_CTXREG_STATE4] |= (ENABLE_STENCIL_TEST_MASK |
+					STENCIL_TEST_MASK(0xff));
+
+   i915->meta.Ctx[I915_CTXREG_LIS5] &= ~(S5_STENCIL_REF_MASK |
+				       S5_STENCIL_TEST_FUNC_MASK);
+					
+   i915->meta.Ctx[I915_CTXREG_LIS5] |= ((s_clear << S5_STENCIL_REF_SHIFT) |  
+				      (func << S5_STENCIL_TEST_FUNC_SHIFT)); 
+
+
+   i915->meta.emitted &= ~I915_UPLOAD_CTX;
+}
+
+
+static void set_color_mask( i915ContextPtr i915, GLboolean state )
+{
+   const GLuint mask = (S5_WRITEDISABLE_RED |
+			S5_WRITEDISABLE_GREEN |
+			S5_WRITEDISABLE_BLUE |
+			S5_WRITEDISABLE_ALPHA);
+
+   /* Copy colormask state from "regular" hw context.
+    */
+   if (state) {
+      i915->meta.Ctx[I915_CTXREG_LIS5] &= ~mask;
+      i915->meta.Ctx[I915_CTXREG_LIS5] |= 
+	 (i915->state.Ctx[I915_CTXREG_LIS5] & mask);
+   }
+   else 
+      i915->meta.Ctx[I915_CTXREG_LIS5] |= mask;
+      
+   i915->meta.emitted &= ~I915_UPLOAD_CTX;
+}
+
+
+
+
+#define REG( type, nr ) (((type)<<5)|(nr))
+
+#define REG_R(x)       REG(REG_TYPE_R, x)
+#define REG_T(x)       REG(REG_TYPE_T, x)
+#define REG_CONST(x)   REG(REG_TYPE_CONST, x)
+#define REG_S(x)       REG(REG_TYPE_S, x)
+#define REG_OC         REG(REG_TYPE_OC, 0)
+#define REG_OD	       REG(REG_TYPE_OD, 0)
+#define REG_U(x)       REG(REG_TYPE_U, x)
+
+#define REG_T_DIFFUSE  REG(REG_TYPE_T, T_DIFFUSE)
+#define REG_T_SPECULAR REG(REG_TYPE_T, T_SPECULAR)
+#define REG_T_FOG_W    REG(REG_TYPE_T, T_FOG_W)
+#define REG_T_TEX(x)   REG(REG_TYPE_T, x)
+
+
+#define A0_DEST_REG( reg ) ( (reg) << A0_DEST_NR_SHIFT )
+#define A0_SRC0_REG( reg ) ( (reg) << A0_SRC0_NR_SHIFT )
+#define A1_SRC1_REG( reg ) ( (reg) << A1_SRC1_NR_SHIFT )
+#define A1_SRC2_REG( reg ) ( (reg) << A1_SRC2_NR_SHIFT )
+#define A2_SRC2_REG( reg ) ( (reg) << A2_SRC2_NR_SHIFT )
+#define D0_DECL_REG( reg ) ( (reg) << D0_NR_SHIFT )
+#define T0_DEST_REG( reg ) ( (reg) << T0_DEST_NR_SHIFT )
+
+#define T0_SAMPLER( unit )     ((unit)<<T0_SAMPLER_NR_SHIFT)
+
+#define T1_ADDRESS_REG( type, nr ) (((type)<<T1_ADDRESS_REG_TYPE_SHIFT)| \
+				    ((nr)<<T1_ADDRESS_REG_NR_SHIFT))
+
+
+#define A1_SRC0_XYZW ((SRC_X << A1_SRC0_CHANNEL_X_SHIFT) |	\
+		      (SRC_Y << A1_SRC0_CHANNEL_Y_SHIFT) |	\
+		      (SRC_Z << A1_SRC0_CHANNEL_Z_SHIFT) |	\
+		      (SRC_W << A1_SRC0_CHANNEL_W_SHIFT))
+
+#define A1_SRC1_XY   ((SRC_X << A1_SRC1_CHANNEL_X_SHIFT) |	\
+		      (SRC_Y << A1_SRC1_CHANNEL_Y_SHIFT))
+
+#define A2_SRC1_ZW   ((SRC_Z << A2_SRC1_CHANNEL_Z_SHIFT) |	\
+		      (SRC_W << A2_SRC1_CHANNEL_W_SHIFT))
+
+#define A2_SRC2_XYZW ((SRC_X << A2_SRC2_CHANNEL_X_SHIFT) |	\
+		      (SRC_Y << A2_SRC2_CHANNEL_Y_SHIFT) |	\
+		      (SRC_Z << A2_SRC2_CHANNEL_Z_SHIFT) |	\
+		      (SRC_W << A2_SRC2_CHANNEL_W_SHIFT))
+
+
+
+
+
+static void set_no_texture( i915ContextPtr i915 )
+{
+   static const GLuint prog[] = {
+      _3DSTATE_PIXEL_SHADER_PROGRAM,
+
+      /* Declare incoming diffuse color:
+       */
+      (D0_DCL |
+       D0_DECL_REG( REG_T_DIFFUSE ) |
+       D0_CHANNEL_ALL),
+      D1_MBZ,
+      D2_MBZ,
+
+      /* output-color = mov(t_diffuse)
+       */
+      (A0_MOV |
+       A0_DEST_REG( REG_OC ) |
+       A0_DEST_CHANNEL_ALL |
+       A0_SRC0_REG( REG_T_DIFFUSE )),
+      (A1_SRC0_XYZW),
+      0,
+   };
+
+   
+   memcpy( i915->meta.Program, prog, sizeof(prog) );
+   i915->meta.ProgramSize = sizeof(prog) / sizeof(*prog);
+   i915->meta.Program[0] |= i915->meta.ProgramSize - 2;
+   i915->meta.emitted &= ~I915_UPLOAD_PROGRAM;
+}
+
+
+static void enable_texture_blend_replace( i915ContextPtr i915 )
+{
+   static const GLuint prog[] = {
+      _3DSTATE_PIXEL_SHADER_PROGRAM,
+
+      /* Declare the sampler:
+       */
+      (D0_DCL |
+       D0_DECL_REG( REG_S(0) ) |
+       D0_SAMPLE_TYPE_2D |
+       D0_CHANNEL_NONE),
+      D1_MBZ,
+      D2_MBZ,
+
+      /* Declare the interpolated texture coordinate:
+       */
+      (D0_DCL |
+       D0_DECL_REG( REG_T_TEX(0) ) |
+       D0_CHANNEL_ALL),
+      D1_MBZ,
+      D2_MBZ,
+
+      /* output-color = texld(sample0, texcoord0) 
+       */
+      (T0_TEXLD | 
+       T0_DEST_REG( REG_OC ) |
+       T0_SAMPLER( 0 )),
+      T1_ADDRESS_REG(REG_TYPE_T, 0),
+      T2_MBZ
+   };
+
+   memcpy( i915->meta.Program, prog, sizeof(prog) );
+   i915->meta.ProgramSize = sizeof(prog) / sizeof(*prog);
+   i915->meta.Program[0] |= i915->meta.ProgramSize - 2;
+   i915->meta.emitted &= ~I915_UPLOAD_PROGRAM;
+}
+
+
+
+
+
+/* Set up an arbitary piece of memory as a rectangular texture
+ * (including the front or back buffer).
+ */
+static void set_tex_rect_source( i915ContextPtr i915,
+				 GLuint offset,
+				 GLuint width, 
+				 GLuint height,
+				 GLuint pitch, /* in bytes! */
+				 GLuint textureFormat )
+{
+   GLuint unit = 0;
+   GLint numLevels = 1;
+   GLuint *state = i915->meta.Tex[0];
+
+#if 0
+   printf("TexRect source offset 0x%x  pitch %d\n", offset, pitch);
+#endif
+
+/*    fprintf(stderr, "%s: offset: %x w: %d h: %d pitch %d format %x\n", */
+/* 	   __FUNCTION__, offset, width, height, pitch, textureFormat ); */
+
+   state[I915_TEXREG_MS2] = offset;
+   state[I915_TEXREG_MS3] = (((height - 1) << MS3_HEIGHT_SHIFT) |
+			    ((width - 1) << MS3_WIDTH_SHIFT) |
+			    textureFormat |
+			    MS3_USE_FENCE_REGS);
+
+   state[I915_TEXREG_MS4] = ((((pitch / 4) - 1) << MS4_PITCH_SHIFT) | 
+			    ((((numLevels-1) * 4)) << MS4_MAX_LOD_SHIFT));
+
+   state[I915_TEXREG_SS2] = ((FILTER_NEAREST << SS2_MIN_FILTER_SHIFT) |
+			    (MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT) |
+			    (FILTER_NEAREST << SS2_MAG_FILTER_SHIFT));
+   state[I915_TEXREG_SS3] = ((TEXCOORDMODE_WRAP << SS3_TCX_ADDR_MODE_SHIFT) |
+			    (TEXCOORDMODE_WRAP << SS3_TCY_ADDR_MODE_SHIFT) |
+			    (TEXCOORDMODE_WRAP << SS3_TCZ_ADDR_MODE_SHIFT) |
+			    (unit<<SS3_TEXTUREMAP_INDEX_SHIFT));
+
+   state[I915_TEXREG_SS4] = 0;
+
+   i915->meta.emitted &= ~I915_UPLOAD_TEX(0);
+}
+
+
+/* Select between front and back draw buffers.
+ */
+static void set_draw_region( i915ContextPtr i915, const intelRegion *region )
+{
+#if 0
+   printf("Rotate into region: offset 0x%x  pitch %d\n",
+          region->offset, region->pitch);
+#endif
+   i915->meta.Buffer[I915_DESTREG_CBUFADDR1] =
+      (BUF_3D_ID_COLOR_BACK | BUF_3D_PITCH(region->pitch) | BUF_3D_USE_FENCE);
+   i915->meta.Buffer[I915_DESTREG_CBUFADDR2] = region->offset;
+   i915->meta.emitted &= ~I915_UPLOAD_BUFFERS;
+}
+
+
+#if 0
+/* Setup an arbitary draw format, useful for targeting texture or agp
+ * memory.
+ */
+static void set_draw_format( i915ContextPtr i915,
+			     GLuint format,
+			     GLuint depth_format)
+{
+   i915->meta.Buffer[I915_DESTREG_DV1] = (DSTORG_HORT_BIAS(0x8) | /* .5 */
+					DSTORG_VERT_BIAS(0x8) | /* .5 */
+					format |
+					LOD_PRECLAMP_OGL |
+					TEX_DEFAULT_COLOR_OGL |
+					depth_format);
+
+   i915->meta.emitted &= ~I915_UPLOAD_BUFFERS;
+/*    fprintf(stderr, "%s: DV1: %x\n",  */
+/* 	   __FUNCTION__, i915->meta.Buffer[I915_DESTREG_DV1]); */
+}
+#endif
+
+static void set_vertex_format( i915ContextPtr i915 )
+{
+   i915->meta.Ctx[I915_CTXREG_LIS2] = 
+      (S2_TEXCOORD_FMT(0, TEXCOORDFMT_2D) |
+       S2_TEXCOORD_FMT(1, TEXCOORDFMT_NOT_PRESENT) | 
+       S2_TEXCOORD_FMT(2, TEXCOORDFMT_NOT_PRESENT) |
+       S2_TEXCOORD_FMT(3, TEXCOORDFMT_NOT_PRESENT) |
+       S2_TEXCOORD_FMT(4, TEXCOORDFMT_NOT_PRESENT) |
+       S2_TEXCOORD_FMT(5, TEXCOORDFMT_NOT_PRESENT) | 
+       S2_TEXCOORD_FMT(6, TEXCOORDFMT_NOT_PRESENT) |
+       S2_TEXCOORD_FMT(7, TEXCOORDFMT_NOT_PRESENT));
+
+   i915->meta.Ctx[I915_CTXREG_LIS4] &= ~S4_VFMT_MASK;
+
+   i915->meta.Ctx[I915_CTXREG_LIS4] |= 
+      (S4_VFMT_COLOR |
+       S4_VFMT_SPEC_FOG |
+       S4_VFMT_XYZW);
+
+   i915->meta.emitted &= ~I915_UPLOAD_CTX;
+
+}
+
+
+static void draw_quad(i915ContextPtr i915, 
+		      GLfloat x0, GLfloat x1,
+		      GLfloat y0, GLfloat y1, 
+		      GLubyte red, GLubyte green,
+		      GLubyte blue, GLubyte alpha,
+		      GLfloat s0, GLfloat s1,
+		      GLfloat t0, GLfloat t1 )
+{
+   GLuint vertex_size = 8;
+   GLuint *vb = intelEmitInlinePrimitiveLocked( &i915->intel, 
+						PRIM3D_TRIFAN, 
+						4 * vertex_size,
+						vertex_size );
+   intelVertex tmp;
+   int i;
+
+   if (0)
+      fprintf(stderr, "%s: %f,%f-%f,%f 0x%x%x%x%x %f,%f-%f,%f\n",
+	      __FUNCTION__,
+	      x0,y0,x1,y1,red,green,blue,alpha,s0,t0,s1,t1);
+
+
+   /* initial vertex, left bottom */
+   tmp.v.x = x0;
+   tmp.v.y = y0;
+   tmp.v.z = 1.0;
+   tmp.v.w = 1.0; 
+   tmp.v.color.red = red;
+   tmp.v.color.green = green;
+   tmp.v.color.blue = blue;
+   tmp.v.color.alpha = alpha;
+   tmp.v.specular.red = 0;
+   tmp.v.specular.green = 0;
+   tmp.v.specular.blue = 0;
+   tmp.v.specular.alpha = 0;
+   tmp.v.u0 = s0;
+   tmp.v.v0 = t0;
+
+   for (i = 0 ; i < vertex_size ; i++)
+      vb[i] = tmp.ui[i];
+
+   /* right bottom */
+   vb += vertex_size;
+   tmp.v.x = x1;
+   tmp.v.u0 = s1;
+   for (i = 0 ; i < vertex_size ; i++)
+      vb[i] = tmp.ui[i];
+
+   /* right top */
+   vb += vertex_size;
+   tmp.v.y = y1;
+   tmp.v.v0 = t1;
+   for (i = 0 ; i < vertex_size ; i++)
+      vb[i] = tmp.ui[i];
+
+   /* left top */
+   vb += vertex_size;
+   tmp.v.x = x0;
+   tmp.v.u0 = s0;
+   for (i = 0 ; i < vertex_size ; i++)
+      vb[i] = tmp.ui[i];
+}
+
+
+static void draw_poly(i915ContextPtr i915, 
+		      GLubyte red, GLubyte green, GLubyte blue, GLubyte alpha,
+                      GLuint numVerts,
+                      /*const*/ GLfloat verts[][2],
+                      /*const*/ GLfloat texcoords[][2])
+{
+   GLuint vertex_size = 8;
+   GLuint *vb = intelEmitInlinePrimitiveLocked( &i915->intel, 
+						PRIM3D_TRIFAN, 
+						numVerts * vertex_size,
+						vertex_size );
+   intelVertex tmp;
+   int i, k;
+
+   /* initial constant vertex fields */
+   tmp.v.z = 1.0;
+   tmp.v.w = 1.0; 
+   tmp.v.color.red = red;
+   tmp.v.color.green = green;
+   tmp.v.color.blue = blue;
+   tmp.v.color.alpha = alpha;
+   tmp.v.specular.red = 0;
+   tmp.v.specular.green = 0;
+   tmp.v.specular.blue = 0;
+   tmp.v.specular.alpha = 0;
+
+   for (k = 0; k < numVerts; k++) {
+      tmp.v.x = verts[k][0];
+      tmp.v.y = verts[k][1];
+      tmp.v.u0 = texcoords[k][0];
+      tmp.v.v0 = texcoords[k][1];
+
+      for (i = 0 ; i < vertex_size ; i++)
+         vb[i] = tmp.ui[i];
+
+      vb += vertex_size;
+   }
+}
+
+
+void 
+i915ClearWithTris(intelContextPtr intel, GLbitfield buffers,
+		  GLboolean allFoo,
+		  GLint cxFoo, GLint cyFoo, GLint cwFoo, GLint chFoo)
+{
+   i915ContextPtr i915 = I915_CONTEXT( intel );
+   __DRIdrawablePrivate *dPriv = intel->driDrawable;
+   intelScreenPrivate *screen = intel->intelScreen;
+   int x0, y0, x1, y1;
+   GLint cx, cy, cw, ch;
+   GLboolean all;
+
+   SET_STATE( i915, meta ); 
+   set_initial_state( i915 ); 
+   set_no_texture( i915 ); 
+   set_vertex_format( i915 ); 
+
+   LOCK_HARDWARE(intel);
+
+   /* get clear bounds after locking */
+   cx = intel->ctx.DrawBuffer->_Xmin;
+   cy = intel->ctx.DrawBuffer->_Ymin;
+   cw = intel->ctx.DrawBuffer->_Xmax - cx;
+   ch = intel->ctx.DrawBuffer->_Ymax - cy;
+   all = (cw == intel->ctx.DrawBuffer->Width &&
+          ch == intel->ctx.DrawBuffer->Height);
+
+   if (!all) {
+      x0 = cx;
+      y0 = cy;
+      x1 = x0 + cw;
+      y1 = y0 + ch;
+   } else {
+      x0 = 0;
+      y0 = 0;
+      x1 = x0 + dPriv->w;
+      y1 = y0 + dPriv->h;
+   }
+
+   /* Don't do any clipping to screen - these are window coordinates.
+    * The active cliprects will be applied as for any other geometry.
+    */
+
+   if (buffers & BUFFER_BIT_FRONT_LEFT) { 
+      set_no_depth_stencil_write( i915 );
+      set_color_mask( i915, GL_TRUE );
+      set_draw_region( i915, &screen->front );
+
+      draw_quad(i915, x0, x1, y0, y1,
+		intel->clear_red, intel->clear_green, 
+ 		intel->clear_blue, intel->clear_alpha, 
+		0, 0, 0, 0);
+   }
+
+   if (buffers & BUFFER_BIT_BACK_LEFT) {
+      set_no_depth_stencil_write( i915 );
+      set_color_mask( i915, GL_TRUE );
+      set_draw_region( i915, &screen->back );
+
+      draw_quad(i915, x0, x1, y0, y1,
+		intel->clear_red, intel->clear_green,
+		intel->clear_blue, intel->clear_alpha,
+		0, 0, 0, 0);
+   }
+
+   if (buffers & BUFFER_BIT_STENCIL) {
+      set_stencil_replace( i915, 
+			   intel->ctx.Stencil.WriteMask[0], 
+			   intel->ctx.Stencil.Clear);
+      
+      set_color_mask( i915, GL_FALSE );
+      set_draw_region( i915, &screen->front ); /* could be either? */
+
+      draw_quad( i915, x0, x1, y0, y1, 0, 0, 0, 0, 0, 0, 0, 0 );
+   }
+
+   UNLOCK_HARDWARE(intel);
+
+   SET_STATE( i915, state );
+}
+
+
+/**
+ * Copy the window contents named by dPriv to the rotated (or reflected)
+ * color buffer.
+ * srcBuf is BUFFER_BIT_FRONT_LEFT or BUFFER_BIT_BACK_LEFT to indicate the source.
+ */
+void
+i915RotateWindow(intelContextPtr intel, __DRIdrawablePrivate *dPriv,
+                 GLuint srcBuf)
+{
+   i915ContextPtr i915 = I915_CONTEXT( intel );
+   intelScreenPrivate *screen = intel->intelScreen;
+   const GLuint cpp = screen->cpp;
+   drm_clip_rect_t fullRect;
+   GLuint textureFormat, srcOffset, srcPitch;
+   const drm_clip_rect_t *clipRects;
+   int numClipRects;
+   int i;
+
+   int xOrig, yOrig;
+   int origNumClipRects;
+   drm_clip_rect_t *origRects;
+
+   /*
+    * set up hardware state
+    */
+   intelFlush( &intel->ctx );
+
+   SET_STATE( i915, meta ); 
+   set_initial_state( i915 ); 
+   set_no_texture( i915 ); 
+   set_vertex_format( i915 ); 
+   set_no_depth_stencil_write( i915 );
+   set_color_mask( i915, GL_TRUE );
+
+   LOCK_HARDWARE(intel);
+
+   /* save current drawing origin and cliprects (restored at end) */
+   xOrig = intel->drawX;
+   yOrig = intel->drawY;
+   origNumClipRects = intel->numClipRects;
+   origRects = intel->pClipRects;
+
+   if (!intel->numClipRects)
+      goto done;
+
+   /*
+    * set drawing origin, cliprects for full-screen access to rotated screen
+    */
+   fullRect.x1 = 0;
+   fullRect.y1 = 0;
+   fullRect.x2 = screen->rotatedWidth;
+   fullRect.y2 = screen->rotatedHeight;
+   intel->drawX = 0;
+   intel->drawY = 0;
+   intel->numClipRects = 1;
+   intel->pClipRects = &fullRect;
+
+   set_draw_region( i915, &screen->rotated );
+
+   if (cpp == 4)
+      textureFormat = MAPSURF_32BIT | MT_32BIT_ARGB8888;
+   else
+      textureFormat = MAPSURF_16BIT | MT_16BIT_RGB565;
+
+   if (srcBuf == BUFFER_BIT_FRONT_LEFT) {
+      srcPitch = screen->front.pitch;   /* in bytes */
+      srcOffset = screen->front.offset; /* bytes */
+      clipRects = dPriv->pClipRects;
+      numClipRects = dPriv->numClipRects;
+   }
+   else {
+      srcPitch = screen->back.pitch;   /* in bytes */
+      srcOffset = screen->back.offset; /* bytes */
+      clipRects = dPriv->pBackClipRects;
+      numClipRects = dPriv->numBackClipRects;
+   }
+
+   /* set the whole screen up as a texture to avoid alignment issues */
+   set_tex_rect_source(i915,
+                       srcOffset,
+                       screen->width,
+		       screen->height,
+                       srcPitch,
+                       textureFormat);
+
+   enable_texture_blend_replace(i915);
+
+   /*
+    * loop over the source window's cliprects
+    */
+   for (i = 0; i < numClipRects; i++) {
+      int srcX0 = clipRects[i].x1;
+      int srcY0 = clipRects[i].y1;
+      int srcX1 = clipRects[i].x2;
+      int srcY1 = clipRects[i].y2;
+      GLfloat verts[4][2], tex[4][2];
+      int j;
+
+      /* build vertices for four corners of clip rect */
+      verts[0][0] = srcX0;  verts[0][1] = srcY0;
+      verts[1][0] = srcX1;  verts[1][1] = srcY0;
+      verts[2][0] = srcX1;  verts[2][1] = srcY1;
+      verts[3][0] = srcX0;  verts[3][1] = srcY1;
+
+      /* .. and texcoords */
+      tex[0][0] = srcX0;  tex[0][1] = srcY0;
+      tex[1][0] = srcX1;  tex[1][1] = srcY0;
+      tex[2][0] = srcX1;  tex[2][1] = srcY1;
+      tex[3][0] = srcX0;  tex[3][1] = srcY1;
+
+      /* transform coords to rotated screen coords */
+      for (j = 0; j < 4; j++) {
+         matrix23TransformCoordf(&screen->rotMatrix,
+                                 &verts[j][0], &verts[j][1]);
+      }
+
+      /* draw polygon to map source image to dest region */
+      draw_poly(i915, 255, 255, 255, 255, 4, verts, tex);
+
+   } /* cliprect loop */
+
+   intelFlushBatchLocked( intel, GL_FALSE, GL_FALSE, GL_FALSE );
+
+ done:
+   /* restore original drawing origin and cliprects */
+   intel->drawX = xOrig;
+   intel->drawY = yOrig;
+   intel->numClipRects = origNumClipRects;
+   intel->pClipRects = origRects;
+
+   UNLOCK_HARDWARE(intel);
+
+   SET_STATE( i915, state );
+}
+
diff --git a/i915/i915_program.c b/i915/i915_program.c
new file mode 100644
index 0000000..6849112
--- /dev/null
+++ b/i915/i915_program.c
@@ -0,0 +1,499 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include <strings.h>
+
+#include "glheader.h"
+#include "macros.h"
+#include "enums.h"
+
+#include "tnl/t_context.h"
+#include "intel_batchbuffer.h"
+
+#include "i915_reg.h"
+#include "i915_context.h"
+#include "i915_program.h"
+
+
+#define A0_DEST( reg ) (((reg)&UREG_TYPE_NR_MASK)>>UREG_A0_DEST_SHIFT_LEFT)
+#define D0_DEST( reg ) (((reg)&UREG_TYPE_NR_MASK)>>UREG_A0_DEST_SHIFT_LEFT)
+#define T0_DEST( reg ) (((reg)&UREG_TYPE_NR_MASK)>>UREG_A0_DEST_SHIFT_LEFT)
+#define A0_SRC0( reg ) (((reg)&UREG_MASK)>>UREG_A0_SRC0_SHIFT_LEFT)
+#define A1_SRC0( reg ) (((reg)&UREG_MASK)<<UREG_A1_SRC0_SHIFT_RIGHT)
+#define A1_SRC1( reg ) (((reg)&UREG_MASK)>>UREG_A1_SRC1_SHIFT_LEFT)
+#define A2_SRC1( reg ) (((reg)&UREG_MASK)<<UREG_A2_SRC1_SHIFT_RIGHT)
+#define A2_SRC2( reg ) (((reg)&UREG_MASK)>>UREG_A2_SRC2_SHIFT_LEFT)
+
+/* These are special, and don't have swizzle/negate bits.
+ */
+#define T0_SAMPLER( reg )     (GET_UREG_NR(reg)<<T0_SAMPLER_NR_SHIFT)
+#define T1_ADDRESS_REG( reg ) ((GET_UREG_NR(reg)<<T1_ADDRESS_REG_NR_SHIFT) | \
+			       (GET_UREG_TYPE(reg)<<T1_ADDRESS_REG_TYPE_SHIFT))
+
+
+/* Macros for translating UREG's into the various register fields used
+ * by the I915 programmable unit.
+ */
+#define UREG_A0_DEST_SHIFT_LEFT  (UREG_TYPE_SHIFT - A0_DEST_TYPE_SHIFT)
+#define UREG_A0_SRC0_SHIFT_LEFT  (UREG_TYPE_SHIFT - A0_SRC0_TYPE_SHIFT)
+#define UREG_A1_SRC0_SHIFT_RIGHT (A1_SRC0_CHANNEL_W_SHIFT - UREG_CHANNEL_W_SHIFT)
+#define UREG_A1_SRC1_SHIFT_LEFT  (UREG_TYPE_SHIFT - A1_SRC1_TYPE_SHIFT)
+#define UREG_A2_SRC1_SHIFT_RIGHT (A2_SRC1_CHANNEL_W_SHIFT - UREG_CHANNEL_W_SHIFT)
+#define UREG_A2_SRC2_SHIFT_LEFT  (UREG_TYPE_SHIFT - A2_SRC2_TYPE_SHIFT)
+
+#define UREG_MASK         0xffffff00
+#define UREG_TYPE_NR_MASK ((REG_TYPE_MASK << UREG_TYPE_SHIFT) | \
+  			   (REG_NR_MASK << UREG_NR_SHIFT))
+
+
+#define I915_CONSTFLAG_PARAM 0x1f
+
+GLuint i915_get_temp( struct i915_fragment_program *p )
+{
+   int bit = ffs( ~p->temp_flag );
+   if (!bit) {
+      fprintf(stderr, "%s: out of temporaries\n", __FILE__);
+      exit(1);
+   }
+
+   p->temp_flag |= 1<<(bit-1);
+   return UREG(REG_TYPE_R, (bit-1));
+}
+
+
+GLuint i915_get_utemp( struct i915_fragment_program *p )
+{
+   int bit = ffs( ~p->utemp_flag );
+   if (!bit) {
+      fprintf(stderr, "%s: out of temporaries\n", __FILE__);
+      exit(1);
+   }
+
+   p->utemp_flag |= 1<<(bit-1);
+   return UREG(REG_TYPE_U, (bit-1));
+}
+
+void i915_release_utemps( struct i915_fragment_program *p )
+{
+   p->utemp_flag = ~0x7;
+}
+
+
+GLuint i915_emit_decl( struct i915_fragment_program *p,
+		      GLuint type, GLuint nr, GLuint d0_flags )
+{
+   GLuint reg = UREG(type, nr);
+
+   if (type == REG_TYPE_T) {
+      if (p->decl_t & (1<<nr))
+	 return reg;
+
+      p->decl_t |= (1<<nr);
+   }
+   else if (type == REG_TYPE_S) {
+      if (p->decl_s & (1<<nr))
+	 return reg;
+
+      p->decl_s |= (1<<nr);
+   }
+   else 
+      return reg;
+
+   *(p->decl++) = (D0_DCL | D0_DEST( reg ) | d0_flags);
+   *(p->decl++) = D1_MBZ;
+   *(p->decl++) = D2_MBZ;
+
+   p->nr_decl_insn++;
+   return reg;
+}
+
+GLuint i915_emit_arith( struct i915_fragment_program *p,
+		       GLuint op,
+		       GLuint dest,
+		       GLuint mask,
+		       GLuint saturate,
+		       GLuint src0,
+		       GLuint src1,
+		       GLuint src2 )
+{
+   GLuint c[3];
+   GLuint nr_const = 0;
+
+   assert(GET_UREG_TYPE(dest) != REG_TYPE_CONST);
+   assert(dest = UREG(GET_UREG_TYPE(dest), GET_UREG_NR(dest)));
+
+   if (GET_UREG_TYPE(src0) == REG_TYPE_CONST) c[nr_const++] = 0;
+   if (GET_UREG_TYPE(src1) == REG_TYPE_CONST) c[nr_const++] = 1;
+   if (GET_UREG_TYPE(src2) == REG_TYPE_CONST) c[nr_const++] = 2;
+
+   /* Recursively call this function to MOV additional const values
+    * into temporary registers.  Use utemp registers for this -
+    * currently shouldn't be possible to run out, but keep an eye on
+    * this.
+    */
+   if (nr_const > 1) {
+      GLuint s[3], first, i, old_utemp_flag;
+
+      s[0] = src0;
+      s[1] = src1;
+      s[2] = src2;
+      old_utemp_flag = p->utemp_flag;
+
+      first = GET_UREG_NR(s[c[0]]);
+      for (i = 1 ; i < nr_const ; i++) {
+	 if (GET_UREG_NR(s[c[i]]) != first) {
+	    GLuint tmp = i915_get_utemp(p);
+
+	    i915_emit_arith( p, A0_MOV, tmp, A0_DEST_CHANNEL_ALL, 0,
+			    s[c[i]], 0, 0 );
+	    s[c[i]] = tmp;
+	 }
+      }
+
+      src0 = s[0];
+      src1 = s[1];
+      src2 = s[2];
+      p->utemp_flag = old_utemp_flag; /* restore */
+   }
+
+   *(p->csr++) = (op | 
+		  A0_DEST( dest ) |
+		  mask | 
+		  saturate |
+		  A0_SRC0( src0 ));
+   *(p->csr++) = (A1_SRC0( src0 ) |
+		  A1_SRC1( src1 ));
+   *(p->csr++) = (A2_SRC1( src1 ) |
+		  A2_SRC2( src2 ));
+
+   p->nr_alu_insn++;
+   return dest;
+}
+
+GLuint i915_emit_texld( struct i915_fragment_program *p,
+			GLuint dest,
+			GLuint destmask,
+			GLuint sampler,
+			GLuint coord,
+			GLuint op )
+{
+   if (coord != UREG(GET_UREG_TYPE(coord), GET_UREG_NR(coord))) {
+      /* No real way to work around this in the general case - need to
+       * allocate and declare a new temporary register (a utemp won't
+       * do).  Will fallback for now.
+       */
+      i915_program_error(p, "Can't (yet) swizzle TEX arguments");
+      return 0;
+   }
+
+   /* Don't worry about saturate as we only support  
+    */
+   if (destmask != A0_DEST_CHANNEL_ALL) {
+      GLuint tmp = i915_get_utemp(p);
+      i915_emit_texld( p, tmp, A0_DEST_CHANNEL_ALL, sampler, coord, op );
+      i915_emit_arith( p, A0_MOV, dest, destmask, 0, tmp, 0, 0 );
+      return dest;
+   }
+   else {
+      assert(GET_UREG_TYPE(dest) != REG_TYPE_CONST);
+      assert(dest = UREG(GET_UREG_TYPE(dest), GET_UREG_NR(dest)));
+
+      if (GET_UREG_TYPE(coord) != REG_TYPE_T) {
+	 p->nr_tex_indirect++;
+      }
+
+      *(p->csr++) = (op | 
+		     T0_DEST( dest ) |
+		     T0_SAMPLER( sampler ));
+
+      *(p->csr++) = T1_ADDRESS_REG( coord );
+      *(p->csr++) = T2_MBZ;
+
+      p->nr_tex_insn++;
+      return dest;
+   }
+}
+
+
+GLuint i915_emit_const1f( struct i915_fragment_program *p, GLfloat c0 )
+{
+   GLint reg, idx;
+
+   if (c0 == 0.0) return swizzle(UREG(REG_TYPE_R, 0), ZERO, ZERO, ZERO, ZERO);
+   if (c0 == 1.0) return swizzle(UREG(REG_TYPE_R, 0), ONE,  ONE,  ONE,  ONE );
+
+   for (reg = 0; reg < I915_MAX_CONSTANT; reg++) {
+      if (p->constant_flags[reg] == I915_CONSTFLAG_PARAM)
+	 continue;
+      for (idx = 0; idx < 4; idx++) {
+	 if (!(p->constant_flags[reg] & (1<<idx)) ||
+	     p->constant[reg][idx] == c0) {
+	    p->constant[reg][idx] = c0;
+	    p->constant_flags[reg] |= 1<<idx;
+	    if (reg+1 > p->nr_constants) p->nr_constants = reg+1;
+	    return swizzle(UREG(REG_TYPE_CONST, reg),idx,ZERO,ZERO,ONE);
+	 }
+      }
+   }
+
+   fprintf(stderr, "%s: out of constants\n", __FUNCTION__);
+   p->error = 1;
+   return 0;
+}
+
+GLuint i915_emit_const2f( struct i915_fragment_program *p, 
+			 GLfloat c0, GLfloat c1 )
+{
+   GLint reg, idx;
+
+   if (c0 == 0.0) return swizzle(i915_emit_const1f(p, c1), ZERO, X, Z, W);
+   if (c0 == 1.0) return swizzle(i915_emit_const1f(p, c1), ONE,  X, Z, W); 
+
+   if (c1 == 0.0) return swizzle(i915_emit_const1f(p, c0), X, ZERO, Z, W);
+   if (c1 == 1.0) return swizzle(i915_emit_const1f(p, c0), X, ONE,  Z, W);
+
+   for (reg = 0; reg < I915_MAX_CONSTANT; reg++) {
+      if (p->constant_flags[reg] == 0xf ||
+	  p->constant_flags[reg] == I915_CONSTFLAG_PARAM)
+	 continue;
+      for (idx = 0; idx < 3; idx++) {
+	 if (!(p->constant_flags[reg] & (3<<idx))) {
+	    p->constant[reg][idx] = c0;
+	    p->constant[reg][idx+1] = c1;
+	    p->constant_flags[reg] |= 3<<idx;
+	    if (reg+1 > p->nr_constants) p->nr_constants = reg+1;
+	    return swizzle(UREG(REG_TYPE_CONST, reg),idx,idx+1,ZERO,ONE);
+	 }
+      }
+   }
+
+   fprintf(stderr, "%s: out of constants\n", __FUNCTION__);
+   p->error = 1;
+   return 0;
+}
+
+
+
+GLuint i915_emit_const4f( struct i915_fragment_program *p, 
+			 GLfloat c0, GLfloat c1, GLfloat c2, GLfloat c3 )
+{
+   GLint reg;
+
+   for (reg = 0; reg < I915_MAX_CONSTANT; reg++) {
+      if (p->constant_flags[reg] == 0xf &&
+	  p->constant[reg][0] == c0 &&
+	  p->constant[reg][1] == c1 &&
+	  p->constant[reg][2] == c2 &&
+	  p->constant[reg][3] == c3) {
+	 return UREG(REG_TYPE_CONST, reg);
+      }
+      else if (p->constant_flags[reg] == 0) {
+	 p->constant[reg][0] = c0;
+	 p->constant[reg][1] = c1;
+	 p->constant[reg][2] = c2;
+	 p->constant[reg][3] = c3;
+	 p->constant_flags[reg] = 0xf;
+	 if (reg+1 > p->nr_constants) p->nr_constants = reg+1;
+	 return UREG(REG_TYPE_CONST, reg);
+      }
+   }
+
+   fprintf(stderr, "%s: out of constants\n", __FUNCTION__);
+   p->error = 1;
+   return 0;
+}
+
+
+GLuint i915_emit_const4fv( struct i915_fragment_program *p, const GLfloat *c )
+{
+   return i915_emit_const4f( p, c[0], c[1], c[2], c[3] );
+}
+
+
+GLuint i915_emit_param4fv( struct i915_fragment_program *p, 
+			  const GLfloat *values )
+{
+   GLint reg, i;
+
+   for (i = 0; i < p->nr_params; i++) {
+      if (p->param[i].values == values)
+	 return UREG(REG_TYPE_CONST, p->param[i].reg);
+   }
+
+
+   for (reg = 0; reg < I915_MAX_CONSTANT; reg++) {
+      if (p->constant_flags[reg] == 0) {
+	 p->constant_flags[reg] = I915_CONSTFLAG_PARAM;
+	 i = p->nr_params++;
+
+	 p->param[i].values = values;
+	 p->param[i].reg = reg;
+	 p->params_uptodate = 0;
+
+	 if (reg+1 > p->nr_constants) p->nr_constants = reg+1;
+	 return UREG(REG_TYPE_CONST, reg);
+      }
+   }
+
+   fprintf(stderr, "%s: out of constants\n", __FUNCTION__);
+   p->error = 1;
+   return 0;
+}
+
+
+
+
+void i915_program_error( struct i915_fragment_program *p, const char *msg )
+{
+   _mesa_problem(NULL, "i915_program_error: %s", msg);
+   p->error = 1;
+}
+
+void i915_init_program( i915ContextPtr i915, struct i915_fragment_program *p )
+{
+   GLcontext *ctx = &i915->intel.ctx;
+   TNLcontext *tnl = TNL_CONTEXT( ctx );
+   
+   p->translated = 0;
+   p->params_uptodate = 0;
+   p->on_hardware = 0;
+   p->error = 0;
+
+   p->nr_tex_indirect = 1;	/* correct? */
+   p->nr_tex_insn = 0;
+   p->nr_alu_insn = 0;
+   p->nr_decl_insn = 0;
+
+   p->ctx = ctx;  
+   memset( p->constant_flags, 0, sizeof(p->constant_flags) );
+
+   p->nr_constants = 0;
+   p->csr = p->program;
+   p->decl = p->declarations;
+   p->decl_s = 0;
+   p->decl_t = 0;
+   p->temp_flag = 0xffff000;
+   p->utemp_flag = ~0x7;
+   p->wpos_tex = -1;
+   p->depth_written = 0;
+   p->nr_params = 0;
+
+   p->src_texture = UREG_BAD;
+   p->src_previous = UREG(REG_TYPE_T, T_DIFFUSE);
+   p->last_tex_stage = 0;
+   p->VB = &tnl->vb;
+
+   *(p->decl++) = _3DSTATE_PIXEL_SHADER_PROGRAM;
+}
+
+
+void i915_fini_program( struct i915_fragment_program *p )
+{
+   GLuint program_size = p->csr - p->program;
+   GLuint decl_size = p->decl - p->declarations;
+   
+   if (p->nr_tex_indirect > I915_MAX_TEX_INDIRECT) 
+      i915_program_error(p, "Exceeded max nr indirect texture lookups");
+
+   if (p->nr_tex_insn > I915_MAX_TEX_INSN)
+      i915_program_error(p, "Exceeded max TEX instructions");
+
+   if (p->nr_alu_insn > I915_MAX_ALU_INSN)
+      i915_program_error(p, "Exceeded max ALU instructions");
+
+   if (p->nr_decl_insn > I915_MAX_DECL_INSN)
+      i915_program_error(p, "Exceeded max DECL instructions");
+
+   if (p->error) {
+      p->FragProg.Base.NumNativeInstructions = 0;
+      p->FragProg.Base.NumNativeAluInstructions = 0;
+      p->FragProg.Base.NumNativeTexInstructions = 0;
+      p->FragProg.Base.NumNativeTexIndirections = 0;
+   }
+   else {
+      p->FragProg.Base.NumNativeInstructions = (p->nr_alu_insn +
+                                                p->nr_tex_insn +
+                                                p->nr_decl_insn);
+      p->FragProg.Base.NumNativeAluInstructions = p->nr_alu_insn;
+      p->FragProg.Base.NumNativeTexInstructions = p->nr_tex_insn;
+      p->FragProg.Base.NumNativeTexIndirections = p->nr_tex_indirect;
+   }
+
+   p->declarations[0] |= program_size + decl_size - 2;
+}
+
+void i915_upload_program( i915ContextPtr i915, struct i915_fragment_program *p )
+{
+   GLuint program_size = p->csr - p->program;
+   GLuint decl_size = p->decl - p->declarations;
+
+   FALLBACK( &i915->intel, I915_FALLBACK_PROGRAM, p->error );
+
+   /* Could just go straight to the batchbuffer from here:
+    */
+   if (i915->state.ProgramSize != (program_size + decl_size) ||
+       memcmp(i915->state.Program + decl_size, p->program, 
+	      program_size*sizeof(int)) != 0) {
+      I915_STATECHANGE( i915, I915_UPLOAD_PROGRAM );
+      memcpy(i915->state.Program, p->declarations, decl_size*sizeof(int));
+      memcpy(i915->state.Program + decl_size, p->program,
+	     program_size*sizeof(int));
+      i915->state.ProgramSize = decl_size + program_size;
+   }
+
+   /* Always seemed to get a failure if I used memcmp() to
+    * shortcircuit this state upload.  Needs further investigation?
+    */
+   if (p->nr_constants) {
+      GLuint nr = p->nr_constants;
+      
+      I915_ACTIVESTATE( i915, I915_UPLOAD_CONSTANTS, 1 );
+      I915_STATECHANGE( i915, I915_UPLOAD_CONSTANTS );
+
+      i915->state.Constant[0] = _3DSTATE_PIXEL_SHADER_CONSTANTS | ((nr) * 4);
+      i915->state.Constant[1] = (1<<(nr-1)) | ((1<<(nr-1))-1);
+      
+      memcpy(&i915->state.Constant[2], p->constant, 4*sizeof(int)*(nr));
+      i915->state.ConstantSize = 2 + (nr) * 4;
+
+      if (0) {
+	 GLuint i;
+	 for (i = 0; i < nr; i++) {
+	    fprintf(stderr, "const[%d]: %f %f %f %f\n", i, 
+		    p->constant[i][0],
+		    p->constant[i][1],
+		    p->constant[i][2],
+		    p->constant[i][3]);
+	 }
+      }
+   }
+   else {
+      I915_ACTIVESTATE( i915, I915_UPLOAD_CONSTANTS, 0 );
+   }  
+
+   p->on_hardware = 1;
+}
diff --git a/i915/i915_program.h b/i915/i915_program.h
new file mode 100644
index 0000000..8891a17
--- /dev/null
+++ b/i915/i915_program.h
@@ -0,0 +1,163 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+
+#ifndef I915_PROGRAM_H
+#define I915_PROGRAM_H
+
+#include "i915_context.h"
+#include "i915_reg.h"
+
+
+
+/* Having zero and one in here makes the definition of swizzle a lot
+ * easier.
+ */
+#define UREG_TYPE_SHIFT               29
+#define UREG_NR_SHIFT                 24
+#define UREG_CHANNEL_X_NEGATE_SHIFT   23
+#define UREG_CHANNEL_X_SHIFT          20
+#define UREG_CHANNEL_Y_NEGATE_SHIFT   19
+#define UREG_CHANNEL_Y_SHIFT          16
+#define UREG_CHANNEL_Z_NEGATE_SHIFT   15
+#define UREG_CHANNEL_Z_SHIFT          12
+#define UREG_CHANNEL_W_NEGATE_SHIFT   11
+#define UREG_CHANNEL_W_SHIFT          8
+#define UREG_CHANNEL_ZERO_NEGATE_MBZ  5
+#define UREG_CHANNEL_ZERO_SHIFT       4      
+#define UREG_CHANNEL_ONE_NEGATE_MBZ   1
+#define UREG_CHANNEL_ONE_SHIFT        0      
+
+#define UREG_BAD          0xffffffff /* not a valid ureg */
+
+#define X    SRC_X
+#define Y    SRC_Y
+#define Z    SRC_Z
+#define W    SRC_W
+#define ZERO SRC_ZERO
+#define ONE  SRC_ONE
+
+/* Construct a ureg:
+ */
+#define UREG( type, nr ) (((type)<< UREG_TYPE_SHIFT) |		\
+			  ((nr)  << UREG_NR_SHIFT) |		\
+			  (X     << UREG_CHANNEL_X_SHIFT) |	\
+			  (Y     << UREG_CHANNEL_Y_SHIFT) |	\
+			  (Z     << UREG_CHANNEL_Z_SHIFT) |	\
+			  (W     << UREG_CHANNEL_W_SHIFT) |	\
+			  (ZERO  << UREG_CHANNEL_ZERO_SHIFT) |	\
+			  (ONE   << UREG_CHANNEL_ONE_SHIFT))
+
+#define GET_CHANNEL_SRC( reg, channel ) ((reg<<(channel*4)) & (0xf<<20))
+#define CHANNEL_SRC( src, channel ) (src>>(channel*4))
+
+#define GET_UREG_TYPE(reg) (((reg)>>UREG_TYPE_SHIFT)&REG_TYPE_MASK)
+#define GET_UREG_NR(reg)   (((reg)>>UREG_NR_SHIFT)&REG_NR_MASK)
+
+
+
+#define UREG_XYZW_CHANNEL_MASK 0x00ffff00
+
+/* One neat thing about the UREG representation:  
+ */
+static __inline int swizzle( int reg, int x, int y, int z, int w )
+{
+   return ((reg & ~UREG_XYZW_CHANNEL_MASK) |
+	   CHANNEL_SRC( GET_CHANNEL_SRC( reg, x ), 0 ) |
+	   CHANNEL_SRC( GET_CHANNEL_SRC( reg, y ), 1 ) |
+	   CHANNEL_SRC( GET_CHANNEL_SRC( reg, z ), 2 ) |
+	   CHANNEL_SRC( GET_CHANNEL_SRC( reg, w ), 3 ));
+}
+
+/* Another neat thing about the UREG representation:  
+ */
+static __inline int negate( int reg, int x, int y, int z, int w )
+{
+   return reg ^ (((x&1)<<UREG_CHANNEL_X_NEGATE_SHIFT)|
+		 ((y&1)<<UREG_CHANNEL_Y_NEGATE_SHIFT)|
+		 ((z&1)<<UREG_CHANNEL_Z_NEGATE_SHIFT)|
+		 ((w&1)<<UREG_CHANNEL_W_NEGATE_SHIFT));
+}
+
+
+extern GLuint i915_get_temp( struct i915_fragment_program *p );
+extern GLuint i915_get_utemp( struct i915_fragment_program *p );
+extern void i915_release_utemps( struct i915_fragment_program *p );
+
+
+extern GLuint i915_emit_texld( struct i915_fragment_program *p,
+			      GLuint dest,
+			      GLuint destmask,
+			      GLuint sampler,
+			      GLuint coord,
+			      GLuint op );
+
+extern GLuint i915_emit_arith( struct i915_fragment_program *p,
+			      GLuint op,
+			      GLuint dest,
+			      GLuint mask,
+			      GLuint saturate,
+			      GLuint src0,
+			      GLuint src1,
+			      GLuint src2 );
+
+extern GLuint i915_emit_decl( struct i915_fragment_program *p,
+			     GLuint type, GLuint nr, GLuint d0_flags );
+
+
+extern GLuint i915_emit_const1f( struct i915_fragment_program *p, 
+				GLfloat c0 );
+
+extern GLuint i915_emit_const2f( struct i915_fragment_program *p, 
+				GLfloat c0, GLfloat c1 );
+
+extern GLuint i915_emit_const4fv( struct i915_fragment_program *p,
+				 const GLfloat *c );
+
+extern GLuint i915_emit_const4f( struct i915_fragment_program *p, 
+				GLfloat c0, GLfloat c1, 
+				GLfloat c2, GLfloat c3 );
+
+
+extern GLuint i915_emit_param4fv( struct i915_fragment_program *p, 
+				 const GLfloat *values );
+
+extern void i915_program_error( struct i915_fragment_program *p,
+                                const char *msg );
+
+extern void i915_init_program( i915ContextPtr i915,
+			      struct i915_fragment_program *p );
+
+extern void i915_upload_program( i915ContextPtr i915, 
+				struct i915_fragment_program *p );
+
+extern void i915_fini_program( struct i915_fragment_program *p );
+
+
+
+
+#endif
diff --git a/i915/i915_reg.h b/i915/i915_reg.h
new file mode 100644
index 0000000..694cd4c
--- /dev/null
+++ b/i915/i915_reg.h
@@ -0,0 +1,835 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+
+#ifndef _I915_REG_H_
+#define _I915_REG_H_
+
+
+#include "intel_reg.h"
+
+#define I915_SET_FIELD( var, mask, value ) (var &= ~(mask), var |= value)
+
+#define CMD_3D (0x3<<29)
+
+#define PRIM3D_INLINE		(CMD_3D | (0x1f<<24))
+#define PRIM3D_TRILIST		(0x0<<18)
+#define PRIM3D_TRISTRIP 	(0x1<<18)
+#define PRIM3D_TRISTRIP_RVRSE	(0x2<<18)
+#define PRIM3D_TRIFAN		(0x3<<18)
+#define PRIM3D_POLY		(0x4<<18)
+#define PRIM3D_LINELIST 	(0x5<<18)
+#define PRIM3D_LINESTRIP	(0x6<<18)
+#define PRIM3D_RECTLIST 	(0x7<<18)
+#define PRIM3D_POINTLIST	(0x8<<18)
+#define PRIM3D_DIB		(0x9<<18)
+#define PRIM3D_CLEAR_RECT	(0xa<<18)
+#define PRIM3D_ZONE_INIT	(0xd<<18)
+#define PRIM3D_MASK		(0x1f<<18)
+
+/* p137 */
+#define _3DSTATE_AA_CMD			(CMD_3D | (0x06<<24))
+#define AA_LINE_ECAAR_WIDTH_ENABLE	(1<<16)
+#define AA_LINE_ECAAR_WIDTH_0_5 	0
+#define AA_LINE_ECAAR_WIDTH_1_0		(1<<14)
+#define AA_LINE_ECAAR_WIDTH_2_0 	(2<<14)
+#define AA_LINE_ECAAR_WIDTH_4_0 	(3<<14)
+#define AA_LINE_REGION_WIDTH_ENABLE	(1<<8)
+#define AA_LINE_REGION_WIDTH_0_5	0
+#define AA_LINE_REGION_WIDTH_1_0	(1<<6)
+#define AA_LINE_REGION_WIDTH_2_0	(2<<6)
+#define AA_LINE_REGION_WIDTH_4_0	(3<<6)
+
+/* 3DSTATE_BACKFACE_STENCIL_OPS, p138*/
+#define _3DSTATE_BACKFACE_STENCIL_OPS    (CMD_3D | (0x8<<24))
+#define BFO_ENABLE_STENCIL_REF          (1<<23)
+#define BFO_STENCIL_REF_SHIFT           15
+#define BFO_STENCIL_REF_MASK            (0xff<<15)
+#define BFO_ENABLE_STENCIL_FUNCS        (1<<14)
+#define BFO_STENCIL_TEST_SHIFT          11
+#define BFO_STENCIL_TEST_MASK           (0x7<<11)
+#define BFO_STENCIL_FAIL_SHIFT          8
+#define BFO_STENCIL_FAIL_MASK           (0x7<<8)
+#define BFO_STENCIL_PASS_Z_FAIL_SHIFT   5
+#define BFO_STENCIL_PASS_Z_FAIL_MASK    (0x7<<5)
+#define BFO_STENCIL_PASS_Z_PASS_SHIFT   2
+#define BFO_STENCIL_PASS_Z_PASS_MASK    (0x7<<2)
+#define BFO_ENABLE_STENCIL_TWO_SIDE     (1<<1)
+#define BFO_STENCIL_TWO_SIDE            (1<<0)
+
+
+/* 3DSTATE_BACKFACE_STENCIL_MASKS, p140 */
+#define _3DSTATE_BACKFACE_STENCIL_MASKS    (CMD_3D | (0x9<<24))
+#define BFM_ENABLE_STENCIL_TEST_MASK      (1<<17)
+#define BFM_ENABLE_STENCIL_WRITE_MASK     (1<<16)
+#define BFM_STENCIL_TEST_MASK_SHIFT       8
+#define BFM_STENCIL_TEST_MASK_MASK        (0xff<<8)
+#define BFM_STENCIL_WRITE_MASK_SHIFT      0
+#define BFM_STENCIL_WRITE_MASK_MASK       (0xff<<0)
+
+
+
+/* 3DSTATE_BIN_CONTROL p141 */
+
+/* p143 */
+#define _3DSTATE_BUF_INFO_CMD	(CMD_3D | (0x1d<<24) | (0x8e<<16) | 1)
+/* Dword 1 */
+#define BUF_3D_ID_COLOR_BACK	(0x3<<24)
+#define BUF_3D_ID_DEPTH 	(0x7<<24)
+#define BUF_3D_USE_FENCE	(1<<23)
+#define BUF_3D_TILED_SURFACE	(1<<22)
+#define BUF_3D_TILE_WALK_X	0
+#define BUF_3D_TILE_WALK_Y	(1<<21)
+#define BUF_3D_PITCH(x)         (((x)/4)<<2)
+/* Dword 2 */
+#define BUF_3D_ADDR(x)		((x) & ~0x3)
+
+
+/* 3DSTATE_CHROMA_KEY */
+
+/* 3DSTATE_CLEAR_PARAMETERS, p150 */
+
+/* 3DSTATE_CONSTANT_BLEND_COLOR, p153 */
+#define _3DSTATE_CONST_BLEND_COLOR_CMD	(CMD_3D | (0x1d<<24) | (0x88<<16))
+
+
+
+/* 3DSTATE_COORD_SET_BINDINGS, p154 */
+#define _3DSTATE_COORD_SET_BINDINGS      (CMD_3D | (0x16<<24))
+#define CSB_TCB(iunit, eunit)           ((eunit)<<(iunit*3))
+
+/* p156 */
+#define _3DSTATE_DFLT_DIFFUSE_CMD	(CMD_3D | (0x1d<<24) | (0x99<<16))
+
+/* p157 */
+#define _3DSTATE_DFLT_SPEC_CMD		(CMD_3D | (0x1d<<24) | (0x9a<<16))
+
+/* p158 */
+#define _3DSTATE_DFLT_Z_CMD		(CMD_3D | (0x1d<<24) | (0x98<<16))
+
+
+/* 3DSTATE_DEPTH_OFFSET_SCALE, p159 */
+#define _3DSTATE_DEPTH_OFFSET_SCALE       (CMD_3D | (0x1d<<24) | (0x97<<16))
+/* scale in dword 1 */
+
+
+/* 3DSTATE_DEPTH_SUBRECT_DISABLE, p160 */
+#define _3DSTATE_DEPTH_SUBRECT_DISABLE    (CMD_3D | (0x1c<<24) | (0x11<<19) | 0x2)
+
+/* p161 */
+#define _3DSTATE_DST_BUF_VARS_CMD	(CMD_3D | (0x1d<<24) | (0x85<<16))
+/* Dword 1 */
+#define TEX_DEFAULT_COLOR_OGL           (0<<30)
+#define TEX_DEFAULT_COLOR_D3D           (1<<30)
+#define ZR_EARLY_DEPTH                  (1<<29)
+#define LOD_PRECLAMP_OGL                (1<<28)
+#define LOD_PRECLAMP_D3D                (0<<28)
+#define DITHER_FULL_ALWAYS              (0<<26)
+#define DITHER_FULL_ON_FB_BLEND         (1<<26)
+#define DITHER_CLAMPED_ALWAYS           (2<<26)
+#define LINEAR_GAMMA_BLEND_32BPP        (1<<25)
+#define DEBUG_DISABLE_ENH_DITHER        (1<<24)
+#define DSTORG_HORT_BIAS(x)		((x)<<20)
+#define DSTORG_VERT_BIAS(x)		((x)<<16)
+#define COLOR_4_2_2_CHNL_WRT_ALL	0
+#define COLOR_4_2_2_CHNL_WRT_Y		(1<<12)
+#define COLOR_4_2_2_CHNL_WRT_CR		(2<<12)
+#define COLOR_4_2_2_CHNL_WRT_CB		(3<<12)
+#define COLOR_4_2_2_CHNL_WRT_CRCB	(4<<12)
+#define COLR_BUF_8BIT			0
+#define COLR_BUF_RGB555 		(1<<8)
+#define COLR_BUF_RGB565 		(2<<8)
+#define COLR_BUF_ARGB8888		(3<<8)
+#define DEPTH_FRMT_16_FIXED		0
+#define DEPTH_FRMT_16_FLOAT		(1<<2)
+#define DEPTH_FRMT_24_FIXED_8_OTHER	(2<<2)
+#define VERT_LINE_STRIDE_1		(1<<1)
+#define VERT_LINE_STRIDE_0		(0<<1)
+#define VERT_LINE_STRIDE_OFS_1		1
+#define VERT_LINE_STRIDE_OFS_0		0
+
+/* p166 */
+#define _3DSTATE_DRAW_RECT_CMD		(CMD_3D|(0x1d<<24)|(0x80<<16)|3)
+/* Dword 1 */
+#define DRAW_RECT_DIS_DEPTH_OFS 	(1<<30)
+#define DRAW_DITHER_OFS_X(x)		((x)<<26)
+#define DRAW_DITHER_OFS_Y(x)		((x)<<24)
+/* Dword 2 */
+#define DRAW_YMIN(x)			((x)<<16)
+#define DRAW_XMIN(x)			(x)
+/* Dword 3 */
+#define DRAW_YMAX(x)			((x)<<16)
+#define DRAW_XMAX(x)			(x)
+/* Dword 4 */
+#define DRAW_YORG(x)			((x)<<16)
+#define DRAW_XORG(x)			(x)
+
+
+/* 3DSTATE_FILTER_COEFFICIENTS_4X4, p170 */
+
+/* 3DSTATE_FILTER_COEFFICIENTS_6X5, p172 */
+
+
+/* _3DSTATE_FOG_COLOR, p173 */
+#define _3DSTATE_FOG_COLOR_CMD		(CMD_3D|(0x15<<24))
+#define FOG_COLOR_RED(x)		((x)<<16)
+#define FOG_COLOR_GREEN(x)		((x)<<8)
+#define FOG_COLOR_BLUE(x)		(x)
+
+/* _3DSTATE_FOG_MODE, p174 */
+#define _3DSTATE_FOG_MODE_CMD		(CMD_3D|(0x1d<<24)|(0x89<<16)|2)
+/* Dword 1 */
+#define FMC1_FOGFUNC_MODIFY_ENABLE	(1<<31)
+#define FMC1_FOGFUNC_VERTEX		(0<<28)
+#define FMC1_FOGFUNC_PIXEL_EXP		(1<<28)
+#define FMC1_FOGFUNC_PIXEL_EXP2		(2<<28)
+#define FMC1_FOGFUNC_PIXEL_LINEAR	(3<<28)
+#define FMC1_FOGFUNC_MASK		(3<<28)
+#define FMC1_FOGINDEX_MODIFY_ENABLE     (1<<27)
+#define FMC1_FOGINDEX_Z		        (0<<25)
+#define FMC1_FOGINDEX_W   		(1<<25)
+#define FMC1_C1_C2_MODIFY_ENABLE	(1<<24)
+#define FMC1_DENSITY_MODIFY_ENABLE	(1<<23)
+#define FMC1_C1_ONE      	        (1<<13)
+#define FMC1_C1_MASK		        (0xffff<<4)
+/* Dword 2 */
+#define FMC2_C2_ONE		        (1<<16)
+/* Dword 3 */
+#define FMC3_D_ONE      		(1<<16)
+
+
+
+/* _3DSTATE_INDEPENDENT_ALPHA_BLEND, p177 */
+#define _3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD	(CMD_3D|(0x0b<<24))
+#define IAB_MODIFY_ENABLE	        (1<<23)
+#define IAB_ENABLE       	        (1<<22)
+#define IAB_MODIFY_FUNC         	(1<<21)
+#define IAB_FUNC_SHIFT          	16
+#define IAB_MODIFY_SRC_FACTOR   	(1<<11)
+#define IAB_SRC_FACTOR_SHIFT		6
+#define IAB_SRC_FACTOR_MASK		(BLENDFACT_MASK<<6)
+#define IAB_MODIFY_DST_FACTOR	        (1<<5)
+#define IAB_DST_FACTOR_SHIFT		0
+#define IAB_DST_FACTOR_MASK		(BLENDFACT_MASK<<0)
+
+
+#define BLENDFUNC_ADD			0x0
+#define BLENDFUNC_SUBTRACT		0x1
+#define BLENDFUNC_REVERSE_SUBTRACT	0x2
+#define BLENDFUNC_MIN			0x3
+#define BLENDFUNC_MAX			0x4
+#define BLENDFUNC_MASK			0x7
+
+/* 3DSTATE_LOAD_INDIRECT, p180 */
+
+#define _3DSTATE_LOAD_INDIRECT	        (CMD_3D|(0x1d<<24)|(0x7<<16))
+#define LI0_STATE_STATIC_INDIRECT       (0x01<<8)
+#define LI0_STATE_DYNAMIC_INDIRECT      (0x02<<8)
+#define LI0_STATE_SAMPLER               (0x04<<8)
+#define LI0_STATE_MAP                   (0x08<<8)
+#define LI0_STATE_PROGRAM               (0x10<<8)
+#define LI0_STATE_CONSTANTS             (0x20<<8)
+
+#define SIS0_BUFFER_ADDRESS(x)          ((x)&~0x3)
+#define SIS0_FORCE_LOAD                 (1<<1)
+#define SIS0_BUFFER_VALID               (1<<0)
+#define SIS1_BUFFER_LENGTH(x)           ((x)&0xff)
+
+#define DIS0_BUFFER_ADDRESS(x)          ((x)&~0x3)
+#define DIS0_BUFFER_RESET               (1<<1)
+#define DIS0_BUFFER_VALID               (1<<0)
+
+#define SSB0_BUFFER_ADDRESS(x)          ((x)&~0x3)
+#define SSB0_FORCE_LOAD                 (1<<1)
+#define SSB0_BUFFER_VALID               (1<<0)
+#define SSB1_BUFFER_LENGTH(x)           ((x)&0xff)
+
+#define MSB0_BUFFER_ADDRESS(x)          ((x)&~0x3)
+#define MSB0_FORCE_LOAD                 (1<<1)
+#define MSB0_BUFFER_VALID               (1<<0)
+#define MSB1_BUFFER_LENGTH(x)           ((x)&0xff)
+
+#define PSP0_BUFFER_ADDRESS(x)          ((x)&~0x3)
+#define PSP0_FORCE_LOAD                 (1<<1)
+#define PSP0_BUFFER_VALID               (1<<0)
+#define PSP1_BUFFER_LENGTH(x)           ((x)&0xff)
+
+#define PSC0_BUFFER_ADDRESS(x)          ((x)&~0x3)
+#define PSC0_FORCE_LOAD                 (1<<1)
+#define PSC0_BUFFER_VALID               (1<<0)
+#define PSC1_BUFFER_LENGTH(x)           ((x)&0xff)
+
+
+
+
+
+/* _3DSTATE_RASTERIZATION_RULES */
+#define _3DSTATE_RASTER_RULES_CMD	(CMD_3D|(0x07<<24))
+#define ENABLE_POINT_RASTER_RULE	(1<<15)
+#define OGL_POINT_RASTER_RULE		(1<<13)
+#define ENABLE_TEXKILL_3D_4D            (1<<10)
+#define TEXKILL_3D                      (0<<9)
+#define TEXKILL_4D                      (1<<9)
+#define ENABLE_LINE_STRIP_PROVOKE_VRTX	(1<<8)
+#define ENABLE_TRI_FAN_PROVOKE_VRTX	(1<<5)
+#define LINE_STRIP_PROVOKE_VRTX(x)	((x)<<6)
+#define TRI_FAN_PROVOKE_VRTX(x) 	((x)<<3)
+
+/* _3DSTATE_SCISSOR_ENABLE, p256 */
+#define _3DSTATE_SCISSOR_ENABLE_CMD	(CMD_3D|(0x1c<<24)|(0x10<<19))
+#define ENABLE_SCISSOR_RECT		((1<<1) | 1)
+#define DISABLE_SCISSOR_RECT		(1<<1)
+
+/* _3DSTATE_SCISSOR_RECTANGLE_0, p257 */
+#define _3DSTATE_SCISSOR_RECT_0_CMD	(CMD_3D|(0x1d<<24)|(0x81<<16)|1)
+/* Dword 1 */
+#define SCISSOR_RECT_0_YMIN(x)		((x)<<16)
+#define SCISSOR_RECT_0_XMIN(x)		(x)
+/* Dword 2 */
+#define SCISSOR_RECT_0_YMAX(x)		((x)<<16)
+#define SCISSOR_RECT_0_XMAX(x)		(x)
+
+/* p189 */
+#define _3DSTATE_LOAD_STATE_IMMEDIATE_1   ((0x3<<29)|(0x1d<<24)|(0x04<<16))
+#define I1_LOAD_S(n)                      (1<<(4+n))
+
+#define S0_VB_OFFSET_MASK              0xffffffc
+#define S0_AUTO_CACHE_INV_DISABLE      (1<<0)
+
+#define S1_VERTEX_WIDTH_SHIFT          24
+#define S1_VERTEX_WIDTH_MASK           (0x3f<<24)
+#define S1_VERTEX_PITCH_SHIFT          16
+#define S1_VERTEX_PITCH_MASK           (0x3f<<16)
+
+#define TEXCOORDFMT_2D                 0x0
+#define TEXCOORDFMT_3D                 0x1
+#define TEXCOORDFMT_4D                 0x2
+#define TEXCOORDFMT_1D                 0x3
+#define TEXCOORDFMT_2D_16              0x4
+#define TEXCOORDFMT_4D_16              0x5
+#define TEXCOORDFMT_NOT_PRESENT        0xf
+#define S2_TEXCOORD_FMT0_MASK            0xf
+#define S2_TEXCOORD_FMT1_SHIFT           4
+#define S2_TEXCOORD_FMT(unit, type)    ((type)<<(unit*4))
+#define S2_TEXCOORD_NONE               (~0)
+
+/* S3 not interesting */
+
+#define S4_POINT_WIDTH_SHIFT           23
+#define S4_POINT_WIDTH_MASK            (0x1ff<<23)
+#define S4_LINE_WIDTH_SHIFT            19
+#define S4_LINE_WIDTH_ONE              (0x2<<19)
+#define S4_LINE_WIDTH_MASK             (0xf<<19)
+#define S4_FLATSHADE_ALPHA             (1<<18)
+#define S4_FLATSHADE_FOG               (1<<17)
+#define S4_FLATSHADE_SPECULAR          (1<<16)
+#define S4_FLATSHADE_COLOR             (1<<15)
+#define S4_CULLMODE_BOTH	       (0<<13)
+#define S4_CULLMODE_NONE	       (1<<13)
+#define S4_CULLMODE_CW		       (2<<13)
+#define S4_CULLMODE_CCW		       (3<<13)
+#define S4_CULLMODE_MASK	       (3<<13)
+#define S4_VFMT_POINT_WIDTH            (1<<12)
+#define S4_VFMT_SPEC_FOG               (1<<11)
+#define S4_VFMT_COLOR                  (1<<10)
+#define S4_VFMT_DEPTH_OFFSET           (1<<9)
+#define S4_VFMT_XYZ     	       (1<<6)
+#define S4_VFMT_XYZW     	       (2<<6)
+#define S4_VFMT_XY     		       (3<<6)
+#define S4_VFMT_XYW     	       (4<<6)
+#define S4_VFMT_XYZW_MASK              (7<<6)
+#define S4_FORCE_DEFAULT_DIFFUSE       (1<<5)
+#define S4_FORCE_DEFAULT_SPECULAR      (1<<4)
+#define S4_LOCAL_DEPTH_OFFSET_ENABLE   (1<<3)
+#define S4_VFMT_FOG_PARAM              (1<<2)
+#define S4_SPRITE_POINT_ENABLE         (1<<1)
+#define S4_LINE_ANTIALIAS_ENABLE       (1<<0)
+
+#define S4_VFMT_MASK (S4_VFMT_POINT_WIDTH   | 	\
+		      S4_VFMT_SPEC_FOG      |	\
+		      S4_VFMT_COLOR         |	\
+		      S4_VFMT_DEPTH_OFFSET  |	\
+		      S4_VFMT_XYZW_MASK     |	\
+		      S4_VFMT_FOG_PARAM)
+
+
+#define S5_WRITEDISABLE_ALPHA          (1<<31)
+#define S5_WRITEDISABLE_RED            (1<<30)
+#define S5_WRITEDISABLE_GREEN          (1<<29)
+#define S5_WRITEDISABLE_BLUE           (1<<28)
+#define S5_WRITEDISABLE_MASK           (0xf<<28)
+#define S5_FORCE_DEFAULT_POINT_SIZE    (1<<27)
+#define S5_LAST_PIXEL_ENABLE           (1<<26)
+#define S5_GLOBAL_DEPTH_OFFSET_ENABLE  (1<<25)
+#define S5_FOG_ENABLE                  (1<<24)
+#define S5_STENCIL_REF_SHIFT           16
+#define S5_STENCIL_REF_MASK            (0xff<<16)
+#define S5_STENCIL_TEST_FUNC_SHIFT     13
+#define S5_STENCIL_TEST_FUNC_MASK      (0x7<<13)
+#define S5_STENCIL_FAIL_SHIFT          10
+#define S5_STENCIL_FAIL_MASK           (0x7<<10)
+#define S5_STENCIL_PASS_Z_FAIL_SHIFT   7
+#define S5_STENCIL_PASS_Z_FAIL_MASK    (0x7<<7)
+#define S5_STENCIL_PASS_Z_PASS_SHIFT   4
+#define S5_STENCIL_PASS_Z_PASS_MASK    (0x7<<4)
+#define S5_STENCIL_WRITE_ENABLE        (1<<3)
+#define S5_STENCIL_TEST_ENABLE         (1<<2)
+#define S5_COLOR_DITHER_ENABLE         (1<<1)
+#define S5_LOGICOP_ENABLE              (1<<0)
+
+
+#define S6_ALPHA_TEST_ENABLE           (1<<31)
+#define S6_ALPHA_TEST_FUNC_SHIFT       28
+#define S6_ALPHA_TEST_FUNC_MASK        (0x7<<28)
+#define S6_ALPHA_REF_SHIFT             20
+#define S6_ALPHA_REF_MASK              (0xff<<20)
+#define S6_DEPTH_TEST_ENABLE           (1<<19)
+#define S6_DEPTH_TEST_FUNC_SHIFT       16
+#define S6_DEPTH_TEST_FUNC_MASK        (0x7<<16)
+#define S6_CBUF_BLEND_ENABLE           (1<<15)
+#define S6_CBUF_BLEND_FUNC_SHIFT       12
+#define S6_CBUF_BLEND_FUNC_MASK        (0x7<<12)
+#define S6_CBUF_SRC_BLEND_FACT_SHIFT   8
+#define S6_CBUF_SRC_BLEND_FACT_MASK    (0xf<<8)
+#define S6_CBUF_DST_BLEND_FACT_SHIFT   4
+#define S6_CBUF_DST_BLEND_FACT_MASK    (0xf<<4)
+#define S6_DEPTH_WRITE_ENABLE          (1<<3)
+#define S6_COLOR_WRITE_ENABLE          (1<<2)
+#define S6_TRISTRIP_PV_SHIFT           0
+#define S6_TRISTRIP_PV_MASK            (0x3<<0)
+
+#define S7_DEPTH_OFFSET_CONST_MASK     ~0
+
+/* 3DSTATE_MAP_DEINTERLACER_PARAMETERS */
+/* 3DSTATE_MAP_PALETTE_LOAD_32, p206 */
+
+
+/* _3DSTATE_MODES_4, p218 */
+#define _3DSTATE_MODES_4_CMD		(CMD_3D|(0x0d<<24))
+#define ENABLE_LOGIC_OP_FUNC		(1<<23)
+#define LOGIC_OP_FUNC(x)		((x)<<18)
+#define LOGICOP_MASK			(0xf<<18)
+#define MODE4_ENABLE_STENCIL_TEST_MASK	((1<<17)|(0xff00))
+#define ENABLE_STENCIL_TEST_MASK	(1<<17)
+#define STENCIL_TEST_MASK(x)		((x)<<8)
+#define MODE4_ENABLE_STENCIL_WRITE_MASK	((1<<16)|(0x00ff))
+#define ENABLE_STENCIL_WRITE_MASK	(1<<16)
+#define STENCIL_WRITE_MASK(x)		((x)&0xff)
+
+/* _3DSTATE_MODES_5, p220 */
+#define _3DSTATE_MODES_5_CMD		(CMD_3D|(0x0c<<24))
+#define PIPELINE_FLUSH_RENDER_CACHE	(1<<18)
+#define PIPELINE_FLUSH_TEXTURE_CACHE	(1<<16)
+
+
+/* p221 */
+#define _3DSTATE_PIXEL_SHADER_CONSTANTS  (CMD_3D|(0x1d<<24)|(0x6<<16))
+#define PS1_REG(n)                      (1<<(n))
+#define PS2_CONST_X(n)                  (n)
+#define PS3_CONST_Y(n)                  (n)
+#define PS4_CONST_Z(n)                  (n)
+#define PS5_CONST_W(n)                  (n)
+
+/* p222 */
+
+
+#define I915_MAX_TEX_INDIRECT 4
+#define I915_MAX_TEX_INSN     32     
+#define I915_MAX_ALU_INSN     64
+#define I915_MAX_DECL_INSN    27
+#define I915_MAX_TEMPORARY    16
+
+
+/* Each instruction is 3 dwords long, though most don't require all
+ * this space.  Maximum of 123 instructions.  Smaller maxes per insn
+ * type.
+ */
+#define _3DSTATE_PIXEL_SHADER_PROGRAM    (CMD_3D|(0x1d<<24)|(0x5<<16))
+
+#define REG_TYPE_R                 0 /* temporary regs, no need to
+				      * dcl, must be written before
+				      * read -- Preserved between
+				      * phases. 
+				      */
+#define REG_TYPE_T                 1 /* Interpolated values, must be
+				      * dcl'ed before use.
+				      *
+				      * 0..7: texture coord,
+				      * 8: diffuse spec,
+				      * 9: specular color,
+				      * 10: fog parameter in w.
+				      */
+#define REG_TYPE_CONST             2 /* Restriction: only one const
+				      * can be referenced per
+				      * instruction, though it may be
+				      * selected for multiple inputs.
+				      * Constants not initialized
+				      * default to zero.
+				      */
+#define REG_TYPE_S                 3 /* sampler */
+#define REG_TYPE_OC                4 /* output color (rgba) */
+#define REG_TYPE_OD                5 /* output depth (w), xyz are
+				      * temporaries.  If not written,
+				      * interpolated depth is used?
+				      */
+#define REG_TYPE_U                 6 /* unpreserved temporaries */
+#define REG_TYPE_MASK              0x7
+#define REG_NR_MASK                0xf
+
+
+/* REG_TYPE_T:
+ */
+#define T_TEX0     0
+#define T_TEX1     1
+#define T_TEX2     2
+#define T_TEX3     3
+#define T_TEX4     4
+#define T_TEX5     5
+#define T_TEX6     6
+#define T_TEX7     7
+#define T_DIFFUSE  8
+#define T_SPECULAR 9
+#define T_FOG_W    10		/* interpolated fog is in W coord */
+
+/* Arithmetic instructions */
+
+/* .replicate_swizzle == selection and replication of a particular
+ * scalar channel, ie., .xxxx, .yyyy, .zzzz or .wwww 
+ */
+#define A0_NOP    (0x0<<24)		/* no operation */
+#define A0_ADD    (0x1<<24)		/* dst = src0 + src1 */
+#define A0_MOV    (0x2<<24)		/* dst = src0 */
+#define A0_MUL    (0x3<<24)		/* dst = src0 * src1 */
+#define A0_MAD    (0x4<<24)		/* dst = src0 * src1 + src2 */
+#define A0_DP2ADD (0x5<<24)		/* dst.xyzw = src0.xy dot src1.xy + src2.replicate_swizzle */
+#define A0_DP3    (0x6<<24)		/* dst.xyzw = src0.xyz dot src1.xyz */
+#define A0_DP4    (0x7<<24)		/* dst.xyzw = src0.xyzw dot src1.xyzw */
+#define A0_FRC    (0x8<<24)		/* dst = src0 - floor(src0) */
+#define A0_RCP    (0x9<<24)		/* dst.xyzw = 1/(src0.replicate_swizzle) */
+#define A0_RSQ    (0xa<<24)		/* dst.xyzw = 1/(sqrt(abs(src0.replicate_swizzle))) */
+#define A0_EXP    (0xb<<24)		/* dst.xyzw = exp2(src0.replicate_swizzle) */
+#define A0_LOG    (0xc<<24)		/* dst.xyzw = log2(abs(src0.replicate_swizzle)) */
+#define A0_CMP    (0xd<<24)		/* dst = (src0 >= 0.0) ? src1 : src2 */
+#define A0_MIN    (0xe<<24)		/* dst = (src0 < src1) ? src0 : src1 */
+#define A0_MAX    (0xf<<24)		/* dst = (src0 >= src1) ? src0 : src1 */
+#define A0_FLR    (0x10<<24)		/* dst = floor(src0) */
+#define A0_MOD    (0x11<<24)		/* dst = src0 fmod 1.0 */
+#define A0_TRC    (0x12<<24)		/* dst = int(src0) */
+#define A0_SGE    (0x13<<24)		/* dst = src0 >= src1 ? 1.0 : 0.0 */
+#define A0_SLT    (0x14<<24)		/* dst = src0 < src1 ? 1.0 : 0.0 */
+#define A0_DEST_SATURATE                 (1<<22)
+#define A0_DEST_TYPE_SHIFT                19
+/* Allow: R, OC, OD, U */
+#define A0_DEST_NR_SHIFT                 14
+/* Allow R: 0..15, OC,OD: 0..0, U: 0..2 */
+#define A0_DEST_CHANNEL_X                (1<<10)
+#define A0_DEST_CHANNEL_Y                (2<<10)
+#define A0_DEST_CHANNEL_Z                (4<<10)
+#define A0_DEST_CHANNEL_W                (8<<10)
+#define A0_DEST_CHANNEL_ALL              (0xf<<10)
+#define A0_DEST_CHANNEL_SHIFT            10
+#define A0_SRC0_TYPE_SHIFT               7
+#define A0_SRC0_NR_SHIFT                 2
+
+#define A0_DEST_CHANNEL_XY              (A0_DEST_CHANNEL_X|A0_DEST_CHANNEL_Y)
+#define A0_DEST_CHANNEL_XYZ             (A0_DEST_CHANNEL_XY|A0_DEST_CHANNEL_Z)
+
+
+#define SRC_X        0
+#define SRC_Y        1
+#define SRC_Z        2
+#define SRC_W        3
+#define SRC_ZERO     4
+#define SRC_ONE      5
+
+#define A1_SRC0_CHANNEL_X_NEGATE         (1<<31)
+#define A1_SRC0_CHANNEL_X_SHIFT          28
+#define A1_SRC0_CHANNEL_Y_NEGATE         (1<<27)
+#define A1_SRC0_CHANNEL_Y_SHIFT          24
+#define A1_SRC0_CHANNEL_Z_NEGATE         (1<<23)
+#define A1_SRC0_CHANNEL_Z_SHIFT          20
+#define A1_SRC0_CHANNEL_W_NEGATE         (1<<19)
+#define A1_SRC0_CHANNEL_W_SHIFT          16
+#define A1_SRC1_TYPE_SHIFT               13
+#define A1_SRC1_NR_SHIFT                 8
+#define A1_SRC1_CHANNEL_X_NEGATE         (1<<7)
+#define A1_SRC1_CHANNEL_X_SHIFT          4
+#define A1_SRC1_CHANNEL_Y_NEGATE         (1<<3)
+#define A1_SRC1_CHANNEL_Y_SHIFT          0
+
+#define A2_SRC1_CHANNEL_Z_NEGATE         (1<<31)
+#define A2_SRC1_CHANNEL_Z_SHIFT          28
+#define A2_SRC1_CHANNEL_W_NEGATE         (1<<27)
+#define A2_SRC1_CHANNEL_W_SHIFT          24
+#define A2_SRC2_TYPE_SHIFT               21
+#define A2_SRC2_NR_SHIFT                 16
+#define A2_SRC2_CHANNEL_X_NEGATE         (1<<15)
+#define A2_SRC2_CHANNEL_X_SHIFT          12
+#define A2_SRC2_CHANNEL_Y_NEGATE         (1<<11)
+#define A2_SRC2_CHANNEL_Y_SHIFT          8
+#define A2_SRC2_CHANNEL_Z_NEGATE         (1<<7)
+#define A2_SRC2_CHANNEL_Z_SHIFT          4
+#define A2_SRC2_CHANNEL_W_NEGATE         (1<<3)
+#define A2_SRC2_CHANNEL_W_SHIFT          0
+
+
+
+/* Texture instructions */
+#define T0_TEXLD     (0x15<<24)	/* Sample texture using predeclared
+				 * sampler and address, and output
+				 * filtered texel data to destination
+				 * register */
+#define T0_TEXLDP    (0x16<<24)	/* Same as texld but performs a
+				 * perspective divide of the texture
+				 * coordinate .xyz values by .w before
+				 * sampling. */
+#define T0_TEXLDB    (0x17<<24)	/* Same as texld but biases the
+				 * computed LOD by w.  Only S4.6 two's
+				 * comp is used.  This implies that a
+				 * float to fixed conversion is
+				 * done. */
+#define T0_TEXKILL   (0x18<<24)	/* Does not perform a sampling
+				 * operation.  Simply kills the pixel
+				 * if any channel of the address
+				 * register is < 0.0. */
+#define T0_DEST_TYPE_SHIFT                19
+/* Allow: R, OC, OD, U */
+/* Note: U (unpreserved) regs do not retain their values between
+ * phases (cannot be used for feedback) 
+ *
+ * Note: oC and OD registers can only be used as the destination of a
+ * texture instruction once per phase (this is an implementation
+ * restriction). 
+ */
+#define T0_DEST_NR_SHIFT                 14
+/* Allow R: 0..15, OC,OD: 0..0, U: 0..2 */
+#define T0_SAMPLER_NR_SHIFT              0 /* This field ignored for TEXKILL */
+#define T0_SAMPLER_NR_MASK               (0xf<<0)
+
+#define T1_ADDRESS_REG_TYPE_SHIFT        24 /* Reg to use as texture coord */
+/* Allow R, T, OC, OD -- R, OC, OD are 'dependent' reads, new program phase */
+#define T1_ADDRESS_REG_NR_SHIFT          17
+#define T2_MBZ                           0
+
+/* Declaration instructions */
+#define D0_DCL       (0x19<<24)	/* Declare a t (interpolated attrib)
+				 * register or an s (sampler)
+				 * register. */
+#define D0_SAMPLE_TYPE_SHIFT              22
+#define D0_SAMPLE_TYPE_2D                 (0x0<<22)
+#define D0_SAMPLE_TYPE_CUBE               (0x1<<22)
+#define D0_SAMPLE_TYPE_VOLUME             (0x2<<22)
+#define D0_SAMPLE_TYPE_MASK               (0x3<<22)
+
+#define D0_TYPE_SHIFT                19
+/* Allow: T, S */
+#define D0_NR_SHIFT                  14
+/* Allow T: 0..10, S: 0..15 */
+#define D0_CHANNEL_X                (1<<10)
+#define D0_CHANNEL_Y                (2<<10)
+#define D0_CHANNEL_Z                (4<<10)
+#define D0_CHANNEL_W                (8<<10)
+#define D0_CHANNEL_ALL              (0xf<<10)
+#define D0_CHANNEL_NONE             (0<<10)
+
+#define D0_CHANNEL_XY               (D0_CHANNEL_X|D0_CHANNEL_Y)
+#define D0_CHANNEL_XYZ              (D0_CHANNEL_XY|D0_CHANNEL_Z)
+
+/* I915 Errata: Do not allow (xz), (xw), (xzw) combinations for diffuse
+ * or specular declarations. 
+ *
+ * For T dcls, only allow: (x), (xy), (xyz), (w), (xyzw) 
+ *
+ * Must be zero for S (sampler) dcls
+ */
+#define D1_MBZ                          0
+#define D2_MBZ                          0
+
+
+
+/* p207 */
+#define _3DSTATE_MAP_STATE               (CMD_3D|(0x1d<<24)|(0x0<<16))
+
+#define MS1_MAPMASK_SHIFT               0
+#define MS1_MAPMASK_MASK                (0x8fff<<0)
+
+#define MS2_UNTRUSTED_SURFACE           (1<<31)
+#define MS2_ADDRESS_MASK                0xfffffffc
+#define MS2_VERTICAL_LINE_STRIDE        (1<<1)
+#define MS2_VERTICAL_OFFSET             (1<<1)
+
+#define MS3_HEIGHT_SHIFT              21
+#define MS3_WIDTH_SHIFT               10
+#define MS3_PALETTE_SELECT            (1<<9)
+#define MS3_MAPSURF_FORMAT_SHIFT      7
+#define MS3_MAPSURF_FORMAT_MASK       (0x7<<7)
+#define    MAPSURF_8BIT		 	   (1<<7)
+#define    MAPSURF_16BIT		   (2<<7)
+#define    MAPSURF_32BIT		   (3<<7)
+#define    MAPSURF_422			   (5<<7)
+#define    MAPSURF_COMPRESSED		   (6<<7)
+#define    MAPSURF_4BIT_INDEXED		   (7<<7)
+#define MS3_MT_FORMAT_MASK         (0x7 << 3)
+#define MS3_MT_FORMAT_SHIFT        3
+#define    MT_4BIT_IDX_ARGB8888	           (7<<3) /* SURFACE_4BIT_INDEXED */
+#define    MT_8BIT_I8		           (0<<3) /* SURFACE_8BIT */
+#define    MT_8BIT_L8		           (1<<3)
+#define    MT_8BIT_A8		           (4<<3)
+#define    MT_8BIT_MONO8	           (5<<3)
+#define    MT_16BIT_RGB565 		   (0<<3) /* SURFACE_16BIT */
+#define    MT_16BIT_ARGB1555		   (1<<3)
+#define    MT_16BIT_ARGB4444		   (2<<3)
+#define    MT_16BIT_AY88		   (3<<3)
+#define    MT_16BIT_88DVDU	           (5<<3)
+#define    MT_16BIT_BUMP_655LDVDU	   (6<<3)
+#define    MT_16BIT_I16	                   (7<<3)
+#define    MT_16BIT_L16	                   (8<<3)
+#define    MT_16BIT_A16	                   (9<<3)
+#define    MT_32BIT_ARGB8888		   (0<<3) /* SURFACE_32BIT */
+#define    MT_32BIT_ABGR8888		   (1<<3)
+#define    MT_32BIT_XRGB8888		   (2<<3)
+#define    MT_32BIT_XBGR8888		   (3<<3)
+#define    MT_32BIT_QWVU8888		   (4<<3)
+#define    MT_32BIT_AXVU8888		   (5<<3)
+#define    MT_32BIT_LXVU8888	           (6<<3)
+#define    MT_32BIT_XLVU8888	           (7<<3)
+#define    MT_32BIT_ARGB2101010	           (8<<3)
+#define    MT_32BIT_ABGR2101010	           (9<<3)
+#define    MT_32BIT_AWVU2101010	           (0xA<<3)
+#define    MT_32BIT_GR1616	           (0xB<<3)
+#define    MT_32BIT_VU1616	           (0xC<<3)
+#define    MT_32BIT_xI824	           (0xD<<3)
+#define    MT_32BIT_xA824	           (0xE<<3)
+#define    MT_32BIT_xL824	           (0xF<<3)
+#define    MT_422_YCRCB_SWAPY	           (0<<3) /* SURFACE_422 */
+#define    MT_422_YCRCB_NORMAL	           (1<<3)
+#define    MT_422_YCRCB_SWAPUV	           (2<<3)
+#define    MT_422_YCRCB_SWAPUVY	           (3<<3)
+#define    MT_COMPRESS_DXT1		   (0<<3) /* SURFACE_COMPRESSED */
+#define    MT_COMPRESS_DXT2_3	           (1<<3)
+#define    MT_COMPRESS_DXT4_5	           (2<<3)
+#define    MT_COMPRESS_FXT1		   (3<<3)
+#define    MT_COMPRESS_DXT1_RGB		   (4<<3)
+#define MS3_USE_FENCE_REGS              (1<<2)
+#define MS3_TILED_SURFACE             (1<<1)
+#define MS3_TILE_WALK                 (1<<0)
+
+#define MS4_PITCH_SHIFT                 21
+#define MS4_CUBE_FACE_ENA_NEGX          (1<<20)
+#define MS4_CUBE_FACE_ENA_POSX          (1<<19)
+#define MS4_CUBE_FACE_ENA_NEGY          (1<<18)
+#define MS4_CUBE_FACE_ENA_POSY          (1<<17)
+#define MS4_CUBE_FACE_ENA_NEGZ          (1<<16)
+#define MS4_CUBE_FACE_ENA_POSZ          (1<<15)
+#define MS4_CUBE_FACE_ENA_MASK          (0x3f<<15)
+#define MS4_MAX_LOD_SHIFT		9
+#define MS4_MAX_LOD_MASK		(0x3f<<9)
+#define MS4_MIP_LAYOUT_LEGACY           (0<<8)
+#define MS4_MIP_LAYOUT_BELOW_LPT        (0<<8)
+#define MS4_MIP_LAYOUT_RIGHT_LPT        (1<<8)
+#define MS4_VOLUME_DEPTH_SHIFT          0    
+#define MS4_VOLUME_DEPTH_MASK           (0xff<<0)
+
+/* p244 */
+#define _3DSTATE_SAMPLER_STATE         (CMD_3D|(0x1d<<24)|(0x1<<16))
+
+#define SS1_MAPMASK_SHIFT               0
+#define SS1_MAPMASK_MASK                (0x8fff<<0)
+
+#define SS2_REVERSE_GAMMA_ENABLE        (1<<31)
+#define SS2_PACKED_TO_PLANAR_ENABLE     (1<<30)
+#define SS2_COLORSPACE_CONVERSION       (1<<29)
+#define SS2_CHROMAKEY_SHIFT             27
+#define SS2_BASE_MIP_LEVEL_SHIFT        22
+#define SS2_BASE_MIP_LEVEL_MASK         (0x1f<<22)
+#define SS2_MIP_FILTER_SHIFT            20
+#define SS2_MIP_FILTER_MASK             (0x3<<20)
+#define   MIPFILTER_NONE       	0
+#define   MIPFILTER_NEAREST	1
+#define   MIPFILTER_LINEAR	3
+#define SS2_MAG_FILTER_SHIFT          17
+#define SS2_MAG_FILTER_MASK           (0x7<<17)
+#define   FILTER_NEAREST	0
+#define   FILTER_LINEAR		1
+#define   FILTER_ANISOTROPIC	2
+#define   FILTER_4X4_1    	3
+#define   FILTER_4X4_2    	4
+#define   FILTER_4X4_FLAT 	5
+#define   FILTER_6X5_MONO   	6 /* XXX - check */
+#define SS2_MIN_FILTER_SHIFT          14
+#define SS2_MIN_FILTER_MASK           (0x7<<14)
+#define SS2_LOD_BIAS_SHIFT            5
+#define SS2_LOD_BIAS_ONE              (0x10<<5)
+#define SS2_LOD_BIAS_MASK             (0x1ff<<5)
+/* Shadow requires:
+ *  MT_X8{I,L,A}24 or MT_{I,L,A}16 texture format
+ *  FILTER_4X4_x  MIN and MAG filters
+ */
+#define SS2_SHADOW_ENABLE             (1<<4)
+#define SS2_MAX_ANISO_MASK            (1<<3)
+#define SS2_MAX_ANISO_2               (0<<3)
+#define SS2_MAX_ANISO_4               (1<<3)
+#define SS2_SHADOW_FUNC_SHIFT         0
+#define SS2_SHADOW_FUNC_MASK          (0x7<<0)
+/* SS2_SHADOW_FUNC values: see COMPAREFUNC_* */
+
+#define SS3_MIN_LOD_SHIFT            24
+#define SS3_MIN_LOD_ONE              (0x10<<24)
+#define SS3_MIN_LOD_MASK             (0xff<<24)
+#define SS3_KILL_PIXEL_ENABLE        (1<<17)
+#define SS3_TCX_ADDR_MODE_SHIFT      12
+#define SS3_TCX_ADDR_MODE_MASK       (0x7<<12)
+#define   TEXCOORDMODE_WRAP		0
+#define   TEXCOORDMODE_MIRROR		1
+#define   TEXCOORDMODE_CLAMP_EDGE	2
+#define   TEXCOORDMODE_CUBE       	3
+#define   TEXCOORDMODE_CLAMP_BORDER	4
+#define   TEXCOORDMODE_MIRROR_ONCE      5
+#define SS3_TCY_ADDR_MODE_SHIFT      9
+#define SS3_TCY_ADDR_MODE_MASK       (0x7<<9)
+#define SS3_TCZ_ADDR_MODE_SHIFT      6
+#define SS3_TCZ_ADDR_MODE_MASK       (0x7<<6)
+#define SS3_NORMALIZED_COORDS        (1<<5)
+#define SS3_TEXTUREMAP_INDEX_SHIFT   1
+#define SS3_TEXTUREMAP_INDEX_MASK    (0xf<<1)
+#define SS3_DEINTERLACER_ENABLE      (1<<0)
+
+#define SS4_BORDER_COLOR_MASK        (~0)
+
+/* 3DSTATE_SPAN_STIPPLE, p258
+ */
+#define _3DSTATE_STIPPLE           ((0x3<<29)|(0x1d<<24)|(0x83<<16))
+#define ST1_ENABLE               (1<<16)
+#define ST1_MASK                 (0xffff)
+
+
+#define MI_FLUSH           ((0<<29)|(4<<23))
+#define FLUSH_MAP_CACHE    (1<<0)
+#define FLUSH_RENDER_CACHE (1<<1)
+
+
+#endif
diff --git a/i915/i915_state.c b/i915/i915_state.c
new file mode 100644
index 0000000..0d5ca32
--- /dev/null
+++ b/i915/i915_state.c
@@ -0,0 +1,975 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+
+#include "glheader.h"
+#include "context.h"
+#include "macros.h"
+#include "enums.h"
+#include "dd.h"
+#include "tnl/tnl.h"
+#include "tnl/t_context.h"
+
+#include "texmem.h"
+
+#include "intel_screen.h"
+#include "intel_batchbuffer.h"
+
+#include "i915_context.h"
+#include "i915_reg.h"
+
+
+
+static void
+i915StencilFuncSeparate(GLcontext *ctx, GLenum face, GLenum func, GLint ref,
+                        GLuint mask)
+{
+   i915ContextPtr i915 = I915_CONTEXT(ctx);
+   int test = intel_translate_compare_func( func );
+
+   mask = mask & 0xff;
+
+   if (INTEL_DEBUG&DEBUG_DRI)
+      fprintf(stderr, "%s : func: %s, ref : 0x%x, mask: 0x%x\n", __FUNCTION__,
+	      _mesa_lookup_enum_by_nr(func), ref, mask);
+
+
+   I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+   i915->state.Ctx[I915_CTXREG_STATE4] &= ~MODE4_ENABLE_STENCIL_TEST_MASK;
+   i915->state.Ctx[I915_CTXREG_STATE4] |= (ENABLE_STENCIL_TEST_MASK |
+					 STENCIL_TEST_MASK(mask));
+
+   i915->state.Ctx[I915_CTXREG_LIS5] &= ~(S5_STENCIL_REF_MASK |
+					S5_STENCIL_TEST_FUNC_MASK);
+					
+   i915->state.Ctx[I915_CTXREG_LIS5] |= ((ref << S5_STENCIL_REF_SHIFT) |  
+ 				       (test << S5_STENCIL_TEST_FUNC_SHIFT)); 
+}
+
+static void
+i915StencilMaskSeparate(GLcontext *ctx, GLenum face, GLuint mask)
+{
+   i915ContextPtr i915 = I915_CONTEXT(ctx);
+
+   if (INTEL_DEBUG&DEBUG_DRI)
+      fprintf(stderr, "%s : mask 0x%x\n", __FUNCTION__, mask);
+
+   mask = mask & 0xff;
+
+   I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+   i915->state.Ctx[I915_CTXREG_STATE4] &= ~MODE4_ENABLE_STENCIL_WRITE_MASK;
+   i915->state.Ctx[I915_CTXREG_STATE4] |= (ENABLE_STENCIL_WRITE_MASK |
+					STENCIL_WRITE_MASK(mask));
+}
+
+
+static void
+i915StencilOpSeparate(GLcontext *ctx, GLenum face, GLenum fail, GLenum zfail,
+                      GLenum zpass)
+{
+   i915ContextPtr i915 = I915_CONTEXT(ctx);
+   int fop = intel_translate_stencil_op(fail); 
+   int dfop = intel_translate_stencil_op(zfail); 
+   int dpop = intel_translate_stencil_op(zpass);
+
+
+   if (INTEL_DEBUG&DEBUG_DRI)
+      fprintf(stderr, "%s: fail : %s, zfail: %s, zpass : %s\n", __FUNCTION__,
+	      _mesa_lookup_enum_by_nr(fail),
+	      _mesa_lookup_enum_by_nr(zfail),
+	      _mesa_lookup_enum_by_nr(zpass));
+
+   I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+
+   i915->state.Ctx[I915_CTXREG_LIS5] &= ~(S5_STENCIL_FAIL_MASK |
+					S5_STENCIL_PASS_Z_FAIL_MASK |
+					S5_STENCIL_PASS_Z_PASS_MASK);
+
+   i915->state.Ctx[I915_CTXREG_LIS5] |= ((fop << S5_STENCIL_FAIL_SHIFT) |
+				       (dfop << S5_STENCIL_PASS_Z_FAIL_SHIFT) |
+				       (dpop << S5_STENCIL_PASS_Z_PASS_SHIFT));
+}
+
+static void i915AlphaFunc(GLcontext *ctx, GLenum func, GLfloat ref)
+{
+   i915ContextPtr i915 = I915_CONTEXT(ctx);
+   int test = intel_translate_compare_func( func );
+   GLubyte refByte;
+
+   UNCLAMPED_FLOAT_TO_UBYTE(refByte, ref);
+
+   I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+   i915->state.Ctx[I915_CTXREG_LIS6] &= ~(S6_ALPHA_TEST_FUNC_MASK |
+					S6_ALPHA_REF_MASK);
+   i915->state.Ctx[I915_CTXREG_LIS6] |= ((test << S6_ALPHA_TEST_FUNC_SHIFT) |
+				       (((GLuint)refByte) << S6_ALPHA_REF_SHIFT));
+}
+
+/* This function makes sure that the proper enables are
+ * set for LogicOp, Independant Alpha Blend, and Blending.
+ * It needs to be called from numerous places where we
+ * could change the LogicOp or Independant Alpha Blend without subsequent
+ * calls to glEnable.
+ */
+static void i915EvalLogicOpBlendState(GLcontext *ctx)
+{
+   i915ContextPtr i915 = I915_CONTEXT(ctx);
+
+   I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+
+   if (RGBA_LOGICOP_ENABLED(ctx)) {
+      i915->state.Ctx[I915_CTXREG_LIS5] |= S5_LOGICOP_ENABLE;
+      i915->state.Ctx[I915_CTXREG_LIS6] &= ~S6_CBUF_BLEND_ENABLE;
+   } else {
+      i915->state.Ctx[I915_CTXREG_LIS5] &= ~S5_LOGICOP_ENABLE;
+
+      if (ctx->Color.BlendEnabled) {
+	 i915->state.Ctx[I915_CTXREG_LIS6] |= S6_CBUF_BLEND_ENABLE;
+      } else {
+	 i915->state.Ctx[I915_CTXREG_LIS6] &= ~S6_CBUF_BLEND_ENABLE;
+      }
+   }
+}
+
+static void i915BlendColor(GLcontext *ctx, const GLfloat color[4])
+{
+   i915ContextPtr i915 = I915_CONTEXT(ctx);
+   GLubyte r, g, b, a;
+
+   if (INTEL_DEBUG&DEBUG_DRI)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   UNCLAMPED_FLOAT_TO_UBYTE(r, color[RCOMP]);
+   UNCLAMPED_FLOAT_TO_UBYTE(g, color[GCOMP]);
+   UNCLAMPED_FLOAT_TO_UBYTE(b, color[BCOMP]);
+   UNCLAMPED_FLOAT_TO_UBYTE(a, color[ACOMP]);
+
+   I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+   i915->state.Ctx[I915_CTXREG_BLENDCOLOR1] = (a<<24) | (r<<16) | (g<<8) | b;
+}
+
+
+#define DST_BLND_FACT(f) ((f)<<S6_CBUF_DST_BLEND_FACT_SHIFT)
+#define SRC_BLND_FACT(f) ((f)<<S6_CBUF_SRC_BLEND_FACT_SHIFT)
+#define DST_ABLND_FACT(f) ((f)<<IAB_DST_FACTOR_SHIFT)
+#define SRC_ABLND_FACT(f) ((f)<<IAB_SRC_FACTOR_SHIFT)
+
+
+
+static GLuint translate_blend_equation( GLenum mode )
+{
+   switch (mode) {
+   case GL_FUNC_ADD: return BLENDFUNC_ADD; 
+   case GL_MIN: return BLENDFUNC_MIN; 
+   case GL_MAX: return BLENDFUNC_MAX; 
+   case GL_FUNC_SUBTRACT: return BLENDFUNC_SUBTRACT; 
+   case GL_FUNC_REVERSE_SUBTRACT: return BLENDFUNC_REVERSE_SUBTRACT; 
+   default: return 0;
+   }
+}
+
+static void i915UpdateBlendState( GLcontext *ctx )
+{
+   i915ContextPtr i915 = I915_CONTEXT(ctx);
+   GLuint iab = (i915->state.Ctx[I915_CTXREG_IAB] & 
+		 ~(IAB_SRC_FACTOR_MASK |
+		   IAB_DST_FACTOR_MASK |
+		   (BLENDFUNC_MASK << IAB_FUNC_SHIFT) |
+		   IAB_ENABLE));
+
+   GLuint lis6 = (i915->state.Ctx[I915_CTXREG_LIS6] & 
+		  ~(S6_CBUF_SRC_BLEND_FACT_MASK |
+		    S6_CBUF_DST_BLEND_FACT_MASK |
+		    S6_CBUF_BLEND_FUNC_MASK));
+
+   GLuint eqRGB = ctx->Color.BlendEquationRGB;
+   GLuint eqA = ctx->Color.BlendEquationA;
+   GLuint srcRGB = ctx->Color.BlendSrcRGB;
+   GLuint dstRGB = ctx->Color.BlendDstRGB;
+   GLuint srcA = ctx->Color.BlendSrcA;
+   GLuint dstA = ctx->Color.BlendDstA;
+
+   if (eqRGB == GL_MIN || eqRGB == GL_MAX) {
+      srcRGB = dstRGB = GL_ONE;
+   }
+
+   if (eqA == GL_MIN || eqA == GL_MAX) {
+      srcA = dstA = GL_ONE;
+   }
+
+   lis6 |= SRC_BLND_FACT(intel_translate_blend_factor(srcRGB)); 
+   lis6 |= DST_BLND_FACT(intel_translate_blend_factor(dstRGB)); 
+   lis6 |= translate_blend_equation( eqRGB ) << S6_CBUF_BLEND_FUNC_SHIFT;
+
+   iab |= SRC_ABLND_FACT(intel_translate_blend_factor(srcA)); 
+   iab |= DST_ABLND_FACT(intel_translate_blend_factor(dstA)); 
+   iab |= translate_blend_equation( eqA ) << IAB_FUNC_SHIFT;
+
+   if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) 
+      iab |= IAB_ENABLE;
+
+   if (iab != i915->state.Ctx[I915_CTXREG_IAB] ||
+       lis6 != i915->state.Ctx[I915_CTXREG_LIS6]) {
+      I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+      i915->state.Ctx[I915_CTXREG_IAB] = iab;
+      i915->state.Ctx[I915_CTXREG_LIS6] = lis6;
+   }
+
+   /* This will catch a logicop blend equation */
+   i915EvalLogicOpBlendState(ctx);
+}
+
+
+static void i915BlendFuncSeparate(GLcontext *ctx, GLenum srcRGB, 
+				 GLenum dstRGB, GLenum srcA,
+				 GLenum dstA )
+{  
+   i915UpdateBlendState( ctx );
+}
+
+
+static void i915BlendEquationSeparate(GLcontext *ctx, GLenum eqRGB,
+				     GLenum eqA) 
+{
+   i915UpdateBlendState( ctx );
+}
+
+
+static void i915DepthFunc(GLcontext *ctx, GLenum func)
+{
+   i915ContextPtr i915 = I915_CONTEXT(ctx);
+   int test = intel_translate_compare_func( func );
+
+   if (INTEL_DEBUG&DEBUG_DRI)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+   i915->state.Ctx[I915_CTXREG_LIS6] &= ~S6_DEPTH_TEST_FUNC_MASK;
+   i915->state.Ctx[I915_CTXREG_LIS6] |= test << S6_DEPTH_TEST_FUNC_SHIFT;
+}
+
+static void i915DepthMask(GLcontext *ctx, GLboolean flag)
+{
+   i915ContextPtr i915 = I915_CONTEXT(ctx);
+
+   if (INTEL_DEBUG&DEBUG_DRI)
+      fprintf(stderr, "%s flag (%d)\n", __FUNCTION__, flag);
+
+   I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+
+   if (flag && ctx->Depth.Test)
+      i915->state.Ctx[I915_CTXREG_LIS6] |= S6_DEPTH_WRITE_ENABLE;
+   else
+      i915->state.Ctx[I915_CTXREG_LIS6] &= ~S6_DEPTH_WRITE_ENABLE;
+}
+
+/* =============================================================
+ * Polygon stipple
+ *
+ * The i915 supports a 4x4 stipple natively, GL wants 32x32.
+ * Fortunately stipple is usually a repeating pattern.
+ */
+static void i915PolygonStipple( GLcontext *ctx, const GLubyte *mask )
+{
+   i915ContextPtr i915 = I915_CONTEXT(ctx);
+   const GLubyte *m = mask;
+   GLubyte p[4];
+   int i,j,k;
+   int active = (ctx->Polygon.StippleFlag &&
+		 i915->intel.reduced_primitive == GL_TRIANGLES);
+   GLuint newMask;
+
+   if (active) {
+      I915_STATECHANGE(i915, I915_UPLOAD_STIPPLE);
+      i915->state.Stipple[I915_STPREG_ST1] &= ~ST1_ENABLE;
+   }
+
+   p[0] = mask[12] & 0xf; p[0] |= p[0] << 4;
+   p[1] = mask[8] & 0xf; p[1] |= p[1] << 4;
+   p[2] = mask[4] & 0xf; p[2] |= p[2] << 4;
+   p[3] = mask[0] & 0xf; p[3] |= p[3] << 4;
+
+   for (k = 0 ; k < 8 ; k++)
+      for (j = 3 ; j >= 0; j--)
+	 for (i = 0 ; i < 4 ; i++, m++)
+	    if (*m != p[j]) {
+	       i915->intel.hw_stipple = 0;
+	       return;
+	    }
+
+   newMask = (((p[0] & 0xf) << 0) |
+	      ((p[1] & 0xf) << 4) |
+	      ((p[2] & 0xf) << 8) |
+	      ((p[3] & 0xf) << 12));
+
+
+   if (newMask == 0xffff || newMask == 0x0) {
+      /* this is needed to make conform pass */
+      i915->intel.hw_stipple = 0;
+      return;
+   }
+
+   i915->state.Stipple[I915_STPREG_ST1] &= ~0xffff;
+   i915->state.Stipple[I915_STPREG_ST1] |= newMask;
+   i915->intel.hw_stipple = 1;
+
+   if (active)
+      i915->state.Stipple[I915_STPREG_ST1] |= ST1_ENABLE;
+}
+
+
+/* =============================================================
+ * Hardware clipping
+ */
+static void i915Scissor(GLcontext *ctx, GLint x, GLint y, 
+			GLsizei w, GLsizei h)
+{
+   i915ContextPtr i915 = I915_CONTEXT(ctx);
+   intelScreenPrivate *screen = i915->intel.intelScreen;
+   int x1, y1, x2, y2;
+
+   if (!i915->intel.driDrawable)
+      return;
+
+   x1 = x;
+   y1 = i915->intel.driDrawable->h - (y + h);
+   x2 = x + w - 1;
+   y2 = y1 + h - 1;
+
+   if (INTEL_DEBUG&DEBUG_DRI)
+      fprintf(stderr, "[%s] x(%d) y(%d) w(%d) h(%d)\n", __FUNCTION__,
+	      x, y, w, h);
+
+   if (x1 < 0) x1 = 0;
+   if (y1 < 0) y1 = 0;
+   if (x2 < 0) x2 = 0;
+   if (y2 < 0) y2 = 0;
+
+   if (x2 >= screen->width) x2 = screen->width-1;
+   if (y2 >= screen->height) y2 = screen->height-1;
+   if (x1 >= screen->width) x1 = screen->width-1;
+   if (y1 >= screen->height) y1 = screen->height-1;
+
+
+   I915_STATECHANGE(i915, I915_UPLOAD_BUFFERS);
+   i915->state.Buffer[I915_DESTREG_SR1] = (y1 << 16) | (x1 & 0xffff);
+   i915->state.Buffer[I915_DESTREG_SR2] = (y2 << 16) | (x2 & 0xffff);
+}
+
+static void i915LogicOp(GLcontext *ctx, GLenum opcode)
+{
+   i915ContextPtr i915 = I915_CONTEXT(ctx);
+   int tmp = intel_translate_logic_op(opcode);
+
+   if (INTEL_DEBUG&DEBUG_DRI)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+   i915->state.Ctx[I915_CTXREG_STATE4] &= ~LOGICOP_MASK;
+   i915->state.Ctx[I915_CTXREG_STATE4] |= LOGIC_OP_FUNC(tmp);
+}
+
+
+
+static void i915CullFaceFrontFace(GLcontext *ctx, GLenum unused)
+{
+   i915ContextPtr i915 = I915_CONTEXT(ctx);
+   GLuint mode;
+
+   if (INTEL_DEBUG&DEBUG_DRI)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   if (!ctx->Polygon.CullFlag) {
+      mode = S4_CULLMODE_NONE;
+   }
+   else if (ctx->Polygon.CullFaceMode != GL_FRONT_AND_BACK) {
+      mode = S4_CULLMODE_CW;
+
+      if (ctx->Polygon.CullFaceMode == GL_FRONT)
+	 mode ^= (S4_CULLMODE_CW ^ S4_CULLMODE_CCW);
+      if (ctx->Polygon.FrontFace != GL_CCW)
+	 mode ^= (S4_CULLMODE_CW ^ S4_CULLMODE_CCW);
+   }
+   else {
+      mode = S4_CULLMODE_BOTH;
+   }
+
+   I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+   i915->state.Ctx[I915_CTXREG_LIS4] &= ~S4_CULLMODE_MASK;
+   i915->state.Ctx[I915_CTXREG_LIS4] |= mode;
+}
+
+static void i915LineWidth( GLcontext *ctx, GLfloat widthf )
+{
+   i915ContextPtr i915 = I915_CONTEXT( ctx );
+   int lis4 = i915->state.Ctx[I915_CTXREG_LIS4] & ~S4_LINE_WIDTH_MASK;
+   int width;
+
+   if (INTEL_DEBUG&DEBUG_DRI)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   width = (int)(widthf * 2);
+   CLAMP_SELF(width, 1, 0xf);
+   lis4 |= width << S4_LINE_WIDTH_SHIFT;
+
+   if (lis4 != i915->state.Ctx[I915_CTXREG_LIS4]) {
+      I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+      i915->state.Ctx[I915_CTXREG_LIS4] = lis4;
+   }
+}
+
+static void i915PointSize(GLcontext *ctx, GLfloat size)
+{
+   i915ContextPtr i915 = I915_CONTEXT(ctx);
+   int lis4 = i915->state.Ctx[I915_CTXREG_LIS4] & ~S4_POINT_WIDTH_MASK;
+   GLint point_size = (int)size;
+
+   if (INTEL_DEBUG&DEBUG_DRI)
+     fprintf(stderr, "%s\n", __FUNCTION__);
+
+   CLAMP_SELF(point_size, 1, 255);
+   lis4 |= point_size << S4_POINT_WIDTH_SHIFT;
+
+   if (lis4 != i915->state.Ctx[I915_CTXREG_LIS4]) {
+      I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+      i915->state.Ctx[I915_CTXREG_LIS4] = lis4;
+   }
+}
+
+
+/* =============================================================
+ * Color masks
+ */
+
+static void i915ColorMask(GLcontext *ctx,
+			 GLboolean r, GLboolean g,
+			 GLboolean b, GLboolean a)
+{
+   i915ContextPtr i915 = I915_CONTEXT( ctx );
+   GLuint tmp = i915->state.Ctx[I915_CTXREG_LIS5] & ~S5_WRITEDISABLE_MASK;
+
+   if (INTEL_DEBUG&DEBUG_DRI)
+      fprintf(stderr, "%s r(%d) g(%d) b(%d) a(%d)\n", __FUNCTION__, r, g, b, a);
+
+   if (!r) tmp |= S5_WRITEDISABLE_RED;
+   if (!g) tmp |= S5_WRITEDISABLE_GREEN;
+   if (!b) tmp |= S5_WRITEDISABLE_BLUE;
+   if (!a) tmp |= S5_WRITEDISABLE_ALPHA;
+
+   if (tmp != i915->state.Ctx[I915_CTXREG_LIS5]) {
+      I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+      i915->state.Ctx[I915_CTXREG_LIS5] = tmp;
+   }
+}
+
+static void update_specular( GLcontext *ctx )
+{
+   /* A hack to trigger the rebuild of the fragment program.
+    */
+   INTEL_CONTEXT(ctx)->NewGLState |= _NEW_TEXTURE;
+   I915_CONTEXT(ctx)->tex_program.translated = 0; 
+}
+
+static void i915LightModelfv(GLcontext *ctx, GLenum pname, 
+			     const GLfloat *param)
+{
+   if (INTEL_DEBUG&DEBUG_DRI)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   if (pname == GL_LIGHT_MODEL_COLOR_CONTROL) {
+      update_specular( ctx );
+   }
+}
+
+static void i915ShadeModel(GLcontext *ctx, GLenum mode)
+{
+   i915ContextPtr i915 = I915_CONTEXT(ctx);
+   I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+
+   if (mode == GL_SMOOTH) {
+     i915->state.Ctx[I915_CTXREG_LIS4] &= ~(S4_FLATSHADE_ALPHA | 
+					  S4_FLATSHADE_COLOR | 
+					  S4_FLATSHADE_SPECULAR);
+   } else {
+     i915->state.Ctx[I915_CTXREG_LIS4] |= (S4_FLATSHADE_ALPHA | 
+					 S4_FLATSHADE_COLOR | 
+					 S4_FLATSHADE_SPECULAR);
+   }
+}
+
+/* =============================================================
+ * Fog
+ */
+void i915_update_fog( GLcontext *ctx )
+{
+   i915ContextPtr i915 = I915_CONTEXT(ctx);
+   GLenum mode;
+   GLboolean enabled;
+   GLboolean try_pixel_fog;
+   
+   if (ctx->FragmentProgram._Active) {
+      /* Pull in static fog state from program */
+      
+      mode = ctx->FragmentProgram._Current->FogOption;
+      enabled = (mode != GL_NONE);
+      try_pixel_fog = 0;
+   }
+   else {
+      enabled = ctx->Fog.Enabled;
+      mode = ctx->Fog.Mode;
+#if 0
+      /* XXX - DISABLED -- Need ortho fallback */
+      try_pixel_fog = (ctx->Fog.FogCoordinateSource == GL_FRAGMENT_DEPTH_EXT
+                       &&ctx->Hint.Fog == GL_NICEST);
+#else
+      try_pixel_fog = 0;
+#endif
+   }
+
+   if (!enabled) {
+      i915->vertex_fog = I915_FOG_NONE;
+   }
+   else if (try_pixel_fog) {
+      I915_STATECHANGE(i915, I915_UPLOAD_FOG);
+      i915->state.Fog[I915_FOGREG_MODE1] &= ~FMC1_FOGFUNC_MASK;
+      i915->vertex_fog = I915_FOG_PIXEL;
+	 
+      switch (mode) {
+      case GL_LINEAR:
+	 if (ctx->Fog.End <= ctx->Fog.Start) {
+	    /* XXX - this won't work with fragment programs.  Need to
+	     * either fallback or append fog instructions to end of
+	     * program in the case of linear fog.
+	     */
+	    i915->state.Fog[I915_FOGREG_MODE1] |= FMC1_FOGFUNC_VERTEX;
+	    i915->vertex_fog = I915_FOG_VERTEX;
+	 }
+	 else {
+            GLfloat c2 = 1.0 / (ctx->Fog.End - ctx->Fog.Start);
+            GLfloat c1 = ctx->Fog.End * c2;
+
+	    i915->state.Fog[I915_FOGREG_MODE1] &= ~FMC1_C1_MASK;
+	    i915->state.Fog[I915_FOGREG_MODE1] |= FMC1_FOGFUNC_PIXEL_LINEAR;
+	    i915->state.Fog[I915_FOGREG_MODE1] |= 
+	       ((GLuint)(c1 * FMC1_C1_ONE)) & FMC1_C1_MASK;
+
+	    if (i915->state.Fog[I915_FOGREG_MODE1] & FMC1_FOGINDEX_Z) {
+	       i915->state.Fog[I915_FOGREG_MODE2]
+                  = (GLuint)(c2 * FMC2_C2_ONE);
+	    }
+	    else {
+	       fi_type fi;
+	       fi.f = c2; 
+	       i915->state.Fog[I915_FOGREG_MODE2] = fi.i; 
+	    }
+	 }
+	 break;
+      case GL_EXP:
+	 i915->state.Fog[I915_FOGREG_MODE1] |= FMC1_FOGFUNC_PIXEL_EXP;
+	 break;
+      case GL_EXP2:
+	 i915->state.Fog[I915_FOGREG_MODE1] |= FMC1_FOGFUNC_PIXEL_EXP2;
+	 break;
+      default:
+	 break;
+      }
+   }
+   else /* if (i915->vertex_fog != I915_FOG_VERTEX) */ {      
+      I915_STATECHANGE(i915, I915_UPLOAD_FOG);
+      i915->state.Fog[I915_FOGREG_MODE1] &= ~FMC1_FOGFUNC_MASK;
+      i915->state.Fog[I915_FOGREG_MODE1] |= FMC1_FOGFUNC_VERTEX;
+      i915->vertex_fog = I915_FOG_VERTEX;
+   }
+
+   I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+   I915_ACTIVESTATE(i915, I915_UPLOAD_FOG, enabled);
+   if (enabled)
+      i915->state.Ctx[I915_CTXREG_LIS5] |= S5_FOG_ENABLE;
+   else
+      i915->state.Ctx[I915_CTXREG_LIS5] &= ~S5_FOG_ENABLE;
+
+   /* Always enable pixel fog.  Vertex fog using fog coord will conflict
+    * with fog code appended onto fragment program.
+    */
+    _tnl_allow_vertex_fog( ctx, 0 );
+    _tnl_allow_pixel_fog( ctx, 1 );
+}
+
+static void
+i915Fogfv(GLcontext *ctx, GLenum pname, const GLfloat *param)
+{
+   i915ContextPtr i915 = I915_CONTEXT(ctx);
+
+   switch (pname) {
+   case GL_FOG_COORDINATE_SOURCE_EXT: 
+   case GL_FOG_MODE:
+   case GL_FOG_START:
+   case GL_FOG_END: 
+      break;
+
+   case GL_FOG_DENSITY:
+      I915_STATECHANGE(i915, I915_UPLOAD_FOG);
+
+      if (i915->state.Fog[I915_FOGREG_MODE1] & FMC1_FOGINDEX_Z) {
+	 i915->state.Fog[I915_FOGREG_MODE3]
+            = (GLuint)(ctx->Fog.Density * FMC3_D_ONE);
+      }
+      else {
+	 union { float f; int i; } fi;
+	 fi.f = ctx->Fog.Density; 
+	 i915->state.Fog[I915_FOGREG_MODE3] = fi.i; 
+      }
+      break;
+
+   case GL_FOG_COLOR: 
+      I915_STATECHANGE(i915, I915_UPLOAD_FOG);
+      i915->state.Fog[I915_FOGREG_COLOR] = 
+	 (_3DSTATE_FOG_COLOR_CMD | 
+	  ((GLubyte)(ctx->Fog.Color[0]*255.0F) << 16) |
+	  ((GLubyte)(ctx->Fog.Color[1]*255.0F) << 8) |
+	  ((GLubyte)(ctx->Fog.Color[2]*255.0F) << 0));
+      break;
+
+   default:
+      break;
+   }
+}
+
+static void i915Hint(GLcontext *ctx, GLenum target, GLenum state)
+{
+   switch (target) {
+   case GL_FOG_HINT:
+      break;
+   default:
+      break;
+   }
+}
+
+/* =============================================================
+ */
+
+static void i915Enable(GLcontext *ctx, GLenum cap, GLboolean state)
+{
+   i915ContextPtr i915 = I915_CONTEXT(ctx);
+
+   switch(cap) {
+   case GL_TEXTURE_2D:
+      break;
+
+   case GL_LIGHTING:
+   case GL_COLOR_SUM:
+      update_specular( ctx );
+      break;
+
+   case GL_ALPHA_TEST:
+      I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+      if (state)
+	 i915->state.Ctx[I915_CTXREG_LIS6] |= S6_ALPHA_TEST_ENABLE;
+      else
+	 i915->state.Ctx[I915_CTXREG_LIS6] &= ~S6_ALPHA_TEST_ENABLE;
+      break;
+
+   case GL_BLEND:
+      i915EvalLogicOpBlendState(ctx);
+      break;
+
+   case GL_COLOR_LOGIC_OP:
+      i915EvalLogicOpBlendState(ctx);
+
+      /* Logicop doesn't seem to work at 16bpp:
+       */
+      if (i915->intel.intelScreen->cpp == 2)
+	 FALLBACK( &i915->intel, I915_FALLBACK_LOGICOP, state );
+      break;
+
+   case GL_FRAGMENT_PROGRAM_ARB:
+      break;
+
+   case GL_DITHER:
+      I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+      if (state)
+	 i915->state.Ctx[I915_CTXREG_LIS5] |= S5_COLOR_DITHER_ENABLE;
+      else
+	 i915->state.Ctx[I915_CTXREG_LIS5] &= ~S5_COLOR_DITHER_ENABLE;
+      break;
+
+   case GL_DEPTH_TEST:
+      I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+      if (state)
+	 i915->state.Ctx[I915_CTXREG_LIS6] |= S6_DEPTH_TEST_ENABLE;
+      else
+	 i915->state.Ctx[I915_CTXREG_LIS6] &= ~S6_DEPTH_TEST_ENABLE;
+
+      i915DepthMask( ctx, ctx->Depth.Mask );
+      break;
+
+   case GL_SCISSOR_TEST:
+      I915_STATECHANGE(i915, I915_UPLOAD_BUFFERS);
+      if (state)
+	 i915->state.Buffer[I915_DESTREG_SENABLE] = (_3DSTATE_SCISSOR_ENABLE_CMD |
+						   ENABLE_SCISSOR_RECT);
+      else
+	 i915->state.Buffer[I915_DESTREG_SENABLE] = (_3DSTATE_SCISSOR_ENABLE_CMD |
+						   DISABLE_SCISSOR_RECT);
+      break;
+
+   case GL_LINE_SMOOTH:
+      I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+      if (state)
+	 i915->state.Ctx[I915_CTXREG_LIS4] |= S4_LINE_ANTIALIAS_ENABLE;
+      else
+	 i915->state.Ctx[I915_CTXREG_LIS4] &= ~S4_LINE_ANTIALIAS_ENABLE;
+      break;
+
+   case GL_FOG:
+      break;
+
+   case GL_CULL_FACE:
+      i915CullFaceFrontFace(ctx, 0);
+      break;
+
+   case GL_STENCIL_TEST:
+      if (i915->intel.hw_stencil) {
+	 I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+	 if (state)
+	    i915->state.Ctx[I915_CTXREG_LIS5] |= (S5_STENCIL_TEST_ENABLE |
+						S5_STENCIL_WRITE_ENABLE);
+	 else
+	    i915->state.Ctx[I915_CTXREG_LIS5] &= ~(S5_STENCIL_TEST_ENABLE | 
+						 S5_STENCIL_WRITE_ENABLE);
+      } else {
+	 FALLBACK( &i915->intel, I915_FALLBACK_STENCIL, state );
+      }
+      break;
+
+   case GL_POLYGON_STIPPLE:
+      /* The stipple command worked on my 855GM box, but not my 845G.
+       * I'll do more testing later to find out exactly which hardware
+       * supports it.  Disabled for now.
+       */
+      if (i915->intel.hw_stipple && 
+	  i915->intel.reduced_primitive == GL_TRIANGLES)
+      {
+	 I915_STATECHANGE(i915, I915_UPLOAD_STIPPLE);
+	 if (state)
+	    i915->state.Stipple[I915_STPREG_ST1] |= ST1_ENABLE;
+	 else
+	    i915->state.Stipple[I915_STPREG_ST1] &= ~ST1_ENABLE;
+      }
+      break;
+
+   case GL_POLYGON_SMOOTH:
+      FALLBACK( &i915->intel, I915_FALLBACK_POLYGON_SMOOTH, state );
+      break;
+
+   case GL_POINT_SMOOTH:
+      FALLBACK( &i915->intel, I915_FALLBACK_POINT_SMOOTH, state );
+      break;
+
+   default:
+      ;
+   }
+}
+
+
+static void i915_init_packets( i915ContextPtr i915 )
+{
+   intelScreenPrivate *screen = i915->intel.intelScreen;
+
+   /* Zero all state */
+   memset(&i915->state, 0, sizeof(i915->state));
+
+
+   {
+      I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+      /* Probably don't want to upload all this stuff every time one 
+       * piece changes.
+       */
+      i915->state.Ctx[I915_CTXREG_LI] = (_3DSTATE_LOAD_STATE_IMMEDIATE_1 | 
+				       I1_LOAD_S(2) |
+				       I1_LOAD_S(4) |
+				       I1_LOAD_S(5) |
+				       I1_LOAD_S(6) | 
+				       (3));
+      i915->state.Ctx[I915_CTXREG_LIS2] = 0;
+      i915->state.Ctx[I915_CTXREG_LIS4] = 0;
+      i915->state.Ctx[I915_CTXREG_LIS5] = 0;
+
+      if (screen->cpp == 2)
+	 i915->state.Ctx[I915_CTXREG_LIS5] |= S5_COLOR_DITHER_ENABLE;
+
+
+      i915->state.Ctx[I915_CTXREG_LIS6] = (S6_COLOR_WRITE_ENABLE |
+					 (2 << S6_TRISTRIP_PV_SHIFT));
+
+      i915->state.Ctx[I915_CTXREG_STATE4] = (_3DSTATE_MODES_4_CMD |
+					   ENABLE_LOGIC_OP_FUNC |
+					   LOGIC_OP_FUNC(LOGICOP_COPY) |
+					   ENABLE_STENCIL_TEST_MASK |
+					   STENCIL_TEST_MASK(0xff) |
+					   ENABLE_STENCIL_WRITE_MASK |
+					   STENCIL_WRITE_MASK(0xff));
+
+
+      i915->state.Ctx[I915_CTXREG_IAB] = (_3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD |
+					IAB_MODIFY_ENABLE |
+					IAB_MODIFY_FUNC |
+					IAB_MODIFY_SRC_FACTOR |
+					IAB_MODIFY_DST_FACTOR);
+
+      i915->state.Ctx[I915_CTXREG_BLENDCOLOR0] = _3DSTATE_CONST_BLEND_COLOR_CMD;
+      i915->state.Ctx[I915_CTXREG_BLENDCOLOR1] = 0;
+
+   }
+
+   {
+      I915_STATECHANGE(i915, I915_UPLOAD_STIPPLE);
+      i915->state.Stipple[I915_STPREG_ST0] = _3DSTATE_STIPPLE;
+   }
+
+
+   {
+      I915_STATECHANGE(i915, I915_UPLOAD_FOG);
+      i915->state.Fog[I915_FOGREG_MODE0] = _3DSTATE_FOG_MODE_CMD;
+      i915->state.Fog[I915_FOGREG_MODE1] = (FMC1_FOGFUNC_MODIFY_ENABLE |
+					  FMC1_FOGFUNC_VERTEX |
+					  FMC1_FOGINDEX_MODIFY_ENABLE |
+					  FMC1_FOGINDEX_W |
+					  FMC1_C1_C2_MODIFY_ENABLE |
+					  FMC1_DENSITY_MODIFY_ENABLE);
+      i915->state.Fog[I915_FOGREG_COLOR] = _3DSTATE_FOG_COLOR_CMD;
+   }
+
+
+   {
+      I915_STATECHANGE(i915, I915_UPLOAD_BUFFERS);
+      /* color buffer offset/stride */
+      i915->state.Buffer[I915_DESTREG_CBUFADDR0] = _3DSTATE_BUF_INFO_CMD;
+      i915->state.Buffer[I915_DESTREG_CBUFADDR1] = 
+	 (BUF_3D_ID_COLOR_BACK | 
+	  BUF_3D_PITCH(screen->front.pitch) |  /* pitch in bytes */
+	  BUF_3D_USE_FENCE);
+      /*i915->state.Buffer[I915_DESTREG_CBUFADDR2] is the offset */
+
+
+      /* depth/Z buffer offset/stride */
+      i915->state.Buffer[I915_DESTREG_DBUFADDR0] = _3DSTATE_BUF_INFO_CMD;
+      i915->state.Buffer[I915_DESTREG_DBUFADDR1] = 
+	 (BUF_3D_ID_DEPTH |
+	  BUF_3D_PITCH(screen->depth.pitch) |  /* pitch in bytes */
+	  BUF_3D_USE_FENCE);
+      i915->state.Buffer[I915_DESTREG_DBUFADDR2] = screen->depth.offset;
+
+
+      i915->state.Buffer[I915_DESTREG_DV0] = _3DSTATE_DST_BUF_VARS_CMD;
+
+      /* color/depth pixel format */
+      switch (screen->fbFormat) {
+      case DV_PF_555:
+      case DV_PF_565:
+	 i915->state.Buffer[I915_DESTREG_DV1] = (DSTORG_HORT_BIAS(0x8) | /* .5 */
+					       DSTORG_VERT_BIAS(0x8) | /* .5 */
+					       LOD_PRECLAMP_OGL |
+					       TEX_DEFAULT_COLOR_OGL |
+					       DITHER_FULL_ALWAYS |
+					       screen->fbFormat |
+					       DEPTH_FRMT_16_FIXED);
+	 break;
+      case DV_PF_8888:
+	 i915->state.Buffer[I915_DESTREG_DV1] = (DSTORG_HORT_BIAS(0x8) | /* .5 */
+					       DSTORG_VERT_BIAS(0x8) | /* .5 */
+					       LOD_PRECLAMP_OGL |
+					       TEX_DEFAULT_COLOR_OGL |
+					       screen->fbFormat |
+					       DEPTH_FRMT_24_FIXED_8_OTHER);
+	 break;
+      }
+
+      /* scissor */
+      i915->state.Buffer[I915_DESTREG_SENABLE] = (_3DSTATE_SCISSOR_ENABLE_CMD |
+						DISABLE_SCISSOR_RECT);
+      i915->state.Buffer[I915_DESTREG_SR0] = _3DSTATE_SCISSOR_RECT_0_CMD;
+      i915->state.Buffer[I915_DESTREG_SR1] = 0;
+      i915->state.Buffer[I915_DESTREG_SR2] = 0;
+   }
+
+
+   /* These will be emitted every at the head of every buffer, unless
+    * we get hardware contexts working.
+    */
+   i915->state.active = (I915_UPLOAD_PROGRAM | 
+			 I915_UPLOAD_STIPPLE | 
+			 I915_UPLOAD_CTX | 
+			 I915_UPLOAD_BUFFERS | 
+			 I915_UPLOAD_INVARIENT);
+}
+
+void i915InitStateFunctions( struct dd_function_table *functions )
+{
+   functions->AlphaFunc = i915AlphaFunc;
+   functions->BlendColor = i915BlendColor;
+   functions->BlendEquationSeparate = i915BlendEquationSeparate;
+   functions->BlendFuncSeparate = i915BlendFuncSeparate;
+   functions->ColorMask = i915ColorMask;
+   functions->CullFace = i915CullFaceFrontFace;
+   functions->DepthFunc = i915DepthFunc;
+   functions->DepthMask = i915DepthMask;
+   functions->Enable = i915Enable;
+   functions->Fogfv = i915Fogfv;
+   functions->FrontFace = i915CullFaceFrontFace;
+   functions->Hint = i915Hint;
+   functions->LightModelfv = i915LightModelfv;
+   functions->LineWidth = i915LineWidth;
+   functions->LogicOpcode = i915LogicOp;
+   functions->PointSize = i915PointSize;
+   functions->PolygonStipple = i915PolygonStipple;
+   functions->Scissor = i915Scissor;
+   functions->ShadeModel = i915ShadeModel;
+   functions->StencilFuncSeparate = i915StencilFuncSeparate;
+   functions->StencilMaskSeparate = i915StencilMaskSeparate;
+   functions->StencilOpSeparate = i915StencilOpSeparate;
+}
+
+
+void i915InitState( i915ContextPtr i915 )
+{
+   GLcontext *ctx = &i915->intel.ctx;
+
+   i915_init_packets( i915 );
+
+   intelInitState( ctx );
+
+   memcpy( &i915->initial, &i915->state, sizeof(i915->state) );
+   i915->current = &i915->state;
+}
+
+
+
+
+
+
+
diff --git a/i915/i915_tex.c b/i915/i915_tex.c
new file mode 100644
index 0000000..d9609d3
--- /dev/null
+++ b/i915/i915_tex.c
@@ -0,0 +1,187 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "glheader.h"
+#include "mtypes.h"
+#include "imports.h"
+#include "simple_list.h"
+#include "enums.h"
+#include "image.h"
+#include "texstore.h"
+#include "texformat.h"
+#include "texmem.h"
+#include "swrast/swrast.h"
+
+#include "mm.h"
+
+#include "intel_ioctl.h"
+
+#include "i915_context.h"
+#include "i915_reg.h"
+
+
+
+
+
+
+/**
+ * Allocate space for and load the mesa images into the texture memory block.
+ * This will happen before drawing with a new texture, or drawing with a
+ * texture after it was swapped out or teximaged again.
+ */
+
+intelTextureObjectPtr i915AllocTexObj( struct gl_texture_object *texObj )
+{
+   i915TextureObjectPtr t = CALLOC_STRUCT( i915_texture_object );
+   if ( !t ) 
+      return NULL;
+
+   texObj->DriverData = t;
+   t->intel.base.tObj = texObj;
+   t->intel.dirty = I915_UPLOAD_TEX_ALL;
+   make_empty_list( &t->intel.base );
+   return &t->intel;
+}
+
+
+static void i915TexParameter( GLcontext *ctx, GLenum target,
+			     struct gl_texture_object *tObj,
+			     GLenum pname, const GLfloat *params )
+{
+   i915TextureObjectPtr t = (i915TextureObjectPtr) tObj->DriverData;
+ 
+   switch (pname) {
+   case GL_TEXTURE_MIN_FILTER:
+   case GL_TEXTURE_MAG_FILTER:
+   case GL_TEXTURE_MAX_ANISOTROPY_EXT:
+   case GL_TEXTURE_WRAP_S:
+   case GL_TEXTURE_WRAP_T:
+   case GL_TEXTURE_WRAP_R:
+   case GL_TEXTURE_BORDER_COLOR:
+      t->intel.dirty = I915_UPLOAD_TEX_ALL;
+      break;
+
+   case GL_TEXTURE_COMPARE_MODE:
+      t->intel.dirty = I915_UPLOAD_TEX_ALL;
+      break;
+   case GL_TEXTURE_COMPARE_FUNC:
+      t->intel.dirty = I915_UPLOAD_TEX_ALL;
+      break;
+
+   case GL_TEXTURE_BASE_LEVEL:
+   case GL_TEXTURE_MAX_LEVEL:
+   case GL_TEXTURE_MIN_LOD:
+   case GL_TEXTURE_MAX_LOD:
+      /* The i915 and its successors can do a lot of this without
+       * reloading the textures.  A project for someone?
+       */
+      intelFlush( ctx );
+      driSwapOutTextureObject( (driTextureObject *) t );
+      t->intel.dirty = I915_UPLOAD_TEX_ALL;
+      break;
+
+   default:
+      return;
+   }
+}
+
+
+static void i915TexEnv( GLcontext *ctx, GLenum target, 
+			GLenum pname, const GLfloat *param )
+{
+   i915ContextPtr i915 = I915_CONTEXT( ctx );
+   GLuint unit = ctx->Texture.CurrentUnit;
+
+   switch (pname) {
+   case GL_TEXTURE_ENV_COLOR: 	/* Should be a tracked param */
+   case GL_TEXTURE_ENV_MODE:
+   case GL_COMBINE_RGB:
+   case GL_COMBINE_ALPHA:
+   case GL_SOURCE0_RGB:
+   case GL_SOURCE1_RGB:
+   case GL_SOURCE2_RGB:
+   case GL_SOURCE0_ALPHA:
+   case GL_SOURCE1_ALPHA:
+   case GL_SOURCE2_ALPHA:
+   case GL_OPERAND0_RGB:
+   case GL_OPERAND1_RGB:
+   case GL_OPERAND2_RGB:
+   case GL_OPERAND0_ALPHA:
+   case GL_OPERAND1_ALPHA:
+   case GL_OPERAND2_ALPHA:
+   case GL_RGB_SCALE:
+   case GL_ALPHA_SCALE:
+      i915->tex_program.translated = 0; 
+      break;
+
+   case GL_TEXTURE_LOD_BIAS: {
+      int b = (int) ((*param) * 16.0);
+      if (b > 255) b = 255;
+      if (b < -256) b = -256;
+      I915_STATECHANGE(i915, I915_UPLOAD_TEX(unit));
+      i915->state.Tex[unit][I915_TEXREG_SS2] &= ~SS2_LOD_BIAS_MASK;
+      i915->state.Tex[unit][I915_TEXREG_SS2] |= 
+	 ((b << SS2_LOD_BIAS_SHIFT) & SS2_LOD_BIAS_MASK);
+      break;
+   }
+
+   default:
+      break;
+   }
+}
+
+
+static void i915BindTexture( GLcontext *ctx, GLenum target,
+			    struct gl_texture_object *texObj )
+{
+   i915TextureObjectPtr tex;
+   
+   if (!texObj->DriverData)
+      i915AllocTexObj( texObj );
+   
+   tex = (i915TextureObjectPtr)texObj->DriverData;
+
+   if (tex->lastTarget != texObj->Target) {
+      tex->intel.dirty = I915_UPLOAD_TEX_ALL;
+      tex->lastTarget = texObj->Target;
+   }
+
+   /* Need this if image format changes between bound textures.
+    * Could try and shortcircuit by checking for differences in
+    * state between incoming and outgoing textures:
+    */
+   I915_CONTEXT(ctx)->tex_program.translated = 0; 
+}
+
+
+
+void i915InitTextureFuncs( struct dd_function_table *functions )
+{
+   functions->BindTexture = i915BindTexture;
+   functions->TexEnv = i915TexEnv;
+   functions->TexParameter = i915TexParameter;
+}
diff --git a/i915/i915_texprog.c b/i915/i915_texprog.c
new file mode 100644
index 0000000..f6a8b02
--- /dev/null
+++ b/i915/i915_texprog.c
@@ -0,0 +1,676 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include <strings.h>
+
+#include "glheader.h"
+#include "macros.h"
+#include "enums.h"
+
+#include "tnl/t_context.h"
+#include "intel_batchbuffer.h"
+
+#include "i915_reg.h"
+#include "i915_context.h"
+#include "i915_program.h"
+
+static GLuint translate_tex_src_bit( struct i915_fragment_program *p,
+				     GLubyte bit )
+{
+   switch (bit) {
+   case TEXTURE_1D_BIT:   return D0_SAMPLE_TYPE_2D;
+   case TEXTURE_2D_BIT:   return D0_SAMPLE_TYPE_2D;
+   case TEXTURE_RECT_BIT: return D0_SAMPLE_TYPE_2D;
+   case TEXTURE_3D_BIT:   return D0_SAMPLE_TYPE_VOLUME;
+   case TEXTURE_CUBE_BIT: return D0_SAMPLE_TYPE_CUBE;
+   default: i915_program_error(p, "TexSrcBit"); return 0;
+   }
+}
+
+static GLuint get_source( struct i915_fragment_program *p, 
+			  GLenum src, GLuint unit )
+{
+   switch (src) {
+   case GL_TEXTURE: 
+      if (p->src_texture == UREG_BAD) {
+
+	 /* TODO: Use D0_CHANNEL_XY where possible.
+	  */
+	 GLuint dim = translate_tex_src_bit( p, p->ctx->Texture.Unit[unit]._ReallyEnabled);
+	 GLuint sampler = i915_emit_decl(p, REG_TYPE_S, unit, dim);
+	 GLuint texcoord = i915_emit_decl(p, REG_TYPE_T, unit, D0_CHANNEL_ALL);
+	 GLuint tmp = i915_get_temp( p );
+	 GLuint op = T0_TEXLD;
+
+	 if (p->VB->TexCoordPtr[unit]->size == 4)
+	    op = T0_TEXLDP;
+
+	 p->src_texture = i915_emit_texld( p, tmp, A0_DEST_CHANNEL_ALL, 
+					  sampler, texcoord, op );
+      }
+
+      return p->src_texture;
+
+      /* Crossbar: */
+   case GL_TEXTURE0:
+   case GL_TEXTURE1:
+   case GL_TEXTURE2:
+   case GL_TEXTURE3:
+   case GL_TEXTURE4:
+   case GL_TEXTURE5:
+   case GL_TEXTURE6:
+   case GL_TEXTURE7: {
+      return UREG_BAD;
+   }
+
+   case GL_CONSTANT:
+      return i915_emit_const4fv( p, p->ctx->Texture.Unit[unit].EnvColor );
+   case GL_PRIMARY_COLOR:
+      return i915_emit_decl(p, REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL);
+   case GL_PREVIOUS:
+   default: 
+      i915_emit_decl(p, 
+		GET_UREG_TYPE(p->src_previous),
+		GET_UREG_NR(p->src_previous), D0_CHANNEL_ALL); 
+      return p->src_previous;
+   }
+}
+			
+
+static GLuint emit_combine_source( struct i915_fragment_program *p, 
+				   GLuint mask,
+				   GLuint unit,
+				   GLenum source, 
+				   GLenum operand )
+{
+   GLuint arg, src;
+
+   src = get_source(p, source, unit);
+
+   switch (operand) {
+   case GL_ONE_MINUS_SRC_COLOR: 
+      /* Get unused tmp,
+       * Emit tmp = 1.0 + arg.-x-y-z-w
+       */
+      arg = i915_get_temp( p );
+      return i915_emit_arith( p, A0_ADD, arg, mask, 0,
+		  swizzle(src, ONE, ONE, ONE, ONE ),
+		  negate(src, 1,1,1,1), 0);
+
+   case GL_SRC_ALPHA: 
+      if (mask == A0_DEST_CHANNEL_W)
+	 return src;
+      else
+	 return swizzle( src, W, W, W, W );
+   case GL_ONE_MINUS_SRC_ALPHA: 
+      /* Get unused tmp,
+       * Emit tmp = 1.0 + arg.-w-w-w-w
+       */
+      arg = i915_get_temp( p );
+      return i915_emit_arith( p, A0_ADD, arg, mask, 0,
+			 swizzle(src, ONE, ONE, ONE, ONE ),
+			 negate( swizzle(src,W,W,W,W), 1,1,1,1), 0);
+   case GL_SRC_COLOR: 
+   default:
+      return src;
+   }
+}
+
+
+
+static int nr_args( GLenum mode )
+{
+   switch (mode) {
+   case GL_REPLACE: return 1; 
+   case GL_MODULATE: return 2;
+   case GL_ADD: return 2;
+   case GL_ADD_SIGNED: return 2;
+   case GL_INTERPOLATE:	return 3;
+   case GL_SUBTRACT: return 2;
+   case GL_DOT3_RGB_EXT: return 2;
+   case GL_DOT3_RGBA_EXT: return 2;
+   case GL_DOT3_RGB: return 2;
+   case GL_DOT3_RGBA: return 2;
+   default: return 0;
+   }
+}
+
+
+static GLboolean args_match( struct gl_texture_unit *texUnit )
+{
+   int i, nr = nr_args(texUnit->Combine.ModeRGB);
+
+   for (i = 0 ; i < nr ; i++) {
+      if (texUnit->Combine.SourceA[i] != texUnit->Combine.SourceRGB[i]) 
+	 return GL_FALSE;
+
+      switch(texUnit->Combine.OperandA[i]) {
+      case GL_SRC_ALPHA: 
+	 switch(texUnit->Combine.OperandRGB[i]) {
+	 case GL_SRC_COLOR: 
+	 case GL_SRC_ALPHA: 
+	    break;
+	 default:
+	    return GL_FALSE;
+	 }
+	 break;
+      case GL_ONE_MINUS_SRC_ALPHA: 
+	 switch(texUnit->Combine.OperandRGB[i]) {
+	 case GL_ONE_MINUS_SRC_COLOR: 
+	 case GL_ONE_MINUS_SRC_ALPHA: 
+	    break;
+	 default:
+	    return GL_FALSE;
+	 }
+	 break;
+      default: 
+	 return GL_FALSE;	/* impossible */
+      }
+   }
+
+   return GL_TRUE;
+}
+
+
+static GLuint emit_combine( struct i915_fragment_program *p,
+			    GLuint dest,
+			    GLuint mask,
+			    GLuint saturate,
+			    GLuint unit,
+			    GLenum mode,
+			    const GLenum *source,
+			    const GLenum *operand)
+{
+   int tmp, src[3], nr = nr_args(mode);
+   int i;
+
+   for (i = 0; i < nr; i++)
+      src[i] = emit_combine_source( p, mask, unit, source[i], operand[i] );
+
+   switch (mode) {
+   case GL_REPLACE: 
+      if (mask == A0_DEST_CHANNEL_ALL && !saturate)
+	 return src[0];
+      else
+	 return i915_emit_arith( p, A0_MOV, dest, mask, saturate, src[0], 0, 0 );
+   case GL_MODULATE: 
+      return i915_emit_arith( p, A0_MUL, dest, mask, saturate,
+			     src[0], src[1], 0 );
+   case GL_ADD: 
+      return i915_emit_arith( p, A0_ADD, dest, mask, saturate, 
+			     src[0], src[1], 0 );
+   case GL_ADD_SIGNED:
+      /* tmp = arg0 + arg1
+       * result = tmp + -.5
+       */
+      tmp = i915_emit_const1f(p, .5);
+      tmp = negate(swizzle(tmp,X,X,X,X),1,1,1,1);
+      i915_emit_arith( p, A0_ADD, dest, mask, 0, src[0], src[1], 0 );
+      i915_emit_arith( p, A0_ADD, dest, mask, saturate, dest, tmp, 0 );
+      return dest;
+   case GL_INTERPOLATE:		/* TWO INSTRUCTIONS */
+      /* Arg0 * (Arg2) + Arg1 * (1-Arg2)
+       *
+       * Arg0*Arg2 + Arg1 - Arg1Arg2 
+       *
+       * tmp = Arg0*Arg2 + Arg1, 
+       * result = (-Arg1)Arg2 + tmp 
+       */
+      tmp = i915_get_temp( p );
+      i915_emit_arith( p, A0_MAD, tmp, mask, 0, src[0], src[2], src[1] );
+      i915_emit_arith( p, A0_MAD, dest, mask, saturate, 
+		      negate(src[1], 1,1,1,1), src[2], tmp );
+      return dest;
+   case GL_SUBTRACT: 
+      /* negate src[1] */
+      return i915_emit_arith( p, A0_ADD, dest, mask, saturate, src[0],
+			 negate(src[1],1,1,1,1), 0 );
+
+   case GL_DOT3_RGBA:
+   case GL_DOT3_RGBA_EXT: 
+   case GL_DOT3_RGB_EXT:
+   case GL_DOT3_RGB: {
+      GLuint tmp0 = i915_get_temp( p );
+      GLuint tmp1 = i915_get_temp( p );
+      GLuint neg1 = negate(swizzle(i915_emit_const1f(p, 1),X,X,X,X), 1,1,1,1);
+      GLuint two = swizzle(i915_emit_const1f(p, 2),X,X,X,X);
+      i915_emit_arith( p, A0_MAD, tmp0, A0_DEST_CHANNEL_ALL, 0, 
+		      two, src[0], neg1);
+      if (src[0] == src[1])
+	 tmp1 = tmp0;
+      else
+	 i915_emit_arith( p, A0_MAD, tmp1, A0_DEST_CHANNEL_ALL, 0, 
+			 two, src[1], neg1);
+      i915_emit_arith( p, A0_DP3, dest, mask, saturate, tmp0, tmp1, 0);
+      return dest;
+   }
+
+   default: 
+      return src[0];
+   }
+}
+
+static GLuint get_dest( struct i915_fragment_program *p, int unit )
+{
+   if (p->ctx->_TriangleCaps & DD_SEPARATE_SPECULAR)
+      return i915_get_temp( p );
+   else if (unit != p->last_tex_stage)
+      return i915_get_temp( p );
+   else
+      return UREG(REG_TYPE_OC, 0);
+}
+      
+
+
+static GLuint emit_texenv( struct i915_fragment_program *p, int unit )
+{
+   struct gl_texture_unit *texUnit = &p->ctx->Texture.Unit[unit];
+   GLenum envMode = texUnit->EnvMode;
+   struct gl_texture_object *tObj = texUnit->_Current;
+   GLenum format = tObj->Image[0][tObj->BaseLevel]->_BaseFormat;
+   GLuint saturate = unit < p->last_tex_stage ? A0_DEST_SATURATE : 0;
+
+   switch(envMode) {
+   case GL_BLEND: {
+      const int cf = get_source(p, GL_PREVIOUS, unit);
+      const int cc = get_source(p, GL_CONSTANT, unit);
+      const int cs = get_source(p, GL_TEXTURE, unit);
+      const int out = get_dest(p, unit);
+
+      if (format == GL_INTENSITY) {
+	 /* cv = cf(1 - cs) + cc.cs
+	  * cv = cf - cf.cs + cc.cs
+	  */
+	 /* u[2] = MAD( -cf * cs + cf )
+	  * cv   = MAD( cc * cs + u[2] )
+	  */
+	 
+	 i915_emit_arith( p, A0_MAD, out, A0_DEST_CHANNEL_ALL, 0, 
+			 negate(cf,1,1,1,1), cs, cf );
+
+	 i915_emit_arith( p, A0_MAD, out, A0_DEST_CHANNEL_ALL, saturate, 
+			 cc, cs, out );
+
+	 return out;
+      } else {
+	 /* cv = cf(1 - cs) + cc.cs
+	  * cv = cf - cf.cs + cc.cs
+	  * av =      af.as
+	  */
+	 /* u[2] = MAD( cf.-x-y-zw * cs.xyzw + cf.xyz0 )
+	  * oC   = MAD( cc.xyz0 * cs.xyz0 + u[2].xyzw )
+	  */
+	 i915_emit_arith( p, A0_MAD, out, A0_DEST_CHANNEL_ALL, 0,
+			 negate(cf,1,1,1,0),  
+			 cs,
+			 swizzle(cf,X,Y,Z,ZERO) );
+
+
+	 i915_emit_arith( p, A0_MAD, out, A0_DEST_CHANNEL_ALL, saturate,
+			 swizzle(cc,X,Y,Z,ZERO),  
+			 swizzle(cs,X,Y,Z,ZERO),
+			 out );
+
+	 return out;
+      }
+   }
+
+   case GL_DECAL: {
+      if (format == GL_RGB ||
+	  format == GL_RGBA) {
+	 int cf = get_source( p, GL_PREVIOUS, unit );
+	 int cs = get_source( p, GL_TEXTURE, unit );
+	 int out = get_dest(p, unit);
+	 
+	 /* cv = cf(1-as) + cs.as
+	  * cv = cf.(-as) + cf + cs.as
+	  * av = af
+	  */ 
+	 
+	 /* u[2] = mad( cf.xyzw * cs.-w-w-w1 + cf.xyz0 )
+	  * oc = mad( cs.xyz0 * cs.www0 + u[2].xyzw )
+	  */
+	 i915_emit_arith( p, A0_MAD, out, A0_DEST_CHANNEL_ALL, 0,
+			 cf,  
+			 negate(swizzle(cs,W,W,W,ONE),1,1,1,0),
+			 swizzle(cf,X,Y,Z,ZERO) );
+	 
+	 i915_emit_arith( p, A0_MAD, out, A0_DEST_CHANNEL_ALL, saturate,
+			 swizzle(cs,X,Y,Z,ZERO),  
+			 swizzle(cs,W,W,W,ZERO),
+			 out );
+	 return out;
+      }
+      else {
+	 return get_source( p, GL_PREVIOUS, unit );
+      }
+   }
+
+   case GL_REPLACE: {
+      const int cs = get_source( p, GL_TEXTURE, unit );	/* saturated */
+      switch (format) {
+      case GL_ALPHA: {
+	 const int cf = get_source( p, GL_PREVIOUS, unit ); /* saturated */
+	 i915_emit_arith( p, A0_MOV, cs, A0_DEST_CHANNEL_XYZ, 0, cf, 0, 0 );
+	 return cs;
+      }
+      case GL_RGB:
+      case GL_LUMINANCE: {
+	 const int cf = get_source( p, GL_PREVIOUS, unit ); /* saturated */
+	 i915_emit_arith( p, A0_MOV, cs, A0_DEST_CHANNEL_W, 0, cf, 0, 0 );
+	 return cs;
+      }
+      default:
+	 return cs;
+      }
+   }
+
+   case GL_MODULATE: {
+      const int cf = get_source( p, GL_PREVIOUS, unit );
+      const int cs = get_source( p, GL_TEXTURE, unit );
+      const int out = get_dest(p, unit);
+      switch (format) {
+      case GL_ALPHA: 
+	 i915_emit_arith( p, A0_MUL, out, A0_DEST_CHANNEL_ALL, saturate,
+			 swizzle(cs, ONE, ONE, ONE, W), cf, 0 );
+	 break;
+      default:
+	 i915_emit_arith( p, A0_MUL, out, A0_DEST_CHANNEL_ALL, saturate, 
+			 cs, cf, 0 );
+	 break;
+      }
+      return out;
+   }
+   case GL_ADD: {
+      int cf = get_source( p, GL_PREVIOUS, unit );
+      int cs = get_source( p, GL_TEXTURE, unit );
+      const int out = get_dest( p, unit );
+
+      if (format == GL_INTENSITY) {
+	 /* output-color.rgba = add( incoming, u[1] )
+	  */
+	 i915_emit_arith( p, A0_ADD, out, A0_DEST_CHANNEL_ALL, saturate, 
+			 cs, cf, 0 );
+	 return out;
+      }
+      else {
+	 /* cv.xyz = cf.xyz + cs.xyz
+	  * cv.w   = cf.w * cs.w
+	  *
+	  * cv.xyzw = MAD( cf.111w * cs.xyzw + cf.xyz0 )
+	  */
+ 	 i915_emit_arith( p, A0_MAD, out, A0_DEST_CHANNEL_ALL, saturate,
+			 swizzle(cf,ONE,ONE,ONE,W), 
+			 cs,  
+			 swizzle(cf,X,Y,Z,ZERO) ); 
+	 return out;
+      }
+      break;
+   }
+   case GL_COMBINE: {
+      GLuint rgb_shift, alpha_shift, out, shift;
+      GLuint dest = get_dest(p, unit);
+
+      /* The EXT version of the DOT3 extension does not support the
+       * scale factor, but the ARB version (and the version in OpenGL
+       * 1.3) does.
+       */
+      switch (texUnit->Combine.ModeRGB) {
+      case GL_DOT3_RGB_EXT:
+	 alpha_shift = texUnit->Combine.ScaleShiftA;
+	 rgb_shift = 0;
+	 break;
+
+      case GL_DOT3_RGBA_EXT:
+	 alpha_shift = 0;
+	 rgb_shift = 0;
+	 break;
+
+      default:
+	 rgb_shift = texUnit->Combine.ScaleShiftRGB;
+	 alpha_shift = texUnit->Combine.ScaleShiftA;
+	 break;
+      }
+
+
+      /* Emit the RGB and A combine ops
+       */
+      if (texUnit->Combine.ModeRGB == texUnit->Combine.ModeA && 
+	  args_match( texUnit )) {
+	 out = emit_combine( p, dest, A0_DEST_CHANNEL_ALL, saturate,
+			     unit,
+			     texUnit->Combine.ModeRGB,
+			     texUnit->Combine.SourceRGB,
+			     texUnit->Combine.OperandRGB );
+      }
+      else if (texUnit->Combine.ModeRGB == GL_DOT3_RGBA_EXT ||
+	       texUnit->Combine.ModeRGB == GL_DOT3_RGBA) {
+
+	 out = emit_combine( p, dest, A0_DEST_CHANNEL_ALL, saturate,
+			     unit,
+			     texUnit->Combine.ModeRGB,
+			     texUnit->Combine.SourceRGB,
+			     texUnit->Combine.OperandRGB );
+      }
+      else {
+	 /* Need to do something to stop from re-emitting identical
+	  * argument calculations here:
+	  */
+	 out = emit_combine( p, dest, A0_DEST_CHANNEL_XYZ, saturate,
+			     unit,
+			     texUnit->Combine.ModeRGB,
+			     texUnit->Combine.SourceRGB,
+			     texUnit->Combine.OperandRGB );
+	 out = emit_combine( p, dest, A0_DEST_CHANNEL_W, saturate,
+			     unit,
+			     texUnit->Combine.ModeA,
+			     texUnit->Combine.SourceA,
+			     texUnit->Combine.OperandA );
+      }
+
+      /* Deal with the final shift:
+       */
+      if (alpha_shift || rgb_shift) {
+	 if (rgb_shift == alpha_shift) {
+	    shift = i915_emit_const1f(p, 1<<rgb_shift);
+	    shift = swizzle(shift,X,X,X,X);
+	 }
+	 else {
+	    shift = i915_emit_const2f(p, 1<<rgb_shift, 1<<alpha_shift);
+	    shift = swizzle(shift,X,X,X,Y);
+	 }
+	 return i915_emit_arith( p, A0_MUL, dest, A0_DEST_CHANNEL_ALL, 
+				saturate, out, shift, 0 );
+      }
+
+      return out;
+   }
+
+   default:
+      return get_source(p, GL_PREVIOUS, 0);
+   }
+}
+
+static void emit_program_fini( struct i915_fragment_program *p )
+{
+   int cf = get_source( p, GL_PREVIOUS, 0 );
+   int out = UREG( REG_TYPE_OC, 0 );
+
+   if (p->ctx->_TriangleCaps & DD_SEPARATE_SPECULAR) {
+      /* Emit specular add.
+       */
+      GLuint s = i915_emit_decl(p, REG_TYPE_T, T_SPECULAR, D0_CHANNEL_ALL);
+      i915_emit_arith( p, A0_ADD, out, A0_DEST_CHANNEL_ALL, 0, cf, 
+		  swizzle(s, X,Y,Z,ZERO), 0 );
+   }
+   else if (cf != out) {
+      /* Will wind up in here if no texture enabled or a couple of
+       * other scenarios (GL_REPLACE for instance).
+       */
+      i915_emit_arith( p, A0_MOV, out, A0_DEST_CHANNEL_ALL, 0, cf, 0, 0 );
+   }
+}
+
+
+static void i915EmitTextureProgram( i915ContextPtr i915 )
+{
+   GLcontext *ctx = &i915->intel.ctx;
+   struct i915_fragment_program *p = &i915->tex_program;
+   GLuint unit;
+
+   if (0) fprintf(stderr, "%s\n", __FUNCTION__);
+
+   i915_init_program( i915, p );
+
+   if (ctx->Texture._EnabledUnits) {
+      for (unit = 0 ; unit < ctx->Const.MaxTextureUnits ; unit++)
+	 if (ctx->Texture.Unit[unit]._ReallyEnabled) {
+	    p->last_tex_stage = unit;
+	 }
+
+      for (unit = 0 ; unit < ctx->Const.MaxTextureUnits; unit++)
+	 if (ctx->Texture.Unit[unit]._ReallyEnabled) {
+	    p->src_previous = emit_texenv( p, unit );
+	    p->src_texture = UREG_BAD;
+	    p->temp_flag = 0xffff000;
+	    p->temp_flag |= 1 << GET_UREG_NR(p->src_previous);
+	 }
+   }
+
+   emit_program_fini( p );
+
+   i915_fini_program( p );
+   i915_upload_program( i915, p );
+
+   p->translated = 1;
+}
+
+
+void i915ValidateTextureProgram( i915ContextPtr i915 )
+{
+   intelContextPtr intel = &i915->intel;
+   GLcontext *ctx = &intel->ctx;
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   struct vertex_buffer *VB = &tnl->vb;
+   DECLARE_RENDERINPUTS(index_bitset);
+   int i, offset;
+   GLuint s4 = i915->state.Ctx[I915_CTXREG_LIS4] & ~S4_VFMT_MASK;
+   GLuint s2 = S2_TEXCOORD_NONE;
+
+   RENDERINPUTS_COPY( index_bitset, tnl->render_inputs_bitset );
+
+   /* Important:
+    */
+   VB->AttribPtr[VERT_ATTRIB_POS] = VB->NdcPtr;
+   intel->vertex_attr_count = 0;
+   intel->coloroffset = 0;
+   intel->specoffset = 0;
+   offset = 0;
+
+   if (i915->current_program) {
+      i915->current_program->on_hardware = 0;
+      i915->current_program->params_uptodate = 0;
+   }
+
+   if (i915->vertex_fog == I915_FOG_PIXEL) {
+      EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_4F_VIEWPORT, S4_VFMT_XYZW, 16 );
+      RENDERINPUTS_CLEAR( index_bitset, _TNL_ATTRIB_FOG );
+   }
+   else if (RENDERINPUTS_TEST_RANGE( index_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX )) {
+      EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_4F_VIEWPORT, S4_VFMT_XYZW, 16 );
+   }
+   else {
+      EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_3F_VIEWPORT, S4_VFMT_XYZ, 12 );
+   }
+
+   /* How undefined is undefined? */
+   if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_POINTSIZE )) {
+      EMIT_ATTR( _TNL_ATTRIB_POINTSIZE, EMIT_1F, S4_VFMT_POINT_WIDTH, 4 );
+   }
+      
+   intel->coloroffset = offset / 4;
+   EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_BGRA, S4_VFMT_COLOR, 4 );
+            
+   if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_COLOR1 ) ||
+       RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_FOG )) {
+      if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_COLOR1 )) {
+	 intel->specoffset = offset / 4;
+	 EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_3UB_3F_BGR, S4_VFMT_SPEC_FOG, 3 );
+      } else 
+	 EMIT_PAD( 3 );
+      
+      if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_FOG ))
+	 EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1UB_1F, S4_VFMT_SPEC_FOG, 1 );
+      else
+	 EMIT_PAD( 1 );
+   }
+
+   if (RENDERINPUTS_TEST_RANGE( index_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX )) {
+      for (i = 0; i < 8; i++) {
+	 if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_TEX(i) )) {
+	    int sz = VB->TexCoordPtr[i]->size;
+	    
+	    s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK);
+	    s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(sz));
+
+	    EMIT_ATTR( _TNL_ATTRIB_TEX0+i, EMIT_SZ(sz), 0, sz * 4 );
+	 }
+      }
+   }
+
+   /* Only need to change the vertex emit code if there has been a
+    * statechange to a new hardware vertex format:
+    */
+   if (s2 != i915->state.Ctx[I915_CTXREG_LIS2] ||
+       s4 != i915->state.Ctx[I915_CTXREG_LIS4]) {
+    
+      I915_STATECHANGE( i915, I915_UPLOAD_CTX );
+
+      i915->tex_program.translated = 0;
+
+      /* Must do this *after* statechange, so as not to affect
+       * buffered vertices reliant on the old state:
+       */
+      intel->vertex_size = _tnl_install_attrs( ctx, 
+					       intel->vertex_attrs, 
+					       intel->vertex_attr_count,
+					       intel->ViewportMatrix.m, 0 ); 
+
+      intel->vertex_size >>= 2;
+
+      i915->state.Ctx[I915_CTXREG_LIS2] = s2;
+      i915->state.Ctx[I915_CTXREG_LIS4] = s4;
+
+      assert(intel->vtbl.check_vertex_size( intel, intel->vertex_size ));
+   }
+
+   if (!i915->tex_program.translated ||
+       i915->last_ReallyEnabled != ctx->Texture._EnabledUnits) {
+      i915EmitTextureProgram( i915 );      
+      i915->last_ReallyEnabled = ctx->Texture._EnabledUnits;
+   }
+}
diff --git a/i915/i915_texstate.c b/i915/i915_texstate.c
new file mode 100644
index 0000000..a19d4b6
--- /dev/null
+++ b/i915/i915_texstate.c
@@ -0,0 +1,926 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "glheader.h"
+#include "macros.h"
+#include "mtypes.h"
+#include "simple_list.h"
+#include "enums.h"
+#include "texformat.h"
+#include "texstore.h"
+
+#include "mm.h"
+
+#include "intel_screen.h"
+#include "intel_ioctl.h"
+#include "intel_tex.h"
+
+#include "i915_context.h"
+#include "i915_reg.h"
+
+static GLint initial_offsets[6][2] = { {0,0},
+				       {0,2},
+				       {1,0},
+				       {1,2},
+				       {1,1},
+				       {1,3} };
+
+
+static GLint step_offsets[6][2] = { {0,2},
+				    {0,2},
+				    {-1,2},
+				    {-1,2},
+				    {-1,1},
+				    {-1,1} };
+
+
+#define I915_TEX_UNIT_ENABLED(unit)		(1<<unit)
+
+static void i915LayoutTextureImages( i915ContextPtr i915,
+				     struct gl_texture_object *tObj )
+{
+   const struct gl_texture_image *baseImage = tObj->Image[0][tObj->BaseLevel];
+   i915TextureObjectPtr t = (i915TextureObjectPtr) tObj->DriverData;
+   GLint firstLevel, lastLevel, numLevels;
+   GLint i, total_height, pitch;
+
+   /* Compute which mipmap levels we really want to send to the hardware.
+    */
+   driCalculateTextureFirstLastLevel( (driTextureObject *) t );
+
+   /* Figure out the amount of memory required to hold all the mipmap
+    * levels.  Choose the smallest pitch to accomodate the largest
+    * mipmap:
+    */
+   firstLevel = t->intel.base.firstLevel;
+   lastLevel = t->intel.base.lastLevel;
+   numLevels = lastLevel - firstLevel + 1;
+
+
+
+   /* All images must be loaded at this pitch.  Count the number of
+    * lines required:
+    */
+   switch (tObj->Target) {
+   case GL_TEXTURE_CUBE_MAP: {
+      const GLuint dim = tObj->Image[0][firstLevel]->Width;
+      GLuint face;
+
+      pitch = dim * t->intel.texelBytes;
+      pitch *= 2;		/* double pitch for cube layouts */
+      pitch = (pitch + 3) & ~3;
+      
+      total_height = dim * 4;
+
+      for ( face = 0 ; face < 6 ; face++) {
+	 GLuint x = initial_offsets[face][0] * dim;
+	 GLuint y = initial_offsets[face][1] * dim;
+	 GLuint d = dim;
+	 
+	 t->intel.base.dirty_images[face] = ~0;
+
+	 assert(tObj->Image[face][firstLevel]->Width == dim);
+	 assert(tObj->Image[face][firstLevel]->Height == dim);
+
+	 for (i = 0; i < numLevels; i++) {
+	    t->intel.image[face][i].image = tObj->Image[face][firstLevel + i];
+	    if (!t->intel.image[face][i].image) {
+	       fprintf(stderr, "no image %d %d\n", face, i);
+	       break;		/* can't happen */
+	    }
+	 
+	    t->intel.image[face][i].offset = 
+	       y * pitch + x * t->intel.texelBytes;
+	    t->intel.image[face][i].internalFormat = baseImage->_BaseFormat;
+
+	    d >>= 1;
+	    x += step_offsets[face][0] * d;
+	    y += step_offsets[face][1] * d;
+	 }
+      }
+      break;
+   }
+   case GL_TEXTURE_3D: {
+      GLuint virtual_height;
+      GLuint tmp_numLevels = numLevels;
+      pitch = tObj->Image[0][firstLevel]->Width * t->intel.texelBytes;
+      pitch = (pitch + 3) & ~3;
+      t->intel.base.dirty_images[0] = ~0;
+
+      /* Calculate the size of a single slice.  Hardware demands a
+       * minimum of 8 mipmaps, some of which might ultimately not be
+       * used:
+       */
+      if (tmp_numLevels < 9)
+	 tmp_numLevels = 9;
+
+      virtual_height = tObj->Image[0][firstLevel]->Height;
+
+      for ( total_height = i = 0 ; i < tmp_numLevels ; i++ ) {
+	 t->intel.image[0][i].image = tObj->Image[0][firstLevel + i];
+	 if (t->intel.image[0][i].image) {
+	    t->intel.image[0][i].offset = total_height * pitch;
+	    t->intel.image[0][i].internalFormat = baseImage->_BaseFormat;
+	 }
+
+	 total_height += MAX2(2, virtual_height);
+	 virtual_height >>= 1;
+      }
+
+      t->intel.depth_pitch = total_height * pitch;
+
+      /* Multiply slice size by texture depth for total size.  It's
+       * remarkable how wasteful of memory all the i8x0 texture
+       * layouts are.
+       */
+      total_height *= t->intel.image[0][0].image->Depth;
+      break;
+   }
+   default:
+      pitch = tObj->Image[0][firstLevel]->Width * t->intel.texelBytes;
+      pitch = (pitch + 3) & ~3;
+      t->intel.base.dirty_images[0] = ~0;
+
+      for ( total_height = i = 0 ; i < numLevels ; i++ ) {
+	 t->intel.image[0][i].image = tObj->Image[0][firstLevel + i];
+	 if (!t->intel.image[0][i].image) 
+	    break;
+	 
+	 t->intel.image[0][i].offset = total_height * pitch;
+	 t->intel.image[0][i].internalFormat = baseImage->_BaseFormat;
+	 if (t->intel.image[0][i].image->IsCompressed) {
+	    total_height += (t->intel.image[0][i].image->Height + 3) / 4;
+	 }
+	 else
+	   total_height += MAX2(2, t->intel.image[0][i].image->Height);
+      }
+      break;
+   }
+
+   t->intel.Pitch = pitch;
+   t->intel.base.totalSize = total_height*pitch;
+   t->intel.max_level = numLevels-1;
+}
+
+
+static void i945LayoutTextureImages( i915ContextPtr i915,
+				    struct gl_texture_object *tObj )
+{
+   const struct gl_texture_image *baseImage = tObj->Image[0][tObj->BaseLevel];
+   i915TextureObjectPtr t = (i915TextureObjectPtr) tObj->DriverData;
+   GLint firstLevel, lastLevel, numLevels;
+   GLint i, total_height, pitch, sz, max_offset = 0, offset;
+
+
+   /* Compute which mipmap levels we really want to send to the hardware.
+    */
+   driCalculateTextureFirstLastLevel( (driTextureObject *) t );
+
+   /* Figure out the amount of memory required to hold all the mipmap
+    * levels.  Choose the smallest pitch to accomodate the largest
+    * mipmap:
+    */
+   firstLevel = t->intel.base.firstLevel;
+   lastLevel = t->intel.base.lastLevel;
+   numLevels = lastLevel - firstLevel + 1;
+
+
+
+   /* All images must be loaded at this pitch.  Count the number of
+    * lines required:
+    */
+   switch (tObj->Target) {
+   case GL_TEXTURE_CUBE_MAP: {
+      const GLuint dim = tObj->Image[0][firstLevel]->Width;
+      GLuint face;
+
+      /* Depending on the size of the largest images, pitch can be
+       * determined either by the old-style packing of cubemap faces,
+       * or the final row of 4x4, 2x2 and 1x1 faces below this. 
+       */
+      if (dim > 32) {
+	 pitch = dim * t->intel.texelBytes;
+	 pitch *= 2;		/* double pitch for cube layouts */
+	 pitch = (pitch + 3) & ~3;
+      }
+      else {
+	 pitch = 14 * 8 * t->intel.texelBytes; /* determined by row of
+						* little maps at
+						* bottom */
+      }
+      
+      total_height = dim * 4 + 4;
+
+      for ( face = 0 ; face < 6 ; face++) {
+	 GLuint x = initial_offsets[face][0] * dim;
+	 GLuint y = initial_offsets[face][1] * dim;
+	 GLuint d = dim;
+	 
+	 if (dim == 4 && face >= 4) {
+	    y = total_height - 4;
+	    x = (face - 4) * 8;
+	 }
+	 else if (dim < 4) {
+	    y = total_height - 4;
+	    x = face * 8;
+	 }
+
+	 t->intel.base.dirty_images[face] = ~0;
+
+	 assert(tObj->Image[face][firstLevel]->Width == dim);
+	 assert(tObj->Image[face][firstLevel]->Height == dim);
+
+	 for (i = 0; i < numLevels; i++) {
+
+
+	    t->intel.image[face][i].image = tObj->Image[face][firstLevel + i];
+	    assert(t->intel.image[face][i].image);
+	 
+	    t->intel.image[face][i].offset = 
+	       y * pitch + x * t->intel.texelBytes;
+	    t->intel.image[face][i].internalFormat = baseImage->_BaseFormat;
+
+	    d >>= 1;
+	    
+	    switch (d) {
+	    case 4:
+	       switch (face) {
+	       case FACE_POS_X:
+	       case FACE_NEG_X:
+		  x += step_offsets[face][0] * d;
+		  y += step_offsets[face][1] * d;
+		  break;
+	       case FACE_POS_Y:
+	       case FACE_NEG_Y:
+		  y += 12;
+		  x -= 8;
+		  break;
+	       case FACE_POS_Z:
+	       case FACE_NEG_Z:
+		  y = total_height - 4;
+		  x = (face - 4) * 8;
+		  break;
+	       }
+
+	    case 2:
+	       y = total_height - 4;
+	       x = 16 + face * 8;
+	       break;
+
+	    case 1:
+	       x += 48;
+	       break;
+	       
+	    default:
+	       x += step_offsets[face][0] * d;
+	       y += step_offsets[face][1] * d;
+	       break;
+	    }
+	 }
+      }
+      max_offset = total_height * pitch;
+      break;
+   }
+   case GL_TEXTURE_3D: {
+      GLuint depth_packing = 0, depth_pack_pitch;
+      GLuint tmp_numLevels = numLevels;
+      pitch = tObj->Image[0][firstLevel]->Width * t->intel.texelBytes;
+      pitch = (pitch + 3) & ~3;
+      depth_pack_pitch = pitch;
+      
+      t->intel.base.dirty_images[0] = ~0;
+
+
+      for ( total_height = i = 0 ; i < tmp_numLevels ; i++ ) {
+	 t->intel.image[0][i].image = tObj->Image[0][firstLevel + i];
+	 if (!t->intel.image[0][i].image) 
+	    break;
+
+	 
+	 t->intel.image[0][i].offset = total_height * pitch;
+	 t->intel.image[0][i].internalFormat = baseImage->_BaseFormat;
+	 
+
+
+	 total_height += MAX2(2, t->intel.image[0][i].image->Height) * 
+	    MAX2((t->intel.image[0][i].image->Depth >> depth_packing), 1);
+
+	 /* When alignment dominates, can't increase depth packing?
+	  * Or does pitch grow???  What are the alignment constraints,
+	  * anyway?
+	  */
+	 if (depth_pack_pitch > 4) {
+	    depth_packing++;
+	    depth_pack_pitch <<= 2;
+	 }
+      }
+
+      max_offset = total_height * pitch;
+      break;
+   }
+   default:
+      pitch = tObj->Image[0][firstLevel]->Width * t->intel.texelBytes;
+      pitch = (pitch + 3) & ~3;
+      t->intel.base.dirty_images[0] = ~0;
+      max_offset = 0;
+
+      for ( offset = i = 0 ; i < numLevels ; i++ ) {
+	 t->intel.image[0][i].image = tObj->Image[0][firstLevel + i];
+	 if (!t->intel.image[0][i].image) 
+	    break;
+	 
+	 t->intel.image[0][i].offset = offset;
+	 t->intel.image[0][i].internalFormat = baseImage->_BaseFormat;
+
+	 if (t->intel.image[0][i].image->IsCompressed)
+	    sz = MAX2(1, t->intel.image[0][i].image->Height/4) * pitch;
+	 else
+	    sz = MAX2(2, t->intel.image[0][i].image->Height) * pitch;
+	 
+	 /* Because the images are packed better, the final offset
+	  * might not be the maximal one:
+	  */
+	 max_offset = MAX2(max_offset, offset + sz);
+
+	 /* LPT change: step right after second mipmap.
+	  */
+	 if (i == 1) 
+	    offset += pitch / 2;
+	 else 
+	    offset += sz;
+
+      }
+      break;
+   }
+
+   t->intel.Pitch = pitch;
+   t->intel.base.totalSize = max_offset;
+   t->intel.max_level = numLevels-1;
+}
+
+
+
+
+static void i915SetTexImages( i915ContextPtr i915, 
+			     struct gl_texture_object *tObj )
+{
+   GLuint textureFormat;
+   i915TextureObjectPtr t = (i915TextureObjectPtr) tObj->DriverData;
+   const struct gl_texture_image *baseImage = tObj->Image[0][tObj->BaseLevel];
+   GLint ss2 = 0;
+
+   switch( baseImage->TexFormat->MesaFormat ) {
+   case MESA_FORMAT_L8:
+      t->intel.texelBytes = 1;
+      textureFormat = MAPSURF_8BIT | MT_8BIT_L8;
+      break;
+
+   case MESA_FORMAT_I8:
+      t->intel.texelBytes = 1;
+      textureFormat = MAPSURF_8BIT | MT_8BIT_I8;
+      break;
+
+   case MESA_FORMAT_A8:
+      t->intel.texelBytes = 1;
+      textureFormat = MAPSURF_8BIT | MT_8BIT_A8; 
+      break;
+
+   case MESA_FORMAT_AL88:
+      t->intel.texelBytes = 2;
+      textureFormat = MAPSURF_16BIT | MT_16BIT_AY88;
+      break;
+
+   case MESA_FORMAT_RGB565:
+      t->intel.texelBytes = 2;
+      textureFormat = MAPSURF_16BIT | MT_16BIT_RGB565;
+      break;
+
+   case MESA_FORMAT_ARGB1555:
+      t->intel.texelBytes = 2;
+      textureFormat = MAPSURF_16BIT | MT_16BIT_ARGB1555;
+      break;
+
+   case MESA_FORMAT_ARGB4444:
+      t->intel.texelBytes = 2;
+      textureFormat = MAPSURF_16BIT | MT_16BIT_ARGB4444;
+      break;
+
+   case MESA_FORMAT_ARGB8888:
+      t->intel.texelBytes = 4;
+      textureFormat = MAPSURF_32BIT | MT_32BIT_ARGB8888;
+      break;
+
+   case MESA_FORMAT_YCBCR_REV:
+      t->intel.texelBytes = 2;
+      textureFormat = (MAPSURF_422 | MT_422_YCRCB_NORMAL);
+      ss2 |= SS2_COLORSPACE_CONVERSION;
+      break;
+
+   case MESA_FORMAT_YCBCR:
+      t->intel.texelBytes = 2;
+      textureFormat = (MAPSURF_422 | MT_422_YCRCB_SWAPY);
+      ss2 |= SS2_COLORSPACE_CONVERSION;
+      break;
+
+   case MESA_FORMAT_RGB_FXT1:
+   case MESA_FORMAT_RGBA_FXT1:
+     t->intel.texelBytes = 2;
+     textureFormat = (MAPSURF_COMPRESSED | MT_COMPRESS_FXT1);
+     break;
+
+   case MESA_FORMAT_Z16:
+      t->intel.texelBytes = 2;
+      textureFormat = (MAPSURF_16BIT | MT_16BIT_L16);
+      break;
+
+   case MESA_FORMAT_RGBA_DXT1:
+   case MESA_FORMAT_RGB_DXT1:
+     /* 
+      * DXTn pitches are Width/4 * blocksize in bytes 
+      * for DXT1: blocksize=8 so Width/4*8 = Width * 2 
+      * for DXT3/5: blocksize=16 so Width/4*16 = Width * 4
+      */
+     t->intel.texelBytes = 2;
+     textureFormat = (MAPSURF_COMPRESSED | MT_COMPRESS_DXT1);
+     break;
+
+   case MESA_FORMAT_RGBA_DXT3:
+     t->intel.texelBytes = 4;
+     textureFormat = (MAPSURF_COMPRESSED | MT_COMPRESS_DXT2_3);
+     break;
+
+   case MESA_FORMAT_RGBA_DXT5:
+     t->intel.texelBytes = 4;
+     textureFormat = (MAPSURF_COMPRESSED | MT_COMPRESS_DXT4_5);
+     break;
+
+#if 0
+   case MESA_FORMAT_Z24_S8:
+      t->intel.texelBytes = 4;
+      textureFormat = (MAPSURF_32BIT | MT_32BIT_xL824);
+      break;
+#endif
+
+   default:
+      fprintf(stderr, "%s: bad image format %x\n", __FUNCTION__,
+	      baseImage->TexFormat->MesaFormat);
+      abort();
+   }
+
+   switch (i915->intel.intelScreen->deviceID) {
+   case PCI_CHIP_I945_G:
+   case PCI_CHIP_I945_GM:
+   case PCI_CHIP_I945_GME:
+   case PCI_CHIP_G33_G:
+   case PCI_CHIP_Q33_G:
+   case PCI_CHIP_Q35_G:
+       i945LayoutTextureImages( i915, tObj );
+       break;
+   default:
+       i915LayoutTextureImages( i915, tObj );
+       break;
+   }
+
+   t->Setup[I915_TEXREG_MS3] = 
+      (((tObj->Image[0][t->intel.base.firstLevel]->Height - 1) << MS3_HEIGHT_SHIFT) |
+       ((tObj->Image[0][t->intel.base.firstLevel]->Width - 1) << MS3_WIDTH_SHIFT) |
+       textureFormat |
+       MS3_USE_FENCE_REGS);
+
+   t->Setup[I915_TEXREG_MS4] = 
+      ((((t->intel.Pitch / 4) - 1) << MS4_PITCH_SHIFT) | 
+       MS4_CUBE_FACE_ENA_MASK |
+       (((t->intel.max_level * 4)) << MS4_MAX_LOD_SHIFT) |
+       ((tObj->Image[0][t->intel.base.firstLevel]->Depth - 1) << MS4_VOLUME_DEPTH_SHIFT));
+
+   t->Setup[I915_TEXREG_SS2] &= ~(SS2_COLORSPACE_CONVERSION);
+   t->Setup[I915_TEXREG_SS2] |= ss2;
+
+   t->intel.dirty = I915_UPLOAD_TEX_ALL;
+
+}
+
+
+/* The i915 (and related graphics cores) do not support GL_CLAMP.  The
+ * Intel drivers for "other operating systems" implement GL_CLAMP as
+ * GL_CLAMP_TO_EDGE, so the same is done here.
+ */
+static GLuint translate_wrap_mode( GLenum wrap )
+{
+   switch( wrap ) {
+   case GL_REPEAT: return TEXCOORDMODE_WRAP;
+   case GL_CLAMP:  return TEXCOORDMODE_CLAMP_EDGE; /* not quite correct */
+   case GL_CLAMP_TO_EDGE: return TEXCOORDMODE_CLAMP_EDGE;
+   case GL_CLAMP_TO_BORDER: return TEXCOORDMODE_CLAMP_BORDER;
+   case GL_MIRRORED_REPEAT: return TEXCOORDMODE_MIRROR;
+   default: return TEXCOORDMODE_WRAP;
+   }
+}
+
+
+/**
+ */
+static void i915ImportTexObjState( struct gl_texture_object *texObj )
+{   
+   i915TextureObjectPtr t = (i915TextureObjectPtr)texObj->DriverData;
+   int minFilt = 0, mipFilt = 0, magFilt = 0, shadow = 0;
+
+   if(INTEL_DEBUG&DEBUG_DRI)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   switch (texObj->MinFilter) {
+   case GL_NEAREST:
+      minFilt = FILTER_NEAREST;
+      mipFilt = MIPFILTER_NONE;
+      break;
+   case GL_LINEAR:
+      minFilt = FILTER_LINEAR;
+      mipFilt = MIPFILTER_NONE;
+      break;
+   case GL_NEAREST_MIPMAP_NEAREST:
+      minFilt = FILTER_NEAREST;
+      mipFilt = MIPFILTER_NEAREST;
+      break;
+   case GL_LINEAR_MIPMAP_NEAREST:
+      minFilt = FILTER_LINEAR;
+      mipFilt = MIPFILTER_NEAREST;
+      break;
+   case GL_NEAREST_MIPMAP_LINEAR:
+      minFilt = FILTER_NEAREST;
+      mipFilt = MIPFILTER_LINEAR;
+      break;
+   case GL_LINEAR_MIPMAP_LINEAR:
+      minFilt = FILTER_LINEAR;
+      mipFilt = MIPFILTER_LINEAR;
+      break;
+   default:
+      break;
+   }
+
+   if ( texObj->MaxAnisotropy > 1.0 ) {
+      minFilt = FILTER_ANISOTROPIC; 
+      magFilt = FILTER_ANISOTROPIC;
+   }
+   else {
+      switch (texObj->MagFilter) {
+      case GL_NEAREST:
+	 magFilt = FILTER_NEAREST;
+	 break;
+      case GL_LINEAR:
+	 magFilt = FILTER_LINEAR;
+	 break;
+      default:
+	 break;
+      }  
+   }
+
+   if (texObj->CompareMode == GL_COMPARE_R_TO_TEXTURE_ARB && 
+       texObj->Target != GL_TEXTURE_3D) {
+
+      shadow = SS2_SHADOW_ENABLE;
+      shadow |= intel_translate_compare_func( texObj->CompareFunc );
+      
+      minFilt = FILTER_4X4_FLAT;
+      magFilt = FILTER_4X4_FLAT;
+   }
+
+
+   t->Setup[I915_TEXREG_SS2] &= ~(SS2_MIN_FILTER_MASK |
+				 SS2_MIP_FILTER_MASK |
+				 SS2_MAG_FILTER_MASK |
+				 SS2_SHADOW_ENABLE |
+				 SS2_SHADOW_FUNC_MASK);
+   t->Setup[I915_TEXREG_SS2] |= ((minFilt << SS2_MIN_FILTER_SHIFT) |
+				(mipFilt << SS2_MIP_FILTER_SHIFT) |
+				(magFilt << SS2_MAG_FILTER_SHIFT) |
+				shadow);
+
+   {
+      GLuint ss3 = t->Setup[I915_TEXREG_SS3] & ~(SS3_TCX_ADDR_MODE_MASK |
+						SS3_TCY_ADDR_MODE_MASK |
+						SS3_TCZ_ADDR_MODE_MASK);
+      GLenum ws = texObj->WrapS;
+      GLenum wt = texObj->WrapT;
+      GLenum wr = texObj->WrapR;
+      
+      t->refs_border_color = 0;
+
+      if (texObj->Target == GL_TEXTURE_3D &&
+	  (texObj->MinFilter != GL_NEAREST ||
+	   texObj->MagFilter != GL_NEAREST)) {
+	 
+	 /* Try to mimic GL_CLAMP functionality a little better -
+	  * switch to CLAMP_TO_BORDER whenever a non-NEAREST filter is
+	  * in use.  Only do this for 3D textures at the moment --
+	  * doing it universally would fix the conform texbc.c
+	  * failure, though.
+	  */
+	 if (ws == GL_CLAMP) ws = GL_CLAMP_TO_BORDER;
+	 if (wt == GL_CLAMP) wt = GL_CLAMP_TO_BORDER;
+	 if (wr == GL_CLAMP) wr = GL_CLAMP_TO_BORDER;
+
+	 /* 3D textures don't seem to respect the border color.
+	  * Fallback if there's ever a danger that they might refer to
+	  * it.
+	  */
+	 if (ws == GL_CLAMP_TO_BORDER) t->refs_border_color = 1;
+	 if (wt == GL_CLAMP_TO_BORDER) t->refs_border_color = 1;
+	 if (wr == GL_CLAMP_TO_BORDER) t->refs_border_color = 1;
+      }
+
+      ss3 |= translate_wrap_mode(ws) << SS3_TCX_ADDR_MODE_SHIFT;
+      ss3 |= translate_wrap_mode(wt) << SS3_TCY_ADDR_MODE_SHIFT;
+      ss3 |= translate_wrap_mode(wr) << SS3_TCZ_ADDR_MODE_SHIFT;
+   
+      if (ss3 != t->Setup[I915_TEXREG_SS3]) {
+	 t->intel.dirty = I915_UPLOAD_TEX_ALL;
+	 t->Setup[I915_TEXREG_SS3] = ss3;
+      }
+   }
+
+   {   
+      const GLubyte *color = texObj->_BorderChan;
+
+      t->Setup[I915_TEXREG_SS4] = INTEL_PACKCOLOR8888(color[0],color[1],
+						     color[2],color[3]);
+   }
+}
+
+
+
+static void i915_import_tex_unit( i915ContextPtr i915, 
+				 i915TextureObjectPtr t,
+				 GLuint unit )
+{
+   GLuint state[I915_TEX_SETUP_SIZE];
+
+   if(INTEL_DEBUG&DEBUG_TEXTURE)
+      fprintf(stderr, "%s unit(%d)\n", __FUNCTION__, unit);
+   
+   if (i915->intel.CurrentTexObj[unit]) 
+      i915->intel.CurrentTexObj[unit]->base.bound &= ~(1U << unit);
+
+   i915->intel.CurrentTexObj[unit] = (intelTextureObjectPtr)t;
+   t->intel.base.bound |= (1 << unit);
+
+   if (t->intel.dirty & I915_UPLOAD_TEX(unit)) {
+      i915ImportTexObjState( t->intel.base.tObj );
+      t->intel.dirty &= ~I915_UPLOAD_TEX(unit);
+   }
+
+   state[I915_TEXREG_MS2] = t->intel.TextureOffset;
+   state[I915_TEXREG_MS3] = t->Setup[I915_TEXREG_MS3];
+   state[I915_TEXREG_MS4] = t->Setup[I915_TEXREG_MS4];
+
+   state[I915_TEXREG_SS2] = (i915->state.Tex[unit][I915_TEXREG_SS2] &
+			    SS2_LOD_BIAS_MASK);
+   state[I915_TEXREG_SS2] |= (t->Setup[I915_TEXREG_SS2] & ~SS2_LOD_BIAS_MASK);
+
+   state[I915_TEXREG_SS3] = (i915->state.Tex[unit][I915_TEXREG_SS3] &
+			    SS3_NORMALIZED_COORDS);
+   state[I915_TEXREG_SS3] |= (t->Setup[I915_TEXREG_SS3] &
+			     ~(SS3_NORMALIZED_COORDS|
+			       SS3_TEXTUREMAP_INDEX_MASK));
+
+   state[I915_TEXREG_SS3] |= (unit<<SS3_TEXTUREMAP_INDEX_SHIFT);
+
+   state[I915_TEXREG_SS4] = t->Setup[I915_TEXREG_SS4];
+
+
+   if (memcmp(state, i915->state.Tex[unit], sizeof(state)) != 0) {
+      I915_STATECHANGE( i915, I915_UPLOAD_TEX(unit) );
+      memcpy(i915->state.Tex[unit], state, sizeof(state));
+   }
+}
+
+
+
+static GLboolean enable_tex_common( GLcontext *ctx, GLuint unit )
+{
+   i915ContextPtr i915 = I915_CONTEXT(ctx);
+   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+   struct gl_texture_object *tObj = texUnit->_Current;
+   i915TextureObjectPtr t = (i915TextureObjectPtr)tObj->DriverData;
+
+   if (0) fprintf(stderr, "%s %d\n", __FUNCTION__, unit);
+
+   if (!(i915->state.active & I915_UPLOAD_TEX(unit))) {
+      I915_ACTIVESTATE(i915, I915_UPLOAD_TEX(unit), GL_TRUE);
+   }
+
+   /* Fallback if there's a texture border */
+   if ( tObj->Image[0][tObj->BaseLevel]->Border > 0 ) {
+      return GL_FALSE;
+   }
+
+
+   /* Update state if this is a different texture object to last
+    * time.
+    */
+   if (i915->intel.CurrentTexObj[unit] != &t->intel || 
+       (t->intel.dirty & I915_UPLOAD_TEX(unit))) {
+      i915_import_tex_unit( i915, t, unit);
+      i915->tex_program.translated = 0;
+   }
+
+   return GL_TRUE;
+}
+
+static GLboolean enable_tex_rect( GLcontext *ctx, GLuint unit )
+{
+   i915ContextPtr i915 = I915_CONTEXT(ctx);
+   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+   struct gl_texture_object *tObj = texUnit->_Current;
+   i915TextureObjectPtr t = (i915TextureObjectPtr)tObj->DriverData;
+   GLuint ss3 = i915->state.Tex[unit][I915_TEXREG_SS3];
+
+   ss3 &= ~SS3_NORMALIZED_COORDS;
+
+   if (ss3 != i915->state.Tex[unit][I915_TEXREG_SS3]) {
+      I915_STATECHANGE(i915, I915_UPLOAD_TEX(unit));
+      i915->state.Tex[unit][I915_TEXREG_SS3] = ss3;
+   }
+
+   /* Upload teximages (not pipelined)
+    */
+   if (t->intel.base.dirty_images[0]) {
+      i915SetTexImages( i915, tObj );
+      if (!intelUploadTexImages( &i915->intel, &t->intel, 0 )) {
+	 return GL_FALSE;
+      }
+   }
+
+   return GL_TRUE;
+}
+
+
+static GLboolean enable_tex_2d( GLcontext *ctx, GLuint unit )
+{
+   i915ContextPtr i915 = I915_CONTEXT(ctx);
+   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+   struct gl_texture_object *tObj = texUnit->_Current;
+   i915TextureObjectPtr t = (i915TextureObjectPtr)tObj->DriverData;
+   GLuint ss3 = i915->state.Tex[unit][I915_TEXREG_SS3];
+
+   ss3 |= SS3_NORMALIZED_COORDS;
+
+   if (ss3 != i915->state.Tex[unit][I915_TEXREG_SS3]) {
+      I915_STATECHANGE(i915, I915_UPLOAD_TEX(unit));
+      i915->state.Tex[unit][I915_TEXREG_SS3] = ss3;
+   }
+
+   /* Upload teximages (not pipelined)
+    */
+   if (t->intel.base.dirty_images[0]) {
+      i915SetTexImages( i915, tObj );
+      if (!intelUploadTexImages( &i915->intel, &t->intel, 0 )) {
+	 return GL_FALSE;
+      }
+   }
+
+   return GL_TRUE;
+}
+
+static GLboolean enable_tex_cube( GLcontext *ctx, GLuint unit )
+{
+   i915ContextPtr i915 = I915_CONTEXT(ctx);
+   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+   struct gl_texture_object *tObj = texUnit->_Current;
+   i915TextureObjectPtr t = (i915TextureObjectPtr)tObj->DriverData;
+   GLuint ss3 = i915->state.Tex[unit][I915_TEXREG_SS3];
+   GLuint face;
+
+   ss3 |= SS3_NORMALIZED_COORDS;
+
+   if (ss3 != i915->state.Tex[unit][I915_TEXREG_SS3]) {
+      I915_STATECHANGE(i915, I915_UPLOAD_TEX(unit));
+      i915->state.Tex[unit][I915_TEXREG_SS3] = ss3;
+   }
+
+   /* Upload teximages (not pipelined)
+    */
+   if ( t->intel.base.dirty_images[0] || t->intel.base.dirty_images[1] ||
+        t->intel.base.dirty_images[2] || t->intel.base.dirty_images[3] ||
+        t->intel.base.dirty_images[4] || t->intel.base.dirty_images[5] ) {
+      i915SetTexImages( i915, tObj );
+   }
+
+   /* upload (per face) */
+   for (face = 0; face < 6; face++) {
+      if (t->intel.base.dirty_images[face]) {
+	 if (!intelUploadTexImages( &i915->intel, &t->intel, face )) {
+	    return GL_FALSE;
+	 }
+      }
+   }
+
+
+   return GL_TRUE;
+}
+
+static GLboolean enable_tex_3d( GLcontext *ctx, GLuint unit )
+{
+   struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
+   i915TextureObjectPtr t = (i915TextureObjectPtr)tObj->DriverData;
+
+   /* 3D textures on I915 seem to get bogus border colors, hence this
+    * fallback:
+    */
+   if (t->refs_border_color)
+      return GL_FALSE;
+
+   return GL_TRUE;
+}
+
+
+
+ 
+static GLboolean disable_tex( GLcontext *ctx, GLuint unit )
+{
+   i915ContextPtr i915 = I915_CONTEXT(ctx);
+
+   if (i915->state.active & I915_UPLOAD_TEX(unit)) {
+      I915_ACTIVESTATE(i915, I915_UPLOAD_TEX(unit), GL_FALSE);
+   }
+
+   /* The old texture is no longer bound to this texture unit.
+    * Mark it as such.
+    */
+   if ( i915->intel.CurrentTexObj[unit] != NULL ) {
+      i915->intel.CurrentTexObj[unit]->base.bound &= ~(1U << 0);
+      i915->intel.CurrentTexObj[unit] = NULL;
+   }
+
+   return GL_TRUE;
+}
+
+static GLboolean i915UpdateTexUnit( GLcontext *ctx, GLuint unit )
+{
+   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+
+   if (texUnit->_ReallyEnabled &&
+       INTEL_CONTEXT(ctx)->intelScreen->tex.size < 2048 * 1024)
+      return GL_FALSE;
+
+   switch (texUnit->_ReallyEnabled) {
+   case TEXTURE_1D_BIT:
+   case TEXTURE_2D_BIT:
+      return (enable_tex_2d( ctx, unit ) &&
+	      enable_tex_common( ctx, unit ));
+   case TEXTURE_RECT_BIT:
+      return (enable_tex_rect( ctx, unit ) &&
+	      enable_tex_common( ctx, unit ));
+   case TEXTURE_CUBE_BIT:
+      return (enable_tex_cube( ctx, unit ) &&
+	      enable_tex_common( ctx, unit ));
+   case TEXTURE_3D_BIT:
+       return (enable_tex_2d( ctx, unit ) && 
+	       enable_tex_common( ctx, unit ) &&
+	       enable_tex_3d( ctx, unit)); 
+   case 0:
+      return disable_tex( ctx, unit );
+   default:
+      return GL_FALSE;
+   }
+}
+
+
+void i915UpdateTextureState( intelContextPtr intel )
+{
+   GLcontext *ctx = &intel->ctx;
+   GLboolean ok = GL_TRUE;
+   GLuint i;
+
+   for (i = 0 ; i < I915_TEX_UNITS && ok ; i++) {
+      ok = i915UpdateTexUnit( ctx, i );
+   }
+
+   FALLBACK( intel, I915_FALLBACK_TEXTURE, !ok );
+}
+
+
+
diff --git a/i915/i915_vtbl.c b/i915/i915_vtbl.c
new file mode 100644
index 0000000..cc8a605
--- /dev/null
+++ b/i915/i915_vtbl.c
@@ -0,0 +1,463 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+
+
+#include "glheader.h"
+#include "mtypes.h"
+#include "imports.h"
+#include "macros.h"
+#include "colormac.h"
+
+#include "tnl/t_context.h"
+#include "tnl/t_vertex.h"
+
+#include "intel_batchbuffer.h"
+
+#include "i915_reg.h"
+#include "i915_context.h"
+
+static void i915_render_start( intelContextPtr intel )
+{
+   GLcontext *ctx = &intel->ctx;
+   i915ContextPtr i915 = I915_CONTEXT(intel);
+
+   if (ctx->FragmentProgram._Active) 
+      i915ValidateFragmentProgram( i915 );
+   else {
+      assert(!ctx->FragmentProgram._MaintainTexEnvProgram);
+      i915ValidateTextureProgram( i915 );
+   }
+}
+
+
+static void i915_reduced_primitive_state( intelContextPtr intel,
+					  GLenum rprim )
+{
+    i915ContextPtr i915 = I915_CONTEXT(intel);
+    GLuint st1 = i915->state.Stipple[I915_STPREG_ST1];
+
+    st1 &= ~ST1_ENABLE;
+
+    switch (rprim) {
+    case GL_QUADS: /* from RASTERIZE(GL_QUADS) in t_dd_tritemp.h */
+    case GL_TRIANGLES:
+       if (intel->ctx.Polygon.StippleFlag &&
+	   intel->hw_stipple)
+	  st1 |= ST1_ENABLE;
+       break;
+    case GL_LINES:
+    case GL_POINTS:
+    default:
+       break;
+    }
+
+    i915->intel.reduced_primitive = rprim;
+
+    if (st1 != i915->state.Stipple[I915_STPREG_ST1]) {
+       I915_STATECHANGE(i915, I915_UPLOAD_STIPPLE);
+       i915->state.Stipple[I915_STPREG_ST1] = st1;
+    }
+}
+
+
+/* Pull apart the vertex format registers and figure out how large a
+ * vertex is supposed to be. 
+ */
+static GLboolean i915_check_vertex_size( intelContextPtr intel,
+					 GLuint expected )
+{
+   i915ContextPtr i915 = I915_CONTEXT(intel);
+   int lis2 = i915->current->Ctx[I915_CTXREG_LIS2];
+   int lis4 = i915->current->Ctx[I915_CTXREG_LIS4];
+   int i, sz = 0;
+
+   switch (lis4 & S4_VFMT_XYZW_MASK) {
+   case S4_VFMT_XY: sz = 2; break;
+   case S4_VFMT_XYZ: sz = 3; break;
+   case S4_VFMT_XYW: sz = 3; break;
+   case S4_VFMT_XYZW: sz = 4; break;
+   default: 
+      fprintf(stderr, "no xyzw specified\n");
+      return 0;
+   }
+
+   if (lis4 & S4_VFMT_SPEC_FOG) sz++;
+   if (lis4 & S4_VFMT_COLOR) sz++;
+   if (lis4 & S4_VFMT_DEPTH_OFFSET) sz++;
+   if (lis4 & S4_VFMT_POINT_WIDTH) sz++;
+   if (lis4 & S4_VFMT_FOG_PARAM) sz++;
+	
+   for (i = 0 ; i < 8 ; i++) { 
+      switch (lis2 & S2_TEXCOORD_FMT0_MASK) {
+      case TEXCOORDFMT_2D: sz += 2; break;
+      case TEXCOORDFMT_3D: sz += 3; break;
+      case TEXCOORDFMT_4D: sz += 4; break;
+      case TEXCOORDFMT_1D: sz += 1; break;
+      case TEXCOORDFMT_2D_16: sz += 1; break;
+      case TEXCOORDFMT_4D_16: sz += 2; break;
+      case TEXCOORDFMT_NOT_PRESENT: break;
+      default:
+	 fprintf(stderr, "bad texcoord fmt %d\n", i);
+	 return GL_FALSE;
+      }
+      lis2 >>= S2_TEXCOORD_FMT1_SHIFT;
+   }
+	
+   if (sz != expected) 
+      fprintf(stderr, "vertex size mismatch %d/%d\n", sz, expected);
+   
+   return sz == expected;
+}
+
+
+static void i915_emit_invarient_state( intelContextPtr intel )
+{
+   BATCH_LOCALS;
+
+   BEGIN_BATCH( 20 );
+
+   OUT_BATCH(_3DSTATE_AA_CMD |
+	     AA_LINE_ECAAR_WIDTH_ENABLE |
+	     AA_LINE_ECAAR_WIDTH_1_0 |
+	     AA_LINE_REGION_WIDTH_ENABLE |
+	     AA_LINE_REGION_WIDTH_1_0);
+
+   OUT_BATCH(_3DSTATE_DFLT_DIFFUSE_CMD);
+   OUT_BATCH(0);
+
+   OUT_BATCH(_3DSTATE_DFLT_SPEC_CMD);
+   OUT_BATCH(0);
+
+   OUT_BATCH(_3DSTATE_DFLT_Z_CMD);
+   OUT_BATCH(0);
+
+   /* Don't support texture crossbar yet */
+   OUT_BATCH(_3DSTATE_COORD_SET_BINDINGS |
+	     CSB_TCB(0, 0) |
+	     CSB_TCB(1, 1) |
+	     CSB_TCB(2, 2) |
+	     CSB_TCB(3, 3) |
+	     CSB_TCB(4, 4) |
+	     CSB_TCB(5, 5) |
+	     CSB_TCB(6, 6) |
+	     CSB_TCB(7, 7));
+
+   OUT_BATCH(_3DSTATE_RASTER_RULES_CMD |
+	     ENABLE_POINT_RASTER_RULE |
+	     OGL_POINT_RASTER_RULE |
+	     ENABLE_LINE_STRIP_PROVOKE_VRTX |
+	     ENABLE_TRI_FAN_PROVOKE_VRTX |
+	     LINE_STRIP_PROVOKE_VRTX(1) |
+	     TRI_FAN_PROVOKE_VRTX(2) | 
+	     ENABLE_TEXKILL_3D_4D |
+	     TEXKILL_4D);
+
+   /* Need to initialize this to zero.
+    */
+   OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | 
+	     I1_LOAD_S(3) |
+	     (0));
+   OUT_BATCH(0);
+ 
+   /* XXX: Use this */
+   OUT_BATCH(_3DSTATE_SCISSOR_ENABLE_CMD | 
+	     DISABLE_SCISSOR_RECT);
+
+   OUT_BATCH(_3DSTATE_SCISSOR_RECT_0_CMD);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+
+   OUT_BATCH(_3DSTATE_DEPTH_SUBRECT_DISABLE);
+
+   OUT_BATCH(_3DSTATE_LOAD_INDIRECT | 0); /* disable indirect state */
+   OUT_BATCH(0);
+
+
+   /* Don't support twosided stencil yet */
+   OUT_BATCH(_3DSTATE_BACKFACE_STENCIL_OPS |
+	     BFO_ENABLE_STENCIL_TWO_SIDE |
+	     0 );
+   
+   ADVANCE_BATCH();
+}
+
+
+#define emit( intel, state, size )			\
+do {							\
+   int k;						\
+   BEGIN_BATCH( (size) / sizeof(GLuint));		\
+   for (k = 0 ; k < (size) / sizeof(GLuint) ; k++)	\
+      OUT_BATCH((state)[k]);				\
+   ADVANCE_BATCH();					\
+} while (0);
+
+static GLuint get_dirty( struct i915_hw_state *state )
+{
+   GLuint dirty;
+
+   /* Workaround the multitex hang - if one texture unit state is
+    * modified, emit all texture units.
+    */
+   dirty = state->active & ~state->emitted;
+   if (dirty & I915_UPLOAD_TEX_ALL)
+      state->emitted &= ~I915_UPLOAD_TEX_ALL;
+   dirty = state->active & ~state->emitted;
+
+   return dirty;
+}
+
+
+static GLuint get_state_size( struct i915_hw_state *state )
+{
+   GLuint dirty = get_dirty(state);
+   GLuint i;
+   GLuint sz = 0;
+
+   if (dirty & I915_UPLOAD_INVARIENT)
+      sz += 20 * sizeof(int);
+
+   if (dirty & I915_UPLOAD_CTX)
+      sz += sizeof(state->Ctx);
+
+   if (dirty & I915_UPLOAD_BUFFERS) 
+      sz += sizeof(state->Buffer);
+
+   if (dirty & I915_UPLOAD_STIPPLE)
+      sz += sizeof(state->Stipple);
+
+   if (dirty & I915_UPLOAD_FOG) 
+      sz += sizeof(state->Fog);
+
+   if (dirty & I915_UPLOAD_TEX_ALL) {
+      int nr = 0;
+      for (i = 0; i < I915_TEX_UNITS; i++) 
+	 if (dirty & I915_UPLOAD_TEX(i)) 
+	    nr++;
+
+      sz += (2+nr*3) * sizeof(GLuint) * 2;
+   }
+
+   if (dirty & I915_UPLOAD_CONSTANTS) 
+      sz += state->ConstantSize * sizeof(GLuint);
+
+   if (dirty & I915_UPLOAD_PROGRAM) 
+      sz += state->ProgramSize * sizeof(GLuint);
+
+   return sz;
+}
+
+
+/* Push the state into the sarea and/or texture memory.
+ */
+static void i915_emit_state( intelContextPtr intel )
+{
+   i915ContextPtr i915 = I915_CONTEXT(intel);
+   struct i915_hw_state *state = i915->current;
+   int i;
+   GLuint dirty = get_dirty(state);
+   GLuint counter = intel->batch.counter;
+   BATCH_LOCALS;
+
+   if (intel->batch.space < get_state_size(state)) {
+      intelFlushBatch(intel, GL_TRUE);
+      dirty = get_dirty(state);
+      counter = intel->batch.counter;
+   }
+
+   if (VERBOSE) 
+      fprintf(stderr, "%s dirty: %x\n", __FUNCTION__, dirty);
+
+   if (dirty & I915_UPLOAD_INVARIENT) {
+      if (VERBOSE) fprintf(stderr, "I915_UPLOAD_INVARIENT:\n"); 
+      i915_emit_invarient_state( intel );
+   }
+
+   if (dirty & I915_UPLOAD_CTX) {
+      if (VERBOSE) fprintf(stderr, "I915_UPLOAD_CTX:\n"); 
+      emit( i915, state->Ctx, sizeof(state->Ctx) );
+   }
+
+   if (dirty & I915_UPLOAD_BUFFERS) {
+      if (VERBOSE) fprintf(stderr, "I915_UPLOAD_BUFFERS:\n"); 
+      emit( i915, state->Buffer, sizeof(state->Buffer) );
+   }
+
+   if (dirty & I915_UPLOAD_STIPPLE) {
+      if (VERBOSE) fprintf(stderr, "I915_UPLOAD_STIPPLE:\n"); 
+      emit( i915, state->Stipple, sizeof(state->Stipple) );
+   }
+
+   if (dirty & I915_UPLOAD_FOG) {
+      if (VERBOSE) fprintf(stderr, "I915_UPLOAD_FOG:\n"); 
+      emit( i915, state->Fog, sizeof(state->Fog) );
+   }
+
+   /* Combine all the dirty texture state into a single command to
+    * avoid lockups on I915 hardware. 
+    */
+   if (dirty & I915_UPLOAD_TEX_ALL) {
+      int nr = 0;
+
+      for (i = 0; i < I915_TEX_UNITS; i++) 
+	 if (dirty & I915_UPLOAD_TEX(i)) 
+	    nr++;
+
+      BEGIN_BATCH(2+nr*3);
+      OUT_BATCH(_3DSTATE_MAP_STATE | (3*nr));
+      OUT_BATCH((dirty & I915_UPLOAD_TEX_ALL) >> I915_UPLOAD_TEX_0_SHIFT);
+      for (i = 0 ; i < I915_TEX_UNITS ; i++)
+	 if (dirty & I915_UPLOAD_TEX(i)) {
+	    OUT_BATCH(state->Tex[i][I915_TEXREG_MS2]);
+	    OUT_BATCH(state->Tex[i][I915_TEXREG_MS3]);
+	    OUT_BATCH(state->Tex[i][I915_TEXREG_MS4]);
+	 }
+      ADVANCE_BATCH();
+
+      BEGIN_BATCH(2+nr*3);
+      OUT_BATCH(_3DSTATE_SAMPLER_STATE | (3*nr));
+      OUT_BATCH((dirty & I915_UPLOAD_TEX_ALL) >> I915_UPLOAD_TEX_0_SHIFT);
+      for (i = 0 ; i < I915_TEX_UNITS ; i++)
+	 if (dirty & I915_UPLOAD_TEX(i)) {
+	    OUT_BATCH(state->Tex[i][I915_TEXREG_SS2]);
+	    OUT_BATCH(state->Tex[i][I915_TEXREG_SS3]);
+	    OUT_BATCH(state->Tex[i][I915_TEXREG_SS4]);
+	 }
+      ADVANCE_BATCH();
+   }
+
+   if (dirty & I915_UPLOAD_CONSTANTS) {
+      if (VERBOSE) fprintf(stderr, "I915_UPLOAD_CONSTANTS:\n"); 
+      emit( i915, state->Constant, state->ConstantSize * sizeof(GLuint) );
+   }
+
+   if (dirty & I915_UPLOAD_PROGRAM) {
+      if (VERBOSE) fprintf(stderr, "I915_UPLOAD_PROGRAM:\n"); 
+
+      assert((state->Program[0] & 0x1ff)+2 == state->ProgramSize);
+      
+      emit( i915, state->Program, state->ProgramSize * sizeof(GLuint) );
+      if (VERBOSE)
+	 i915_disassemble_program( state->Program, state->ProgramSize );
+   }
+
+   state->emitted |= dirty;
+   intel->batch.last_emit_state = counter;
+   assert(counter == intel->batch.counter);
+}
+
+static void i915_destroy_context( intelContextPtr intel )
+{
+   _tnl_free_vertices(&intel->ctx);
+}
+
+
+/**
+ * Set the color buffer drawing region.
+ */
+static void
+i915_set_color_region( intelContextPtr intel, const intelRegion *region)
+{
+   i915ContextPtr i915 = I915_CONTEXT(intel);
+   I915_STATECHANGE( i915, I915_UPLOAD_BUFFERS );
+   i915->state.Buffer[I915_DESTREG_CBUFADDR1] =
+      (BUF_3D_ID_COLOR_BACK | BUF_3D_PITCH(region->pitch) | BUF_3D_USE_FENCE);
+   i915->state.Buffer[I915_DESTREG_CBUFADDR2] = region->offset;
+}
+
+
+/**
+ * specify the z-buffer/stencil region
+ */
+static void
+i915_set_z_region( intelContextPtr intel, const intelRegion *region)
+{
+   i915ContextPtr i915 = I915_CONTEXT(intel);
+   I915_STATECHANGE( i915, I915_UPLOAD_BUFFERS );
+   i915->state.Buffer[I915_DESTREG_DBUFADDR1] =
+      (BUF_3D_ID_DEPTH | BUF_3D_PITCH(region->pitch) | BUF_3D_USE_FENCE);
+   i915->state.Buffer[I915_DESTREG_DBUFADDR2] = region->offset;
+}
+
+
+/**
+ * Set both the color and Z/stencil drawing regions.
+ * Similar to two previous functions, but don't use I915_STATECHANGE()
+ */
+static void
+i915_update_color_z_regions(intelContextPtr intel,
+                            const intelRegion *colorRegion,
+                            const intelRegion *depthRegion)
+{
+   i915ContextPtr i915 = I915_CONTEXT(intel);
+
+   i915->state.Buffer[I915_DESTREG_CBUFADDR1] =
+      (BUF_3D_ID_COLOR_BACK | BUF_3D_PITCH(colorRegion->pitch) | BUF_3D_USE_FENCE);
+   i915->state.Buffer[I915_DESTREG_CBUFADDR2] = colorRegion->offset;
+
+   i915->state.Buffer[I915_DESTREG_DBUFADDR1] =
+      (BUF_3D_ID_DEPTH |
+       BUF_3D_PITCH(depthRegion->pitch) |  /* pitch in bytes */
+       BUF_3D_USE_FENCE);
+   i915->state.Buffer[I915_DESTREG_DBUFADDR2] = depthRegion->offset;
+}
+
+
+static void i915_lost_hardware( intelContextPtr intel )
+{
+   I915_CONTEXT(intel)->state.emitted = 0;
+}
+
+static void i915_emit_flush( intelContextPtr intel )
+{
+   BATCH_LOCALS;
+
+   BEGIN_BATCH(2);
+   OUT_BATCH( MI_FLUSH | FLUSH_MAP_CACHE | FLUSH_RENDER_CACHE ); 
+   OUT_BATCH( 0 );
+   ADVANCE_BATCH();
+}
+
+
+void i915InitVtbl( i915ContextPtr i915 )
+{
+   i915->intel.vtbl.alloc_tex_obj = i915AllocTexObj;
+   i915->intel.vtbl.check_vertex_size = i915_check_vertex_size;
+   i915->intel.vtbl.clear_with_tris = i915ClearWithTris;
+   i915->intel.vtbl.rotate_window = i915RotateWindow;
+   i915->intel.vtbl.destroy = i915_destroy_context;
+   i915->intel.vtbl.emit_state = i915_emit_state;
+   i915->intel.vtbl.lost_hardware = i915_lost_hardware;
+   i915->intel.vtbl.reduced_primitive_state = i915_reduced_primitive_state;
+   i915->intel.vtbl.render_start = i915_render_start;
+   i915->intel.vtbl.set_color_region = i915_set_color_region;
+   i915->intel.vtbl.set_z_region = i915_set_z_region;
+   i915->intel.vtbl.update_color_z_regions = i915_update_color_z_regions;
+   i915->intel.vtbl.update_texture_state = i915UpdateTextureState;
+   i915->intel.vtbl.emit_flush = i915_emit_flush;
+}
+
diff --git a/i915/intel_batchbuffer.c b/i915/intel_batchbuffer.c
new file mode 100644
index 0000000..803b41b
--- /dev/null
+++ b/i915/intel_batchbuffer.c
@@ -0,0 +1,829 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+
+#include <stdio.h>
+#include <errno.h>
+
+#include "mtypes.h"
+#include "context.h"
+#include "enums.h"
+#include "vblank.h"
+
+#include "intel_reg.h"
+#include "intel_batchbuffer.h"
+#include "intel_context.h"
+
+
+
+
+/* ================================================================
+ * Performance monitoring functions
+ */
+
+static void intel_fill_box( intelContextPtr intel,
+			    GLshort x, GLshort y,
+			    GLshort w, GLshort h,
+			    GLubyte r, GLubyte g, GLubyte b )
+{
+   x += intel->drawX;
+   y += intel->drawY;
+
+   if (x >= 0 && y >= 0 &&
+       x+w < intel->intelScreen->width &&
+       y+h < intel->intelScreen->height)
+      intelEmitFillBlitLocked( intel, 
+			       intel->intelScreen->cpp,
+			       intel->intelScreen->back.pitch,
+			       intel->intelScreen->back.offset,
+			       x, y, w, h,
+			       INTEL_PACKCOLOR(intel->intelScreen->fbFormat,
+					       r,g,b,0xff));
+}
+
+static void intel_draw_performance_boxes( intelContextPtr intel )
+{
+   /* Purple box for page flipping
+    */
+   if ( intel->perf_boxes & I830_BOX_FLIP ) 
+      intel_fill_box( intel, 4, 4, 8, 8, 255, 0, 255 );
+
+   /* Red box if we have to wait for idle at any point
+    */
+   if ( intel->perf_boxes & I830_BOX_WAIT ) 
+      intel_fill_box( intel, 16, 4, 8, 8, 255, 0, 0 );
+
+   /* Blue box: lost context?
+    */
+   if ( intel->perf_boxes & I830_BOX_LOST_CONTEXT ) 
+      intel_fill_box( intel, 28, 4, 8, 8, 0, 0, 255 );
+
+   /* Yellow box for texture swaps
+    */
+   if ( intel->perf_boxes & I830_BOX_TEXTURE_LOAD ) 
+      intel_fill_box( intel, 40, 4, 8, 8, 255, 255, 0 );
+
+   /* Green box if hardware never idles (as far as we can tell)
+    */
+   if ( !(intel->perf_boxes & I830_BOX_RING_EMPTY) ) 
+      intel_fill_box( intel, 64, 4, 8, 8, 0, 255, 0 );
+
+
+   /* Draw bars indicating number of buffers allocated 
+    * (not a great measure, easily confused)
+    */
+#if 0
+   if (intel->dma_used) {
+      int bar = intel->dma_used / 10240;
+      if (bar > 100) bar = 100;
+      if (bar < 1) bar = 1;
+      intel_fill_box( intel, 4, 16, bar, 4, 196, 128, 128 );
+      intel->dma_used = 0;
+   }
+#endif
+
+   intel->perf_boxes = 0;
+}
+
+
+
+
+
+
+static int bad_prim_vertex_nr( int primitive, int nr )
+{
+   switch (primitive & PRIM3D_MASK) {
+   case PRIM3D_POINTLIST:
+      return nr < 1;
+   case PRIM3D_LINELIST:
+      return (nr & 1) || nr == 0;
+   case PRIM3D_LINESTRIP:
+      return nr < 2;
+   case PRIM3D_TRILIST:
+   case PRIM3D_RECTLIST:
+      return nr % 3 || nr == 0;
+   case PRIM3D_POLY:
+   case PRIM3D_TRIFAN:
+   case PRIM3D_TRISTRIP:
+   case PRIM3D_TRISTRIP_RVRSE:
+      return nr < 3;
+   default:
+      return 1;
+   }	
+}
+
+static void intel_flush_inline_primitive( GLcontext *ctx )
+{
+   intelContextPtr intel = INTEL_CONTEXT( ctx );
+   GLuint used = intel->batch.ptr - intel->prim.start_ptr;
+   GLuint vertcount;
+
+   assert(intel->prim.primitive != ~0);
+
+   if (1) {
+      /* Check vertex size against the vertex we're specifying to
+       * hardware.  If it's wrong, ditch the primitive.
+       */ 
+      if (!intel->vtbl.check_vertex_size( intel, intel->vertex_size )) 
+	 goto do_discard;
+
+      vertcount = (used - 4)/ (intel->vertex_size * 4);
+
+      if (!vertcount)
+	 goto do_discard;
+      
+      if (vertcount * intel->vertex_size * 4 != used - 4) {
+	 fprintf(stderr, "vertex size confusion %d %d\n", used, 
+		 intel->vertex_size * vertcount * 4);
+	 goto do_discard;
+      }
+
+      if (bad_prim_vertex_nr( intel->prim.primitive, vertcount )) {
+	 fprintf(stderr, "bad_prim_vertex_nr %x %d\n", intel->prim.primitive,
+		 vertcount);
+	 goto do_discard;
+      }
+   }
+
+   if (used < 8)
+      goto do_discard;
+
+   *(int *)intel->prim.start_ptr = (_3DPRIMITIVE | 
+				    intel->prim.primitive |
+				    (used/4-2));
+
+   goto finished;
+   
+ do_discard:
+   intel->batch.ptr -= used;
+   intel->batch.space += used;
+   assert(intel->batch.space >= 0);
+
+ finished:
+   intel->prim.primitive = ~0;
+   intel->prim.start_ptr = 0;
+   intel->prim.flush = 0;
+}
+
+
+/* Emit a primitive referencing vertices in a vertex buffer.
+ */
+void intelStartInlinePrimitive( intelContextPtr intel, GLuint prim )
+{
+   BATCH_LOCALS;
+
+   if (0)
+      fprintf(stderr, "%s %x\n", __FUNCTION__, prim);
+
+
+   /* Finish any in-progress primitive:
+    */
+   INTEL_FIREVERTICES( intel );
+   
+   /* Emit outstanding state:
+    */
+   intel->vtbl.emit_state( intel );
+   
+   /* Make sure there is some space in this buffer:
+    */
+   if (intel->vertex_size * 10 * sizeof(GLuint) >= intel->batch.space) {
+      intelFlushBatch(intel, GL_TRUE); 
+      intel->vtbl.emit_state( intel );
+   }
+
+#if 1
+   if (((unsigned long)intel->batch.ptr) & 0x4) {
+      BEGIN_BATCH(1);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+#endif
+
+   /* Emit a slot which will be filled with the inline primitive
+    * command later.
+    */
+   BEGIN_BATCH(2);
+   OUT_BATCH( 0 );
+
+   intel->prim.start_ptr = batch_ptr;
+   intel->prim.primitive = prim;
+   intel->prim.flush = intel_flush_inline_primitive;
+   intel->batch.contains_geometry = 1;
+
+   OUT_BATCH( 0 );
+   ADVANCE_BATCH();
+}
+
+
+void intelRestartInlinePrimitive( intelContextPtr intel )
+{
+   GLuint prim = intel->prim.primitive;
+
+   intel_flush_inline_primitive( &intel->ctx );
+   if (1) intelFlushBatch(intel, GL_TRUE); /* GL_TRUE - is critical */
+   intelStartInlinePrimitive( intel, prim );
+}
+
+
+
+void intelWrapInlinePrimitive( intelContextPtr intel )
+{
+   GLuint prim = intel->prim.primitive;
+
+   if (0)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+   intel_flush_inline_primitive( &intel->ctx );
+   intelFlushBatch(intel, GL_TRUE);
+   intelStartInlinePrimitive( intel, prim );
+}
+
+
+/* Emit a primitive with space for inline vertices.
+ */
+GLuint *intelEmitInlinePrimitiveLocked(intelContextPtr intel, 
+				       int primitive,
+				       int dwords,
+				       int vertex_size )
+{
+   GLuint *tmp = 0;
+   BATCH_LOCALS;
+
+   if (0)
+      fprintf(stderr, "%s 0x%x %d\n", __FUNCTION__, primitive, dwords);
+
+   /* Emit outstanding state:
+    */
+   intel->vtbl.emit_state( intel );
+
+   if ((1+dwords)*4 >= intel->batch.space) {
+      intelFlushBatch(intel, GL_TRUE); 
+      intel->vtbl.emit_state( intel );
+   }
+
+
+   if (1) {
+      int used = dwords * 4;
+      int vertcount;
+
+      /* Check vertex size against the vertex we're specifying to
+       * hardware.  If it's wrong, ditch the primitive.
+       */ 
+      if (!intel->vtbl.check_vertex_size( intel, vertex_size )) 
+	 goto do_discard;
+
+      vertcount = dwords / vertex_size;
+      
+      if (dwords % vertex_size) {
+	 fprintf(stderr, "did not request a whole number of vertices\n");
+	 goto do_discard;
+      }
+
+      if (bad_prim_vertex_nr( primitive, vertcount )) {
+	 fprintf(stderr, "bad_prim_vertex_nr %x %d\n", primitive, vertcount);
+	 goto do_discard;
+      }
+
+      if (used < 8)
+	 goto do_discard;
+   }
+
+   /* Emit 3D_PRIMITIVE commands:
+    */
+   BEGIN_BATCH(1 + dwords);
+   OUT_BATCH( _3DPRIMITIVE | 
+	      primitive |
+	      (dwords-1) );
+
+   tmp = (GLuint *)batch_ptr;
+   batch_ptr += dwords * 4;
+
+   ADVANCE_BATCH();
+
+   intel->batch.contains_geometry = 1;
+
+ do_discard:
+   return tmp;
+}
+
+
+static void intelWaitForFrameCompletion( intelContextPtr intel )
+{
+  drm_i915_sarea_t *sarea = (drm_i915_sarea_t *)intel->sarea;
+
+   if (intel->do_irqs) {
+      if (intelGetLastFrame(intel) < sarea->last_dispatch) {
+	 if (!intel->irqsEmitted) {
+	    while (intelGetLastFrame (intel) < sarea->last_dispatch)
+	       ;
+	 }
+	 else {
+	    intelWaitIrq( intel, intel->alloc.irq_emitted );	
+	 }
+	 intel->irqsEmitted = 10;
+      }
+
+      if (intel->irqsEmitted) {
+	 LOCK_HARDWARE( intel ); 
+	 intelEmitIrqLocked( intel );
+	 intel->irqsEmitted--;
+	 UNLOCK_HARDWARE( intel ); 
+      }
+   } 
+   else {
+      while (intelGetLastFrame (intel) < sarea->last_dispatch) {
+	 if (intel->do_usleeps) 
+	    DO_USLEEP( 1 );
+      }
+   }
+}
+
+/*
+ * Copy the back buffer to the front buffer. 
+ */
+void intelCopyBuffer( const __DRIdrawablePrivate *dPriv,
+		      const drm_clip_rect_t	 *rect)
+{
+   intelContextPtr intel;
+   const intelScreenPrivate *intelScreen;
+   GLboolean   missed_target;
+   int64_t ust;
+
+   if (0)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   assert(dPriv);
+   assert(dPriv->driContextPriv);
+   assert(dPriv->driContextPriv->driverPrivate);
+
+   intel = (intelContextPtr) dPriv->driContextPriv->driverPrivate;
+
+   intelFlush( &intel->ctx );
+   
+   intelScreen = intel->intelScreen;
+
+   if (!rect && !intel->swap_scheduled && intelScreen->drmMinor >= 6 &&
+       !(intel->vblank_flags & VBLANK_FLAG_NO_IRQ) &&
+       intelScreen->current_rotation == 0) {
+      unsigned int interval = driGetVBlankInterval(dPriv, intel->vblank_flags);
+      unsigned int target;
+      drm_i915_vblank_swap_t swap;
+
+      swap.drawable = dPriv->hHWDrawable;
+      swap.seqtype = DRM_VBLANK_ABSOLUTE;
+      target = swap.sequence = intel->vbl_seq + interval;
+
+      if (intel->vblank_flags & VBLANK_FLAG_SYNC) {
+	 swap.seqtype |= DRM_VBLANK_NEXTONMISS;
+      } else if (interval == 0) {
+	 goto noschedule;
+      }
+
+      if ( intel->vblank_flags & VBLANK_FLAG_SECONDARY ) {
+	 swap.seqtype |= DRM_VBLANK_SECONDARY;
+      }
+
+      if (!drmCommandWriteRead(intel->driFd, DRM_I915_VBLANK_SWAP, &swap,
+                              sizeof(swap))) {
+        intel->swap_scheduled = 1;
+        intel->vbl_seq = swap.sequence;
+        swap.sequence -= target;
+        missed_target = swap.sequence > 0 && swap.sequence <= (1 << 23);
+      }
+   } else {
+      intel->swap_scheduled = 0;
+   }
+noschedule:
+
+   if (!intel->swap_scheduled) {
+      intelWaitForFrameCompletion( intel );
+      LOCK_HARDWARE( intel );
+
+      if (!rect)
+      {
+	 UNLOCK_HARDWARE( intel );
+	 driWaitForVBlank( dPriv, &intel->vbl_seq, intel->vblank_flags, & missed_target );
+	 LOCK_HARDWARE( intel );
+      }
+      {
+	 const intelScreenPrivate *intelScreen = intel->intelScreen;
+	 const __DRIdrawablePrivate *dPriv = intel->driDrawable;
+	 const int nbox = dPriv->numClipRects;
+	 const drm_clip_rect_t *pbox = dPriv->pClipRects;
+	 drm_clip_rect_t box;
+	 const int cpp = intelScreen->cpp;
+	 const int pitch = intelScreen->front.pitch; /* in bytes */
+	 int i;
+	 GLuint CMD, BR13;
+	 BATCH_LOCALS;
+
+	 switch(cpp) {
+	 case 2: 
+	    BR13 = (pitch) | (0xCC << 16) | (1<<24);
+	    CMD = XY_SRC_COPY_BLT_CMD;
+	    break;
+	 case 4:
+	    BR13 = (pitch) | (0xCC << 16) | (1<<24) | (1<<25);
+	    CMD = (XY_SRC_COPY_BLT_CMD | XY_SRC_COPY_BLT_WRITE_ALPHA |
+		   XY_SRC_COPY_BLT_WRITE_RGB);
+	    break;
+	 default:
+	    BR13 = (pitch) | (0xCC << 16) | (1<<24);
+	    CMD = XY_SRC_COPY_BLT_CMD;
+	    break;
+	 }
+   
+	 if (0) 
+	    intel_draw_performance_boxes( intel );
+
+	 for (i = 0 ; i < nbox; i++, pbox++) 
+	 {
+	    if (pbox->x1 > pbox->x2 ||
+		pbox->y1 > pbox->y2 ||
+		pbox->x2 > intelScreen->width ||
+		pbox->y2 > intelScreen->height) {
+	       _mesa_warning(&intel->ctx, "Bad cliprect in intelCopyBuffer()");
+	       continue;
+	    }
+
+	    box = *pbox;
+
+	    if (rect)
+	    {
+	       if (rect->x1 > box.x1)
+		  box.x1 = rect->x1;
+	       if (rect->y1 > box.y1)
+		  box.y1 = rect->y1;
+	       if (rect->x2 < box.x2)
+		  box.x2 = rect->x2;
+	       if (rect->y2 < box.y2)
+		  box.y2 = rect->y2;
+
+	       if (box.x1 > box.x2 || box.y1 > box.y2)
+		  continue;
+	    }
+
+	    BEGIN_BATCH( 8);
+	    OUT_BATCH( CMD );
+	    OUT_BATCH( BR13 );
+	    OUT_BATCH( (box.y1 << 16) | box.x1 );
+	    OUT_BATCH( (box.y2 << 16) | box.x2 );
+
+	    if (intel->sarea->pf_current_page == 0) 
+	       OUT_BATCH( intelScreen->front.offset );
+	    else
+	       OUT_BATCH( intelScreen->back.offset );			
+
+	    OUT_BATCH( (box.y1 << 16) | box.x1 );
+	    OUT_BATCH( BR13 & 0xffff );
+
+	    if (intel->sarea->pf_current_page == 0) 
+	       OUT_BATCH( intelScreen->back.offset );			
+	    else
+	       OUT_BATCH( intelScreen->front.offset );
+
+	    ADVANCE_BATCH();
+	 }
+      }
+      intelFlushBatchLocked( intel, GL_TRUE, GL_TRUE, GL_TRUE );
+      UNLOCK_HARDWARE( intel );
+   }
+
+   if (!rect)
+   {
+       intel->swap_count++;
+       (*dri_interface->getUST)(&ust);
+       if (missed_target) {
+	   intel->swap_missed_count++;
+	   intel->swap_missed_ust = ust -  intel->swap_ust;
+       }
+   
+       intel->swap_ust = ust;
+   }
+}
+
+
+
+
+void intelEmitFillBlitLocked( intelContextPtr intel,
+			      GLuint cpp,
+			      GLshort dst_pitch,  /* in bytes */
+			      GLuint dst_offset,
+			      GLshort x, GLshort y, 
+			      GLshort w, GLshort h,
+			      GLuint color )
+{
+   GLuint BR13, CMD;
+   BATCH_LOCALS;
+
+   switch(cpp) {
+   case 1: 
+   case 2: 
+   case 3: 
+      BR13 = dst_pitch | (0xF0 << 16) | (1<<24);
+      CMD = XY_COLOR_BLT_CMD;
+      break;
+   case 4:
+      BR13 = dst_pitch | (0xF0 << 16) | (1<<24) | (1<<25);
+      CMD = (XY_COLOR_BLT_CMD | XY_COLOR_BLT_WRITE_ALPHA |
+	     XY_COLOR_BLT_WRITE_RGB);
+      break;
+   default:
+      return;
+   }
+
+   BEGIN_BATCH( 6);
+   OUT_BATCH( CMD );
+   OUT_BATCH( BR13 );
+   OUT_BATCH( (y << 16) | x );
+   OUT_BATCH( ((y+h) << 16) | (x+w) );
+   OUT_BATCH( dst_offset );
+   OUT_BATCH( color );
+   ADVANCE_BATCH();
+}
+
+
+/* Copy BitBlt
+ */
+void intelEmitCopyBlitLocked( intelContextPtr intel,
+			      GLuint cpp,
+			      GLshort src_pitch,
+			      GLuint  src_offset,
+			      GLshort dst_pitch,
+			      GLuint  dst_offset,
+			      GLshort src_x, GLshort src_y,
+			      GLshort dst_x, GLshort dst_y,
+			      GLshort w, GLshort h )
+{
+   GLuint CMD, BR13;
+   int dst_y2 = dst_y + h;
+   int dst_x2 = dst_x + w;
+   BATCH_LOCALS;
+
+   src_pitch *= cpp;
+   dst_pitch *= cpp;
+
+   switch(cpp) {
+   case 1: 
+   case 2: 
+   case 3: 
+      BR13 = dst_pitch | (0xCC << 16) | (1<<24);
+      CMD = XY_SRC_COPY_BLT_CMD;
+      break;
+   case 4:
+      BR13 = dst_pitch | (0xCC << 16) | (1<<24) | (1<<25);
+      CMD = (XY_SRC_COPY_BLT_CMD | XY_SRC_COPY_BLT_WRITE_ALPHA |
+	     XY_SRC_COPY_BLT_WRITE_RGB);
+      break;
+   default:
+      return;
+   }
+
+   if (dst_y2 < dst_y ||
+       dst_x2 < dst_x) {
+      return;
+   }
+
+   BEGIN_BATCH( 12);
+   OUT_BATCH( CMD );
+   OUT_BATCH( BR13 );
+   OUT_BATCH( (dst_y << 16) | dst_x );
+   OUT_BATCH( (dst_y2 << 16) | dst_x2 );
+   OUT_BATCH( dst_offset );	
+   OUT_BATCH( (src_y << 16) | src_x );
+   OUT_BATCH( src_pitch );
+   OUT_BATCH( src_offset ); 
+   ADVANCE_BATCH();
+}
+
+
+
+void intelClearWithBlit(GLcontext *ctx, GLbitfield buffers, GLboolean allFoo,
+                        GLint cx1Foo, GLint cy1Foo, GLint cwFoo, GLint chFoo)
+{
+   intelContextPtr intel = INTEL_CONTEXT( ctx );
+   intelScreenPrivate *intelScreen = intel->intelScreen;
+   GLuint clear_depth, clear_color;
+   GLint cx, cy, cw, ch;
+   GLboolean all;
+   GLint pitch;
+   GLint cpp = intelScreen->cpp;
+   GLint i;
+   GLuint BR13, CMD, D_CMD;
+   BATCH_LOCALS;
+
+   intelFlush( &intel->ctx );
+   LOCK_HARDWARE( intel );
+
+   /* get clear bounds after locking */
+   cx = intel->ctx.DrawBuffer->_Xmin;
+   cy = intel->ctx.DrawBuffer->_Ymin;
+   cw = intel->ctx.DrawBuffer->_Xmax - cx;
+   ch = intel->ctx.DrawBuffer->_Ymax - cy;
+   all = (cw == intel->ctx.DrawBuffer->Width &&
+          ch == intel->ctx.DrawBuffer->Height);
+
+   pitch = intelScreen->front.pitch;
+
+   clear_color = intel->ClearColor;
+   clear_depth = 0;
+
+   if (buffers & BUFFER_BIT_DEPTH) {
+      clear_depth = (GLuint)(ctx->Depth.Clear * intel->ClearDepth);
+   }
+
+   if (buffers & BUFFER_BIT_STENCIL) {
+      clear_depth |= (ctx->Stencil.Clear & 0xff) << 24;
+   }
+
+   switch(cpp) {
+   case 2: 
+      BR13 = (0xF0 << 16) | (pitch) | (1<<24);
+      D_CMD = CMD = XY_COLOR_BLT_CMD;
+      break;
+   case 4:
+      BR13 = (0xF0 << 16) | (pitch) | (1<<24) | (1<<25);
+      CMD = (XY_COLOR_BLT_CMD |
+	     XY_COLOR_BLT_WRITE_ALPHA | 
+	     XY_COLOR_BLT_WRITE_RGB);
+      D_CMD = XY_COLOR_BLT_CMD;
+      if (buffers & BUFFER_BIT_DEPTH) D_CMD |= XY_COLOR_BLT_WRITE_RGB;
+      if (buffers & BUFFER_BIT_STENCIL) D_CMD |= XY_COLOR_BLT_WRITE_ALPHA;
+      break;
+   default:
+      BR13 = (0xF0 << 16) | (pitch) | (1<<24);
+      D_CMD = CMD = XY_COLOR_BLT_CMD;
+      break;
+   }
+
+   {
+      /* flip top to bottom */
+      cy = intel->driDrawable->h - cy - ch;
+      cx = cx + intel->drawX;
+      cy += intel->drawY;
+
+      /* adjust for page flipping */
+      if ( intel->sarea->pf_current_page == 1 ) {
+	 GLuint tmp = buffers;
+
+	 buffers &= ~(BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_BACK_LEFT);
+	 if ( tmp & BUFFER_BIT_FRONT_LEFT ) buffers |= BUFFER_BIT_BACK_LEFT;
+	 if ( tmp & BUFFER_BIT_BACK_LEFT )  buffers |= BUFFER_BIT_FRONT_LEFT;
+      }
+
+      for (i = 0 ; i < intel->numClipRects ; i++) 
+      { 	 
+	 drm_clip_rect_t *box = &intel->pClipRects[i];	 
+	 drm_clip_rect_t b;
+
+	 if (!all) {
+	    GLint x = box->x1;
+	    GLint y = box->y1;
+	    GLint w = box->x2 - x;
+	    GLint h = box->y2 - y;
+
+	    if (x < cx) w -= cx - x, x = cx; 
+	    if (y < cy) h -= cy - y, y = cy;
+	    if (x + w > cx + cw) w = cx + cw - x;
+	    if (y + h > cy + ch) h = cy + ch - y;
+	    if (w <= 0) continue;
+	    if (h <= 0) continue;
+
+	    b.x1 = x;
+	    b.y1 = y;
+	    b.x2 = x + w;
+	    b.y2 = y + h;      
+	 } else {
+	    b = *box;
+	 }
+
+
+	 if (b.x1 > b.x2 ||
+	     b.y1 > b.y2 ||
+	     b.x2 > intelScreen->width ||
+	     b.y2 > intelScreen->height)
+	    continue;
+
+	 if ( buffers & BUFFER_BIT_FRONT_LEFT ) {	    
+	    BEGIN_BATCH( 6);	    
+	    OUT_BATCH( CMD );
+	    OUT_BATCH( BR13 );
+	    OUT_BATCH( (b.y1 << 16) | b.x1 );
+	    OUT_BATCH( (b.y2 << 16) | b.x2 );
+	    OUT_BATCH( intelScreen->front.offset );
+	    OUT_BATCH( clear_color );
+	    ADVANCE_BATCH();
+	 }
+
+	 if ( buffers & BUFFER_BIT_BACK_LEFT ) {
+	    BEGIN_BATCH( 6); 
+	    OUT_BATCH( CMD );
+	    OUT_BATCH( BR13 );
+	    OUT_BATCH( (b.y1 << 16) | b.x1 );
+	    OUT_BATCH( (b.y2 << 16) | b.x2 );
+	    OUT_BATCH( intelScreen->back.offset );
+	    OUT_BATCH( clear_color );
+	    ADVANCE_BATCH();
+	 }
+
+	 if ( buffers & (BUFFER_BIT_STENCIL | BUFFER_BIT_DEPTH) ) {
+	    BEGIN_BATCH( 6);
+	    OUT_BATCH( D_CMD );
+	    OUT_BATCH( BR13 );
+	    OUT_BATCH( (b.y1 << 16) | b.x1 );
+	    OUT_BATCH( (b.y2 << 16) | b.x2 );
+	    OUT_BATCH( intelScreen->depth.offset );
+	    OUT_BATCH( clear_depth );
+	    ADVANCE_BATCH();
+	 }      
+      }
+   }
+   intelFlushBatchLocked( intel, GL_TRUE, GL_FALSE, GL_TRUE );
+   UNLOCK_HARDWARE( intel );
+}
+
+
+
+
+void intelDestroyBatchBuffer( GLcontext *ctx )
+{
+   intelContextPtr intel = INTEL_CONTEXT(ctx);
+
+   if (intel->alloc.offset) {
+      intelFreeAGP( intel, intel->alloc.ptr );
+      intel->alloc.ptr = NULL;
+      intel->alloc.offset = 0;
+   }
+   else if (intel->alloc.ptr) {
+      free(intel->alloc.ptr);
+      intel->alloc.ptr = NULL;
+   }
+
+   memset(&intel->batch, 0, sizeof(intel->batch));
+}
+
+
+void intelInitBatchBuffer( GLcontext *ctx )
+{
+   intelContextPtr intel = INTEL_CONTEXT(ctx);
+
+   /* This path isn't really safe with rotate:
+    */
+   if (getenv("INTEL_BATCH") && intel->intelScreen->allow_batchbuffer) {      
+      switch (intel->intelScreen->deviceID) {
+      case PCI_CHIP_I865_G:
+	 /* HW bug?  Seems to crash if batchbuffer crosses 4k boundary.
+	  */
+	 intel->alloc.size = 8 * 1024; 
+	 break;
+      default:
+	 /* This is the smallest amount of memory the kernel deals with.
+	  * We'd ideally like to make this smaller.
+	  */
+	 intel->alloc.size = 1 << intel->intelScreen->logTextureGranularity;
+	 break;
+      }
+
+      intel->alloc.ptr = intelAllocateAGP( intel, intel->alloc.size );
+      if (intel->alloc.ptr)
+	 intel->alloc.offset = 
+	    intelAgpOffsetFromVirtual( intel, intel->alloc.ptr );
+      else
+         intel->alloc.offset = 0; /* OK? */
+   }
+
+   /* The default is now to use a local buffer and pass that to the
+    * kernel.  This is also a fallback if allocation fails on the
+    * above path:
+    */
+   if (!intel->alloc.ptr) {
+      intel->alloc.size = 8 * 1024;
+      intel->alloc.ptr = malloc( intel->alloc.size );
+      intel->alloc.offset = 0;
+   }
+
+   assert(intel->alloc.ptr);
+}
diff --git a/i915/intel_batchbuffer.h b/i915/intel_batchbuffer.h
new file mode 100644
index 0000000..577d071
--- /dev/null
+++ b/i915/intel_batchbuffer.h
@@ -0,0 +1,126 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef INTEL_BATCHBUFFER_H
+#define INTEL_BATCHBUFFER_H
+
+#include "intel_context.h"
+#include "intel_ioctl.h"
+
+
+#define BATCH_LOCALS	GLubyte *batch_ptr;
+
+/* #define VERBOSE 0 */
+#ifndef VERBOSE
+extern int VERBOSE;
+#endif
+
+
+#define BEGIN_BATCH(n)							\
+do {									\
+   if (VERBOSE) fprintf(stderr, 					\
+			"BEGIN_BATCH(%ld) in %s, %d dwords free\n",	\
+			((unsigned long)n), __FUNCTION__,		\
+			intel->batch.space/4);				\
+   if (intel->batch.space < (n)*4)					\
+      intelFlushBatch(intel, GL_TRUE);					\
+   if (intel->batch.space == intel->batch.size)	intel->batch.func = __FUNCTION__;			\
+   batch_ptr = intel->batch.ptr;					\
+} while (0)
+
+#define OUT_BATCH(n)					\
+do {							\
+   *(GLuint *)batch_ptr = (n);				\
+   if (VERBOSE) fprintf(stderr, " -- %08x at %s/%d\n", (n), __FILE__, __LINE__);	\
+   batch_ptr += 4;					\
+} while (0)
+
+#define ADVANCE_BATCH()						\
+do {								\
+   if (VERBOSE) fprintf(stderr, "ADVANCE_BATCH()\n");		\
+   intel->batch.space -= (batch_ptr - intel->batch.ptr);	\
+   intel->batch.ptr = batch_ptr;				\
+   assert(intel->batch.space >= 0);				\
+} while(0)
+
+extern void intelInitBatchBuffer( GLcontext *ctx );
+extern void intelDestroyBatchBuffer( GLcontext *ctx );
+
+extern void intelStartInlinePrimitive( intelContextPtr intel, GLuint prim );
+extern void intelWrapInlinePrimitive( intelContextPtr intel );
+extern void intelRestartInlinePrimitive( intelContextPtr intel );
+extern GLuint *intelEmitInlinePrimitiveLocked(intelContextPtr intel, 
+					      int primitive, int dwords,
+					      int vertex_size);
+extern void intelCopyBuffer( const __DRIdrawablePrivate *dpriv,
+			     const drm_clip_rect_t	*rect);
+extern void intelClearWithBlit(GLcontext *ctx, GLbitfield mask, GLboolean all,
+			     GLint cx1, GLint cy1, GLint cw, GLint ch);
+
+extern void intelEmitCopyBlitLocked( intelContextPtr intel,
+				     GLuint cpp,
+				     GLshort src_pitch,
+				     GLuint  src_offset,
+				     GLshort dst_pitch,
+				     GLuint  dst_offset,
+				     GLshort srcx, GLshort srcy,
+				     GLshort dstx, GLshort dsty,
+				     GLshort w, GLshort h );
+
+extern void intelEmitFillBlitLocked( intelContextPtr intel,
+				     GLuint cpp,
+				     GLshort dst_pitch,
+				     GLuint dst_offset,
+				     GLshort x, GLshort y, 
+				     GLshort w, GLshort h,
+				     GLuint color );
+
+
+
+
+static __inline GLuint *intelExtendInlinePrimitive( intelContextPtr intel, 
+						GLuint dwords )
+{
+   GLuint sz = dwords * sizeof(GLuint);
+   GLuint *ptr;
+
+   if (intel->batch.space < sz) {
+      intelWrapInlinePrimitive( intel );
+/*       assert(intel->batch.space >= sz); */
+   }
+
+/*    assert(intel->prim.primitive != ~0); */
+   ptr = (GLuint *)intel->batch.ptr;
+   intel->batch.ptr += sz;
+   intel->batch.space -= sz;
+
+   return ptr;
+}
+
+
+
+#endif
diff --git a/i915/intel_context.c b/i915/intel_context.c
new file mode 100644
index 0000000..bb5ce64
--- /dev/null
+++ b/i915/intel_context.c
@@ -0,0 +1,871 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+
+#include "glheader.h"
+#include "context.h"
+#include "matrix.h"
+#include "simple_list.h"
+#include "extensions.h"
+#include "framebuffer.h"
+#include "imports.h"
+#include "points.h"
+
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "tnl/tnl.h"
+#include "vbo/vbo.h"
+
+#include "tnl/t_pipeline.h"
+#include "tnl/t_vertex.h"
+
+#include "drivers/common/driverfuncs.h"
+
+#include "intel_screen.h"
+
+#include "i830_dri.h"
+#include "i830_common.h"
+
+#include "intel_tex.h"
+#include "intel_span.h"
+#include "intel_tris.h"
+#include "intel_ioctl.h"
+#include "intel_batchbuffer.h"
+
+#include "vblank.h"
+#include "utils.h"
+#include "xmlpool.h" /* for symbolic values of enum-type options */
+#ifndef INTEL_DEBUG
+int INTEL_DEBUG = (0);
+#endif
+
+#define need_GL_ARB_multisample
+#define need_GL_ARB_point_parameters
+#define need_GL_ARB_texture_compression
+#define need_GL_ARB_vertex_buffer_object
+#define need_GL_ARB_vertex_program
+#define need_GL_ARB_window_pos
+#define need_GL_EXT_blend_color
+#define need_GL_EXT_blend_equation_separate
+#define need_GL_EXT_blend_func_separate
+#define need_GL_EXT_blend_minmax
+#define need_GL_EXT_cull_vertex
+#define need_GL_EXT_fog_coord
+#define need_GL_EXT_multi_draw_arrays
+#define need_GL_EXT_secondary_color
+#define need_GL_NV_vertex_program
+#include "extension_helper.h"
+
+#ifndef VERBOSE
+int VERBOSE = 0;
+#endif
+
+#if DEBUG_LOCKING
+char *prevLockFile;
+int prevLockLine;
+#endif
+
+/***************************************
+ * Mesa's Driver Functions
+ ***************************************/
+
+#define DRIVER_DATE "20061017"
+
+const GLubyte *intelGetString( GLcontext *ctx, GLenum name )
+{
+   const char * chipset;
+   static char buffer[128];
+
+   switch (name) {
+   case GL_VENDOR:
+      return (GLubyte *)"Tungsten Graphics, Inc";
+      break;
+      
+   case GL_RENDERER:
+      switch (INTEL_CONTEXT(ctx)->intelScreen->deviceID) {
+      case PCI_CHIP_845_G:
+	 chipset = "Intel(R) 845G"; break;
+      case PCI_CHIP_I830_M:
+	 chipset = "Intel(R) 830M"; break;
+      case PCI_CHIP_I855_GM:
+	 chipset = "Intel(R) 852GM/855GM"; break;
+      case PCI_CHIP_I865_G:
+	 chipset = "Intel(R) 865G"; break;
+      case PCI_CHIP_I915_G:
+	 chipset = "Intel(R) 915G"; break;
+      case PCI_CHIP_I915_GM:
+	 chipset = "Intel(R) 915GM"; break;
+      case PCI_CHIP_I945_G:
+	 chipset = "Intel(R) 945G"; break;
+      case PCI_CHIP_I945_GM:
+	 chipset = "Intel(R) 945GM"; break;
+      case PCI_CHIP_I945_GME:
+	 chipset = "Intel(R) 945GME"; break;
+      case PCI_CHIP_G33_G:
+	 chipset = "Intel(R) G33"; break;
+      case PCI_CHIP_Q35_G:
+	 chipset = "Intel(R) Q35"; break;
+      case PCI_CHIP_Q33_G:
+	 chipset = "Intel(R) Q33"; break;
+      default:
+	 chipset = "Unknown Intel Chipset"; break;
+      }
+
+      (void) driGetRendererString( buffer, chipset, DRIVER_DATE, 0 );
+      return (GLubyte *) buffer;
+
+   default:
+      return NULL;
+   }
+}
+
+
+/**
+ * Extension strings exported by the intel driver.
+ *
+ * \note
+ * It appears that ARB_texture_env_crossbar has "disappeared" compared to the
+ * old i830-specific driver.
+ */
+const struct dri_extension card_extensions[] =
+{
+    { "GL_ARB_multisample",                GL_ARB_multisample_functions },
+    { "GL_ARB_multitexture",               NULL },
+    { "GL_ARB_point_parameters",           GL_ARB_point_parameters_functions },
+    { "GL_ARB_texture_border_clamp",       NULL },
+    { "GL_ARB_texture_compression",        GL_ARB_texture_compression_functions },
+    { "GL_ARB_texture_cube_map",           NULL },
+    { "GL_ARB_texture_env_add",            NULL },
+    { "GL_ARB_texture_env_combine",        NULL },
+    { "GL_ARB_texture_env_dot3",           NULL },
+    { "GL_ARB_texture_mirrored_repeat",    NULL },
+    { "GL_ARB_texture_rectangle",          NULL },
+    { "GL_ARB_vertex_buffer_object",       GL_ARB_vertex_buffer_object_functions },
+    { "GL_ARB_vertex_program",             GL_ARB_vertex_program_functions },
+    { "GL_ARB_window_pos",                 GL_ARB_window_pos_functions },
+    { "GL_EXT_blend_color",                GL_EXT_blend_color_functions },
+    { "GL_EXT_blend_equation_separate",    GL_EXT_blend_equation_separate_functions },
+    { "GL_EXT_blend_func_separate",        GL_EXT_blend_func_separate_functions },
+    { "GL_EXT_blend_minmax",               GL_EXT_blend_minmax_functions },
+    { "GL_EXT_blend_subtract",             NULL },
+    { "GL_EXT_cull_vertex",                GL_EXT_cull_vertex_functions },
+    { "GL_EXT_fog_coord",                  GL_EXT_fog_coord_functions },
+    { "GL_EXT_multi_draw_arrays",          GL_EXT_multi_draw_arrays_functions },
+    { "GL_EXT_secondary_color",            GL_EXT_secondary_color_functions },
+    { "GL_EXT_stencil_wrap",               NULL },
+    { "GL_EXT_texture_edge_clamp",         NULL },
+    { "GL_EXT_texture_env_combine",        NULL },
+    { "GL_EXT_texture_env_dot3",           NULL },
+    { "GL_EXT_texture_filter_anisotropic", NULL },
+    { "GL_EXT_texture_lod_bias",           NULL },
+    { "GL_3DFX_texture_compression_FXT1",  NULL },
+    { "GL_APPLE_client_storage",           NULL },
+    { "GL_MESA_pack_invert",               NULL },
+    { "GL_MESA_ycbcr_texture",             NULL },
+    { "GL_NV_blend_square",                NULL },
+    { "GL_NV_vertex_program",              GL_NV_vertex_program_functions },
+    { "GL_NV_vertex_program1_1",           NULL },
+    { "GL_SGIS_generate_mipmap",           NULL },
+    { NULL,                                NULL }
+};
+
+extern const struct tnl_pipeline_stage _intel_render_stage;
+
+static const struct tnl_pipeline_stage *intel_pipeline[] = {
+   &_tnl_vertex_transform_stage,
+   &_tnl_vertex_cull_stage,
+   &_tnl_normal_transform_stage,
+   &_tnl_lighting_stage,
+   &_tnl_fog_coordinate_stage,
+   &_tnl_texgen_stage,
+   &_tnl_texture_transform_stage,
+   &_tnl_point_attenuation_stage,
+   &_tnl_vertex_program_stage,
+#if 1
+   &_intel_render_stage,     /* ADD: unclipped rastersetup-to-dma */
+#endif
+   &_tnl_render_stage,
+   0,
+};
+
+
+static const struct dri_debug_control debug_control[] =
+{
+    { "fall",  DEBUG_FALLBACKS },
+    { "tex",   DEBUG_TEXTURE },
+    { "ioctl", DEBUG_IOCTL },
+    { "prim",  DEBUG_PRIMS },
+    { "vert",  DEBUG_VERTS },
+    { "state", DEBUG_STATE },
+    { "verb",  DEBUG_VERBOSE },
+    { "dri",   DEBUG_DRI },
+    { "dma",   DEBUG_DMA },
+    { "san",   DEBUG_SANITY },
+    { "sync",  DEBUG_SYNC },
+    { "sleep", DEBUG_SLEEP },
+    { "pix",   DEBUG_PIXEL },
+    { NULL,    0 }
+};
+
+
+static void intelInvalidateState( GLcontext *ctx, GLuint new_state )
+{
+   _swrast_InvalidateState( ctx, new_state );
+   _swsetup_InvalidateState( ctx, new_state );
+   _vbo_InvalidateState( ctx, new_state );
+   _tnl_InvalidateState( ctx, new_state );
+   _tnl_invalidate_vertex_state( ctx, new_state );
+   INTEL_CONTEXT(ctx)->NewGLState |= new_state;
+}
+
+
+void intelInitDriverFunctions( struct dd_function_table *functions )
+{
+   _mesa_init_driver_functions( functions );
+
+   functions->Clear = intelClear;
+   functions->Flush = intelglFlush;
+   functions->Finish = intelFinish;
+   functions->GetString = intelGetString;
+   functions->UpdateState = intelInvalidateState;
+
+   intelInitTextureFuncs( functions );
+   intelInitPixelFuncs( functions );
+   intelInitStateFuncs( functions );
+}
+
+static void intel_emit_invarient_state( GLcontext *ctx )
+{
+}
+
+
+
+GLboolean intelInitContext( intelContextPtr intel,
+			    const __GLcontextModes *mesaVis,
+			    __DRIcontextPrivate *driContextPriv,
+			    void *sharedContextPrivate,
+			    struct dd_function_table *functions )
+{
+   GLcontext *ctx = &intel->ctx;
+   GLcontext *shareCtx = (GLcontext *) sharedContextPrivate;
+   __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv;
+   intelScreenPrivate *intelScreen = (intelScreenPrivate *)sPriv->private;
+   drmI830Sarea *saPriv = (drmI830Sarea *)
+      (((GLubyte *)sPriv->pSAREA)+intelScreen->sarea_priv_offset);
+   int fthrottle_mode;
+
+   if (!_mesa_initialize_context(&intel->ctx,
+				 mesaVis, shareCtx, 
+				 functions,
+				 (void*) intel))
+      return GL_FALSE;
+
+   driContextPriv->driverPrivate = intel;
+   intel->intelScreen = intelScreen;
+   intel->driScreen = sPriv;
+   intel->sarea = saPriv;
+
+
+   (void) memset( intel->texture_heaps, 0, sizeof( intel->texture_heaps ) );
+   make_empty_list( & intel->swapped );
+
+   driParseConfigFiles (&intel->optionCache, &intelScreen->optionCache,
+			intel->driScreen->myNum, "i915");
+
+   ctx->Const.MaxTextureMaxAnisotropy = 2.0;
+
+   ctx->Const.MinLineWidth = 1.0;
+   ctx->Const.MinLineWidthAA = 1.0;
+   ctx->Const.MaxLineWidth = 3.0;
+   ctx->Const.MaxLineWidthAA = 3.0;
+   ctx->Const.LineWidthGranularity = 1.0;
+
+   ctx->Const.MinPointSize = 1.0;
+   ctx->Const.MinPointSizeAA = 1.0;
+   ctx->Const.MaxPointSize = 255.0;
+   ctx->Const.MaxPointSizeAA = 3.0;
+   ctx->Const.PointSizeGranularity = 1.0;
+
+   /* reinitialize the context point state.
+    * It depend on constants in __GLcontextRec::Const
+    */
+   _mesa_init_point(ctx);
+
+   /* Initialize the software rasterizer and helper modules. */
+   _swrast_CreateContext( ctx );
+   _vbo_CreateContext( ctx );
+   _tnl_CreateContext( ctx );
+   _swsetup_CreateContext( ctx );
+
+   /* Install the customized pipeline: */
+   _tnl_destroy_pipeline( ctx );
+   _tnl_install_pipeline( ctx, intel_pipeline );
+
+   /* Configure swrast to match hardware characteristics: */
+   _swrast_allow_pixel_fog( ctx, GL_FALSE );
+   _swrast_allow_vertex_fog( ctx, GL_TRUE );
+
+   /* Dri stuff */
+   intel->hHWContext = driContextPriv->hHWContext;
+   intel->driFd = sPriv->fd;
+   intel->driHwLock = (drmLock *) &sPriv->pSAREA->lock;
+
+   intel->hw_stencil = mesaVis->stencilBits && mesaVis->depthBits == 24;
+   intel->hw_stipple = 1;
+
+   switch(mesaVis->depthBits) {
+   case 0:			/* what to do in this case? */
+   case 16:
+      intel->depth_scale = 1.0/0xffff;
+      intel->polygon_offset_scale = 1.0/0xffff;
+      intel->depth_clear_mask = ~0;
+      intel->ClearDepth = 0xffff;
+      break;
+   case 24:
+      intel->depth_scale = 1.0/0xffffff;
+      intel->polygon_offset_scale = 2.0/0xffffff; /* req'd to pass glean */
+      intel->depth_clear_mask = 0x00ffffff;
+      intel->stencil_clear_mask = 0xff000000;
+      intel->ClearDepth = 0x00ffffff;
+      break;
+   default:
+      assert(0); 
+      break;
+   }
+
+   /* Initialize swrast, tnl driver tables: */
+   intelInitSpanFuncs( ctx );
+   intelInitTriFuncs( ctx );
+
+
+   intel->RenderIndex = ~0;
+
+   fthrottle_mode = driQueryOptioni(&intel->optionCache, "fthrottle_mode");
+   intel->iw.irq_seq = -1;
+   intel->irqsEmitted = 0;
+
+   intel->do_irqs = (intel->intelScreen->irq_active &&
+		     fthrottle_mode == DRI_CONF_FTHROTTLE_IRQS);
+
+   intel->do_usleeps = (fthrottle_mode == DRI_CONF_FTHROTTLE_USLEEPS);
+
+   intel->vblank_flags = (intel->intelScreen->irq_active != 0)
+       ? driGetDefaultVBlankFlags(&intel->optionCache) : VBLANK_FLAG_NO_IRQ;
+
+   (*dri_interface->getUST)(&intel->swap_ust);
+   _math_matrix_ctr (&intel->ViewportMatrix);
+
+   driInitExtensions( ctx, card_extensions, GL_TRUE );
+
+   if (intel->ctx.Mesa_DXTn) {
+     _mesa_enable_extension( ctx, "GL_EXT_texture_compression_s3tc" );
+     _mesa_enable_extension( ctx, "GL_S3_s3tc" );
+   }
+   else if (driQueryOptionb (&intel->optionCache, "force_s3tc_enable")) {
+     _mesa_enable_extension( ctx, "GL_EXT_texture_compression_s3tc" );
+   }
+
+/*    driInitTextureObjects( ctx, & intel->swapped, */
+/* 			  DRI_TEXMGR_DO_TEXTURE_1D | */
+/* 			  DRI_TEXMGR_DO_TEXTURE_2D |  */
+/* 			  DRI_TEXMGR_DO_TEXTURE_RECT ); */
+
+
+   intelInitBatchBuffer(&intel->ctx);
+   intel->prim.flush = intel_emit_invarient_state;
+   intel->prim.primitive = ~0;
+
+
+#if DO_DEBUG
+   INTEL_DEBUG  = driParseDebugString( getenv( "INTEL_DEBUG" ),
+				       debug_control );
+   INTEL_DEBUG |= driParseDebugString( getenv( "INTEL_DEBUG" ),
+				       debug_control );
+#endif
+
+#ifndef VERBOSE
+   if (getenv("INTEL_VERBOSE"))
+      VERBOSE=1;
+#endif
+
+   if (getenv("INTEL_NO_RAST") || 
+       getenv("INTEL_NO_RAST")) {
+      fprintf(stderr, "disabling 3D rasterization\n");
+      FALLBACK(intel, INTEL_FALLBACK_USER, 1); 
+   }
+
+   return GL_TRUE;
+}
+
+void intelDestroyContext(__DRIcontextPrivate *driContextPriv)
+{
+   intelContextPtr intel = (intelContextPtr) driContextPriv->driverPrivate;
+
+   assert(intel); /* should never be null */
+   if (intel) {
+      GLboolean   release_texture_heaps;
+
+      INTEL_FIREVERTICES( intel );
+
+      intel->vtbl.destroy( intel );
+
+      release_texture_heaps = (intel->ctx.Shared->RefCount == 1);
+      _swsetup_DestroyContext (&intel->ctx);
+      _tnl_DestroyContext (&intel->ctx);
+      _vbo_DestroyContext (&intel->ctx);
+
+      _swrast_DestroyContext (&intel->ctx);
+      intel->Fallback = 0;	/* don't call _swrast_Flush later */
+
+      intelDestroyBatchBuffer(&intel->ctx);
+      
+
+      if ( release_texture_heaps ) {
+         /* This share group is about to go away, free our private
+          * texture object data.
+          */
+         int i;
+
+         for ( i = 0 ; i < intel->nr_heaps ; i++ ) {
+	    driDestroyTextureHeap( intel->texture_heaps[ i ] );
+	    intel->texture_heaps[ i ] = NULL;
+         }
+
+	 assert( is_empty_list( & intel->swapped ) );
+      }
+
+      /* free the Mesa context */
+      _mesa_destroy_context(&intel->ctx);
+   }
+}
+
+void intelSetFrontClipRects( intelContextPtr intel )
+{
+   __DRIdrawablePrivate *dPriv = intel->driDrawable;
+
+   if (!dPriv) return;
+
+   intel->numClipRects = dPriv->numClipRects;
+   intel->pClipRects = dPriv->pClipRects;
+   intel->drawX = dPriv->x;
+   intel->drawY = dPriv->y;
+}
+
+
+void intelSetBackClipRects( intelContextPtr intel )
+{
+   __DRIdrawablePrivate *dPriv = intel->driDrawable;
+
+   if (!dPriv) return;
+
+   if (intel->sarea->pf_enabled == 0 && dPriv->numBackClipRects == 0) {
+      intel->numClipRects = dPriv->numClipRects;
+      intel->pClipRects = dPriv->pClipRects;
+      intel->drawX = dPriv->x;
+      intel->drawY = dPriv->y;
+   } else {
+      intel->numClipRects = dPriv->numBackClipRects;
+      intel->pClipRects = dPriv->pBackClipRects;
+      intel->drawX = dPriv->backX;
+      intel->drawY = dPriv->backY;
+      
+      if (dPriv->numBackClipRects == 1 &&
+	  dPriv->x == dPriv->backX &&
+	  dPriv->y == dPriv->backY) {
+      
+	 /* Repeat the calculation of the back cliprect dimensions here
+	  * as early versions of dri.a in the Xserver are incorrect.  Try
+	  * very hard not to restrict future versions of dri.a which
+	  * might eg. allocate truly private back buffers.
+	  */
+	 int x1, y1;
+	 int x2, y2;
+	 
+	 x1 = dPriv->x;
+	 y1 = dPriv->y;      
+	 x2 = dPriv->x + dPriv->w;
+	 y2 = dPriv->y + dPriv->h;
+	 
+	 if (x1 < 0) x1 = 0;
+	 if (y1 < 0) y1 = 0;
+	 if (x2 > intel->intelScreen->width) x2 = intel->intelScreen->width;
+	 if (y2 > intel->intelScreen->height) y2 = intel->intelScreen->height;
+
+	 if (x1 == dPriv->pBackClipRects[0].x1 &&
+	     y1 == dPriv->pBackClipRects[0].y1) {
+
+	    dPriv->pBackClipRects[0].x2 = x2;
+	    dPriv->pBackClipRects[0].y2 = y2;
+	 }
+      }
+   }
+}
+
+
+void intelWindowMoved( intelContextPtr intel )
+{
+   __DRIdrawablePrivate *dPriv = intel->driDrawable;
+   GLframebuffer *drawFb = (GLframebuffer *) dPriv->driverPrivate;
+
+   if (!intel->ctx.DrawBuffer) {
+      intelSetFrontClipRects( intel );
+   }
+   else {
+      driUpdateFramebufferSize(&intel->ctx, dPriv);
+      switch (drawFb->_ColorDrawBufferMask[0]) {
+      case BUFFER_BIT_FRONT_LEFT:
+	 intelSetFrontClipRects( intel );
+	 break;
+      case BUFFER_BIT_BACK_LEFT:
+	 intelSetBackClipRects( intel );
+	 break;
+      default:
+	 /* glDrawBuffer(GL_NONE or GL_FRONT_AND_BACK): software fallback */
+	 intelSetFrontClipRects( intel );
+      }
+   }
+
+   if (drawFb->Width != dPriv->w || drawFb->Height != dPriv->h) {
+      /* update Mesa's notion of framebuffer/window size */
+      _mesa_resize_framebuffer(&intel->ctx, drawFb, dPriv->w, dPriv->h);
+      drawFb->Initialized = GL_TRUE; /* XXX remove someday */
+   }
+
+   /* Set state we know depends on drawable parameters:
+    */
+   {
+      GLcontext *ctx = &intel->ctx;
+
+      if (intel->intelScreen->driScrnPriv->ddxMinor >= 7) {
+	 drmI830Sarea *sarea = intel->sarea;
+	 drm_clip_rect_t drw_rect = { .x1 = dPriv->x, .x2 = dPriv->x + dPriv->w,
+				      .y1 = dPriv->y, .y2 = dPriv->y + dPriv->h };
+	 drm_clip_rect_t pipeA_rect = { .x1 = sarea->pipeA_x,
+					.x2 = sarea->pipeA_x + sarea->pipeA_w,
+					.y1 = sarea->pipeA_y,
+					.y2 = sarea->pipeA_y + sarea->pipeA_h };
+	 drm_clip_rect_t pipeB_rect = { .x1 = sarea->pipeB_x,
+					.x2 = sarea->pipeB_x + sarea->pipeB_w,
+					.y1 = sarea->pipeB_y,
+					.y2 = sarea->pipeB_y + sarea->pipeB_h };
+	 GLint areaA = driIntersectArea( drw_rect, pipeA_rect );
+	 GLint areaB = driIntersectArea( drw_rect, pipeB_rect );
+	 GLuint flags = intel->vblank_flags;
+
+	 if (areaB > areaA || (areaA == areaB && areaB > 0)) {
+	    flags = intel->vblank_flags | VBLANK_FLAG_SECONDARY;
+	 } else {
+	    flags = intel->vblank_flags & ~VBLANK_FLAG_SECONDARY;
+	 }
+
+	 if (flags != intel->vblank_flags) {
+	    intel->vblank_flags = flags;
+	    driGetCurrentVBlank(dPriv, intel->vblank_flags, &intel->vbl_seq);
+	 }
+      } else {
+	 intel->vblank_flags &= ~VBLANK_FLAG_SECONDARY;
+      }
+
+      ctx->Driver.Scissor( ctx, ctx->Scissor.X, ctx->Scissor.Y,
+			   ctx->Scissor.Width, ctx->Scissor.Height );
+      
+      ctx->Driver.DepthRange( ctx, 
+			      ctx->Viewport.Near,
+			      ctx->Viewport.Far );
+   }
+}
+
+GLboolean intelUnbindContext(__DRIcontextPrivate *driContextPriv)
+{
+   return GL_TRUE;
+}
+
+GLboolean intelMakeCurrent(__DRIcontextPrivate *driContextPriv,
+			  __DRIdrawablePrivate *driDrawPriv,
+			  __DRIdrawablePrivate *driReadPriv)
+{
+
+   if (driContextPriv) {
+      intelContextPtr intel = (intelContextPtr) driContextPriv->driverPrivate;
+
+      if ( intel->driDrawable != driDrawPriv ) {
+	 /* Shouldn't the readbuffer be stored also? */
+	 driDrawableInitVBlank( driDrawPriv, intel->vblank_flags,
+				&intel->vbl_seq );
+
+	 intel->driDrawable = driDrawPriv;
+	 intelWindowMoved( intel );
+      }
+
+      _mesa_make_current(&intel->ctx,
+			 (GLframebuffer *) driDrawPriv->driverPrivate,
+			 (GLframebuffer *) driReadPriv->driverPrivate);
+
+      intel->ctx.Driver.DrawBuffer( &intel->ctx, intel->ctx.Color.DrawBuffer[0] );
+   } else {
+      _mesa_make_current(NULL, NULL, NULL);
+   }
+
+   return GL_TRUE;
+}
+
+/**
+ * Use the information in the sarea to update the screen parameters
+ * related to screen rotation.
+ */
+static void
+intelUpdateScreenRotation(intelContextPtr intel,
+                          __DRIscreenPrivate *sPriv,
+                          drmI830Sarea *sarea)
+{
+   intelScreenPrivate *intelScreen = (intelScreenPrivate *)sPriv->private;
+   intelRegion *colorBuf;
+
+   intelUnmapScreenRegions(intelScreen);
+
+   intelUpdateScreenFromSAREA(intelScreen, sarea);
+
+   /* update the current hw offsets for the color and depth buffers */
+   if (intel->ctx.DrawBuffer->_ColorDrawBufferMask[0] == BUFFER_BIT_BACK_LEFT)
+      colorBuf = &intelScreen->back;
+   else
+      colorBuf = &intelScreen->front;
+   intel->vtbl.update_color_z_regions(intel, colorBuf, &intelScreen->depth);
+
+   if (!intelMapScreenRegions(sPriv)) {
+      fprintf(stderr, "ERROR Remapping screen regions!!!\n");
+   }
+}
+
+void intelGetLock( intelContextPtr intel, GLuint flags )
+{
+   __DRIdrawablePrivate *dPriv = intel->driDrawable;
+   __DRIscreenPrivate *sPriv = intel->driScreen;
+   intelScreenPrivate *intelScreen = (intelScreenPrivate *)sPriv->private;
+   drmI830Sarea * sarea = intel->sarea;
+   unsigned   i;
+
+   drmGetLock(intel->driFd, intel->hHWContext, flags);
+
+   /* If the window moved, may need to set a new cliprect now.
+    *
+    * NOTE: This releases and regains the hw lock, so all state
+    * checking must be done *after* this call:
+    */
+   if (dPriv)
+      DRI_VALIDATE_DRAWABLE_INFO(sPriv, dPriv);
+
+   if (dPriv && intel->lastStamp != dPriv->lastStamp) {
+      intelWindowMoved( intel );
+      intel->lastStamp = dPriv->lastStamp;
+   }
+
+   /* If we lost context, need to dump all registers to hardware.
+    * Note that we don't care about 2d contexts, even if they perform
+    * accelerated commands, so the DRI locking in the X server is even
+    * more broken than usual.
+    */
+
+   if (sarea->width != intelScreen->width ||
+       sarea->height != intelScreen->height ||
+       sarea->rotation != intelScreen->current_rotation) {
+      intelUpdateScreenRotation(intel, sPriv, sarea);
+
+      /* This will drop the outstanding batchbuffer on the floor */
+      intel->batch.ptr -= (intel->batch.size - intel->batch.space);
+      intel->batch.space = intel->batch.size;
+      /* lose all primitives */
+      intel->prim.primitive = ~0;
+      intel->prim.start_ptr = 0;
+      intel->prim.flush = 0;
+      intel->vtbl.lost_hardware( intel ); 
+
+      intel->lastStamp = 0; /* force window update */
+
+      /* Release batch buffer
+       */
+      intelDestroyBatchBuffer(&intel->ctx);
+      intelInitBatchBuffer(&intel->ctx);
+      intel->prim.flush = intel_emit_invarient_state;
+
+      /* Still need to reset the global LRU?
+       */
+      intel_driReinitTextureHeap( intel->texture_heaps[0], intel->intelScreen->tex.size );
+   }
+
+   /* Shared texture managment - if another client has played with
+    * texture space, figure out which if any of our textures have been
+    * ejected, and update our global LRU.
+    */
+   for ( i = 0 ; i < intel->nr_heaps ; i++ ) {
+      DRI_AGE_TEXTURES( intel->texture_heaps[ i ] );
+   }
+}
+
+
+void intelSwapBuffers( __DRIdrawablePrivate *dPriv )
+{
+   if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) {
+      intelContextPtr intel;
+      GLcontext *ctx;
+      intel = (intelContextPtr) dPriv->driContextPriv->driverPrivate;
+      ctx = &intel->ctx;
+      if (ctx->Visual.doubleBufferMode) {
+         intelScreenPrivate *screen = intel->intelScreen;
+	 _mesa_notifySwapBuffers( ctx );  /* flush pending rendering comands */
+	 if ( 0 /*intel->doPageFlip*/ ) { /* doPageFlip is never set !!! */
+	    intelPageFlip( dPriv );
+	 } else {
+	     intelCopyBuffer( dPriv, NULL );
+	 }
+         if (screen->current_rotation != 0) {
+            intelRotateWindow(intel, dPriv, BUFFER_BIT_FRONT_LEFT);
+         }
+      }
+   } else {
+      /* XXX this shouldn't be an error but we can't handle it for now */
+      fprintf(stderr, "%s: drawable has no context!\n", __FUNCTION__);
+   }
+}
+
+void intelCopySubBuffer( __DRIdrawablePrivate *dPriv,
+			 int x, int y, int w, int h )
+{
+   if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) {
+      intelContextPtr intel;
+      GLcontext *ctx;
+      intel = (intelContextPtr) dPriv->driContextPriv->driverPrivate;
+      ctx = &intel->ctx;
+      if (ctx->Visual.doubleBufferMode) {
+	 drm_clip_rect_t rect;
+	 rect.x1 = x + dPriv->x;
+	 rect.y1 = (dPriv->h - y - h) + dPriv->y;
+	 rect.x2 = rect.x1 + w;
+	 rect.y2 = rect.y1 + h;
+	 _mesa_notifySwapBuffers( ctx );  /* flush pending rendering comands */
+	 intelCopyBuffer( dPriv, &rect );
+      }
+   } else {
+      /* XXX this shouldn't be an error but we can't handle it for now */
+      fprintf(stderr, "%s: drawable has no context!\n", __FUNCTION__);
+   }
+}
+
+void intelInitState( GLcontext *ctx )
+{
+   /* Mesa should do this for us:
+    */
+   ctx->Driver.AlphaFunc( ctx, 
+			  ctx->Color.AlphaFunc,
+			  ctx->Color.AlphaRef);
+
+   ctx->Driver.BlendColor( ctx,
+			   ctx->Color.BlendColor );
+
+   ctx->Driver.BlendEquationSeparate( ctx, 
+				      ctx->Color.BlendEquationRGB,
+				      ctx->Color.BlendEquationA);
+
+   ctx->Driver.BlendFuncSeparate( ctx,
+				  ctx->Color.BlendSrcRGB,
+				  ctx->Color.BlendDstRGB,
+				  ctx->Color.BlendSrcA,
+				  ctx->Color.BlendDstA);
+
+   ctx->Driver.ColorMask( ctx, 
+			  ctx->Color.ColorMask[RCOMP],
+			  ctx->Color.ColorMask[GCOMP],
+			  ctx->Color.ColorMask[BCOMP],
+			  ctx->Color.ColorMask[ACOMP]);
+
+   ctx->Driver.CullFace( ctx, ctx->Polygon.CullFaceMode );
+   ctx->Driver.DepthFunc( ctx, ctx->Depth.Func );
+   ctx->Driver.DepthMask( ctx, ctx->Depth.Mask );
+
+   ctx->Driver.Enable( ctx, GL_ALPHA_TEST, ctx->Color.AlphaEnabled );
+   ctx->Driver.Enable( ctx, GL_BLEND, ctx->Color.BlendEnabled );
+   ctx->Driver.Enable( ctx, GL_COLOR_LOGIC_OP, ctx->Color.ColorLogicOpEnabled );
+   ctx->Driver.Enable( ctx, GL_COLOR_SUM, ctx->Fog.ColorSumEnabled );
+   ctx->Driver.Enable( ctx, GL_CULL_FACE, ctx->Polygon.CullFlag );
+   ctx->Driver.Enable( ctx, GL_DEPTH_TEST, ctx->Depth.Test );
+   ctx->Driver.Enable( ctx, GL_DITHER, ctx->Color.DitherFlag );
+   ctx->Driver.Enable( ctx, GL_FOG, ctx->Fog.Enabled );
+   ctx->Driver.Enable( ctx, GL_LIGHTING, ctx->Light.Enabled );
+   ctx->Driver.Enable( ctx, GL_LINE_SMOOTH, ctx->Line.SmoothFlag );
+   ctx->Driver.Enable( ctx, GL_POLYGON_STIPPLE, ctx->Polygon.StippleFlag );
+   ctx->Driver.Enable( ctx, GL_SCISSOR_TEST, ctx->Scissor.Enabled );
+   ctx->Driver.Enable( ctx, GL_STENCIL_TEST, ctx->Stencil.Enabled );
+   ctx->Driver.Enable( ctx, GL_TEXTURE_1D, GL_FALSE );
+   ctx->Driver.Enable( ctx, GL_TEXTURE_2D, GL_FALSE );
+   ctx->Driver.Enable( ctx, GL_TEXTURE_RECTANGLE_NV, GL_FALSE );
+   ctx->Driver.Enable( ctx, GL_TEXTURE_3D, GL_FALSE );
+   ctx->Driver.Enable( ctx, GL_TEXTURE_CUBE_MAP, GL_FALSE );
+
+   ctx->Driver.Fogfv( ctx, GL_FOG_COLOR, ctx->Fog.Color );
+   ctx->Driver.Fogfv( ctx, GL_FOG_MODE, 0 );
+   ctx->Driver.Fogfv( ctx, GL_FOG_DENSITY, &ctx->Fog.Density );
+   ctx->Driver.Fogfv( ctx, GL_FOG_START, &ctx->Fog.Start );
+   ctx->Driver.Fogfv( ctx, GL_FOG_END, &ctx->Fog.End );
+
+   ctx->Driver.FrontFace( ctx, ctx->Polygon.FrontFace );
+
+   {
+      GLfloat f = (GLfloat)ctx->Light.Model.ColorControl;
+      ctx->Driver.LightModelfv( ctx, GL_LIGHT_MODEL_COLOR_CONTROL, &f );
+   }
+
+   ctx->Driver.LineWidth( ctx, ctx->Line.Width );
+   ctx->Driver.LogicOpcode( ctx, ctx->Color.LogicOp );
+   ctx->Driver.PointSize( ctx, ctx->Point.Size );
+   ctx->Driver.PolygonStipple( ctx, (const GLubyte *)ctx->PolygonStipple );
+   ctx->Driver.Scissor( ctx, ctx->Scissor.X, ctx->Scissor.Y,
+			ctx->Scissor.Width, ctx->Scissor.Height );
+   ctx->Driver.ShadeModel( ctx, ctx->Light.ShadeModel );
+   ctx->Driver.StencilFuncSeparate( ctx, GL_FRONT,
+                                    ctx->Stencil.Function[0],
+                                    ctx->Stencil.Ref[0],
+                                    ctx->Stencil.ValueMask[0] );
+   ctx->Driver.StencilFuncSeparate( ctx, GL_BACK,
+                                    ctx->Stencil.Function[1],
+                                    ctx->Stencil.Ref[1],
+                                    ctx->Stencil.ValueMask[1] );
+   ctx->Driver.StencilMaskSeparate( ctx, GL_FRONT, ctx->Stencil.WriteMask[0] );
+   ctx->Driver.StencilMaskSeparate( ctx, GL_BACK, ctx->Stencil.WriteMask[1] );
+   ctx->Driver.StencilOpSeparate( ctx, GL_FRONT,
+                                  ctx->Stencil.FailFunc[0],
+                                  ctx->Stencil.ZFailFunc[0],
+                                  ctx->Stencil.ZPassFunc[0]);
+   ctx->Driver.StencilOpSeparate( ctx, GL_BACK,
+                                  ctx->Stencil.FailFunc[1],
+                                  ctx->Stencil.ZFailFunc[1],
+                                  ctx->Stencil.ZPassFunc[1]);
+
+
+   ctx->Driver.DrawBuffer( ctx, ctx->Color.DrawBuffer[0] );
+}
+
+
diff --git a/i915/intel_context.h b/i915/intel_context.h
new file mode 100644
index 0000000..50e6178
--- /dev/null
+++ b/i915/intel_context.h
@@ -0,0 +1,564 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef INTELCONTEXT_INC
+#define INTELCONTEXT_INC
+
+
+
+#include "mtypes.h"
+#include "drm.h"
+#include "mm.h"
+#include "texmem.h"
+#include "vblank.h"
+
+#include "intel_screen.h"
+#include "i915_drm.h"
+#include "i830_common.h"
+#include "tnl/t_vertex.h"
+
+#define TAG(x) intel##x
+#include "tnl_dd/t_dd_vertex.h"
+#undef TAG
+
+#define DV_PF_555  (1<<8)
+#define DV_PF_565  (2<<8)
+#define DV_PF_8888 (3<<8)
+
+#define INTEL_CONTEXT(ctx)	((intelContextPtr)(ctx))
+
+typedef struct intel_context intelContext;
+typedef struct intel_context *intelContextPtr;
+typedef struct intel_texture_object *intelTextureObjectPtr;
+
+typedef void (*intel_tri_func)(intelContextPtr, intelVertex *, intelVertex *,
+							  intelVertex *);
+typedef void (*intel_line_func)(intelContextPtr, intelVertex *, intelVertex *);
+typedef void (*intel_point_func)(intelContextPtr, intelVertex *);
+
+#define INTEL_FALLBACK_DRAW_BUFFER	 0x1
+#define INTEL_FALLBACK_READ_BUFFER	 0x2
+#define INTEL_FALLBACK_USER		 0x4
+#define INTEL_FALLBACK_NO_BATCHBUFFER	 0x8
+#define INTEL_FALLBACK_NO_TEXMEM	 0x10
+#define INTEL_FALLBACK_RENDERMODE	 0x20
+
+extern void intelFallback( intelContextPtr intel, GLuint bit, GLboolean mode );
+#define FALLBACK( intel, bit, mode ) intelFallback( intel, bit, mode )
+
+
+#define INTEL_TEX_MAXLEVELS 10
+
+
+struct intel_texture_object
+{
+   driTextureObject    base;	/* the parent class */
+
+   GLuint texelBytes;
+   GLuint age;
+   GLuint Pitch;
+   GLuint Height;
+   GLuint TextureOffset;
+   GLubyte *BufAddr;   
+
+   GLuint min_level;
+   GLuint max_level;
+   GLuint depth_pitch;
+
+   struct {
+      const struct gl_texture_image *image;
+      GLuint offset;       /* into BufAddr */
+      GLuint height;
+      GLuint internalFormat;
+   } image[6][INTEL_TEX_MAXLEVELS];
+
+   GLuint dirty;
+   GLuint firstLevel,lastLevel;
+};
+
+
+struct intel_context
+{
+   GLcontext ctx;		/* the parent class */
+
+   struct {
+      void (*destroy)( intelContextPtr intel ); 
+      void (*emit_state)( intelContextPtr intel );
+      void (*lost_hardware)( intelContextPtr intel );
+      void (*update_texture_state)( intelContextPtr intel );
+
+      void (*render_start)( intelContextPtr intel );
+      void (*set_color_region)( intelContextPtr intel, const intelRegion *reg );
+      void (*set_z_region)( intelContextPtr intel, const intelRegion *reg );
+      void (*update_color_z_regions)(intelContextPtr intel,
+                                     const intelRegion *colorRegion,
+                                     const intelRegion *depthRegion);
+      void (*emit_flush)( intelContextPtr intel );
+      void (*reduced_primitive_state)( intelContextPtr intel, GLenum rprim );
+
+      GLboolean (*check_vertex_size)( intelContextPtr intel, GLuint expected );
+
+      void (*clear_with_tris)( intelContextPtr intel, GLbitfield mask,
+			       GLboolean all, 
+			       GLint cx, GLint cy, GLint cw, GLint ch);
+
+      void (*rotate_window)( intelContextPtr intel,
+                             __DRIdrawablePrivate *dPriv, GLuint srcBuf);
+
+      intelTextureObjectPtr (*alloc_tex_obj)( struct gl_texture_object *tObj );
+
+   } vtbl;
+
+   GLint refcount;   
+   GLuint Fallback;
+   GLuint NewGLState;
+   
+   struct {
+      GLuint start_offset;
+      GLint size;
+      GLint space;
+      GLubyte *ptr;
+      GLuint counter;
+      GLuint last_emit_state;
+      GLboolean contains_geometry;
+      const char *func;
+      GLuint last_swap;
+   } batch;
+      
+   struct {
+      void *ptr;
+      GLint size;
+      GLuint offset;
+      GLuint active_buf;
+      GLuint irq_emitted;
+   } alloc;
+
+   struct {
+      GLuint primitive;
+      GLubyte *start_ptr;      
+      void (*flush)( GLcontext * );
+   } prim;
+
+   GLboolean locked;
+
+   GLubyte clear_red;
+   GLubyte clear_green;
+   GLubyte clear_blue;
+   GLubyte clear_alpha;
+   GLuint ClearColor;
+   GLuint ClearDepth;
+
+   GLuint coloroffset;
+   GLuint specoffset;
+
+   /* Support for duplicating XYZW as WPOS parameter (crutch for I915).
+    */
+   GLuint wpos_offset;
+   GLuint wpos_size;
+
+   struct tnl_attr_map vertex_attrs[VERT_ATTRIB_MAX];
+   GLuint vertex_attr_count;
+
+   GLfloat depth_scale;
+   GLfloat polygon_offset_scale; /* dependent on depth_scale, bpp */
+   GLuint depth_clear_mask;
+   GLuint stencil_clear_mask;
+
+   GLboolean hw_stencil;
+   GLboolean hw_stipple;
+   
+   /* Texture object bookkeeping
+    */
+   GLuint                nr_heaps;
+   driTexHeap          * texture_heaps[1];
+   driTextureObject      swapped;
+   GLuint                lastStamp;
+
+   struct intel_texture_object *CurrentTexObj[MAX_TEXTURE_UNITS];
+
+   /* State for intelvb.c and inteltris.c.
+    */
+   GLuint RenderIndex;
+   GLmatrix ViewportMatrix;
+   GLenum render_primitive;
+   GLenum reduced_primitive;
+   GLuint vertex_size;
+   unsigned char *verts;	   /* points to tnl->clipspace.vertex_buf */
+
+
+   /* Fallback rasterization functions 
+    */
+   intel_point_func draw_point;
+   intel_line_func draw_line;
+   intel_tri_func draw_tri;
+
+   /* Drawing buffer state
+    */
+   intelRegion *drawRegion;  /* current drawing buffer */
+   intelRegion *readRegion;  /* current reading buffer */
+
+   int drawX;			/* origin of drawable in draw buffer */
+   int drawY;
+   GLuint numClipRects;		/* cliprects for that buffer */
+   drm_clip_rect_t *pClipRects;
+
+   int dirtyAge;
+   int perf_boxes;
+
+   GLuint do_usleeps;
+   int do_irqs;
+   GLuint irqsEmitted;
+   drm_i915_irq_wait_t iw;
+
+   GLboolean scissor;
+   drm_clip_rect_t draw_rect;
+   drm_clip_rect_t scissor_rect;
+
+   drm_context_t hHWContext;
+   drmLock *driHwLock;
+   int driFd;
+
+   __DRIdrawablePrivate *driDrawable;
+   __DRIscreenPrivate *driScreen;
+   intelScreenPrivate *intelScreen; 
+   drmI830Sarea *sarea; 
+
+   /**
+    * Configuration cache
+    */
+   driOptionCache optionCache;
+
+   /* VBI
+    */
+   GLuint vbl_seq;
+   GLuint vblank_flags;
+
+   int64_t swap_ust;
+   int64_t swap_missed_ust;
+
+   GLuint swap_count;
+   GLuint swap_missed_count;
+
+   GLuint swap_scheduled;
+};
+
+
+#define DEBUG_LOCKING	1
+
+#if DEBUG_LOCKING
+extern char *prevLockFile;
+extern int prevLockLine;
+
+#define DEBUG_LOCK()							\
+   do {									\
+      prevLockFile = (__FILE__);					\
+      prevLockLine = (__LINE__);					\
+   } while (0)
+
+#define DEBUG_RESET()							\
+   do {									\
+      prevLockFile = 0;							\
+      prevLockLine = 0;							\
+   } while (0)
+
+/* Slightly less broken way of detecting recursive locking in a
+ * threaded environment.  The right way to do this would be to make
+ * prevLockFile, prevLockLine thread-local.
+ *
+ * This technique instead checks to see if the same context is
+ * requesting the lock twice -- this will not catch application
+ * breakages where the same context is active in two different threads
+ * at once, but it will catch driver breakages (recursive locking) in
+ * threaded apps.
+ */
+#define DEBUG_CHECK_LOCK()						\
+   do {									\
+      if ( *((volatile int *)intel->driHwLock) == 			\
+	   (DRM_LOCK_HELD | intel->hHWContext) ) {			\
+	 fprintf( stderr,						\
+		  "LOCK SET!\n\tPrevious %s:%d\n\tCurrent: %s:%d\n",	\
+		  prevLockFile, prevLockLine, __FILE__, __LINE__ );	\
+	 abort();							\
+      }									\
+   } while (0)
+
+#else
+
+#define DEBUG_LOCK()
+#define DEBUG_RESET()
+#define DEBUG_CHECK_LOCK()
+
+#endif
+
+
+
+
+/* Lock the hardware and validate our state.  
+ */
+#define LOCK_HARDWARE( intel )				\
+do {							\
+    char __ret=0;					\
+    DEBUG_CHECK_LOCK();					\
+    assert(!(intel)->locked);				\
+    if ((intel)->swap_scheduled) {			\
+        drmVBlank vbl;					\
+        vbl.request.type = DRM_VBLANK_ABSOLUTE;		\
+        if ((intel)->vblank_flags &			\
+            VBLANK_FLAG_SECONDARY) {			\
+            vbl.request.type |= DRM_VBLANK_SECONDARY;	\
+        }						\
+        vbl.request.sequence = (intel)->vbl_seq;	\
+        drmWaitVBlank((intel)->driFd, &vbl);		\
+        (intel)->swap_scheduled = 0;			\
+    }							\
+    DRM_CAS((intel)->driHwLock, (intel)->hHWContext,	\
+        (DRM_LOCK_HELD|(intel)->hHWContext), __ret);	\
+    if (__ret)						\
+        intelGetLock( (intel), 0 );			\
+      DEBUG_LOCK();					\
+    (intel)->locked = 1;				\
+}while (0)
+ 
+  
+  /* Unlock the hardware using the global current context 
+   */
+#define UNLOCK_HARDWARE(intel)						\
+do {									\
+   intel->locked = 0;							\
+   if (0) { 								\
+      intel->perf_boxes |= intel->sarea->perf_boxes;  			\
+      intel->sarea->perf_boxes = 0;					\
+   }									\
+   DRM_UNLOCK((intel)->driFd, (intel)->driHwLock, (intel)->hHWContext);	\
+   DEBUG_RESET();							\
+} while (0)
+
+
+#define SUBPIXEL_X 0.125
+#define SUBPIXEL_Y 0.125
+
+#define INTEL_FIREVERTICES(intel)		\
+do {						\
+   if ((intel)->prim.flush)			\
+      (intel)->prim.flush(&(intel)->ctx);		\
+} while (0)
+
+/* ================================================================
+ * Color packing:
+ */
+
+#define INTEL_PACKCOLOR4444(r,g,b,a) \
+  ((((a) & 0xf0) << 8) | (((r) & 0xf0) << 4) | ((g) & 0xf0) | ((b) >> 4))
+
+#define INTEL_PACKCOLOR1555(r,g,b,a) \
+  ((((r) & 0xf8) << 7) | (((g) & 0xf8) << 2) | (((b) & 0xf8) >> 3) | \
+    ((a) ? 0x8000 : 0))
+
+#define INTEL_PACKCOLOR565(r,g,b) \
+  ((((r) & 0xf8) << 8) | (((g) & 0xfc) << 3) | (((b) & 0xf8) >> 3))
+
+#define INTEL_PACKCOLOR8888(r,g,b,a) \
+  ((a<<24) | (r<<16) | (g<<8) | b)
+
+
+#define INTEL_PACKCOLOR(format, r,  g,  b, a)		\
+(format == DV_PF_555 ? INTEL_PACKCOLOR1555(r,g,b,a) :	\
+ (format == DV_PF_565 ? INTEL_PACKCOLOR565(r,g,b) :	\
+  (format == DV_PF_8888 ? INTEL_PACKCOLOR8888(r,g,b,a) :	\
+   0)))
+
+
+
+/* ================================================================
+ * From linux kernel i386 header files, copes with odd sizes better
+ * than COPY_DWORDS would:
+ */
+#if defined(i386) || defined(__i386__)
+static __inline__ void * __memcpy(void * to, const void * from, size_t n)
+{
+   int d0, d1, d2;
+   __asm__ __volatile__(
+      "rep ; movsl\n\t"
+      "testb $2,%b4\n\t"
+      "je 1f\n\t"
+      "movsw\n"
+      "1:\ttestb $1,%b4\n\t"
+      "je 2f\n\t"
+      "movsb\n"
+      "2:"
+      : "=&c" (d0), "=&D" (d1), "=&S" (d2)
+      :"0" (n/4), "q" (n),"1" ((long) to),"2" ((long) from)
+      : "memory");
+   return (to);
+}
+#else
+#define __memcpy(a,b,c) memcpy(a,b,c)
+#endif
+
+
+
+/* ================================================================
+ * Debugging:
+ */
+#define DO_DEBUG		1
+#if DO_DEBUG
+extern int INTEL_DEBUG;
+#else
+#define INTEL_DEBUG		0
+#endif
+
+#define DEBUG_TEXTURE	0x1
+#define DEBUG_STATE	0x2
+#define DEBUG_IOCTL	0x4
+#define DEBUG_PRIMS	0x8
+#define DEBUG_VERTS	0x10
+#define DEBUG_FALLBACKS	0x20
+#define DEBUG_VERBOSE	0x40
+#define DEBUG_DRI       0x80
+#define DEBUG_DMA       0x100
+#define DEBUG_SANITY    0x200
+#define DEBUG_SYNC      0x400
+#define DEBUG_SLEEP     0x800
+#define DEBUG_PIXEL     0x1000
+
+
+#define PCI_CHIP_845_G			0x2562
+#define PCI_CHIP_I830_M			0x3577
+#define PCI_CHIP_I855_GM		0x3582
+#define PCI_CHIP_I865_G			0x2572
+#define PCI_CHIP_I915_G			0x2582
+#define PCI_CHIP_I915_GM		0x2592
+#define PCI_CHIP_I945_G			0x2772
+#define PCI_CHIP_I945_GM		0x27A2
+#define PCI_CHIP_I945_GME		0x27AE
+#define PCI_CHIP_G33_G			0x29C2
+#define PCI_CHIP_Q35_G			0x29B2
+#define PCI_CHIP_Q33_G			0x29D2
+
+
+/* ================================================================
+ * intel_context.c:
+ */
+
+extern void intelInitDriverFunctions( struct dd_function_table *functions );
+
+extern GLboolean intelInitContext( intelContextPtr intel, 
+				   const __GLcontextModes *mesaVis,
+				   __DRIcontextPrivate *driContextPriv,
+				   void *sharedContextPrivate,
+				   struct dd_function_table *functions );
+
+extern void intelGetLock(intelContextPtr intel, GLuint flags);
+extern void intelSetBackClipRects(intelContextPtr intel);
+extern void intelSetFrontClipRects(intelContextPtr intel);
+extern void intelWindowMoved( intelContextPtr intel );
+
+extern void intelInitState( GLcontext *ctx );
+extern const GLubyte *intelGetString( GLcontext *ctx, GLenum name );
+
+
+/* ================================================================
+ * intel_state.c:
+ */
+extern void intelInitStateFuncs( struct dd_function_table *functions );
+
+#define COMPAREFUNC_ALWAYS		0
+#define COMPAREFUNC_NEVER		0x1
+#define COMPAREFUNC_LESS		0x2
+#define COMPAREFUNC_EQUAL		0x3
+#define COMPAREFUNC_LEQUAL		0x4
+#define COMPAREFUNC_GREATER		0x5
+#define COMPAREFUNC_NOTEQUAL		0x6
+#define COMPAREFUNC_GEQUAL		0x7
+
+#define STENCILOP_KEEP			0
+#define STENCILOP_ZERO			0x1
+#define STENCILOP_REPLACE		0x2
+#define STENCILOP_INCRSAT		0x3
+#define STENCILOP_DECRSAT		0x4
+#define STENCILOP_INCR			0x5
+#define STENCILOP_DECR			0x6
+#define STENCILOP_INVERT		0x7
+
+#define LOGICOP_CLEAR			0
+#define LOGICOP_NOR			0x1
+#define LOGICOP_AND_INV 		0x2
+#define LOGICOP_COPY_INV		0x3
+#define LOGICOP_AND_RVRSE		0x4
+#define LOGICOP_INV			0x5
+#define LOGICOP_XOR			0x6
+#define LOGICOP_NAND			0x7
+#define LOGICOP_AND			0x8
+#define LOGICOP_EQUIV			0x9
+#define LOGICOP_NOOP			0xa
+#define LOGICOP_OR_INV			0xb
+#define LOGICOP_COPY			0xc
+#define LOGICOP_OR_RVRSE		0xd
+#define LOGICOP_OR			0xe
+#define LOGICOP_SET			0xf
+
+#define BLENDFACT_ZERO			0x01
+#define BLENDFACT_ONE			0x02
+#define BLENDFACT_SRC_COLR		0x03
+#define BLENDFACT_INV_SRC_COLR 		0x04
+#define BLENDFACT_SRC_ALPHA		0x05
+#define BLENDFACT_INV_SRC_ALPHA 	0x06
+#define BLENDFACT_DST_ALPHA		0x07
+#define BLENDFACT_INV_DST_ALPHA 	0x08
+#define BLENDFACT_DST_COLR		0x09
+#define BLENDFACT_INV_DST_COLR		0x0a
+#define BLENDFACT_SRC_ALPHA_SATURATE	0x0b
+#define BLENDFACT_CONST_COLOR		0x0c
+#define BLENDFACT_INV_CONST_COLOR	0x0d
+#define BLENDFACT_CONST_ALPHA		0x0e
+#define BLENDFACT_INV_CONST_ALPHA	0x0f
+#define BLENDFACT_MASK          	0x0f
+
+
+extern int intel_translate_compare_func( GLenum func );
+extern int intel_translate_stencil_op( GLenum op );
+extern int intel_translate_blend_factor( GLenum factor );
+extern int intel_translate_logic_op( GLenum opcode );
+
+
+/* ================================================================
+ * intel_ioctl.c:
+ */
+extern void intel_dump_batchbuffer( long offset,
+				    int *ptr,
+				    int count );
+
+
+/* ================================================================
+ * intel_pixel.c:
+ */	
+extern void intelInitPixelFuncs( struct dd_function_table *functions );
+
+
+
+#endif
+
diff --git a/i915/intel_ioctl.c b/i915/intel_ioctl.c
new file mode 100644
index 0000000..ede3b63
--- /dev/null
+++ b/i915/intel_ioctl.c
@@ -0,0 +1,659 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+
+#include <stdio.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sched.h>
+
+#include "mtypes.h"
+#include "context.h"
+#include "swrast/swrast.h"
+
+#include "intel_context.h"
+#include "intel_ioctl.h"
+#include "intel_batchbuffer.h"
+#include "drm.h"
+
+u_int32_t intelGetLastFrame (intelContextPtr intel) 
+{
+   int ret;
+   u_int32_t frame;
+   drm_i915_getparam_t gp;
+   
+   gp.param = I915_PARAM_LAST_DISPATCH;
+   gp.value = (int *)&frame;
+   ret = drmCommandWriteRead( intel->driFd, DRM_I915_GETPARAM,
+			      &gp, sizeof(gp) );
+   return frame;
+}
+
+int intelEmitIrqLocked( intelContextPtr intel )
+{
+   drmI830IrqEmit ie;
+   int ret, seq;
+      
+   assert(((*(int *)intel->driHwLock) & ~DRM_LOCK_CONT) == 
+	  (DRM_LOCK_HELD|intel->hHWContext));
+
+   ie.irq_seq = &seq;
+	 
+   ret = drmCommandWriteRead( intel->driFd, DRM_I830_IRQ_EMIT, 
+			      &ie, sizeof(ie) );
+   if ( ret ) {
+      fprintf( stderr, "%s: drmI830IrqEmit: %d\n", __FUNCTION__, ret );
+      exit(1);
+   }
+   
+   if (0)
+      fprintf(stderr, "%s -->  %d\n", __FUNCTION__, seq );
+
+   return seq;
+}
+
+void intelWaitIrq( intelContextPtr intel, int seq )
+{
+   int ret;
+      
+   if (0)
+      fprintf(stderr, "%s %d\n", __FUNCTION__, seq );
+
+   intel->iw.irq_seq = seq;
+	 
+   do {
+     ret = drmCommandWrite( intel->driFd, DRM_I830_IRQ_WAIT, &intel->iw, sizeof(intel->iw) );
+   } while (ret == -EAGAIN || ret == -EINTR);
+
+   if ( ret ) {
+      fprintf( stderr, "%s: drmI830IrqWait: %d\n", __FUNCTION__, ret );
+      if (0)
+	 intel_dump_batchbuffer( intel->alloc.offset,
+				 intel->alloc.ptr,
+				 intel->alloc.size );
+      exit(1);
+   }
+}
+
+
+
+static void age_intel( intelContextPtr intel, int age )
+{
+   GLuint i;
+
+   for (i = 0 ; i < MAX_TEXTURE_UNITS ; i++)
+      if (intel->CurrentTexObj[i]) 
+	 intel->CurrentTexObj[i]->age = age;
+}
+
+void intel_dump_batchbuffer( long offset,
+			     int *ptr,
+			     int count )
+{
+   int i;
+   fprintf(stderr, "\n\n\nSTART BATCH (%d dwords):\n", count);
+   for (i = 0; i < count/4; i += 4) 
+      fprintf(stderr, "\t0x%x: 0x%08x 0x%08x 0x%08x 0x%08x\n", 
+	      (unsigned int)offset + i*4, ptr[i], ptr[i+1], ptr[i+2], ptr[i+3]);
+   fprintf(stderr, "END BATCH\n\n\n");
+}
+
+void intelRefillBatchLocked( intelContextPtr intel, GLboolean allow_unlock )
+{
+   GLuint last_irq = intel->alloc.irq_emitted;
+   GLuint half = intel->alloc.size / 2;
+   GLuint buf = (intel->alloc.active_buf ^= 1);
+
+   intel->alloc.irq_emitted = intelEmitIrqLocked( intel );
+
+   if (last_irq) {
+      if (allow_unlock) UNLOCK_HARDWARE( intel ); 
+      intelWaitIrq( intel, last_irq );
+      if (allow_unlock) LOCK_HARDWARE( intel ); 
+   }
+
+   if (0)
+      fprintf(stderr, "%s: now using half %d\n", __FUNCTION__, buf);
+
+   intel->batch.start_offset = intel->alloc.offset + buf * half;
+   intel->batch.ptr = (unsigned char *)intel->alloc.ptr + buf * half;
+   intel->batch.size = half - 8;
+   intel->batch.space = half - 8;
+   assert(intel->batch.space >= 0);
+}
+
+#define MI_BATCH_BUFFER_END 	(0xA<<23)
+
+
+void intelFlushBatchLocked( intelContextPtr intel, 
+			    GLboolean ignore_cliprects,
+			    GLboolean refill,
+			    GLboolean allow_unlock)
+{
+   drmI830BatchBuffer batch;
+
+   assert(intel->locked);
+
+   if (0)
+      fprintf(stderr, "%s used %d of %d offset %x..%x refill %d (started in %s)\n",
+	      __FUNCTION__, 
+	      (intel->batch.size - intel->batch.space), 
+	      intel->batch.size,
+	      intel->batch.start_offset,
+	      intel->batch.start_offset + 
+	      (intel->batch.size - intel->batch.space), 
+	      refill,
+	      intel->batch.func);
+
+   /* Throw away non-effective packets.  Won't work once we have
+    * hardware contexts which would preserve statechanges beyond a
+    * single buffer.
+    */
+   if (intel->numClipRects == 0 && !ignore_cliprects) {
+      
+      /* Without this yeild, an application with no cliprects can hog
+       * the hardware.  Without unlocking, the effect is much worse -
+       * effectively a lock-out of other contexts.
+       */
+      if (allow_unlock) {
+	 UNLOCK_HARDWARE( intel );
+	 sched_yield();
+	 LOCK_HARDWARE( intel );
+      }
+
+      /* Note that any state thought to have been emitted actually
+       * hasn't:
+       */
+      intel->batch.ptr -= (intel->batch.size - intel->batch.space);
+      intel->batch.space = intel->batch.size;
+      intel->vtbl.lost_hardware( intel ); 
+   }
+
+   if (intel->batch.space != intel->batch.size) {
+
+      if (intel->sarea->ctxOwner != intel->hHWContext) {
+	 intel->perf_boxes |= I830_BOX_LOST_CONTEXT;
+	 intel->sarea->ctxOwner = intel->hHWContext;
+      }
+
+      batch.start = intel->batch.start_offset;
+      batch.used = intel->batch.size - intel->batch.space;
+      batch.cliprects = intel->pClipRects;
+      batch.num_cliprects = ignore_cliprects ? 0 : intel->numClipRects;
+      batch.DR1 = 0;
+      batch.DR4 = ((((GLuint)intel->drawX) & 0xffff) | 
+		   (((GLuint)intel->drawY) << 16));
+      
+      if (intel->alloc.offset) {
+	 if ((batch.used & 0x4) == 0) {
+	    ((int *)intel->batch.ptr)[0] = 0;
+	    ((int *)intel->batch.ptr)[1] = MI_BATCH_BUFFER_END;
+	    batch.used += 0x8;
+	    intel->batch.ptr += 0x8;
+	 }
+	 else {
+	    ((int *)intel->batch.ptr)[0] = MI_BATCH_BUFFER_END;
+	    batch.used += 0x4;
+	    intel->batch.ptr += 0x4;
+	 }      
+      }
+
+      if (0)
+ 	 intel_dump_batchbuffer( batch.start,
+				 (int *)(intel->batch.ptr - batch.used),
+				 batch.used );
+
+      intel->batch.start_offset += batch.used;
+      intel->batch.size -= batch.used;
+
+      if (intel->batch.size < 8) {
+	 refill = GL_TRUE;
+	 intel->batch.space = intel->batch.size = 0;
+      }
+      else {
+	 intel->batch.size -= 8;
+	 intel->batch.space = intel->batch.size;
+      }
+
+
+      assert(intel->batch.space >= 0);
+      assert(batch.start >= intel->alloc.offset);
+      assert(batch.start < intel->alloc.offset + intel->alloc.size);
+      assert(batch.start + batch.used > intel->alloc.offset);
+      assert(batch.start + batch.used <= 
+	     intel->alloc.offset + intel->alloc.size);
+
+
+      if (intel->alloc.offset) {
+	 if (drmCommandWrite (intel->driFd, DRM_I830_BATCHBUFFER, &batch, 
+			      sizeof(batch))) {
+	    fprintf(stderr, "DRM_I830_BATCHBUFFER: %d\n",  -errno);
+	    UNLOCK_HARDWARE(intel);
+	    exit(1);
+	 }
+      } else {
+	 drmI830CmdBuffer cmd;
+	 cmd.buf = (char *)intel->alloc.ptr + batch.start;
+	 cmd.sz = batch.used;
+	 cmd.DR1 = batch.DR1;
+	 cmd.DR4 = batch.DR4;
+	 cmd.num_cliprects = batch.num_cliprects;
+	 cmd.cliprects = batch.cliprects;
+	 
+	 if (drmCommandWrite (intel->driFd, DRM_I830_CMDBUFFER, &cmd, 
+			      sizeof(cmd))) {
+	    fprintf(stderr, "DRM_I830_CMDBUFFER: %d\n",  -errno);
+	    UNLOCK_HARDWARE(intel);
+	    exit(1);
+	 }
+      }	 
+
+      
+      age_intel(intel, intel->sarea->last_enqueue);
+
+      /* FIXME: use hardware contexts to avoid 'losing' hardware after
+       * each buffer flush.
+       */
+      if (intel->batch.contains_geometry) 
+	 assert(intel->batch.last_emit_state == intel->batch.counter);
+
+      intel->batch.counter++;
+      intel->batch.contains_geometry = 0;
+      intel->batch.func = 0;
+      intel->vtbl.lost_hardware( intel );
+   }
+
+   if (refill)
+      intelRefillBatchLocked( intel, allow_unlock );
+}
+
+void intelFlushBatch( intelContextPtr intel, GLboolean refill )
+{
+   if (intel->locked) {
+      intelFlushBatchLocked( intel, GL_FALSE, refill, GL_FALSE );
+   } 
+   else {
+      LOCK_HARDWARE(intel);
+      intelFlushBatchLocked( intel, GL_FALSE, refill, GL_TRUE );
+      UNLOCK_HARDWARE(intel);
+   }
+}
+
+
+void intelWaitForIdle( intelContextPtr intel )
+{   
+   if (0)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   intel->vtbl.emit_flush( intel );
+   intelFlushBatch( intel, GL_TRUE );
+
+   /* Use an irq to wait for dma idle -- Need to track lost contexts
+    * to shortcircuit consecutive calls to this function:
+    */
+   intelWaitIrq( intel, intel->alloc.irq_emitted );
+   intel->alloc.irq_emitted = 0;
+}
+
+
+/**
+ * Check if we need to rotate/warp the front color buffer to the
+ * rotated screen.  We generally need to do this when we get a glFlush
+ * or glFinish after drawing to the front color buffer.
+ */
+static void
+intelCheckFrontRotate(GLcontext *ctx)
+{
+   intelContextPtr intel = INTEL_CONTEXT( ctx );
+   if (intel->ctx.DrawBuffer->_ColorDrawBufferMask[0] == BUFFER_BIT_FRONT_LEFT) {
+      intelScreenPrivate *screen = intel->intelScreen;
+      if (screen->current_rotation != 0) {
+         __DRIdrawablePrivate *dPriv = intel->driDrawable;
+         intelRotateWindow(intel, dPriv, BUFFER_BIT_FRONT_LEFT);
+      }
+   }
+}
+
+
+/**
+ * NOT directly called via glFlush.
+ */
+void intelFlush( GLcontext *ctx )
+{
+   intelContextPtr intel = INTEL_CONTEXT( ctx );
+
+   if (intel->Fallback)
+      _swrast_flush( ctx );
+
+   INTEL_FIREVERTICES( intel );
+
+   if (intel->batch.size != intel->batch.space)
+      intelFlushBatch( intel, GL_FALSE );
+}
+
+
+/**
+ * Called via glFlush.
+ */
+void intelglFlush( GLcontext *ctx )
+{
+   intelFlush(ctx);
+   intelCheckFrontRotate(ctx);
+}
+
+
+void intelFinish( GLcontext *ctx  ) 
+{
+   intelContextPtr intel = INTEL_CONTEXT( ctx );
+   intelFlush( ctx );
+   intelWaitForIdle( intel );
+   intelCheckFrontRotate(ctx);
+}
+
+
+void intelClear(GLcontext *ctx, GLbitfield mask)
+{
+   intelContextPtr intel = INTEL_CONTEXT( ctx );
+   const GLuint colorMask = *((GLuint *) &ctx->Color.ColorMask);
+   GLbitfield tri_mask = 0;
+   GLbitfield blit_mask = 0;
+   GLbitfield swrast_mask = 0;
+
+   if (0)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   /* Take care of cliprects, which are handled differently for
+    * clears, etc.
+    */
+   intelFlush( &intel->ctx );
+
+   if (mask & BUFFER_BIT_FRONT_LEFT) {
+      if (colorMask == ~0) {
+	 blit_mask |= BUFFER_BIT_FRONT_LEFT;
+      } 
+      else {
+	 tri_mask |= BUFFER_BIT_FRONT_LEFT;
+      }
+   }
+
+   if (mask & BUFFER_BIT_BACK_LEFT) {
+      if (colorMask == ~0) {
+	 blit_mask |= BUFFER_BIT_BACK_LEFT;
+      } 
+      else {
+	 tri_mask |= BUFFER_BIT_BACK_LEFT;
+      }
+   }
+
+   if (mask & BUFFER_BIT_DEPTH) {
+      blit_mask |= BUFFER_BIT_DEPTH;
+   }
+
+   if (mask & BUFFER_BIT_STENCIL) {
+      if (!intel->hw_stencil) {
+	 swrast_mask |= BUFFER_BIT_STENCIL;
+      }
+      else if ((ctx->Stencil.WriteMask[0] & 0xff) != 0xff) {
+	 tri_mask |= BUFFER_BIT_STENCIL;
+      } 
+      else {
+	 blit_mask |= BUFFER_BIT_STENCIL;
+      }
+   }
+
+   swrast_mask |= (mask & BUFFER_BIT_ACCUM);
+
+   if (blit_mask) 
+      intelClearWithBlit( ctx, blit_mask, 0, 0, 0, 0, 0);
+
+   if (tri_mask) 
+      intel->vtbl.clear_with_tris( intel, tri_mask, 0, 0, 0, 0, 0);
+
+   if (swrast_mask)
+      _swrast_Clear( ctx, swrast_mask );
+}
+
+
+void
+intelRotateWindow(intelContextPtr intel, __DRIdrawablePrivate *dPriv,
+                  GLuint srcBuffer)
+{
+   if (intel->vtbl.rotate_window) {
+      intel->vtbl.rotate_window(intel, dPriv, srcBuffer);
+   }
+}
+
+
+void *intelAllocateAGP( intelContextPtr intel, GLsizei size )
+{
+   int region_offset;
+   drmI830MemAlloc alloc;
+   int ret;
+
+   if (0)
+      fprintf(stderr, "%s: %d bytes\n", __FUNCTION__, size);
+
+   alloc.region = I830_MEM_REGION_AGP;
+   alloc.alignment = 0;
+   alloc.size = size;
+   alloc.region_offset = &region_offset;
+
+   LOCK_HARDWARE(intel);
+
+   /* Make sure the global heap is initialized
+    */
+   if (intel->texture_heaps[0])
+      driAgeTextures( intel->texture_heaps[0] );
+
+
+   ret = drmCommandWriteRead( intel->driFd,
+			      DRM_I830_ALLOC,
+			      &alloc, sizeof(alloc));
+   
+   if (ret) {
+      fprintf(stderr, "%s: DRM_I830_ALLOC ret %d\n", __FUNCTION__, ret);
+      UNLOCK_HARDWARE(intel);
+      return NULL;
+   }
+   
+   if (0)
+      fprintf(stderr, "%s: allocated %d bytes\n", __FUNCTION__, size);
+
+   /* Need to propogate this information (agp memory in use) to our
+    * local texture lru.  The kernel has already updated the global
+    * lru.  An alternative would have been to allocate memory the
+    * usual way and then notify the kernel to pin the allocation.
+    */
+   if (intel->texture_heaps[0])
+      driAgeTextures( intel->texture_heaps[0] );
+
+   UNLOCK_HARDWARE(intel);   
+
+   return (void *)((char *)intel->intelScreen->tex.map + region_offset);
+}
+
+void intelFreeAGP( intelContextPtr intel, void *pointer )
+{
+   int region_offset;
+   drmI830MemFree memfree;
+   int ret;
+
+   region_offset = (char *)pointer - (char *)intel->intelScreen->tex.map;
+
+   if (region_offset < 0 || 
+       region_offset > intel->intelScreen->tex.size) {
+      fprintf(stderr, "offset %d outside range 0..%d\n", region_offset,
+	      intel->intelScreen->tex.size);
+      return;
+   }
+
+   memfree.region = I830_MEM_REGION_AGP;
+   memfree.region_offset = region_offset;
+   
+   ret = drmCommandWrite( intel->driFd,
+			  DRM_I830_FREE,
+			  &memfree, sizeof(memfree));
+   
+   if (ret) 
+      fprintf(stderr, "%s: DRM_I830_FREE ret %d\n", __FUNCTION__, ret);
+}
+
+/* This version of AllocateMemoryMESA allocates only agp memory, and
+ * only does so after the point at which the driver has been
+ * initialized.
+ *
+ * Theoretically a valid context isn't required.  However, in this
+ * implementation, it is, as I'm using the hardware lock to protect
+ * the kernel data structures, and the current context to get the
+ * device fd.
+ */
+void *intelAllocateMemoryMESA(__DRInativeDisplay *dpy, int scrn,
+			      GLsizei size, GLfloat readfreq,
+			      GLfloat writefreq, GLfloat priority)
+{
+   GET_CURRENT_CONTEXT(ctx);
+
+   if (INTEL_DEBUG & DEBUG_IOCTL)
+      fprintf(stderr, "%s sz %d %f/%f/%f\n", __FUNCTION__, size, readfreq, 
+	      writefreq, priority);
+
+   if (getenv("INTEL_NO_ALLOC"))
+      return NULL;
+   
+   if (!ctx || INTEL_CONTEXT(ctx) == 0) 
+      return NULL;
+   
+   return intelAllocateAGP( INTEL_CONTEXT(ctx), size );
+}
+
+
+/* Called via glXFreeMemoryMESA() */
+void intelFreeMemoryMESA(__DRInativeDisplay *dpy, int scrn, GLvoid *pointer)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   if (INTEL_DEBUG & DEBUG_IOCTL) 
+      fprintf(stderr, "%s %p\n", __FUNCTION__, pointer);
+
+   if (!ctx || INTEL_CONTEXT(ctx) == 0) {
+      fprintf(stderr, "%s: no context\n", __FUNCTION__);
+      return;
+   }
+
+   intelFreeAGP( INTEL_CONTEXT(ctx), pointer );
+}
+
+/* Called via glXGetMemoryOffsetMESA() 
+ *
+ * Returns offset of pointer from the start of agp aperture.
+ */
+GLuint intelGetMemoryOffsetMESA(__DRInativeDisplay *dpy, int scrn, 
+				const GLvoid *pointer)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   intelContextPtr intel;
+
+   if (!ctx || !(intel = INTEL_CONTEXT(ctx)) ) {
+      fprintf(stderr, "%s: no context\n", __FUNCTION__);
+      return ~0;
+   }
+
+   if (!intelIsAgpMemory( intel, pointer, 0 ))
+      return ~0;
+
+   return intelAgpOffsetFromVirtual( intel, pointer );
+}
+
+
+GLboolean intelIsAgpMemory( intelContextPtr intel, const GLvoid *pointer,
+			   GLint size )
+{
+   int offset = (char *)pointer - (char *)intel->intelScreen->tex.map;
+   int valid = (size >= 0 &&
+		offset >= 0 &&
+		offset + size < intel->intelScreen->tex.size);
+
+   if (INTEL_DEBUG & DEBUG_IOCTL)
+      fprintf(stderr, "intelIsAgpMemory( %p ) : %d\n", pointer, valid );
+   
+   return valid;
+}
+
+
+GLuint intelAgpOffsetFromVirtual( intelContextPtr intel, const GLvoid *pointer )
+{
+   int offset = (char *)pointer - (char *)intel->intelScreen->tex.map;
+
+   if (offset < 0 || offset > intel->intelScreen->tex.size)
+      return ~0;
+   else
+      return intel->intelScreen->tex.offset + offset;
+}
+
+
+
+
+
+/* Flip the front & back buffes
+ */
+void intelPageFlip( const __DRIdrawablePrivate *dPriv )
+{
+#if 0
+   intelContextPtr intel;
+   int tmp, ret;
+
+   if (INTEL_DEBUG & DEBUG_IOCTL)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   assert(dPriv);
+   assert(dPriv->driContextPriv);
+   assert(dPriv->driContextPriv->driverPrivate);
+
+   intel = (intelContextPtr) dPriv->driContextPriv->driverPrivate;
+
+   intelFlush( &intel->ctx );
+   LOCK_HARDWARE( intel );
+
+   if (dPriv->pClipRects) {
+      *(drm_clip_rect_t *)intel->sarea->boxes = dPriv->pClipRects[0];
+      intel->sarea->nbox = 1;
+   }
+
+   ret = drmCommandNone(intel->driFd, DRM_I830_FLIP); 
+   if (ret) {
+      fprintf(stderr, "%s: %d\n", __FUNCTION__, ret);
+      UNLOCK_HARDWARE( intel );
+      exit(1);
+   }
+
+   tmp = intel->sarea->last_enqueue;
+   intelRefillBatchLocked( intel );
+   UNLOCK_HARDWARE( intel );
+
+
+   intelSetDrawBuffer( &intel->ctx, intel->ctx.Color.DriverDrawBuffer );
+#endif
+}
diff --git a/i915/intel_ioctl.h b/i915/intel_ioctl.h
new file mode 100644
index 0000000..6ea47e4
--- /dev/null
+++ b/i915/intel_ioctl.h
@@ -0,0 +1,72 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef INTEL_IOCTL_H
+#define INTEL_IOCTL_H
+
+#include "intel_context.h"
+
+extern void intelWaitAgeLocked( intelContextPtr intel, int age, GLboolean unlock );
+
+extern void intelClear(GLcontext *ctx, GLbitfield mask);
+
+extern void intelPageFlip( const __DRIdrawablePrivate *dpriv );
+
+extern void intelRotateWindow(intelContextPtr intel,
+                              __DRIdrawablePrivate *dPriv, GLuint srcBuffer);
+
+extern void intelWaitForIdle( intelContextPtr intel );
+extern void intelFlushBatch( intelContextPtr intel, GLboolean refill );
+extern void intelFlushBatchLocked( intelContextPtr intel,
+				   GLboolean ignore_cliprects,
+				   GLboolean refill,
+				   GLboolean allow_unlock);
+extern void intelRefillBatchLocked( intelContextPtr intel, GLboolean allow_unlock );
+extern void intelFinish( GLcontext *ctx );
+extern void intelFlush( GLcontext *ctx );
+extern void intelglFlush( GLcontext *ctx );
+
+extern void *intelAllocateAGP( intelContextPtr intel, GLsizei size );
+extern void intelFreeAGP( intelContextPtr intel, void *pointer );
+
+extern void *intelAllocateMemoryMESA( __DRInativeDisplay *dpy, int scrn, 
+				      GLsizei size, GLfloat readfreq,
+				      GLfloat writefreq, GLfloat priority );
+
+extern void intelFreeMemoryMESA( __DRInativeDisplay *dpy, int scrn, 
+				 GLvoid *pointer );
+
+extern GLuint intelGetMemoryOffsetMESA( __DRInativeDisplay *dpy, int scrn, const GLvoid *pointer );
+extern GLboolean intelIsAgpMemory( intelContextPtr intel, const GLvoid *pointer,
+				  GLint size );
+
+extern GLuint intelAgpOffsetFromVirtual( intelContextPtr intel, const GLvoid *p );
+
+extern void intelWaitIrq( intelContextPtr intel, int seq );
+extern u_int32_t intelGetLastFrame (intelContextPtr intel);
+extern int intelEmitIrqLocked( intelContextPtr intel );
+#endif
diff --git a/i915/intel_pixel.c b/i915/intel_pixel.c
new file mode 100644
index 0000000..535cbfc
--- /dev/null
+++ b/i915/intel_pixel.c
@@ -0,0 +1,502 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "glheader.h"
+#include "enums.h"
+#include "mtypes.h"
+#include "macros.h"
+#include "swrast/swrast.h"
+
+#include "intel_screen.h"
+#include "intel_context.h"
+#include "intel_ioctl.h"
+#include "intel_batchbuffer.h"
+
+
+
+static GLboolean
+check_color( const GLcontext *ctx, GLenum type, GLenum format,
+	     const struct gl_pixelstore_attrib *packing,
+	     const void *pixels, GLint sz, GLint pitch )
+{
+   intelContextPtr intel = INTEL_CONTEXT(ctx);
+   GLuint cpp = intel->intelScreen->cpp;
+
+   if (INTEL_DEBUG & DEBUG_PIXEL)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   if (	(pitch & 63) ||
+	ctx->_ImageTransferState ||
+	packing->SwapBytes ||
+	packing->LsbFirst) {
+      if (INTEL_DEBUG & DEBUG_PIXEL)
+	 fprintf(stderr, "%s: failed 1\n", __FUNCTION__);
+      return GL_FALSE;
+   }
+
+   if ( type == GL_UNSIGNED_INT_8_8_8_8_REV && 
+	cpp == 4 && 
+	format == GL_BGRA ) {
+      if (INTEL_DEBUG & DEBUG_PIXEL)
+	 fprintf(stderr, "%s: passed 2\n", __FUNCTION__);
+      return GL_TRUE;
+   }
+
+   if (INTEL_DEBUG & DEBUG_PIXEL)
+      fprintf(stderr, "%s: failed\n", __FUNCTION__);
+
+   return GL_FALSE;
+}
+
+static GLboolean
+check_color_per_fragment_ops( const GLcontext *ctx )
+{
+   int result;
+   result = (!(     ctx->Color.AlphaEnabled || 
+		    ctx->Depth.Test ||
+		    ctx->Fog.Enabled ||
+		    ctx->Scissor.Enabled ||
+		    ctx->Stencil.Enabled ||
+		    !ctx->Color.ColorMask[0] ||
+		    !ctx->Color.ColorMask[1] ||
+		    !ctx->Color.ColorMask[2] ||
+		    !ctx->Color.ColorMask[3] ||
+		    ctx->Color.ColorLogicOpEnabled ||
+		    ctx->Texture._EnabledUnits
+           ) &&
+	   ctx->Current.RasterPosValid);
+   
+   return result;
+}
+
+
+/**
+ * Clip the given rectangle against the buffer's bounds (including scissor).
+ * \param size returns the 
+ * \return GL_TRUE if any pixels remain, GL_FALSE if totally clipped.
+ *
+ * XXX Replace this with _mesa_clip_drawpixels() and _mesa_clip_readpixels()
+ * from Mesa 6.4.  We shouldn't apply scissor for ReadPixels.
+ */
+static GLboolean
+clip_pixelrect( const GLcontext *ctx,
+		const GLframebuffer *buffer,
+		GLint *x, GLint *y,
+		GLsizei *width, GLsizei *height)
+{
+   /* left clipping */
+   if (*x < buffer->_Xmin) {
+      *width -= (buffer->_Xmin - *x);
+      *x = buffer->_Xmin;
+   }
+
+   /* right clipping */
+   if (*x + *width > buffer->_Xmax)
+      *width -= (*x + *width - buffer->_Xmax - 1);
+
+   if (*width <= 0)
+      return GL_FALSE;
+
+   /* bottom clipping */
+   if (*y < buffer->_Ymin) {
+      *height -= (buffer->_Ymin - *y);
+      *y = buffer->_Ymin;
+   }
+
+   /* top clipping */
+   if (*y + *height > buffer->_Ymax)
+      *height -= (*y + *height - buffer->_Ymax - 1);
+
+   if (*height <= 0)
+      return GL_FALSE;
+
+   return GL_TRUE;
+}
+
+
+/**
+ * Compute intersection of a clipping rectangle and pixel rectangle,
+ * returning results in x/y/w/hOut vars.
+ * \return GL_TRUE if there's intersection, GL_FALSE if disjoint.
+ */
+static INLINE GLboolean
+intersect_region(const drm_clip_rect_t *box,
+		 GLint x, GLint y, GLsizei width, GLsizei height,
+		 GLint *xOut, GLint *yOut, GLint *wOut, GLint *hOut)
+{
+   GLint bx = box->x1;
+   GLint by = box->y1;
+   GLint bw = box->x2 - bx;
+   GLint bh = box->y2 - by;
+
+   if (bx < x) bw -= x - bx, bx = x;
+   if (by < y) bh -= y - by, by = y;
+   if (bx + bw > x + width) bw = x + width - bx;
+   if (by + bh > y + height) bh = y + height - by;
+
+   *xOut = bx;
+   *yOut = by;
+   *wOut = bw;
+   *hOut = bh;
+
+   if (bw <= 0) return GL_FALSE;
+   if (bh <= 0) return GL_FALSE;
+
+   return GL_TRUE;
+}
+
+
+
+static GLboolean
+intelTryReadPixels( GLcontext *ctx,
+		  GLint x, GLint y, GLsizei width, GLsizei height,
+		  GLenum format, GLenum type,
+		  const struct gl_pixelstore_attrib *pack,
+		  GLvoid *pixels )
+{
+   intelContextPtr intel = INTEL_CONTEXT(ctx);
+   GLint size = 0; /* not really used */
+   GLint pitch = pack->RowLength ? pack->RowLength : width;
+
+   if (INTEL_DEBUG & DEBUG_PIXEL)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   /* Only accelerate reading to agp buffers.
+    */
+   if ( !intelIsAgpMemory(intel, pixels, 
+			pitch * height * intel->intelScreen->cpp ) ) {
+      if (INTEL_DEBUG & DEBUG_PIXEL)
+	 fprintf(stderr, "%s: dest not agp\n", __FUNCTION__);
+      return GL_FALSE;
+   }
+
+   /* Need GL_PACK_INVERT_MESA to cope with upsidedown results from
+    * blitter:
+    */
+   if (!pack->Invert) {
+      if (INTEL_DEBUG & DEBUG_PIXEL)
+	 fprintf(stderr, "%s: MESA_PACK_INVERT not set\n", __FUNCTION__);
+      return GL_FALSE;
+   }
+
+   if (!check_color(ctx, type, format, pack, pixels, size, pitch))
+      return GL_FALSE;
+
+   switch ( intel->intelScreen->cpp ) {
+   case 4:
+      break;
+   default:
+      return GL_FALSE;
+   }
+
+
+   /* Although the blits go on the command buffer, need to do this and
+    * fire with lock held to guarentee cliprects and drawing offset are
+    * correct.
+    *
+    * This is an unusual situation however, as the code which flushes
+    * a full command buffer expects to be called unlocked.  As a
+    * workaround, immediately flush the buffer on aquiring the lock.
+    */
+   intelFlush( &intel->ctx );
+   LOCK_HARDWARE( intel );
+   {
+      __DRIdrawablePrivate *dPriv = intel->driDrawable;
+      int nbox = dPriv->numClipRects;
+      int src_offset = intel->readRegion->offset;
+      int src_pitch = intel->intelScreen->front.pitch;
+      int dst_offset = intelAgpOffsetFromVirtual( intel, pixels);
+      drm_clip_rect_t *box = dPriv->pClipRects;
+      int i;
+
+      assert(dst_offset != ~0);  /* should have been caught above */
+
+      if (!clip_pixelrect(ctx, ctx->ReadBuffer, &x, &y, &width, &height)) {
+	 UNLOCK_HARDWARE( intel );
+	 if (INTEL_DEBUG & DEBUG_PIXEL)
+	    fprintf(stderr, "%s totally clipped -- nothing to do\n",
+		    __FUNCTION__);
+	 return GL_TRUE;
+      }
+
+      /* convert to screen coords (y=0=top) */
+      y = dPriv->h - y - height;
+      x += dPriv->x;
+      y += dPriv->y;
+
+      if (INTEL_DEBUG & DEBUG_PIXEL)
+	 fprintf(stderr, "readpixel blit src_pitch %d dst_pitch %d\n",
+		 src_pitch, pitch);
+
+      /* We don't really have to do window clipping for readpixels.
+       * The OpenGL spec says that pixels read from outside the
+       * visible window region (pixel ownership) have undefined value.
+       */
+      for (i = 0 ; i < nbox ; i++)
+      {
+         GLint bx, by, bw, bh;
+         if (intersect_region(box+i, x, y, width, height,
+                              &bx, &by, &bw, &bh)) {
+            intelEmitCopyBlitLocked( intel,
+                                     intel->intelScreen->cpp,
+                                     src_pitch, src_offset,
+                                     pitch, dst_offset,
+                                     bx, by,
+                                     bx - x, by - y,
+                                     bw, bh );
+         }
+      }
+   }
+   UNLOCK_HARDWARE( intel );
+   intelFinish( &intel->ctx );
+
+   return GL_TRUE;
+}
+
+static void
+intelReadPixels( GLcontext *ctx,
+		 GLint x, GLint y, GLsizei width, GLsizei height,
+		 GLenum format, GLenum type,
+		 const struct gl_pixelstore_attrib *pack,
+		 GLvoid *pixels )
+{
+   if (INTEL_DEBUG & DEBUG_PIXEL)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   if (!intelTryReadPixels( ctx, x, y, width, height, format, type, pack, 
+                            pixels))
+      _swrast_ReadPixels( ctx, x, y, width, height, format, type, pack, 
+			  pixels);
+}
+
+
+
+
+static void do_draw_pix( GLcontext *ctx,
+			 GLint x, GLint y, GLsizei width, GLsizei height,
+			 GLint pitch,
+			 const void *pixels,
+			 GLuint dest )
+{
+   intelContextPtr intel = INTEL_CONTEXT(ctx);
+   __DRIdrawablePrivate *dPriv = intel->driDrawable;
+   drm_clip_rect_t *box = dPriv->pClipRects;
+   int nbox = dPriv->numClipRects;
+   int i;
+   int src_offset = intelAgpOffsetFromVirtual( intel, pixels);
+   int src_pitch = pitch;
+
+   assert(src_offset != ~0);  /* should be caught earlier */
+
+   if (INTEL_DEBUG & DEBUG_PIXEL)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   intelFlush( &intel->ctx );
+   LOCK_HARDWARE( intel );
+   if (ctx->DrawBuffer)
+   {
+      y -= height;			/* cope with pixel zoom */
+   
+      if (!clip_pixelrect(ctx, ctx->DrawBuffer,
+			  &x, &y, &width, &height)) {
+	 UNLOCK_HARDWARE( intel );
+	 return;
+      }
+
+      y = dPriv->h - y - height; 	/* convert from gl to hardware coords */
+      x += dPriv->x;
+      y += dPriv->y;
+
+      for (i = 0 ; i < nbox ; i++ )
+      {
+	 GLint bx, by, bw, bh;
+	 if (intersect_region(box + i, x, y, width, height,
+			      &bx, &by, &bw, &bh)) {
+            intelEmitCopyBlitLocked( intel,
+                                     intel->intelScreen->cpp,
+                                     src_pitch, src_offset,
+                                     intel->intelScreen->front.pitch,
+                                     intel->drawRegion->offset,
+                                     bx - x, by - y,
+                                     bx, by,
+                                     bw, bh );
+         }
+      }
+   }
+   UNLOCK_HARDWARE( intel );
+   intelFinish( &intel->ctx );
+}
+
+
+
+static GLboolean
+intelTryDrawPixels( GLcontext *ctx,
+		  GLint x, GLint y, GLsizei width, GLsizei height,
+		  GLenum format, GLenum type,
+		  const struct gl_pixelstore_attrib *unpack,
+		  const GLvoid *pixels )
+{
+   intelContextPtr intel = INTEL_CONTEXT(ctx);
+   GLint pitch = unpack->RowLength ? unpack->RowLength : width;
+   GLuint dest;
+   GLuint cpp = intel->intelScreen->cpp;
+   GLint size = width * pitch * cpp;
+
+   if (INTEL_DEBUG & DEBUG_PIXEL)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   switch (format) {
+   case GL_RGB:
+   case GL_RGBA:
+   case GL_BGRA:
+      dest = intel->drawRegion->offset;
+
+      /* Planemask doesn't have full support in blits.
+       */
+      if (!ctx->Color.ColorMask[RCOMP] ||
+	  !ctx->Color.ColorMask[GCOMP] ||
+	  !ctx->Color.ColorMask[BCOMP] ||
+	  !ctx->Color.ColorMask[ACOMP]) {
+	 if (INTEL_DEBUG & DEBUG_PIXEL)
+	    fprintf(stderr, "%s: planemask\n", __FUNCTION__);
+	 return GL_FALSE;	
+      }
+
+      /* Can't do conversions on agp reads/draws. 
+       */
+      if ( !intelIsAgpMemory( intel, pixels, size ) ) {
+	 if (INTEL_DEBUG & DEBUG_PIXEL)
+	    fprintf(stderr, "%s: not agp memory\n", __FUNCTION__);
+	 return GL_FALSE;
+      }
+
+      if (!check_color(ctx, type, format, unpack, pixels, size, pitch)) {
+	 return GL_FALSE;
+      }
+      if (!check_color_per_fragment_ops(ctx)) {
+	 return GL_FALSE;
+      }
+
+      if (ctx->Pixel.ZoomX != 1.0F ||
+	  ctx->Pixel.ZoomY != -1.0F)
+	 return GL_FALSE;
+      break;
+
+   default:
+      return GL_FALSE;
+   }
+
+   if ( intelIsAgpMemory(intel, pixels, size) )
+   {
+      do_draw_pix( ctx, x, y, width, height, pitch, pixels, dest );
+      return GL_TRUE;
+   }
+   else if (0)
+   {
+      /* Pixels is in regular memory -- get dma buffers and perform
+       * upload through them.  No point doing this for regular uploads
+       * but once we remove some of the restrictions above (colormask,
+       * pixelformat conversion, zoom?, etc), this could be a win.
+       */
+   }
+   else
+      return GL_FALSE;
+
+   return GL_FALSE;
+}
+
+static void
+intelDrawPixels( GLcontext *ctx,
+		 GLint x, GLint y, GLsizei width, GLsizei height,
+		 GLenum format, GLenum type,
+		 const struct gl_pixelstore_attrib *unpack,
+		 const GLvoid *pixels )
+{
+   if (INTEL_DEBUG & DEBUG_PIXEL)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   if (!intelTryDrawPixels( ctx, x, y, width, height, format, type,
+                            unpack, pixels ))
+      _swrast_DrawPixels( ctx, x, y, width, height, format, type,
+			  unpack, pixels );
+}
+
+
+
+
+/**
+ * Implement glCopyPixels for the front color buffer (or back buffer Pixmap)
+ * for the color buffer.  Don't support zooming, pixel transfer, etc.
+ * We do support copying from one window to another, ala glXMakeCurrentRead.
+ */
+static void
+intelCopyPixels( GLcontext *ctx,
+		 GLint srcx, GLint srcy, GLsizei width, GLsizei height,
+		 GLint destx, GLint desty, GLenum type )
+{
+#if 0
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   const SWcontext *swrast = SWRAST_CONTEXT( ctx );
+   XMesaDisplay *dpy = xmesa->xm_visual->display;
+   const XMesaDrawable drawBuffer = xmesa->xm_draw_buffer->buffer;
+   const XMesaDrawable readBuffer = xmesa->xm_read_buffer->buffer;
+   const XMesaGC gc = xmesa->xm_draw_buffer->gc;
+
+   ASSERT(dpy);
+   ASSERT(gc);
+
+   if (drawBuffer &&  /* buffer != 0 means it's a Window or Pixmap */
+       readBuffer &&
+       type == GL_COLOR &&
+       (swrast->_RasterMask & ~CLIP_BIT) == 0 && /* no blend, z-test, etc */
+       ctx->_ImageTransferState == 0 &&  /* no color tables, scale/bias, etc */
+       ctx->Pixel.ZoomX == 1.0 &&        /* no zooming */
+       ctx->Pixel.ZoomY == 1.0) {
+      /* Note: we don't do any special clipping work here.  We could,
+       * but X will do it for us.
+       */
+      srcy = FLIP(xmesa->xm_read_buffer, srcy) - height + 1;
+      desty = FLIP(xmesa->xm_draw_buffer, desty) - height + 1;
+      XCopyArea(dpy, readBuffer, drawBuffer, gc,
+                srcx, srcy, width, height, destx, desty);
+   }
+#else
+   _swrast_CopyPixels(ctx, srcx, srcy, width, height, destx, desty, type );
+#endif
+}
+
+
+
+
+void intelInitPixelFuncs( struct dd_function_table *functions )
+{
+   functions->CopyPixels = intelCopyPixels;
+   if (!getenv("INTEL_NO_BLITS")) {
+      functions->ReadPixels = intelReadPixels;  
+      functions->DrawPixels = intelDrawPixels; 
+   }
+}
diff --git a/i915/intel_reg.h b/i915/intel_reg.h
new file mode 100644
index 0000000..1ec1532
--- /dev/null
+++ b/i915/intel_reg.h
@@ -0,0 +1,84 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+
+#ifndef _INTEL_REG_H_
+#define _INTEL_REG_H_
+
+
+
+#define CMD_3D (0x3<<29)
+
+
+#define _3DPRIMITIVE         ((0x3<<29)|(0x1f<<24))
+#define PRIM_INDIRECT            (1<<23)
+#define PRIM_INLINE              (0<<23)
+#define PRIM_INDIRECT_SEQUENTIAL (0<<17)
+#define PRIM_INDIRECT_ELTS       (1<<17)
+
+#define PRIM3D_TRILIST		(0x0<<18)
+#define PRIM3D_TRISTRIP 	(0x1<<18)
+#define PRIM3D_TRISTRIP_RVRSE	(0x2<<18)
+#define PRIM3D_TRIFAN		(0x3<<18)
+#define PRIM3D_POLY		(0x4<<18)
+#define PRIM3D_LINELIST 	(0x5<<18)
+#define PRIM3D_LINESTRIP	(0x6<<18)
+#define PRIM3D_RECTLIST 	(0x7<<18)
+#define PRIM3D_POINTLIST	(0x8<<18)
+#define PRIM3D_DIB		(0x9<<18)
+#define PRIM3D_MASK		(0x1f<<18)
+
+#define I915PACKCOLOR4444(r,g,b,a) \
+  ((((a) & 0xf0) << 8) | (((r) & 0xf0) << 4) | ((g) & 0xf0) | ((b) >> 4))
+
+#define I915PACKCOLOR1555(r,g,b,a) \
+  ((((r) & 0xf8) << 7) | (((g) & 0xf8) << 2) | (((b) & 0xf8) >> 3) | \
+    ((a) ? 0x8000 : 0))
+
+#define I915PACKCOLOR565(r,g,b) \
+  ((((r) & 0xf8) << 8) | (((g) & 0xfc) << 3) | (((b) & 0xf8) >> 3))
+
+#define I915PACKCOLOR8888(r,g,b,a) \
+  ((a<<24) | (r<<16) | (g<<8) | b)
+
+
+
+
+#define BR00_BITBLT_CLIENT   0x40000000
+#define BR00_OP_COLOR_BLT    0x10000000
+#define BR00_OP_SRC_COPY_BLT 0x10C00000
+#define BR13_SOLID_PATTERN   0x80000000
+
+#define XY_COLOR_BLT_CMD		((2<<29)|(0x50<<22)|0x4)
+#define XY_COLOR_BLT_WRITE_ALPHA	(1<<21)
+#define XY_COLOR_BLT_WRITE_RGB		(1<<20)
+
+#define XY_SRC_COPY_BLT_CMD             ((2<<29)|(0x53<<22)|6)
+#define XY_SRC_COPY_BLT_WRITE_ALPHA     (1<<21)
+#define XY_SRC_COPY_BLT_WRITE_RGB       (1<<20)
+
+#endif
diff --git a/i915/intel_render.c b/i915/intel_render.c
new file mode 100644
index 0000000..773779a
--- /dev/null
+++ b/i915/intel_render.c
@@ -0,0 +1,240 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+/*
+ * Render unclipped vertex buffers by emitting vertices directly to
+ * dma buffers.  Use strip/fan hardware acceleration where possible.
+ *
+ */
+#include "glheader.h"
+#include "context.h"
+#include "macros.h"
+#include "imports.h"
+#include "mtypes.h"
+#include "enums.h"
+
+#include "tnl/t_context.h"
+#include "tnl/t_vertex.h"
+
+#include "intel_screen.h"
+#include "intel_context.h"
+#include "intel_tris.h"
+#include "intel_batchbuffer.h"
+#include "intel_reg.h"
+
+/*
+ * Render unclipped vertex buffers by emitting vertices directly to
+ * dma buffers.  Use strip/fan hardware primitives where possible.
+ * Try to simulate missing primitives with indexed vertices.
+ */
+#define HAVE_POINTS      0  /* Has it, but can't use because subpixel has to
+			     * be adjusted for points on the INTEL/I845G
+			     */
+#define HAVE_LINES       1
+#define HAVE_LINE_STRIPS 1
+#define HAVE_TRIANGLES   1
+#define HAVE_TRI_STRIPS  1
+#define HAVE_TRI_STRIP_1 0  /* has it, template can't use it yet */
+#define HAVE_TRI_FANS    1
+#define HAVE_POLYGONS    1
+#define HAVE_QUADS       0
+#define HAVE_QUAD_STRIPS 0
+
+#define HAVE_ELTS        0
+
+static GLuint hw_prim[GL_POLYGON+1] = {
+   0,
+   PRIM3D_LINELIST,
+   PRIM3D_LINESTRIP,
+   PRIM3D_LINESTRIP,
+   PRIM3D_TRILIST,
+   PRIM3D_TRISTRIP,
+   PRIM3D_TRIFAN,
+   0,
+   0,
+   PRIM3D_POLY
+};
+
+static const GLenum reduced_prim[GL_POLYGON+1] = {  
+   GL_POINTS,
+   GL_LINES,
+   GL_LINES,
+   GL_LINES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_TRIANGLES
+};
+
+static const int scale_prim[GL_POLYGON+1] = {  
+   0,				/* fallback case */
+   1,
+   2,
+   2,
+   1,
+   3,
+   3,
+   0,				/* fallback case */
+   0,				/* fallback case */
+   3
+};
+
+
+static void intelDmaPrimitive( intelContextPtr intel, GLenum prim )
+{
+   if (0) fprintf(stderr, "%s %s\n", __FUNCTION__, _mesa_lookup_enum_by_nr(prim));
+   INTEL_FIREVERTICES(intel);
+   intel->vtbl.reduced_primitive_state( intel, reduced_prim[prim] );
+   intelStartInlinePrimitive( intel, hw_prim[prim] );
+}
+
+
+#define LOCAL_VARS intelContextPtr intel = INTEL_CONTEXT(ctx)
+#define INIT( prim ) 				\
+do {						\
+   intelDmaPrimitive( intel, prim );		\
+} while (0)
+#define FLUSH()  INTEL_FIREVERTICES( intel )
+
+#define GET_SUBSEQUENT_VB_MAX_VERTS() \
+  (((intel->alloc.size / 2) - 1500) / (intel->vertex_size*4))
+#define GET_CURRENT_VB_MAX_VERTS() GET_SUBSEQUENT_VB_MAX_VERTS()
+
+#define ALLOC_VERTS( nr ) \
+   intelExtendInlinePrimitive( intel, (nr) * intel->vertex_size )
+  
+#define EMIT_VERTS( ctx, j, nr, buf ) \
+  _tnl_emit_vertices_to_buffer(ctx, j, (j)+(nr), buf )  
+
+#define TAG(x) intel_##x
+#include "tnl_dd/t_dd_dmatmp.h"
+  
+  
+/**********************************************************************/
+/*                          Render pipeline stage                     */
+/**********************************************************************/
+
+/* Heuristic to choose between the two render paths:  
+ */
+static GLboolean choose_render( intelContextPtr intel,
+				struct vertex_buffer *VB )
+{
+   int vertsz = intel->vertex_size;
+   int cost_render = 0;
+   int cost_fallback = 0;
+   int nr_prims = 0;
+   int nr_rprims = 0;
+   int nr_rverts = 0;
+   int rprim = intel->reduced_primitive;
+   int i = 0;
+   
+   for (i = 0 ; i < VB->PrimitiveCount ; i++) {
+      GLuint prim = VB->Primitive[i].mode;
+      GLuint length = VB->Primitive[i].count;
+
+      if (!length)
+	 continue;
+
+      nr_prims++;
+      nr_rverts += length * scale_prim[prim & PRIM_MODE_MASK];
+
+      if (reduced_prim[prim & PRIM_MODE_MASK] != rprim) {
+	 nr_rprims++;
+	 rprim = reduced_prim[prim & PRIM_MODE_MASK];
+      }
+   }
+
+   /* One point for each generated primitive:
+    */
+   cost_render = nr_prims;
+   cost_fallback = nr_rprims;
+
+   /* One point for every 1024 dwords (4k) of dma:
+    */
+   cost_render += (vertsz * i) / 1024; 
+   cost_fallback += (vertsz * nr_rverts) / 1024; 
+
+   if (0)
+      fprintf(stderr, "cost render: %d fallback: %d\n",
+	      cost_render, cost_fallback);
+
+   if (cost_render > cost_fallback) 
+      return GL_FALSE;
+
+   return GL_TRUE;
+}
+
+
+static GLboolean intel_run_render( GLcontext *ctx, 
+				 struct tnl_pipeline_stage *stage )
+{
+   intelContextPtr intel = INTEL_CONTEXT(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   struct vertex_buffer *VB = &tnl->vb;
+   GLuint i;
+
+   /* Don't handle clipping or indexed vertices.
+    */
+   if (intel->RenderIndex != 0 || 
+       !intel_validate_render( ctx, VB ) || 
+       !choose_render( intel, VB )) {
+      return GL_TRUE;
+   }
+
+   tnl->clipspace.new_inputs |= VERT_BIT_POS;
+
+   tnl->Driver.Render.Start( ctx );
+   
+   for (i = 0 ; i < VB->PrimitiveCount ; i++)
+   {
+      GLuint prim = _tnl_translate_prim(&VB->Primitive[i]);
+      GLuint start = VB->Primitive[i].start;
+      GLuint length = VB->Primitive[i].count;
+
+      if (!length)
+	 continue;
+
+      intel_render_tab_verts[prim & PRIM_MODE_MASK]( ctx, start, start + length,
+						     prim );
+   }
+      
+   tnl->Driver.Render.Finish( ctx );
+
+   return GL_FALSE;     /* finished the pipe */
+}
+
+const struct tnl_pipeline_stage _intel_render_stage =
+{
+   "intel render",
+   NULL,
+   NULL,
+   NULL,
+   NULL,
+   intel_run_render	/* run */
+};
diff --git a/i915/intel_rotate.c b/i915/intel_rotate.c
new file mode 100644
index 0000000..a77640e
--- /dev/null
+++ b/i915/intel_rotate.c
@@ -0,0 +1,221 @@
+
+/**
+ * Routines for simple 2D->2D transformations for rotated, flipped screens.
+ *
+ * XXX This code is not intel-specific.  Move it into a common/utility
+ * someday.
+ */
+
+#include "intel_rotate.h"
+
+#define MIN2(A, B)   ( ((A) < (B)) ? (A) : (B) )
+
+#define ABS(A)  ( ((A) < 0) ? -(A) : (A) )
+
+
+void
+matrix23Set(struct matrix23 *m,
+            int m00, int m01, int m02,
+            int m10, int m11, int m12)
+{
+   m->m00 = m00;   m->m01 = m01;   m->m02 = m02;
+   m->m10 = m10;   m->m11 = m11;   m->m12 = m12;
+}
+
+
+/*
+ * Transform (x,y) coordinate by the given matrix.
+ */
+void
+matrix23TransformCoordf(const struct matrix23 *m, float *x, float *y)
+{
+   const float x0 = *x;
+   const float y0 = *y;
+
+   *x = m->m00 * x0 + m->m01 * y0 + m->m02;
+   *y = m->m10 * x0 + m->m11 * y0 + m->m12;
+}
+
+
+void
+matrix23TransformCoordi(const struct matrix23 *m, int *x, int *y)
+{
+   const int x0 = *x;
+   const int y0 = *y;
+
+   *x = m->m00 * x0 + m->m01 * y0 + m->m02;
+   *y = m->m10 * x0 + m->m11 * y0 + m->m12;
+}
+
+
+/*
+ * Transform a width and height by the given matrix.
+ * XXX this could be optimized quite a bit.
+ */
+void
+matrix23TransformDistance(const struct matrix23 *m, int *xDist, int *yDist)
+{
+   int x0 = 0, y0 = 0;
+   int x1 = *xDist, y1 = 0;
+   int x2 = 0, y2 = *yDist;
+   matrix23TransformCoordi(m, &x0, &y0);
+   matrix23TransformCoordi(m, &x1, &y1);
+   matrix23TransformCoordi(m, &x2, &y2);
+
+   *xDist = (x1 - x0) + (x2 - x0);
+   *yDist = (y1 - y0) + (y2 - y0);
+
+   if (*xDist < 0)
+       *xDist = -*xDist;
+   if (*yDist < 0)
+       *yDist = -*yDist;
+}
+
+
+/**
+ * Transform the rect defined by (x, y, w, h) by m.
+ */
+void
+matrix23TransformRect(const struct matrix23 *m, int *x, int *y, int *w, int *h)
+{
+   int x0 = *x, y0 = *y;
+   int x1 = *x + *w, y1 = *y;
+   int x2 = *x + *w, y2 = *y + *h;
+   int x3 = *x, y3 = *y + *h;
+   matrix23TransformCoordi(m, &x0, &y0);
+   matrix23TransformCoordi(m, &x1, &y1);
+   matrix23TransformCoordi(m, &x2, &y2);
+   matrix23TransformCoordi(m, &x3, &y3);
+   *w = ABS(x1 - x0) + ABS(x2 - x1);
+   /**w = ABS(*w);*/
+   *h = ABS(y1 - y0) + ABS(y2 - y1);
+   /**h = ABS(*h);*/
+   *x = MIN2(x0, x1);
+   *x = MIN2(*x, x2);
+   *y = MIN2(y0, y1);
+   *y = MIN2(*y, y2);
+}
+
+
+/*
+ * Make rotation matrix for width X height screen.
+ */
+void
+matrix23Rotate(struct matrix23 *m, int width, int height, int angle)
+{
+   switch (angle) {
+   case 0:
+      matrix23Set(m, 1, 0, 0, 0, 1, 0);
+      break;
+   case 90:
+      matrix23Set(m, 0, 1, 0,  -1, 0, width);
+      break;
+   case 180:
+      matrix23Set(m, -1, 0, width,  0, -1, height);
+      break;
+   case 270:
+      matrix23Set(m, 0, -1, height,  1, 0, 0);
+      break;
+   default:
+      /*abort()*/;
+   }
+}
+
+
+/*
+ * Make flip/reflection matrix for width X height screen.
+ */
+void
+matrix23Flip(struct matrix23 *m, int width, int height, int xflip, int yflip)
+{
+   if (xflip) {
+      m->m00 = -1;  m->m01 = 0;   m->m02 = width - 1;
+   }
+   else {
+      m->m00 = 1;   m->m01 = 0;   m->m02 = 0;
+   }
+   if (yflip) {
+      m->m10 = 0;   m->m11 = -1;  m->m12 = height - 1;
+   }
+   else {
+      m->m10 = 0;   m->m11 = 1;   m->m12 = 0;
+   }
+}
+
+
+/*
+ * result = a * b
+ */
+void
+matrix23Multiply(struct matrix23 *result,
+                 const struct matrix23 *a, const struct matrix23 *b)
+{
+   result->m00 = a->m00 * b->m00 + a->m01 * b->m10;
+   result->m01 = a->m00 * b->m01 + a->m01 * b->m11;
+   result->m02 = a->m00 * b->m02 + a->m01 * b->m12 + a->m02;
+
+   result->m10 = a->m10 * b->m00 + a->m11 * b->m10;
+   result->m11 = a->m10 * b->m01 + a->m11 * b->m11;
+   result->m12 = a->m10 * b->m02 + a->m11 * b->m12 + a->m12;
+}
+
+
+#if 000
+
+#include <stdio.h>
+
+int
+main(int argc, char *argv[])
+{
+   int width = 500, height = 400;
+   int rot;
+   int fx = 0, fy = 0;  /* flip x and/or y ? */
+   int coords[4][2];
+
+   /* four corner coords to test with */
+   coords[0][0] = 0;  coords[0][1] = 0;
+   coords[1][0] = width-1;  coords[1][1] = 0;
+   coords[2][0] = width-1;  coords[2][1] = height-1;
+   coords[3][0] = 0;  coords[3][1] = height-1;
+
+
+   for (rot = 0; rot < 360; rot += 90) {
+      struct matrix23 rotate, flip, m;
+      int i;
+
+      printf("Rot %d, xFlip %d, yFlip %d:\n", rot, fx, fy);
+
+      /* make transformation matrix 'm' */
+      matrix23Rotate(&rotate, width, height, rot);
+      matrix23Flip(&flip, width, height, fx, fy);
+      matrix23Multiply(&m, &rotate, &flip);
+
+      /* xform four coords */
+      for (i = 0; i < 4; i++) {
+         int x = coords[i][0];
+         int y = coords[i][1];
+         matrix23TransformCoordi(&m, &x, &y);
+         printf("  %d, %d  -> %d %d\n", coords[i][0], coords[i][1], x, y);
+      }
+
+      /* xform width, height */
+      {
+         int x = width;
+         int y = height;
+         matrix23TransformDistance(&m, &x, &y);
+         printf("  %d x %d -> %d x %d\n", width, height, x, y);
+      }
+
+      /* xform rect */
+      {
+         int x = 50, y = 10, w = 200, h = 100;
+         matrix23TransformRect(&m, &x, &y, &w, &h);
+         printf("  %d,%d %d x %d -> %d, %d %d x %d\n", 50, 10, 200, 100,
+                x, y, w, h);
+      }
+
+   }
+
+   return 0;
+}
+#endif
diff --git a/i915/intel_rotate.h b/i915/intel_rotate.h
new file mode 100644
index 0000000..0da45d2
--- /dev/null
+++ b/i915/intel_rotate.h
@@ -0,0 +1,41 @@
+#ifndef INTEL_ROTATE_H
+#define INTEL_ROTATE_H 1
+
+struct matrix23
+{
+   int m00, m01, m02;
+   int m10, m11, m12;
+};
+
+
+
+extern void
+matrix23Set(struct matrix23 *m,
+            int m00, int m01, int m02,
+            int m10, int m11, int m12);
+
+extern void
+matrix23TransformCoordi(const struct matrix23 *m, int *x, int *y);
+
+extern void
+matrix23TransformCoordf(const struct matrix23 *m, float *x, float *y);
+
+extern void
+matrix23TransformDistance(const struct matrix23 *m, int *xDist, int *yDist);
+
+extern void
+matrix23TransformRect(const struct matrix23 *m,
+                      int *x, int *y, int *w, int *h);
+
+extern void
+matrix23Rotate(struct matrix23 *m, int width, int height, int angle);
+
+extern void
+matrix23Flip(struct matrix23 *m, int width, int height, int xflip, int yflip);
+
+extern void
+matrix23Multiply(struct matrix23 *result,
+                 const struct matrix23 *a, const struct matrix23 *b);
+
+
+#endif /* INTEL_ROTATE_H */
diff --git a/i915/intel_screen.c b/i915/intel_screen.c
new file mode 100644
index 0000000..ca8610b
--- /dev/null
+++ b/i915/intel_screen.c
@@ -0,0 +1,690 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "glheader.h"
+#include "context.h"
+#include "framebuffer.h"
+#include "matrix.h"
+#include "renderbuffer.h"
+#include "simple_list.h"
+#include "utils.h"
+#include "vblank.h"
+#include "xmlpool.h"
+
+
+#include "intel_screen.h"
+
+#include "intel_tex.h"
+#include "intel_span.h"
+#include "intel_tris.h"
+#include "intel_ioctl.h"
+
+#include "i830_dri.h"
+
+PUBLIC const char __driConfigOptions[] =
+DRI_CONF_BEGIN
+    DRI_CONF_SECTION_PERFORMANCE
+       DRI_CONF_FTHROTTLE_MODE(DRI_CONF_FTHROTTLE_IRQS) 
+       DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_0)
+    DRI_CONF_SECTION_END
+    DRI_CONF_SECTION_QUALITY
+       DRI_CONF_FORCE_S3TC_ENABLE(false)
+       DRI_CONF_ALLOW_LARGE_TEXTURES(1)
+      DRI_CONF_SECTION_END
+DRI_CONF_END;
+const GLuint __driNConfigOptions = 4;
+
+#ifdef USE_NEW_INTERFACE
+static PFNGLXCREATECONTEXTMODES create_context_modes = NULL;
+#endif /*USE_NEW_INTERFACE*/
+
+extern const struct dri_extension card_extensions[];
+
+/**
+ * Map all the memory regions described by the screen.
+ * \return GL_TRUE if success, GL_FALSE if error.
+ */
+GLboolean
+intelMapScreenRegions(__DRIscreenPrivate *sPriv)
+{
+   intelScreenPrivate *intelScreen = (intelScreenPrivate *)sPriv->private;
+
+   if (intelScreen->front.handle) {
+      if (drmMap(sPriv->fd,
+                 intelScreen->front.handle,
+                 intelScreen->front.size,
+                 (drmAddress *)&intelScreen->front.map) != 0) {
+         _mesa_problem(NULL, "drmMap(frontbuffer) failed!");
+         return GL_FALSE;
+      }
+   }
+   else {
+      _mesa_warning(NULL, "no front buffer handle in intelMapScreenRegions!");
+   }
+
+   if (drmMap(sPriv->fd,
+              intelScreen->back.handle,
+              intelScreen->back.size,
+              (drmAddress *)&intelScreen->back.map) != 0) {
+      intelUnmapScreenRegions(intelScreen);
+      return GL_FALSE;
+   }
+
+   if (drmMap(sPriv->fd,
+              intelScreen->depth.handle,
+              intelScreen->depth.size,
+              (drmAddress *)&intelScreen->depth.map) != 0) {
+      intelUnmapScreenRegions(intelScreen);
+      return GL_FALSE;
+   }
+
+   if (drmMap(sPriv->fd,
+              intelScreen->tex.handle,
+              intelScreen->tex.size,
+              (drmAddress *)&intelScreen->tex.map) != 0) {
+      intelUnmapScreenRegions(intelScreen);
+      return GL_FALSE;
+   }
+
+   if (0)
+      printf("Mappings:  front: %p  back: %p  depth: %p  tex: %p\n",
+          intelScreen->front.map,
+          intelScreen->back.map,
+          intelScreen->depth.map,
+          intelScreen->tex.map);
+   return GL_TRUE;
+}
+
+
+void
+intelUnmapScreenRegions(intelScreenPrivate *intelScreen)
+{
+#define REALLY_UNMAP 1
+   if (intelScreen->front.map) {
+#if REALLY_UNMAP
+      if (drmUnmap(intelScreen->front.map, intelScreen->front.size) != 0)
+         printf("drmUnmap front failed!\n");
+#endif
+      intelScreen->front.map = NULL;
+   }
+   if (intelScreen->back.map) {
+#if REALLY_UNMAP
+      if (drmUnmap(intelScreen->back.map, intelScreen->back.size) != 0)
+         printf("drmUnmap back failed!\n");
+#endif
+      intelScreen->back.map = NULL;
+   }
+   if (intelScreen->depth.map) {
+#if REALLY_UNMAP
+      drmUnmap(intelScreen->depth.map, intelScreen->depth.size);
+      intelScreen->depth.map = NULL;
+#endif
+   }
+   if (intelScreen->tex.map) {
+#if REALLY_UNMAP
+      drmUnmap(intelScreen->tex.map, intelScreen->tex.size);
+      intelScreen->tex.map = NULL;
+#endif
+   }
+}
+
+
+static void
+intelPrintDRIInfo(intelScreenPrivate *intelScreen,
+                  __DRIscreenPrivate *sPriv,
+                  I830DRIPtr gDRIPriv)
+{
+   fprintf(stderr, "*** Front size:   0x%x  offset: 0x%x  pitch: %d\n",
+           intelScreen->front.size, intelScreen->front.offset,
+           intelScreen->front.pitch);
+   fprintf(stderr, "*** Back size:    0x%x  offset: 0x%x  pitch: %d\n",
+           intelScreen->back.size, intelScreen->back.offset,
+           intelScreen->back.pitch);
+   fprintf(stderr, "*** Depth size:   0x%x  offset: 0x%x  pitch: %d\n",
+           intelScreen->depth.size, intelScreen->depth.offset,
+           intelScreen->depth.pitch);
+   fprintf(stderr, "*** Rotated size: 0x%x  offset: 0x%x  pitch: %d\n",
+           intelScreen->rotated.size, intelScreen->rotated.offset,
+           intelScreen->rotated.pitch);
+   fprintf(stderr, "*** Texture size: 0x%x  offset: 0x%x\n",
+           intelScreen->tex.size, intelScreen->tex.offset);
+   fprintf(stderr, "*** Memory : 0x%x\n", gDRIPriv->mem);
+}
+
+
+static void
+intelPrintSAREA(const drmI830Sarea *sarea)
+{
+   fprintf(stderr, "SAREA: sarea width %d  height %d\n", sarea->width, sarea->height);
+   fprintf(stderr, "SAREA: pitch: %d\n", sarea->pitch);
+   fprintf(stderr,
+           "SAREA: front offset: 0x%08x  size: 0x%x  handle: 0x%x\n",
+           sarea->front_offset, sarea->front_size,
+           (unsigned) sarea->front_handle);
+   fprintf(stderr,
+           "SAREA: back  offset: 0x%08x  size: 0x%x  handle: 0x%x\n",
+           sarea->back_offset, sarea->back_size,
+           (unsigned) sarea->back_handle);
+   fprintf(stderr, "SAREA: depth offset: 0x%08x  size: 0x%x  handle: 0x%x\n",
+           sarea->depth_offset, sarea->depth_size,
+           (unsigned) sarea->depth_handle);
+   fprintf(stderr, "SAREA: tex   offset: 0x%08x  size: 0x%x  handle: 0x%x\n",
+           sarea->tex_offset, sarea->tex_size,
+           (unsigned) sarea->tex_handle);
+   fprintf(stderr, "SAREA: rotation: %d\n", sarea->rotation);
+   fprintf(stderr,
+           "SAREA: rotated offset: 0x%08x  size: 0x%x\n",
+           sarea->rotated_offset, sarea->rotated_size);
+   fprintf(stderr, "SAREA: rotated pitch: %d\n", sarea->rotated_pitch);
+}
+
+
+/**
+ * A number of the screen parameters are obtained/computed from
+ * information in the SAREA.  This function updates those parameters.
+ */
+void
+intelUpdateScreenFromSAREA(intelScreenPrivate *intelScreen,
+                           drmI830Sarea *sarea)
+{
+   intelScreen->width = sarea->width;
+   intelScreen->height = sarea->height;
+
+   intelScreen->front.offset = sarea->front_offset;
+   intelScreen->front.pitch = sarea->pitch * intelScreen->cpp;
+   intelScreen->front.handle = sarea->front_handle;
+   intelScreen->front.size = sarea->front_size;
+
+   intelScreen->back.offset = sarea->back_offset;
+   intelScreen->back.pitch = sarea->pitch * intelScreen->cpp;
+   intelScreen->back.handle = sarea->back_handle;
+   intelScreen->back.size = sarea->back_size;
+			 
+   intelScreen->depth.offset = sarea->depth_offset;
+   intelScreen->depth.pitch = sarea->pitch * intelScreen->cpp;
+   intelScreen->depth.handle = sarea->depth_handle;
+   intelScreen->depth.size = sarea->depth_size;
+
+   intelScreen->tex.offset = sarea->tex_offset;
+   intelScreen->logTextureGranularity = sarea->log_tex_granularity;
+   intelScreen->tex.handle = sarea->tex_handle;
+   intelScreen->tex.size = sarea->tex_size;
+
+   intelScreen->rotated.offset = sarea->rotated_offset;
+   intelScreen->rotated.pitch = sarea->rotated_pitch * intelScreen->cpp;
+   intelScreen->rotated.size = sarea->rotated_size;
+   intelScreen->current_rotation = sarea->rotation;
+   matrix23Rotate(&intelScreen->rotMatrix,
+                  sarea->width, sarea->height, sarea->rotation);
+   intelScreen->rotatedWidth = sarea->virtualX;
+   intelScreen->rotatedHeight = sarea->virtualY;
+
+   if (0)
+      intelPrintSAREA(sarea);
+}
+
+
+static GLboolean intelInitDriver(__DRIscreenPrivate *sPriv)
+{
+   intelScreenPrivate *intelScreen;
+   I830DRIPtr         gDRIPriv = (I830DRIPtr)sPriv->pDevPriv;
+   drmI830Sarea *sarea;
+   PFNGLXSCRENABLEEXTENSIONPROC glx_enable_extension =
+     (PFNGLXSCRENABLEEXTENSIONPROC) (*dri_interface->getProcAddress("glxEnableExtension"));
+   void * const psc = sPriv->psc->screenConfigs;
+
+   if (sPriv->devPrivSize != sizeof(I830DRIRec)) {
+      fprintf(stderr,"\nERROR!  sizeof(I830DRIRec) does not match passed size from device driver\n");
+      return GL_FALSE;
+   }
+
+   /* Allocate the private area */
+   intelScreen = (intelScreenPrivate *)CALLOC(sizeof(intelScreenPrivate));
+   if (!intelScreen) {
+      fprintf(stderr,"\nERROR!  Allocating private area failed\n");
+      return GL_FALSE;
+   }
+   /* parse information in __driConfigOptions */
+   driParseOptionInfo (&intelScreen->optionCache,
+		       __driConfigOptions, __driNConfigOptions);
+
+   intelScreen->driScrnPriv = sPriv;
+   sPriv->private = (void *)intelScreen;
+   intelScreen->sarea_priv_offset = gDRIPriv->sarea_priv_offset;
+   sarea = (drmI830Sarea *)
+         (((GLubyte *)sPriv->pSAREA)+intelScreen->sarea_priv_offset);
+
+   intelScreen->deviceID = gDRIPriv->deviceID;
+   intelScreen->mem = gDRIPriv->mem;
+   intelScreen->cpp = gDRIPriv->cpp;
+
+   switch (gDRIPriv->bitsPerPixel) {
+   case 15: intelScreen->fbFormat = DV_PF_555; break;
+   case 16: intelScreen->fbFormat = DV_PF_565; break;
+   case 32: intelScreen->fbFormat = DV_PF_8888; break;
+   }
+			 
+   intelUpdateScreenFromSAREA(intelScreen, sarea);
+
+   if (0)
+      intelPrintDRIInfo(intelScreen, sPriv, gDRIPriv);
+
+   if (!intelMapScreenRegions(sPriv)) {
+      fprintf(stderr,"\nERROR!  mapping regions\n");
+      _mesa_free(intelScreen);
+      sPriv->private = NULL;
+      return GL_FALSE;
+   }
+
+   intelScreen->drmMinor = sPriv->drmMinor;
+
+   /* Determine if IRQs are active? */
+   {
+      int ret;
+      drmI830GetParam gp;
+
+      gp.param = I830_PARAM_IRQ_ACTIVE;
+      gp.value = &intelScreen->irq_active;
+
+      ret = drmCommandWriteRead( sPriv->fd, DRM_I830_GETPARAM,
+				 &gp, sizeof(gp));
+      if (ret) {
+	 fprintf(stderr, "drmI830GetParam: %d\n", ret);
+	 return GL_FALSE;
+      }
+   }
+
+   /* Determine if batchbuffers are allowed */
+   {
+      int ret;
+      drmI830GetParam gp;
+
+      gp.param = I830_PARAM_ALLOW_BATCHBUFFER;
+      gp.value = &intelScreen->allow_batchbuffer;
+
+      ret = drmCommandWriteRead( sPriv->fd, DRM_I830_GETPARAM,
+				 &gp, sizeof(gp));
+      if (ret) {
+	 fprintf(stderr, "drmI830GetParam: (%d) %d\n", gp.param, ret);
+	 return GL_FALSE;
+      }
+   }
+
+   if (glx_enable_extension != NULL) {
+      (*glx_enable_extension)( psc, "GLX_SGI_swap_control" );
+      (*glx_enable_extension)( psc, "GLX_SGI_video_sync" );
+      (*glx_enable_extension)( psc, "GLX_MESA_swap_control" );
+      (*glx_enable_extension)( psc, "GLX_MESA_swap_frame_usage" );
+      (*glx_enable_extension)( psc, "GLX_SGI_make_current_read" );
+      (*glx_enable_extension)( psc, "GLX_MESA_allocate_memory" );
+      (*glx_enable_extension)( psc, "GLX_MESA_copy_sub_buffer" );
+   }
+   
+   sPriv->psc->allocateMemory = (void *) intelAllocateMemoryMESA;
+   sPriv->psc->freeMemory     = (void *) intelFreeMemoryMESA;
+   sPriv->psc->memoryOffset   = (void *) intelGetMemoryOffsetMESA;
+
+   return GL_TRUE;
+}
+		
+		
+static void intelDestroyScreen(__DRIscreenPrivate *sPriv)
+{
+   intelScreenPrivate *intelScreen = (intelScreenPrivate *)sPriv->private;
+
+   intelUnmapScreenRegions(intelScreen);
+
+   driDestroyOptionInfo (&intelScreen->optionCache);
+
+   FREE(intelScreen);
+   sPriv->private = NULL;
+}
+
+
+static GLboolean intelCreateBuffer( __DRIscreenPrivate *driScrnPriv,
+				    __DRIdrawablePrivate *driDrawPriv,
+				    const __GLcontextModes *mesaVis,
+				    GLboolean isPixmap )
+{
+   intelScreenPrivate *screen = (intelScreenPrivate *) driScrnPriv->private;
+
+   if (isPixmap) {
+      return GL_FALSE; /* not implemented */
+   } else {
+      GLboolean swStencil = (mesaVis->stencilBits > 0 && 
+			     mesaVis->depthBits != 24);
+
+      struct gl_framebuffer *fb = _mesa_create_framebuffer(mesaVis);
+
+      {
+         driRenderbuffer *frontRb
+            = driNewRenderbuffer(GL_RGBA,
+                                 screen->front.map,
+                                 screen->cpp,
+                                 screen->front.offset, screen->front.pitch,
+                                 driDrawPriv);
+         intelSetSpanFunctions(frontRb, mesaVis);
+         _mesa_add_renderbuffer(fb, BUFFER_FRONT_LEFT, &frontRb->Base);
+      }
+
+      if (mesaVis->doubleBufferMode) {
+         driRenderbuffer *backRb
+            = driNewRenderbuffer(GL_RGBA,
+                                 screen->back.map,
+                                 screen->cpp,
+                                 screen->back.offset, screen->back.pitch,
+                                 driDrawPriv);
+         intelSetSpanFunctions(backRb, mesaVis);
+         _mesa_add_renderbuffer(fb, BUFFER_BACK_LEFT, &backRb->Base);
+      }
+
+      if (mesaVis->depthBits == 16) {
+         driRenderbuffer *depthRb
+            = driNewRenderbuffer(GL_DEPTH_COMPONENT16,
+                                 screen->depth.map,
+                                 screen->cpp,
+                                 screen->depth.offset, screen->depth.pitch,
+                                 driDrawPriv);
+         intelSetSpanFunctions(depthRb, mesaVis);
+         _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depthRb->Base);
+      }
+      else if (mesaVis->depthBits == 24) {
+         driRenderbuffer *depthRb
+            = driNewRenderbuffer(GL_DEPTH_COMPONENT24,
+                                 screen->depth.map,
+                                 screen->cpp,
+                                 screen->depth.offset, screen->depth.pitch,
+                                 driDrawPriv);
+         intelSetSpanFunctions(depthRb, mesaVis);
+         _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depthRb->Base);
+      }
+
+      if (mesaVis->stencilBits > 0 && !swStencil) {
+         driRenderbuffer *stencilRb
+            = driNewRenderbuffer(GL_STENCIL_INDEX8_EXT,
+                                 screen->depth.map,
+                                 screen->cpp,
+                                 screen->depth.offset, screen->depth.pitch,
+                                 driDrawPriv);
+         intelSetSpanFunctions(stencilRb, mesaVis);
+         _mesa_add_renderbuffer(fb, BUFFER_STENCIL, &stencilRb->Base);
+      }
+
+      _mesa_add_soft_renderbuffers(fb,
+                                   GL_FALSE, /* color */
+                                   GL_FALSE, /* depth */
+                                   swStencil,
+                                   mesaVis->accumRedBits > 0,
+                                   GL_FALSE, /* alpha */
+                                   GL_FALSE /* aux */);
+      driDrawPriv->driverPrivate = (void *) fb;
+
+      return (driDrawPriv->driverPrivate != NULL);
+   }
+}
+
+static void intelDestroyBuffer(__DRIdrawablePrivate *driDrawPriv)
+{
+   _mesa_unreference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate)));
+}
+
+
+/**
+ * Get information about previous buffer swaps.
+ */
+static int
+intelGetSwapInfo( __DRIdrawablePrivate *dPriv, __DRIswapInfo * sInfo )
+{
+   intelContextPtr intel;
+
+   if ( (dPriv == NULL) || (dPriv->driContextPriv == NULL)
+	|| (dPriv->driContextPriv->driverPrivate == NULL)
+	|| (sInfo == NULL) ) {
+      return -1;
+   }
+
+   intel = dPriv->driContextPriv->driverPrivate;
+   sInfo->swap_count = intel->swap_count;
+   sInfo->swap_ust = intel->swap_ust;
+   sInfo->swap_missed_count = intel->swap_missed_count;
+
+   sInfo->swap_missed_usage = (sInfo->swap_missed_count != 0)
+       ? driCalculateSwapUsage( dPriv, 0, intel->swap_missed_ust )
+       : 0.0;
+
+   return 0;
+}
+
+
+/* There are probably better ways to do this, such as an
+ * init-designated function to register chipids and createcontext
+ * functions.
+ */
+extern GLboolean i830CreateContext( const __GLcontextModes *mesaVis,
+				    __DRIcontextPrivate *driContextPriv,
+				    void *sharedContextPrivate);
+
+extern GLboolean i915CreateContext( const __GLcontextModes *mesaVis,
+				   __DRIcontextPrivate *driContextPriv,
+				   void *sharedContextPrivate);
+
+
+
+
+static GLboolean intelCreateContext( const __GLcontextModes *mesaVis,
+				   __DRIcontextPrivate *driContextPriv,
+				   void *sharedContextPrivate)
+{
+   __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv;
+   intelScreenPrivate *intelScreen = (intelScreenPrivate *)sPriv->private;
+
+   switch (intelScreen->deviceID) {
+   case PCI_CHIP_845_G:
+   case PCI_CHIP_I830_M:
+   case PCI_CHIP_I855_GM:
+   case PCI_CHIP_I865_G:
+      return i830CreateContext( mesaVis, driContextPriv, 
+				sharedContextPrivate );
+
+   case PCI_CHIP_I915_G:
+   case PCI_CHIP_I915_GM:
+   case PCI_CHIP_I945_G:
+   case PCI_CHIP_I945_GM:
+   case PCI_CHIP_I945_GME:
+   case PCI_CHIP_G33_G:
+   case PCI_CHIP_Q35_G:
+   case PCI_CHIP_Q33_G:
+      return i915CreateContext( mesaVis, driContextPriv, 
+			       sharedContextPrivate );
+ 
+   default:
+      fprintf(stderr, "Unrecognized deviceID %x\n", intelScreen->deviceID);
+      return GL_FALSE;
+   }
+}
+
+
+static const struct __DriverAPIRec intelAPI = {
+   .InitDriver      = intelInitDriver,
+   .DestroyScreen   = intelDestroyScreen,
+   .CreateContext   = intelCreateContext,
+   .DestroyContext  = intelDestroyContext,
+   .CreateBuffer    = intelCreateBuffer,
+   .DestroyBuffer   = intelDestroyBuffer,
+   .SwapBuffers     = intelSwapBuffers,
+   .MakeCurrent     = intelMakeCurrent,
+   .UnbindContext   = intelUnbindContext,
+   .GetSwapInfo     = intelGetSwapInfo,
+   .GetMSC          = driGetMSC32,
+   .WaitForMSC      = driWaitForMSC32,
+   .WaitForSBC      = NULL,
+   .SwapBuffersMSC  = NULL,
+   .CopySubBuffer   = intelCopySubBuffer
+};
+
+
+static __GLcontextModes *
+intelFillInModes( unsigned pixel_bits, unsigned depth_bits,
+		 unsigned stencil_bits, GLboolean have_back_buffer )
+{
+   __GLcontextModes * modes;
+   __GLcontextModes * m;
+   unsigned num_modes;
+   unsigned depth_buffer_factor;
+   unsigned back_buffer_factor;
+   GLenum fb_format;
+   GLenum fb_type;
+
+   /* GLX_SWAP_COPY_OML is only supported because the Intel driver doesn't
+    * support pageflipping at all.
+    */
+   static const GLenum back_buffer_modes[] = {
+      GLX_NONE, GLX_SWAP_UNDEFINED_OML, GLX_SWAP_COPY_OML
+   };
+
+   u_int8_t depth_bits_array[3];
+   u_int8_t stencil_bits_array[3];
+
+
+   depth_bits_array[0] = 0;
+   depth_bits_array[1] = depth_bits;
+   depth_bits_array[2] = depth_bits;
+
+   /* Just like with the accumulation buffer, always provide some modes
+    * with a stencil buffer.  It will be a sw fallback, but some apps won't
+    * care about that.
+    */
+   stencil_bits_array[0] = 0;
+   stencil_bits_array[1] = 0;
+   stencil_bits_array[2] = (stencil_bits == 0) ? 8 : stencil_bits;
+
+   depth_buffer_factor = ((depth_bits != 0) || (stencil_bits != 0)) ? 3 : 1;
+   back_buffer_factor  = (have_back_buffer) ? 3 : 1;
+
+   num_modes = depth_buffer_factor * back_buffer_factor * 4;
+
+    if ( pixel_bits == 16 ) {
+        fb_format = GL_RGB;
+        fb_type = GL_UNSIGNED_SHORT_5_6_5;
+    }
+    else {
+        fb_format = GL_BGRA;
+        fb_type = GL_UNSIGNED_INT_8_8_8_8_REV;
+    }
+
+   modes = (*dri_interface->createContextModes)( num_modes, sizeof( __GLcontextModes ) );
+   m = modes;
+   if ( ! driFillInModes( & m, fb_format, fb_type,
+			  depth_bits_array, stencil_bits_array, depth_buffer_factor,
+			  back_buffer_modes, back_buffer_factor,
+			  GLX_TRUE_COLOR ) ) {
+	fprintf( stderr, "[%s:%u] Error creating FBConfig!\n",
+		 __func__, __LINE__ );
+	return NULL;
+   }
+   if ( ! driFillInModes( & m, fb_format, fb_type,
+			  depth_bits_array, stencil_bits_array, depth_buffer_factor,
+			  back_buffer_modes, back_buffer_factor,
+			  GLX_DIRECT_COLOR ) ) {
+	fprintf( stderr, "[%s:%u] Error creating FBConfig!\n",
+		 __func__, __LINE__ );
+	return NULL;
+   }
+
+   /* Mark the visual as slow if there are "fake" stencil bits.
+    */
+   for ( m = modes ; m != NULL ; m = m->next ) {
+      if ( (m->stencilBits != 0) && (m->stencilBits != stencil_bits) ) {
+	 m->visualRating = GLX_SLOW_CONFIG;
+      }
+   }
+
+   return modes;
+}
+
+
+/**
+ * This is the bootstrap function for the driver.  libGL supplies all of the
+ * requisite information about the system, and the driver initializes itself.
+ * This routine also fills in the linked list pointed to by \c driver_modes
+ * with the \c __GLcontextModes that the driver can support for windows or
+ * pbuffers.
+ * 
+ * \return A pointer to a \c __DRIscreenPrivate on success, or \c NULL on 
+ *         failure.
+ */
+PUBLIC
+void * __driCreateNewScreen_20050727( __DRInativeDisplay *dpy, int scrn, __DRIscreen *psc,
+			     const __GLcontextModes * modes,
+			     const __DRIversion * ddx_version,
+			     const __DRIversion * dri_version,
+			     const __DRIversion * drm_version,
+			     const __DRIframebuffer * frame_buffer,
+			     drmAddress pSAREA, int fd, 
+			     int internal_api_version,
+			     const __DRIinterfaceMethods * interface,
+			     __GLcontextModes ** driver_modes )
+			     
+{
+   __DRIscreenPrivate *psp;
+   static const __DRIversion ddx_expected = { 1, 5, 0 };
+   static const __DRIversion dri_expected = { 4, 0, 0 };
+   static const __DRIversion drm_expected = { 1, 4, 0 };
+
+   dri_interface = interface;
+
+   if ( ! driCheckDriDdxDrmVersions2( "i915",
+				      dri_version, & dri_expected,
+				      ddx_version, & ddx_expected,
+				      drm_version, & drm_expected ) ) {
+      return NULL;
+   }
+
+   psp = __driUtilCreateNewScreen(dpy, scrn, psc, NULL,
+				  ddx_version, dri_version, drm_version,
+				  frame_buffer, pSAREA, fd,
+				  internal_api_version, &intelAPI);
+   if ( psp != NULL ) {
+      I830DRIPtr dri_priv = (I830DRIPtr) psp->pDevPriv;
+      *driver_modes = intelFillInModes( dri_priv->cpp * 8,
+					(dri_priv->cpp == 2) ? 16 : 24,
+					(dri_priv->cpp == 2) ? 0  : 8,
+					1 );
+
+      /* Calling driInitExtensions here, with a NULL context pointer, does not actually
+       * enable the extensions.  It just makes sure that all the dispatch offsets for all
+       * the extensions that *might* be enables are known.  This is needed because the
+       * dispatch offsets need to be known when _mesa_context_create is called, but we can't
+       * enable the extensions until we have a context pointer.
+       *
+       * Hello chicken.  Hello egg.  How are you two today?
+       */
+      driInitExtensions( NULL, card_extensions, GL_FALSE );
+   }
+
+   return (void *) psp;
+}
diff --git a/i915/intel_screen.h b/i915/intel_screen.h
new file mode 100644
index 0000000..24cfd9b
--- /dev/null
+++ b/i915/intel_screen.h
@@ -0,0 +1,112 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef _INTEL_INIT_H_
+#define _INTEL_INIT_H_
+
+#include <sys/time.h>
+#include "xmlconfig.h"
+#include "dri_util.h"
+#include "intel_rotate.h"
+#include "i830_common.h"
+
+
+/* This roughly corresponds to a gl_renderbuffer (Mesa 6.4) */
+typedef struct {
+   drm_handle_t handle;
+   drmSize size;        /* region size in bytes */
+   char *map;           /* memory map */
+   int offset;          /* from start of video mem, in bytes */
+   int pitch;           /* row stride, in bytes */
+} intelRegion;
+
+typedef struct 
+{
+   intelRegion front;
+   intelRegion back;
+   intelRegion rotated;
+   intelRegion depth;
+   intelRegion tex;
+   
+   int deviceID;
+   int width;
+   int height;
+   int mem;         /* unused */
+   
+   int cpp;         /* for front and back buffers */
+   int fbFormat;
+
+   int logTextureGranularity;
+   
+   __DRIscreenPrivate *driScrnPriv;
+   unsigned int sarea_priv_offset;
+
+   int drmMinor;
+
+   int irq_active;
+   int allow_batchbuffer;
+
+   struct matrix23 rotMatrix;
+
+   int current_rotation;  /* 0, 90, 180 or 270 */
+   int rotatedWidth, rotatedHeight;
+
+   /**
+   * Configuration cache with default values for all contexts
+   */
+   driOptionCache optionCache;
+} intelScreenPrivate;
+
+
+extern GLboolean
+intelMapScreenRegions(__DRIscreenPrivate *sPriv);
+
+extern void
+intelUnmapScreenRegions(intelScreenPrivate *intelScreen);
+
+extern void
+intelUpdateScreenFromSAREA(intelScreenPrivate *intelScreen,
+                           drmI830Sarea *sarea);
+
+extern void
+intelDestroyContext(__DRIcontextPrivate *driContextPriv);
+
+extern GLboolean
+intelUnbindContext(__DRIcontextPrivate *driContextPriv);
+
+extern GLboolean
+intelMakeCurrent(__DRIcontextPrivate *driContextPriv,
+                 __DRIdrawablePrivate *driDrawPriv,
+                 __DRIdrawablePrivate *driReadPriv);
+
+extern void
+intelSwapBuffers(__DRIdrawablePrivate *dPriv);
+
+extern void
+intelCopySubBuffer( __DRIdrawablePrivate *dPriv, int x, int y, int w, int h );
+
+#endif
diff --git a/i915/intel_span.c b/i915/intel_span.c
new file mode 100644
index 0000000..c3ffc4b
--- /dev/null
+++ b/i915/intel_span.c
@@ -0,0 +1,258 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "glheader.h"
+#include "macros.h"
+#include "mtypes.h"
+#include "colormac.h"
+
+#include "intel_screen.h"
+
+#include "intel_span.h"
+#include "intel_ioctl.h"
+#include "swrast/swrast.h"
+
+
+#define DBG 0
+
+#define LOCAL_VARS						\
+   intelContextPtr intel = INTEL_CONTEXT(ctx);			\
+   __DRIdrawablePrivate *dPriv = intel->driDrawable;		\
+   driRenderbuffer *drb = (driRenderbuffer *) rb;		\
+   GLuint pitch = drb->pitch;					\
+   GLuint height = dPriv->h;					\
+   char *buf = (char *) drb->Base.Data +			\
+			dPriv->x * drb->cpp +			\
+			dPriv->y * pitch;			\
+   GLushort p;							\
+   (void) buf; (void) p
+
+#define LOCAL_DEPTH_VARS					\
+   intelContextPtr intel = INTEL_CONTEXT(ctx);			\
+   __DRIdrawablePrivate *dPriv = intel->driDrawable;		\
+   driRenderbuffer *drb = (driRenderbuffer *) rb;		\
+   GLuint pitch = drb->pitch;					\
+   GLuint height = dPriv->h;					\
+   char *buf = (char *) drb->Base.Data +			\
+			dPriv->x * drb->cpp +			\
+			dPriv->y * pitch
+
+#define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS 
+
+#define INIT_MONO_PIXEL(p,color)\
+	 p = INTEL_PACKCOLOR565(color[0],color[1],color[2])
+
+#define Y_FLIP(_y) (height - _y - 1)
+
+#define HW_LOCK()
+
+#define HW_UNLOCK()
+
+/* 16 bit, 565 rgb color spanline and pixel functions
+ */
+#define WRITE_RGBA( _x, _y, r, g, b, a )				\
+   *(GLushort *)(buf + _x*2 + _y*pitch)  = ( (((int)r & 0xf8) << 8) |	\
+		                             (((int)g & 0xfc) << 3) |	\
+		                             (((int)b & 0xf8) >> 3))
+#define WRITE_PIXEL( _x, _y, p )  \
+   *(GLushort *)(buf + _x*2 + _y*pitch) = p
+
+#define READ_RGBA( rgba, _x, _y )				\
+do {								\
+   GLushort p = *(GLushort *)(buf + _x*2 + _y*pitch);		\
+   rgba[0] = (((p >> 11) & 0x1f) * 255) / 31;			\
+   rgba[1] = (((p >>  5) & 0x3f) * 255) / 63;			\
+   rgba[2] = (((p >>  0) & 0x1f) * 255) / 31;			\
+   rgba[3] = 255;						\
+} while(0)
+
+#define TAG(x) intel##x##_565
+#include "spantmp.h"
+
+/* 15 bit, 555 rgb color spanline and pixel functions
+ */
+#define WRITE_RGBA( _x, _y, r, g, b, a )			\
+   *(GLushort *)(buf + _x*2 + _y*pitch)  = (((r & 0xf8) << 7) |	\
+		                            ((g & 0xf8) << 3) |	\
+                         		    ((b & 0xf8) >> 3))
+
+#define WRITE_PIXEL( _x, _y, p )  \
+   *(GLushort *)(buf + _x*2 + _y*pitch)  = p
+
+#define READ_RGBA( rgba, _x, _y )				\
+do {								\
+   GLushort p = *(GLushort *)(buf + _x*2 + _y*pitch);		\
+   rgba[0] = (p >> 7) & 0xf8;					\
+   rgba[1] = (p >> 3) & 0xf8;					\
+   rgba[2] = (p << 3) & 0xf8;					\
+   rgba[3] = 255;						\
+} while(0)
+
+#define TAG(x) intel##x##_555
+#include "spantmp.h"
+
+/* 16 bit depthbuffer functions.
+ */
+#define WRITE_DEPTH( _x, _y, d ) \
+   *(GLushort *)(buf + (_x)*2 + (_y)*pitch)  = d;
+
+#define READ_DEPTH( d, _x, _y )	\
+   d = *(GLushort *)(buf + (_x)*2 + (_y)*pitch);	 
+
+
+#define TAG(x) intel##x##_z16
+#include "depthtmp.h"
+
+
+#undef LOCAL_VARS
+#define LOCAL_VARS						\
+   intelContextPtr intel = INTEL_CONTEXT(ctx);			\
+   __DRIdrawablePrivate *dPriv = intel->driDrawable;		\
+   driRenderbuffer *drb = (driRenderbuffer *) rb;		\
+   GLuint pitch = drb->pitch;					\
+   GLuint height = dPriv->h;					\
+   char *buf = (char *)drb->Base.Data +				\
+			dPriv->x * drb->cpp +			\
+			dPriv->y * pitch;			\
+   GLuint p;							\
+   (void) buf; (void) p
+
+#undef INIT_MONO_PIXEL
+#define INIT_MONO_PIXEL(p,color)\
+	 p = INTEL_PACKCOLOR8888(color[0],color[1],color[2],color[3])
+
+/* 32 bit, 8888 argb color spanline and pixel functions
+ */
+#define WRITE_RGBA(_x, _y, r, g, b, a)			\
+    *(GLuint *)(buf + _x*4 + _y*pitch) = ((r << 16) |	\
+					  (g << 8)  |	\
+					  (b << 0)  |	\
+					  (a << 24) )
+
+#define WRITE_PIXEL(_x, _y, p)			\
+    *(GLuint *)(buf + _x*4 + _y*pitch) = p
+
+
+#define READ_RGBA(rgba, _x, _y)					\
+    do {							\
+	GLuint p = *(GLuint *)(buf + _x*4 + _y*pitch);		\
+	rgba[0] = (p >> 16) & 0xff;				\
+	rgba[1] = (p >> 8)  & 0xff;				\
+	rgba[2] = (p >> 0)  & 0xff;				\
+	rgba[3] = (p >> 24) & 0xff;				\
+    } while (0)
+
+#define TAG(x) intel##x##_8888
+#include "spantmp.h"
+
+
+/* 24/8 bit interleaved depth/stencil functions
+ */
+#define WRITE_DEPTH( _x, _y, d ) {			\
+   GLuint tmp = *(GLuint *)(buf + (_x)*4 + (_y)*pitch);	\
+   tmp &= 0xff000000;					\
+   tmp |= (d) & 0xffffff;				\
+   *(GLuint *)(buf + (_x)*4 + (_y)*pitch) = tmp;		\
+}
+
+#define READ_DEPTH( d, _x, _y )		\
+   d = *(GLuint *)(buf + (_x)*4 + (_y)*pitch) & 0xffffff;
+
+
+#define TAG(x) intel##x##_z24_s8
+#include "depthtmp.h"
+
+#define WRITE_STENCIL( _x, _y, d ) {			\
+   GLuint tmp = *(GLuint *)(buf + (_x)*4 + (_y)*pitch);	\
+   tmp &= 0xffffff;					\
+   tmp |= ((d)<<24);					\
+   *(GLuint *)(buf + (_x)*4 + (_y)*pitch) = tmp;		\
+}
+
+#define READ_STENCIL( d, _x, _y )			\
+   d = *(GLuint *)(buf + (_x)*4 + (_y)*pitch) >> 24;
+
+#define TAG(x) intel##x##_z24_s8
+#include "stenciltmp.h"
+
+
+/* Move locking out to get reasonable span performance.
+ */
+void intelSpanRenderStart( GLcontext *ctx )
+{
+   intelContextPtr intel = INTEL_CONTEXT(ctx);
+
+   intelFlush(&intel->ctx);
+   LOCK_HARDWARE(intel);
+   intelWaitForIdle(intel);
+}
+
+void intelSpanRenderFinish( GLcontext *ctx )
+{
+   intelContextPtr intel = INTEL_CONTEXT( ctx );
+   _swrast_flush( ctx );
+   UNLOCK_HARDWARE( intel );
+}
+
+void intelInitSpanFuncs( GLcontext *ctx )
+{
+   struct swrast_device_driver *swdd = _swrast_GetDeviceDriverReference(ctx);
+   swdd->SpanRenderStart = intelSpanRenderStart;
+   swdd->SpanRenderFinish = intelSpanRenderFinish; 
+}
+
+
+/**
+ * Plug in the Get/Put routines for the given driRenderbuffer.
+ */
+void
+intelSetSpanFunctions(driRenderbuffer *drb, const GLvisual *vis)
+{
+   if (drb->Base.InternalFormat == GL_RGBA) {
+      if (vis->redBits == 5 && vis->greenBits == 5 && vis->blueBits == 5) {
+         intelInitPointers_555(&drb->Base);
+      }
+      else if (vis->redBits == 5 && vis->greenBits == 6 && vis->blueBits == 5) {
+         intelInitPointers_565(&drb->Base);
+      }
+      else {
+         assert(vis->redBits == 8);
+         assert(vis->greenBits == 8);
+         assert(vis->blueBits == 8);
+         intelInitPointers_8888(&drb->Base);
+      }
+   }
+   else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT16) {
+      intelInitDepthPointers_z16(&drb->Base);
+   }
+   else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT24) {
+      intelInitDepthPointers_z24_s8(&drb->Base);
+   }
+   else if (drb->Base.InternalFormat == GL_STENCIL_INDEX8_EXT) {
+      intelInitStencilPointers_z24_s8(&drb->Base);
+   }
+}
diff --git a/i915/intel_span.h b/i915/intel_span.h
new file mode 100644
index 0000000..2d4f858
--- /dev/null
+++ b/i915/intel_span.h
@@ -0,0 +1,41 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef _INTEL_SPAN_H
+#define _INTEL_SPAN_H
+
+#include "drirenderbuffer.h"
+
+extern void intelInitSpanFuncs( GLcontext *ctx );
+
+extern void intelSpanRenderFinish( GLcontext *ctx );
+extern void intelSpanRenderStart( GLcontext *ctx );
+
+extern void
+intelSetSpanFunctions(driRenderbuffer *rb, const GLvisual *vis);
+
+#endif
diff --git a/i915/intel_state.c b/i915/intel_state.c
new file mode 100644
index 0000000..e5988a5
--- /dev/null
+++ b/i915/intel_state.c
@@ -0,0 +1,281 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+
+#include "glheader.h"
+#include "context.h"
+#include "macros.h"
+#include "enums.h"
+#include "dd.h"
+
+#include "intel_screen.h"
+#include "intel_context.h"
+#include "swrast/swrast.h"
+
+int intel_translate_compare_func( GLenum func )
+{
+   switch(func) {
+   case GL_NEVER: 
+      return COMPAREFUNC_NEVER; 
+   case GL_LESS: 
+      return COMPAREFUNC_LESS; 
+   case GL_LEQUAL: 
+      return COMPAREFUNC_LEQUAL; 
+   case GL_GREATER: 
+      return COMPAREFUNC_GREATER; 
+   case GL_GEQUAL: 
+      return COMPAREFUNC_GEQUAL; 
+   case GL_NOTEQUAL: 
+      return COMPAREFUNC_NOTEQUAL; 
+   case GL_EQUAL: 
+      return COMPAREFUNC_EQUAL; 
+   case GL_ALWAYS: 
+      return COMPAREFUNC_ALWAYS; 
+   }
+
+   fprintf(stderr, "Unknown value in %s: %x\n", __FUNCTION__, func);
+   return COMPAREFUNC_ALWAYS; 
+}
+
+int intel_translate_stencil_op( GLenum op )
+{
+   switch(op) {
+   case GL_KEEP: 
+      return STENCILOP_KEEP; 
+   case GL_ZERO: 
+      return STENCILOP_ZERO; 
+   case GL_REPLACE: 
+      return STENCILOP_REPLACE; 
+   case GL_INCR: 
+      return STENCILOP_INCRSAT;
+   case GL_DECR: 
+      return STENCILOP_DECRSAT;
+   case GL_INCR_WRAP:
+      return STENCILOP_INCR; 
+   case GL_DECR_WRAP:
+      return STENCILOP_DECR; 
+   case GL_INVERT: 
+      return STENCILOP_INVERT; 
+   default: 
+      return STENCILOP_ZERO;
+   }
+}
+
+int intel_translate_blend_factor( GLenum factor )
+{
+   switch(factor) {
+   case GL_ZERO: 
+      return BLENDFACT_ZERO; 
+   case GL_SRC_ALPHA: 
+      return BLENDFACT_SRC_ALPHA; 
+   case GL_ONE: 
+      return BLENDFACT_ONE; 
+   case GL_SRC_COLOR: 
+      return BLENDFACT_SRC_COLR; 
+   case GL_ONE_MINUS_SRC_COLOR: 
+      return BLENDFACT_INV_SRC_COLR; 
+   case GL_DST_COLOR: 
+      return BLENDFACT_DST_COLR; 
+   case GL_ONE_MINUS_DST_COLOR: 
+      return BLENDFACT_INV_DST_COLR; 
+   case GL_ONE_MINUS_SRC_ALPHA:
+      return BLENDFACT_INV_SRC_ALPHA; 
+   case GL_DST_ALPHA: 
+      return BLENDFACT_DST_ALPHA; 
+   case GL_ONE_MINUS_DST_ALPHA:
+      return BLENDFACT_INV_DST_ALPHA; 
+   case GL_SRC_ALPHA_SATURATE: 
+      return BLENDFACT_SRC_ALPHA_SATURATE;
+   case GL_CONSTANT_COLOR:
+      return BLENDFACT_CONST_COLOR; 
+   case GL_ONE_MINUS_CONSTANT_COLOR:
+      return BLENDFACT_INV_CONST_COLOR;
+   case GL_CONSTANT_ALPHA:
+      return BLENDFACT_CONST_ALPHA; 
+   case GL_ONE_MINUS_CONSTANT_ALPHA:
+      return BLENDFACT_INV_CONST_ALPHA;
+   }
+   
+   fprintf(stderr, "Unknown value in %s: %x\n", __FUNCTION__, factor);
+   return BLENDFACT_ZERO;
+}
+
+int intel_translate_logic_op( GLenum opcode )
+{
+   switch(opcode) {
+   case GL_CLEAR: 
+      return LOGICOP_CLEAR; 
+   case GL_AND: 
+      return LOGICOP_AND; 
+   case GL_AND_REVERSE: 
+      return LOGICOP_AND_RVRSE; 
+   case GL_COPY: 
+      return LOGICOP_COPY; 
+   case GL_COPY_INVERTED: 
+      return LOGICOP_COPY_INV; 
+   case GL_AND_INVERTED: 
+      return LOGICOP_AND_INV; 
+   case GL_NOOP: 
+      return LOGICOP_NOOP; 
+   case GL_XOR: 
+      return LOGICOP_XOR; 
+   case GL_OR: 
+      return LOGICOP_OR; 
+   case GL_OR_INVERTED: 
+      return LOGICOP_OR_INV; 
+   case GL_NOR: 
+      return LOGICOP_NOR; 
+   case GL_EQUIV: 
+      return LOGICOP_EQUIV; 
+   case GL_INVERT: 
+      return LOGICOP_INV; 
+   case GL_OR_REVERSE: 
+      return LOGICOP_OR_RVRSE; 
+   case GL_NAND: 
+      return LOGICOP_NAND; 
+   case GL_SET: 
+      return LOGICOP_SET; 
+   default:
+      return LOGICOP_SET;
+   }
+}
+
+static void intelDrawBuffer(GLcontext *ctx, GLenum mode )
+{
+   intelContextPtr intel = INTEL_CONTEXT(ctx);
+   int front = 0;
+ 
+   if (!ctx->DrawBuffer)
+      return;
+
+   switch ( ctx->DrawBuffer->_ColorDrawBufferMask[0] ) {
+   case BUFFER_BIT_FRONT_LEFT:
+      front = 1;
+      FALLBACK( intel, INTEL_FALLBACK_DRAW_BUFFER, GL_FALSE );
+      break;
+   case BUFFER_BIT_BACK_LEFT:
+      front = 0;
+      FALLBACK( intel, INTEL_FALLBACK_DRAW_BUFFER, GL_FALSE );
+      break;
+   default:
+      FALLBACK( intel, INTEL_FALLBACK_DRAW_BUFFER, GL_TRUE );
+      return;
+   }
+
+   if ( intel->sarea->pf_current_page == 1 ) 
+      front ^= 1;
+   
+   intelSetFrontClipRects( intel );
+
+   if (front) {
+      intel->drawRegion = &intel->intelScreen->front;
+      intel->readRegion = &intel->intelScreen->front;
+   } else {
+      intel->drawRegion = &intel->intelScreen->back;
+      intel->readRegion = &intel->intelScreen->back;
+   }
+
+   intel->vtbl.set_color_region( intel, intel->drawRegion );
+}
+
+static void intelReadBuffer( GLcontext *ctx, GLenum mode )
+{
+   /* nothing, until we implement h/w glRead/CopyPixels or CopyTexImage */
+}
+
+
+static void intelClearColor(GLcontext *ctx, const GLfloat color[4])
+{
+   intelContextPtr intel = INTEL_CONTEXT(ctx);
+   intelScreenPrivate *screen = intel->intelScreen;
+
+   CLAMPED_FLOAT_TO_UBYTE(intel->clear_red, color[0]);
+   CLAMPED_FLOAT_TO_UBYTE(intel->clear_green, color[1]);
+   CLAMPED_FLOAT_TO_UBYTE(intel->clear_blue, color[2]);
+   CLAMPED_FLOAT_TO_UBYTE(intel->clear_alpha, color[3]);
+
+   intel->ClearColor = INTEL_PACKCOLOR(screen->fbFormat,
+				       intel->clear_red, 
+				       intel->clear_green, 
+				       intel->clear_blue, 
+				       intel->clear_alpha);
+}
+
+
+static void intelCalcViewport( GLcontext *ctx )
+{
+   intelContextPtr intel = INTEL_CONTEXT(ctx);
+   const GLfloat *v = ctx->Viewport._WindowMap.m;
+   GLfloat *m = intel->ViewportMatrix.m;
+   GLint h = 0;
+
+   if (intel->driDrawable) 
+      h = intel->driDrawable->h + SUBPIXEL_Y;
+
+   /* See also intel_translate_vertex.  SUBPIXEL adjustments can be done
+    * via state vars, too.
+    */
+   m[MAT_SX] =   v[MAT_SX];
+   m[MAT_TX] =   v[MAT_TX] + SUBPIXEL_X;
+   m[MAT_SY] = - v[MAT_SY];
+   m[MAT_TY] = - v[MAT_TY] + h;
+   m[MAT_SZ] =   v[MAT_SZ] * intel->depth_scale;
+   m[MAT_TZ] =   v[MAT_TZ] * intel->depth_scale;
+}
+
+static void intelViewport( GLcontext *ctx,
+			  GLint x, GLint y,
+			  GLsizei width, GLsizei height )
+{
+   intelCalcViewport( ctx );
+}
+
+static void intelDepthRange( GLcontext *ctx,
+			    GLclampd nearval, GLclampd farval )
+{
+   intelCalcViewport( ctx );
+}
+
+/* Fallback to swrast for select and feedback.
+ */
+static void intelRenderMode( GLcontext *ctx, GLenum mode )
+{
+   intelContextPtr intel = INTEL_CONTEXT(ctx);
+   FALLBACK( intel, INTEL_FALLBACK_RENDERMODE, (mode != GL_RENDER) );
+}
+
+
+void intelInitStateFuncs( struct dd_function_table *functions )
+{
+   functions->DrawBuffer = intelDrawBuffer;
+   functions->ReadBuffer = intelReadBuffer;
+   functions->RenderMode = intelRenderMode;
+   functions->Viewport = intelViewport;
+   functions->DepthRange = intelDepthRange;
+   functions->ClearColor = intelClearColor;
+}
+
diff --git a/i915/intel_tex.c b/i915/intel_tex.c
new file mode 100644
index 0000000..5bd2806
--- /dev/null
+++ b/i915/intel_tex.c
@@ -0,0 +1,877 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "glheader.h"
+#include "mtypes.h"
+#include "imports.h"
+#include "macros.h"
+#include "simple_list.h"
+#include "enums.h"
+#include "image.h"
+#include "texstore.h"
+#include "texformat.h"
+#include "teximage.h"
+#include "texmem.h"
+#include "texobj.h"
+#include "swrast/swrast.h"
+
+#include "mm.h"
+
+#include "intel_screen.h"
+#include "intel_batchbuffer.h"
+#include "intel_context.h"
+#include "intel_tex.h"
+#include "intel_ioctl.h"
+
+
+
+static GLboolean
+intelValidateClientStorage( intelContextPtr intel, GLenum target,
+			    GLint internalFormat,
+			    GLint srcWidth, GLint srcHeight, 
+			    GLenum format, GLenum type,  const void *pixels,
+			    const struct gl_pixelstore_attrib *packing,
+			    struct gl_texture_object *texObj,
+			    struct gl_texture_image *texImage)
+
+{
+   GLcontext *ctx = &intel->ctx;
+   int texelBytes;
+
+   if (0)
+      fprintf(stderr, "intformat %s format %s type %s\n",
+	      _mesa_lookup_enum_by_nr( internalFormat ),
+	      _mesa_lookup_enum_by_nr( format ),
+	      _mesa_lookup_enum_by_nr( type ));
+
+   if (!ctx->Unpack.ClientStorage)
+      return 0;
+
+   if (ctx->_ImageTransferState ||
+       texImage->IsCompressed ||
+       texObj->GenerateMipmap)
+      return 0;
+
+
+   /* This list is incomplete
+    */
+   switch ( internalFormat ) {
+   case GL_RGBA:
+      if ( format == GL_BGRA && type == GL_UNSIGNED_INT_8_8_8_8_REV ) {
+	 texImage->TexFormat = &_mesa_texformat_argb8888;
+	 texelBytes = 4;
+      }
+      else
+	 return 0;
+      break;
+
+   case GL_RGB:
+      if ( format == GL_RGB && type == GL_UNSIGNED_SHORT_5_6_5 ) {
+	 texImage->TexFormat = &_mesa_texformat_rgb565;
+	 texelBytes = 2;
+      }
+      else
+	 return 0;
+      break;
+
+   case GL_YCBCR_MESA:
+      if ( format == GL_YCBCR_MESA && 
+	   type == GL_UNSIGNED_SHORT_8_8_REV_APPLE ) {
+	 texImage->TexFormat = &_mesa_texformat_ycbcr_rev;
+	 texelBytes = 2;
+      }
+      else if ( format == GL_YCBCR_MESA && 
+		(type == GL_UNSIGNED_SHORT_8_8_APPLE || 
+		 type == GL_UNSIGNED_BYTE)) {
+	 texImage->TexFormat = &_mesa_texformat_ycbcr;
+	 texelBytes = 2;
+      }
+      else
+	 return 0;
+      break;
+      
+	 
+   default:
+      return 0;
+   }
+
+   /* Could deal with these packing issues, but currently don't:
+    */
+   if (packing->SkipPixels || 
+       packing->SkipRows || 
+       packing->SwapBytes ||
+       packing->LsbFirst) {
+      return 0;
+   }
+
+   {      
+      GLint srcRowStride = _mesa_image_row_stride(packing, srcWidth,
+						  format, type);
+
+      
+      if (0)
+	 fprintf(stderr, "%s: srcRowStride %d/%x\n", 
+		 __FUNCTION__, srcRowStride, srcRowStride);
+
+      /* Could check this later in upload, pitch restrictions could be
+       * relaxed, but would need to store the image pitch somewhere,
+       * as packing details might change before image is uploaded:
+       */
+      if (!intelIsAgpMemory( intel, pixels, srcHeight * srcRowStride ) ||
+	  (srcRowStride & 63))
+	 return 0;
+
+
+      /* Have validated that _mesa_transfer_teximage would be a straight
+       * memcpy at this point.  NOTE: future calls to TexSubImage will
+       * overwrite the client data.  This is explicitly mentioned in the
+       * extension spec.
+       */
+      texImage->Data = (void *)pixels;
+      texImage->IsClientData = GL_TRUE;
+      texImage->RowStride = srcRowStride / texelBytes;
+      return 1;
+   }
+}
+
+ 
+
+static void intelTexImage1D( GLcontext *ctx, GLenum target, GLint level,
+			    GLint internalFormat,
+			    GLint width, GLint border,
+			    GLenum format, GLenum type, const GLvoid *pixels,
+			    const struct gl_pixelstore_attrib *packing,
+			    struct gl_texture_object *texObj,
+			    struct gl_texture_image *texImage )
+{
+   driTextureObject * t = (driTextureObject *) texObj->DriverData;
+
+   assert(t);
+   intelFlush( ctx );
+   driSwapOutTextureObject( t );
+
+   texImage->IsClientData = GL_FALSE;
+
+   _mesa_store_teximage1d( ctx, target, level, internalFormat,
+			   width, border, format, type,
+			   pixels, packing, texObj, texImage );
+
+   t->dirty_images[0] |= (1 << level);
+}
+
+static void intelTexSubImage1D( GLcontext *ctx, 
+			       GLenum target,
+			       GLint level,	
+			       GLint xoffset,
+				GLsizei width,
+			       GLenum format, GLenum type,
+			       const GLvoid *pixels,
+			       const struct gl_pixelstore_attrib *packing,
+			       struct gl_texture_object *texObj,
+			       struct gl_texture_image *texImage )
+{
+   driTextureObject * t = (driTextureObject *) texObj->DriverData;
+
+   assert(t);
+   intelFlush( ctx );
+   driSwapOutTextureObject( t );
+
+   _mesa_store_texsubimage1d(ctx, target, level, xoffset, width, 
+			     format, type, pixels, packing, texObj,
+			     texImage);
+}
+
+
+/* Handles 2D, CUBE, RECT:
+ */
+static void intelTexImage2D( GLcontext *ctx, GLenum target, GLint level,
+			    GLint internalFormat,
+			    GLint width, GLint height, GLint border,
+			    GLenum format, GLenum type, const GLvoid *pixels,
+			    const struct gl_pixelstore_attrib *packing,
+			    struct gl_texture_object *texObj,
+			    struct gl_texture_image *texImage )
+{
+   driTextureObject * t = (driTextureObject *) texObj->DriverData;
+   GLuint face;
+
+   /* which cube face or ordinary 2D image */
+   switch (target) {
+   case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+   case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+   case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+   case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
+      face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
+      ASSERT(face < 6);
+      break;
+   default:
+      face = 0;
+   }
+
+   assert(t);
+   intelFlush( ctx );
+   driSwapOutTextureObject( t );
+   texImage->IsClientData = GL_FALSE;
+
+   if (intelValidateClientStorage( INTEL_CONTEXT(ctx), target, 
+				   internalFormat, 
+				   width, height, 
+				   format, type, pixels, 
+				   packing, texObj, texImage)) {
+      if (INTEL_DEBUG & DEBUG_TEXTURE)
+	 fprintf(stderr, "%s: Using client storage\n", __FUNCTION__); 
+   }
+   else {
+      _mesa_store_teximage2d( ctx, target, level, internalFormat,
+			      width, height, border, format, type,
+			      pixels, packing, texObj, texImage );
+
+      t->dirty_images[face] |= (1 << level);
+   }
+}
+
+static void intelTexSubImage2D( GLcontext *ctx, 
+			       GLenum target,
+			       GLint level,	
+			       GLint xoffset, GLint yoffset,
+			       GLsizei width, GLsizei height,
+			       GLenum format, GLenum type,
+			       const GLvoid *pixels,
+			       const struct gl_pixelstore_attrib *packing,
+			       struct gl_texture_object *texObj,
+			       struct gl_texture_image *texImage )
+{
+   driTextureObject * t = (driTextureObject *) texObj->DriverData;
+   GLuint face;
+
+   /* which cube face or ordinary 2D image */
+   switch (target) {
+   case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+   case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+   case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+   case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
+      face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
+      ASSERT(face < 6);
+      break;
+   default:
+      face = 0;
+   }
+
+   if (texImage->IsClientData &&
+       (char *)pixels == (char *)texImage->Data + 
+       ((xoffset + yoffset * texImage->RowStride) * 
+	texImage->TexFormat->TexelBytes)) {
+
+      /* Notification only - no upload required */
+   }
+   else {
+      assert( t ); /* this _should_ be true */
+      intelFlush( ctx );
+      driSwapOutTextureObject( t );
+
+      _mesa_store_texsubimage2d(ctx, target, level, xoffset, yoffset, width, 
+				height, format, type, pixels, packing, texObj,
+				texImage);
+
+      t->dirty_images[face] |= (1 << level);
+   }
+}
+
+static void intelCompressedTexImage2D( GLcontext *ctx, GLenum target, GLint level,
+                              GLint internalFormat,
+                              GLint width, GLint height, GLint border,
+                              GLsizei imageSize, const GLvoid *data,
+                              struct gl_texture_object *texObj,
+                              struct gl_texture_image *texImage )
+{
+   driTextureObject * t = (driTextureObject *) texObj->DriverData;
+   GLuint face;
+
+   /* which cube face or ordinary 2D image */
+   switch (target) {
+   case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+   case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+   case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+   case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
+      face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
+      ASSERT(face < 6);
+      break;
+   default:
+      face = 0;
+   }
+
+   assert(t);
+   intelFlush( ctx );
+   
+   driSwapOutTextureObject( t );
+   texImage->IsClientData = GL_FALSE;
+
+   if (INTEL_DEBUG & DEBUG_TEXTURE)
+     fprintf(stderr, "%s: Using normal storage\n", __FUNCTION__); 
+   
+   _mesa_store_compressed_teximage2d(ctx, target, level, internalFormat, width,
+				     height, border, imageSize, data, texObj, texImage);
+   
+   t->dirty_images[face] |= (1 << level);
+}
+
+
+static void intelCompressedTexSubImage2D( GLcontext *ctx, GLenum target, GLint level,
+                                 GLint xoffset, GLint yoffset,
+                                 GLsizei width, GLsizei height,
+                                 GLenum format,
+                                 GLsizei imageSize, const GLvoid *data,
+                                 struct gl_texture_object *texObj,
+                                 struct gl_texture_image *texImage )
+{
+   driTextureObject * t = (driTextureObject *) texObj->DriverData;
+   GLuint face;
+
+
+   /* which cube face or ordinary 2D image */
+   switch (target) {
+   case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+   case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+   case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+   case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
+      face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
+      ASSERT(face < 6);
+      break;
+   default:
+      face = 0;
+   }
+
+   assert( t ); /* this _should_ be true */
+   intelFlush( ctx );
+   driSwapOutTextureObject( t );
+   
+   _mesa_store_compressed_texsubimage2d(ctx, target, level, xoffset, yoffset, width,
+					height, format, imageSize, data, texObj, texImage);
+   
+   t->dirty_images[face] |= (1 << level);
+}
+
+
+static void intelTexImage3D( GLcontext *ctx, GLenum target, GLint level,
+                            GLint internalFormat,
+                            GLint width, GLint height, GLint depth,
+                            GLint border,
+                            GLenum format, GLenum type, const GLvoid *pixels,
+                            const struct gl_pixelstore_attrib *packing,
+                            struct gl_texture_object *texObj,
+                            struct gl_texture_image *texImage )
+{
+   driTextureObject * t = (driTextureObject *) texObj->DriverData;
+
+   assert(t);
+   driSwapOutTextureObject( t );
+   texImage->IsClientData = GL_FALSE;
+
+   _mesa_store_teximage3d(ctx, target, level, internalFormat,
+			  width, height, depth, border,
+			  format, type, pixels,
+			  &ctx->Unpack, texObj, texImage);
+   
+   t->dirty_images[0] |= (1 << level);
+}
+
+
+static void
+intelTexSubImage3D( GLcontext *ctx, GLenum target, GLint level,
+                   GLint xoffset, GLint yoffset, GLint zoffset,
+                   GLsizei width, GLsizei height, GLsizei depth,
+                   GLenum format, GLenum type,
+                   const GLvoid *pixels,
+                   const struct gl_pixelstore_attrib *packing,
+                   struct gl_texture_object *texObj,
+                   struct gl_texture_image *texImage )
+{
+   driTextureObject * t = (driTextureObject *) texObj->DriverData;
+
+   assert( t ); /* this _should_ be true */
+   driSwapOutTextureObject( t );
+
+   _mesa_store_texsubimage3d(ctx, target, level, xoffset, yoffset, zoffset,
+                             width, height, depth,
+                             format, type, pixels, packing, texObj, texImage);
+
+   t->dirty_images[0] |= (1 << level);
+}
+
+
+
+
+static void intelDeleteTexture( GLcontext *ctx, struct gl_texture_object *tObj )
+{
+   driTextureObject * t = (driTextureObject *) tObj->DriverData;
+
+   if ( t != NULL ) {
+      intelFlush( ctx );
+      driDestroyTextureObject( t );
+   }
+   
+   /* Free mipmap images and the texture object itself */
+   _mesa_delete_texture_object(ctx, tObj);
+}
+
+
+static const struct gl_texture_format *
+intelChooseTextureFormat( GLcontext *ctx, GLint internalFormat,
+			 GLenum format, GLenum type )
+{
+   intelContextPtr intel = INTEL_CONTEXT( ctx );
+   const GLboolean do32bpt = ( intel->intelScreen->cpp == 4 &&
+			       intel->intelScreen->tex.size > 4*1024*1024);
+
+   switch ( internalFormat ) {
+   case 4:
+   case GL_RGBA:
+   case GL_COMPRESSED_RGBA:
+      if ( format == GL_BGRA ) {
+	 if ( type == GL_UNSIGNED_INT_8_8_8_8_REV ) {
+	    return &_mesa_texformat_argb8888;
+	 }
+         else if ( type == GL_UNSIGNED_SHORT_4_4_4_4_REV ) {
+            return &_mesa_texformat_argb4444;
+	 }
+         else if ( type == GL_UNSIGNED_SHORT_1_5_5_5_REV ) {
+	    return &_mesa_texformat_argb1555;
+	 }
+      }
+      return do32bpt ? &_mesa_texformat_argb8888 : &_mesa_texformat_argb4444;
+
+   case 3:
+   case GL_RGB:
+   case GL_COMPRESSED_RGB:
+      if ( format == GL_RGB && type == GL_UNSIGNED_SHORT_5_6_5 ) {
+	 return &_mesa_texformat_rgb565;
+      }
+      return do32bpt ? &_mesa_texformat_argb8888 : &_mesa_texformat_rgb565;
+
+   case GL_RGBA8:
+   case GL_RGB10_A2:
+   case GL_RGBA12:
+   case GL_RGBA16:
+      return do32bpt ? &_mesa_texformat_argb8888 : &_mesa_texformat_argb4444;
+
+   case GL_RGBA4:
+   case GL_RGBA2:
+      return &_mesa_texformat_argb4444;
+
+   case GL_RGB5_A1:
+      return &_mesa_texformat_argb1555;
+
+   case GL_RGB8:
+   case GL_RGB10:
+   case GL_RGB12:
+   case GL_RGB16:
+      return do32bpt ? &_mesa_texformat_argb8888 : &_mesa_texformat_rgb565;
+
+   case GL_RGB5:
+   case GL_RGB4:
+   case GL_R3_G3_B2:
+      return &_mesa_texformat_rgb565;
+
+   case GL_ALPHA:
+   case GL_ALPHA4:
+   case GL_ALPHA8:
+   case GL_ALPHA12:
+   case GL_ALPHA16:
+   case GL_COMPRESSED_ALPHA:
+      return &_mesa_texformat_a8;
+
+   case 1:
+   case GL_LUMINANCE:
+   case GL_LUMINANCE4:
+   case GL_LUMINANCE8:
+   case GL_LUMINANCE12:
+   case GL_LUMINANCE16:
+   case GL_COMPRESSED_LUMINANCE:
+      return &_mesa_texformat_l8;
+
+   case 2:
+   case GL_LUMINANCE_ALPHA:
+   case GL_LUMINANCE4_ALPHA4:
+   case GL_LUMINANCE6_ALPHA2:
+   case GL_LUMINANCE8_ALPHA8:
+   case GL_LUMINANCE12_ALPHA4:
+   case GL_LUMINANCE12_ALPHA12:
+   case GL_LUMINANCE16_ALPHA16:
+   case GL_COMPRESSED_LUMINANCE_ALPHA:
+      return &_mesa_texformat_al88;
+
+   case GL_INTENSITY:
+   case GL_INTENSITY4:
+   case GL_INTENSITY8:
+   case GL_INTENSITY12:
+   case GL_INTENSITY16:
+   case GL_COMPRESSED_INTENSITY:
+      return &_mesa_texformat_i8;
+
+   case GL_YCBCR_MESA:
+      if (type == GL_UNSIGNED_SHORT_8_8_MESA ||
+	  type == GL_UNSIGNED_BYTE)
+         return &_mesa_texformat_ycbcr;
+      else
+         return &_mesa_texformat_ycbcr_rev;
+
+   case GL_COMPRESSED_RGB_FXT1_3DFX:
+     return &_mesa_texformat_rgb_fxt1;
+   case GL_COMPRESSED_RGBA_FXT1_3DFX:
+     return &_mesa_texformat_rgba_fxt1;
+
+   case GL_RGB_S3TC:
+   case GL_RGB4_S3TC:
+   case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
+     return &_mesa_texformat_rgb_dxt1;
+
+   case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
+     return &_mesa_texformat_rgba_dxt1;
+
+   case GL_RGBA_S3TC:
+   case GL_RGBA4_S3TC:
+   case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
+     return &_mesa_texformat_rgba_dxt3;
+
+   case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
+      return &_mesa_texformat_rgba_dxt5;
+
+   case GL_DEPTH_COMPONENT:
+   case GL_DEPTH_COMPONENT16:
+   case GL_DEPTH_COMPONENT24:
+   case GL_DEPTH_COMPONENT32:
+      return &_mesa_texformat_z16;
+
+   default:
+      fprintf(stderr, "unexpected texture format %s in %s\n", 
+	      _mesa_lookup_enum_by_nr(internalFormat),
+	      __FUNCTION__);
+      return NULL;
+   }
+
+   return NULL; /* never get here */
+}
+
+
+
+void intelDestroyTexObj(intelContextPtr intel, intelTextureObjectPtr t)
+{
+   unsigned   i;
+
+   if ( intel == NULL ) 
+      return;
+
+   if ( t->age > intel->dirtyAge )
+      intel->dirtyAge = t->age;
+
+   for ( i = 0 ; i < MAX_TEXTURE_UNITS ; i++ ) {
+      if ( t == intel->CurrentTexObj[ i ] ) 
+	 intel->CurrentTexObj[ i ] = NULL;
+   }
+}
+
+
+
+/* Upload an image from mesa's internal copy.  Image may be 1D, 2D or
+ * 3D.  Cubemaps are expanded elsewhere.
+ */
+static void intelUploadTexImage( intelContextPtr intel,
+				 intelTextureObjectPtr t,
+				 const struct gl_texture_image *image,
+				 const GLuint offset )
+{
+
+   if (!image || !image->Data) 
+      return;
+
+   if (image->Depth == 1 && image->IsClientData) {
+      if (INTEL_DEBUG & DEBUG_TEXTURE)
+	 fprintf(stderr, "Blit uploading\n");
+
+      /* Do it with a blit.
+       */
+      intelEmitCopyBlitLocked( intel,
+			       image->TexFormat->TexelBytes,
+			       image->RowStride, /* ? */
+			       intelGetMemoryOffsetMESA( NULL, 0, image->Data ),
+			       t->Pitch / image->TexFormat->TexelBytes,
+			       intelGetMemoryOffsetMESA( NULL, 0, t->BufAddr + offset ),
+			       0, 0,
+			       0, 0,
+			       image->Width,
+			       image->Height);
+   }
+   else if (image->IsCompressed) {
+      GLuint row_len = 0;
+      GLubyte *dst = (GLubyte *)(t->BufAddr + offset);
+      GLubyte *src = (GLubyte *)image->Data;
+      GLuint j;
+
+      /* must always copy whole blocks (8/16 bytes) */
+      switch (image->InternalFormat) {
+	case GL_COMPRESSED_RGB_FXT1_3DFX:
+	case GL_COMPRESSED_RGBA_FXT1_3DFX:
+	case GL_RGB_S3TC:
+	case GL_RGB4_S3TC:
+	case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
+	case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
+	  row_len = (image->Width * 2 + 7) & ~7;
+	  break;
+	case GL_RGBA_S3TC:
+	case GL_RGBA4_S3TC:
+	case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
+	case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
+	  row_len = (image->Width * 4 + 15) & ~15;
+	  break;
+	default:
+	  fprintf(stderr,"Internal Compressed format not supported %d\n", image->InternalFormat);
+	  break;
+      }
+
+      if (INTEL_DEBUG & DEBUG_TEXTURE)
+	 fprintf(stderr, 
+		 "Upload image %dx%dx%d offset %xm row_len %x "
+		 "pitch %x depth_pitch %x\n",
+		 image->Width, image->Height, image->Depth, offset,
+		 row_len, t->Pitch, t->depth_pitch);
+
+      if (row_len) {
+	 for (j = 0 ; j < (image->Height + 3)/4 ; j++, dst += (t->Pitch)) {
+	   __memcpy(dst, src, row_len );
+	   src += row_len;
+	 }
+      }
+   }
+   /* Time for another vtbl entry:
+    */
+   else if (intel->intelScreen->deviceID == PCI_CHIP_I945_G ||
+            intel->intelScreen->deviceID == PCI_CHIP_I945_GM ||
+            intel->intelScreen->deviceID == PCI_CHIP_I945_GME ||
+            intel->intelScreen->deviceID == PCI_CHIP_G33_G ||
+            intel->intelScreen->deviceID == PCI_CHIP_Q33_G ||
+            intel->intelScreen->deviceID == PCI_CHIP_Q35_G) {
+      GLuint row_len = image->Width * image->TexFormat->TexelBytes;
+      GLubyte *dst = (GLubyte *)(t->BufAddr + offset);
+      GLubyte *src = (GLubyte *)image->Data;
+      GLuint d, j;
+
+      if (INTEL_DEBUG & DEBUG_TEXTURE)
+	 fprintf(stderr, 
+		 "Upload image %dx%dx%d offset %xm row_len %x "
+		 "pitch %x depth_pitch %x\n",
+		 image->Width, image->Height, image->Depth, offset,
+		 row_len, t->Pitch, t->depth_pitch);
+
+      if (row_len == t->Pitch) {
+	 memcpy( dst, src, row_len * image->Height * image->Depth );
+      }
+      else { 
+	 GLuint x = 0, y = 0;
+
+	 for (d = 0 ; d < image->Depth ; d++) {
+	    GLubyte *dst0 = dst + x + y * t->Pitch;
+
+	    for (j = 0 ; j < image->Height ; j++) {
+	       __memcpy(dst0, src, row_len );
+	       src += row_len;
+	       dst0 += t->Pitch;
+	    }
+
+	    x += MIN2(4, row_len); /* Guess: 4 byte minimum alignment */
+	    if (x > t->Pitch) {
+	       x = 0;
+	       y += image->Height;
+	    }
+	 }
+      }
+
+   }
+   else {
+      GLuint row_len = image->Width * image->TexFormat->TexelBytes;
+      GLubyte *dst = (GLubyte *)(t->BufAddr + offset);
+      GLubyte *src = (GLubyte *)image->Data;
+      GLuint d, j;
+
+      if (INTEL_DEBUG & DEBUG_TEXTURE)
+	 fprintf(stderr, 
+		 "Upload image %dx%dx%d offset %xm row_len %x "
+		 "pitch %x depth_pitch %x\n",
+		 image->Width, image->Height, image->Depth, offset,
+		 row_len, t->Pitch, t->depth_pitch);
+
+      if (row_len == t->Pitch) {
+	 for (d = 0; d < image->Depth; d++) {
+	    memcpy( dst, src, t->Pitch * image->Height );
+	    dst += t->depth_pitch;
+	    src += row_len * image->Height;
+	 }
+      }
+      else { 
+	 for (d = 0 ; d < image->Depth ; d++) {
+	    for (j = 0 ; j < image->Height ; j++) {
+	       __memcpy(dst, src, row_len );
+	       src += row_len;
+	       dst += t->Pitch;
+	    }
+
+	    dst += t->depth_pitch - (t->Pitch * image->Height);
+	 }
+      }
+   }
+}
+
+
+
+int intelUploadTexImages( intelContextPtr intel, 
+			  intelTextureObjectPtr t,
+			  GLuint face)
+{
+   const int numLevels = t->base.lastLevel - t->base.firstLevel + 1;
+   const struct gl_texture_image *firstImage = t->image[face][t->base.firstLevel].image;
+   int pitch = firstImage->RowStride * firstImage->TexFormat->TexelBytes;
+
+   /* Can we texture out of the existing client data? */
+   if ( numLevels == 1 &&
+	firstImage->IsClientData &&
+	(pitch & 3) == 0) {
+
+      if (INTEL_DEBUG & DEBUG_TEXTURE)
+	 fprintf(stderr, "AGP texturing from client memory\n");
+
+      t->TextureOffset = intelAgpOffsetFromVirtual( intel, firstImage->Data );
+      t->BufAddr = 0;
+      t->dirty = ~0;
+      return GL_TRUE;
+   }
+   else {
+      if (INTEL_DEBUG & DEBUG_TEXTURE) 
+	 fprintf(stderr, "Uploading client data to agp\n");
+
+      INTEL_FIREVERTICES( intel );
+      LOCK_HARDWARE( intel );
+
+      if ( t->base.memBlock == NULL ) {
+	 int heap;
+
+	 heap = driAllocateTexture( intel->texture_heaps, intel->nr_heaps,
+				    (driTextureObject *) t );
+	 if ( heap == -1 ) {
+	    UNLOCK_HARDWARE( intel );
+	    return GL_FALSE;
+	 }
+
+	 /* Set the base offset of the texture image */
+	 t->BufAddr = (GLubyte *) (intel->intelScreen->tex.map + 
+				   t->base.memBlock->ofs);
+	 t->TextureOffset = intel->intelScreen->tex.offset + t->base.memBlock->ofs;
+	 t->dirty = ~0;
+      }
+
+
+      /* Let the world know we've used this memory recently.
+       */
+      driUpdateTextureLRU( (driTextureObject *) t );
+
+
+      /* Upload any images that are new */
+      if (t->base.dirty_images[face]) {
+	 int i;
+
+ 	 intelWaitForIdle( intel );
+	    
+	 for (i = 0 ; i < numLevels ; i++) { 
+	    int level = i + t->base.firstLevel;
+
+	    if (t->base.dirty_images[face] & (1<<level)) {
+
+	       const struct gl_texture_image *image = t->image[face][i].image;
+	       GLuint offset = t->image[face][i].offset;
+
+     	       if (INTEL_DEBUG & DEBUG_TEXTURE)
+	          fprintf(stderr, "upload level %d, offset %x\n", 
+			  level, offset);
+
+	       intelUploadTexImage( intel, t, image, offset );
+	    }
+	 }
+	 t->base.dirty_images[face] = 0;
+	 intel->perf_boxes |= I830_BOX_TEXTURE_LOAD;
+      }
+      
+      UNLOCK_HARDWARE( intel );
+      return GL_TRUE;
+   }
+}
+
+/**
+ * Allocate a new texture object.
+ * Called via ctx->Driver.NewTextureObject.
+ * Note: this function will be called during context creation to
+ * allocate the default texture objects.
+ * Note: we could use containment here to 'derive' the driver-specific
+ * texture object from the core mesa gl_texture_object.  Not done at this time.
+ */
+static struct gl_texture_object *
+intelNewTextureObject( GLcontext *ctx, GLuint name, GLenum target )
+{
+   struct gl_texture_object *obj = _mesa_new_texture_object(ctx, name, target);
+   INTEL_CONTEXT(ctx)->vtbl.alloc_tex_obj( obj );
+   return obj;
+}
+
+
+void intelInitTextureFuncs( struct dd_function_table *functions )
+{
+   functions->NewTextureObject          = intelNewTextureObject;
+   functions->ChooseTextureFormat       = intelChooseTextureFormat;
+   functions->TexImage1D                = intelTexImage1D;
+   functions->TexImage2D                = intelTexImage2D;
+   functions->TexImage3D                = intelTexImage3D;
+   functions->TexSubImage1D             = intelTexSubImage1D;
+   functions->TexSubImage2D             = intelTexSubImage2D;
+   functions->TexSubImage3D             = intelTexSubImage3D;
+   functions->CopyTexImage1D            = _swrast_copy_teximage1d;
+   functions->CopyTexImage2D            = _swrast_copy_teximage2d;
+   functions->CopyTexSubImage1D         = _swrast_copy_texsubimage1d;
+   functions->CopyTexSubImage2D         = _swrast_copy_texsubimage2d;
+   functions->CopyTexSubImage3D         = _swrast_copy_texsubimage3d;
+   functions->DeleteTexture             = intelDeleteTexture;
+   functions->UpdateTexturePalette      = NULL;
+   functions->IsTextureResident         = driIsTextureResident;
+   functions->TestProxyTexImage         = _mesa_test_proxy_teximage;
+   functions->DeleteTexture             = intelDeleteTexture;
+   functions->CompressedTexImage2D      = intelCompressedTexImage2D;
+   functions->CompressedTexSubImage2D   = intelCompressedTexSubImage2D;
+}
diff --git a/i915/intel_tex.h b/i915/intel_tex.h
new file mode 100644
index 0000000..9b7e550
--- /dev/null
+++ b/i915/intel_tex.h
@@ -0,0 +1,45 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef INTELTEX_INC
+#define INTELTEX_INC
+
+#include "mtypes.h"
+#include "intel_context.h"
+#include "texmem.h"
+
+
+void intelInitTextureFuncs( struct dd_function_table *functions );
+
+void intelDestroyTexObj( intelContextPtr intel, intelTextureObjectPtr t );
+int intelUploadTexImages( intelContextPtr intel, intelTextureObjectPtr t,
+			  GLuint face );
+
+GLboolean 
+intel_driReinitTextureHeap( driTexHeap *heap,
+			    unsigned size );
+#endif
diff --git a/i915/intel_texmem.c b/i915/intel_texmem.c
new file mode 100644
index 0000000..09beec9
--- /dev/null
+++ b/i915/intel_texmem.c
@@ -0,0 +1,72 @@
+#include "texmem.h"
+#include "simple_list.h"
+#include "imports.h"
+#include "macros.h"
+
+#include "intel_tex.h"
+
+static GLuint
+driLog2( GLuint n )
+{
+   GLuint log2;
+
+   for ( log2 = 1 ; n > 1 ; log2++ ) {
+      n >>= 1;
+   }
+
+   return log2;
+}
+
+static void calculate_heap_size( driTexHeap * heap, unsigned size, 
+				 unsigned nr_regions, unsigned alignmentShift )
+{
+   unsigned     l;
+
+   l = driLog2( (size - 1) / nr_regions );
+   if ( l < alignmentShift )
+   {
+      l = alignmentShift;
+   }
+
+   heap->logGranularity = l;
+   heap->size = size & ~((1L << l) - 1);
+}
+
+
+GLboolean 
+intel_driReinitTextureHeap( driTexHeap *heap,
+			    unsigned size )
+{
+   driTextureObject *t, *tmp;
+
+   /* Kick out everything:
+    */
+   foreach_s ( t, tmp, & heap->texture_objects ) {
+      if ( t->tObj != NULL ) {
+	 driSwapOutTextureObject( t );
+      }
+      else {
+	 driDestroyTextureObject( t );
+      }
+   }
+   
+   /* Destroy the memory manager:
+    */
+   mmDestroy( heap->memory_heap );
+      
+   /* Recreate the memory manager:
+    */
+   calculate_heap_size(heap, size, heap->nrRegions, heap->alignmentShift);
+   heap->memory_heap = mmInit( 0, heap->size );
+   if ( heap->memory_heap == NULL ) {
+      fprintf(stderr, "driReinitTextureHeap: couldn't recreate memory heap\n");
+      FREE( heap );
+      return GL_FALSE;
+   }
+
+   make_empty_list( & heap->texture_objects );
+
+   return GL_TRUE;
+}
+
+
diff --git a/i915/intel_tris.c b/i915/intel_tris.c
new file mode 100644
index 0000000..b2787ee
--- /dev/null
+++ b/i915/intel_tris.c
@@ -0,0 +1,945 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "glheader.h"
+#include "context.h"
+#include "macros.h"
+#include "enums.h"
+#include "dd.h"
+
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "tnl/t_context.h"
+#include "tnl/t_pipeline.h"
+#include "tnl/t_vertex.h"
+
+#include "intel_screen.h"
+#include "intel_tris.h"
+#include "intel_batchbuffer.h"
+#include "intel_reg.h"
+#include "intel_span.h"
+
+/* XXX we shouldn't include these headers in this file, but we need them
+ * for fallbackStrings, below.
+ */
+#include "i830_context.h"
+#include "i915_context.h"
+
+static void intelRenderPrimitive( GLcontext *ctx, GLenum prim );
+static void intelRasterPrimitive( GLcontext *ctx, GLenum rprim, GLuint hwprim );
+
+/***********************************************************************
+ *                    Emit primitives as inline vertices               *
+ ***********************************************************************/
+
+#ifdef __i386__
+#define COPY_DWORDS( j, vb, vertsize, v )			\
+do {								\
+   int __tmp;							\
+   __asm__ __volatile__( "rep ; movsl"				\
+			 : "=%c" (j), "=D" (vb), "=S" (__tmp)	\
+			 : "0" (vertsize),			\
+			 "D" ((long)vb),			\
+			 "S" ((long)v) );			\
+} while (0)
+#else
+#define COPY_DWORDS( j, vb, vertsize, v )	\
+do {						\
+   if (0) fprintf(stderr, "\n");	\
+   for ( j = 0 ; j < vertsize ; j++ ) {		\
+      if (0) fprintf(stderr, "   -- v(%d): %x/%f\n",j,	\
+	      ((GLuint *)v)[j],			\
+	      ((GLfloat *)v)[j]);		\
+      vb[j] = ((GLuint *)v)[j];			\
+   }						\
+   vb += vertsize;				\
+} while (0)
+#endif
+
+static void __inline__ intel_draw_quad( intelContextPtr intel,
+					intelVertexPtr v0,
+					intelVertexPtr v1,
+					intelVertexPtr v2,
+					intelVertexPtr v3 )
+{
+   GLuint vertsize = intel->vertex_size;
+   GLuint *vb = intelExtendInlinePrimitive( intel, 6 * vertsize );
+   int j;
+
+   COPY_DWORDS( j, vb, vertsize, v0 );
+   COPY_DWORDS( j, vb, vertsize, v1 );
+   COPY_DWORDS( j, vb, vertsize, v3 );
+   COPY_DWORDS( j, vb, vertsize, v1 );
+   COPY_DWORDS( j, vb, vertsize, v2 );
+   COPY_DWORDS( j, vb, vertsize, v3 );
+}
+
+static void __inline__ intel_draw_triangle( intelContextPtr intel,
+					    intelVertexPtr v0,
+					    intelVertexPtr v1,
+					    intelVertexPtr v2 )
+{
+   GLuint vertsize = intel->vertex_size;
+   GLuint *vb = intelExtendInlinePrimitive( intel, 3 * vertsize );
+   int j;
+   
+   COPY_DWORDS( j, vb, vertsize, v0 );
+   COPY_DWORDS( j, vb, vertsize, v1 );
+   COPY_DWORDS( j, vb, vertsize, v2 );
+}
+
+
+static __inline__ void intel_draw_line( intelContextPtr intel,
+					intelVertexPtr v0,
+					intelVertexPtr v1 )
+{
+   GLuint vertsize = intel->vertex_size;
+   GLuint *vb = intelExtendInlinePrimitive( intel, 2 * vertsize );
+   int j;
+
+   COPY_DWORDS( j, vb, vertsize, v0 );
+   COPY_DWORDS( j, vb, vertsize, v1 );
+}
+
+
+static __inline__ void intel_draw_point( intelContextPtr intel,
+					 intelVertexPtr v0 )
+{
+   GLuint vertsize = intel->vertex_size;
+   GLuint *vb = intelExtendInlinePrimitive( intel, vertsize );
+   int j;
+
+   /* Adjust for sub pixel position -- still required for conform. */
+   *(float *)&vb[0] = v0->v.x - 0.125;
+   *(float *)&vb[1] = v0->v.y - 0.125;
+   for (j = 2 ; j < vertsize ; j++)
+     vb[j] = v0->ui[j];
+}
+
+
+
+/***********************************************************************
+ *                Fixup for ARB_point_parameters                       *
+ ***********************************************************************/
+
+static void intel_atten_point( intelContextPtr intel, intelVertexPtr v0 )
+{
+   GLcontext *ctx = &intel->ctx;
+   GLfloat psz[4], col[4], restore_psz, restore_alpha;
+
+   _tnl_get_attr( ctx, v0, _TNL_ATTRIB_POINTSIZE, psz );
+   _tnl_get_attr( ctx, v0, _TNL_ATTRIB_COLOR0, col );
+
+   restore_psz = psz[0];
+   restore_alpha = col[3];
+
+   if (psz[0] >= ctx->Point.Threshold) {
+      psz[0] = MIN2(psz[0], ctx->Point.MaxSize);
+   }
+   else {
+      GLfloat dsize = psz[0] / ctx->Point.Threshold;
+      psz[0] = MAX2(ctx->Point.Threshold, ctx->Point.MinSize);
+      col[3] *= dsize * dsize;
+   }
+
+   if (psz[0] < 1.0)
+      psz[0] = 1.0;
+
+   if (restore_psz != psz[0] || restore_alpha != col[3]) {
+      _tnl_set_attr( ctx, v0, _TNL_ATTRIB_POINTSIZE, psz);
+      _tnl_set_attr( ctx, v0, _TNL_ATTRIB_COLOR0, col);
+   
+      intel_draw_point( intel, v0 );
+
+      psz[0] = restore_psz;
+      col[3] = restore_alpha;
+
+      _tnl_set_attr( ctx, v0, _TNL_ATTRIB_POINTSIZE, psz);
+      _tnl_set_attr( ctx, v0, _TNL_ATTRIB_COLOR0, col);
+   }
+   else
+      intel_draw_point( intel, v0 );
+}
+
+
+
+
+
+/***********************************************************************
+ *                Fixup for I915 WPOS texture coordinate                *
+ ***********************************************************************/
+
+
+
+static void intel_wpos_triangle( intelContextPtr intel,
+				 intelVertexPtr v0,
+				 intelVertexPtr v1,
+				 intelVertexPtr v2 )
+{
+   GLuint offset = intel->wpos_offset;
+   GLuint size = intel->wpos_size;
+   
+   __memcpy( ((char *)v0) + offset, v0, size );
+   __memcpy( ((char *)v1) + offset, v1, size );
+   __memcpy( ((char *)v2) + offset, v2, size );
+
+   intel_draw_triangle( intel, v0, v1, v2 );
+}
+
+
+static void intel_wpos_line( intelContextPtr intel,
+			     intelVertexPtr v0,
+			     intelVertexPtr v1 )
+{
+   GLuint offset = intel->wpos_offset;
+   GLuint size = intel->wpos_size;
+
+   __memcpy( ((char *)v0) + offset, v0, size );
+   __memcpy( ((char *)v1) + offset, v1, size );
+
+   intel_draw_line( intel, v0, v1 );
+}
+
+
+static void intel_wpos_point( intelContextPtr intel,
+			      intelVertexPtr v0 )
+{
+   GLuint offset = intel->wpos_offset;
+   GLuint size = intel->wpos_size;
+
+   __memcpy( ((char *)v0) + offset, v0, size );
+
+   intel_draw_point( intel, v0 );
+}
+
+
+
+
+
+
+/***********************************************************************
+ *          Macros for t_dd_tritmp.h to draw basic primitives          *
+ ***********************************************************************/
+
+#define TRI( a, b, c )				\
+do { 						\
+   if (DO_FALLBACK)				\
+      intel->draw_tri( intel, a, b, c );	\
+   else						\
+      intel_draw_triangle( intel, a, b, c );	\
+} while (0)
+
+#define QUAD( a, b, c, d )			\
+do { 						\
+   if (DO_FALLBACK) {				\
+      intel->draw_tri( intel, a, b, d );	\
+      intel->draw_tri( intel, b, c, d );	\
+   } else					\
+      intel_draw_quad( intel, a, b, c, d );	\
+} while (0)
+
+#define LINE( v0, v1 )				\
+do { 						\
+   if (DO_FALLBACK)				\
+      intel->draw_line( intel, v0, v1 );	\
+   else						\
+      intel_draw_line( intel, v0, v1 );		\
+} while (0)
+
+#define POINT( v0 )				\
+do { 						\
+   if (DO_FALLBACK)				\
+      intel->draw_point( intel, v0 );		\
+   else						\
+      intel_draw_point( intel, v0 );		\
+} while (0)
+
+
+/***********************************************************************
+ *              Build render functions from dd templates               *
+ ***********************************************************************/
+
+#define INTEL_OFFSET_BIT 	0x01
+#define INTEL_TWOSIDE_BIT	0x02
+#define INTEL_UNFILLED_BIT	0x04
+#define INTEL_FALLBACK_BIT	0x08
+#define INTEL_MAX_TRIFUNC	0x10
+
+
+static struct {
+   tnl_points_func	        points;
+   tnl_line_func		line;
+   tnl_triangle_func	triangle;
+   tnl_quad_func		quad;
+} rast_tab[INTEL_MAX_TRIFUNC];
+
+
+#define DO_FALLBACK (IND & INTEL_FALLBACK_BIT)
+#define DO_OFFSET   (IND & INTEL_OFFSET_BIT)
+#define DO_UNFILLED (IND & INTEL_UNFILLED_BIT)
+#define DO_TWOSIDE  (IND & INTEL_TWOSIDE_BIT)
+#define DO_FLAT      0
+#define DO_TRI       1
+#define DO_QUAD      1
+#define DO_LINE      1
+#define DO_POINTS    1
+#define DO_FULL_QUAD 1
+
+#define HAVE_RGBA         1
+#define HAVE_SPEC         1
+#define HAVE_BACK_COLORS  0
+#define HAVE_HW_FLATSHADE 1
+#define VERTEX            intelVertex
+#define TAB               rast_tab
+
+/* Only used to pull back colors into vertices (ie, we know color is
+ * floating point).
+ */
+#define INTEL_COLOR( dst, src )				\
+do {							\
+   UNCLAMPED_FLOAT_TO_UBYTE((dst)[0], (src)[2]);	\
+   UNCLAMPED_FLOAT_TO_UBYTE((dst)[1], (src)[1]);	\
+   UNCLAMPED_FLOAT_TO_UBYTE((dst)[2], (src)[0]);	\
+   UNCLAMPED_FLOAT_TO_UBYTE((dst)[3], (src)[3]);	\
+} while (0)
+
+#define INTEL_SPEC( dst, src )				\
+do {							\
+   UNCLAMPED_FLOAT_TO_UBYTE((dst)[0], (src)[2]);	\
+   UNCLAMPED_FLOAT_TO_UBYTE((dst)[1], (src)[1]);	\
+   UNCLAMPED_FLOAT_TO_UBYTE((dst)[2], (src)[0]);	\
+} while (0)
+
+
+#define DEPTH_SCALE intel->polygon_offset_scale
+#define UNFILLED_TRI unfilled_tri
+#define UNFILLED_QUAD unfilled_quad
+#define VERT_X(_v) _v->v.x
+#define VERT_Y(_v) _v->v.y
+#define VERT_Z(_v) _v->v.z
+#define AREA_IS_CCW( a ) (a > 0)
+#define GET_VERTEX(e) (intel->verts + (e * intel->vertex_size * sizeof(GLuint)))
+
+#define VERT_SET_RGBA( v, c )    if (coloroffset) INTEL_COLOR( v->ub4[coloroffset], c )
+#define VERT_COPY_RGBA( v0, v1 ) if (coloroffset) v0->ui[coloroffset] = v1->ui[coloroffset]
+#define VERT_SAVE_RGBA( idx )    if (coloroffset) color[idx] = v[idx]->ui[coloroffset]
+#define VERT_RESTORE_RGBA( idx ) if (coloroffset) v[idx]->ui[coloroffset] = color[idx]
+
+#define VERT_SET_SPEC( v, c )    if (specoffset) INTEL_SPEC( v->ub4[specoffset], c )
+#define VERT_COPY_SPEC( v0, v1 ) if (specoffset) COPY_3V(v0->ub4[specoffset], v1->ub4[specoffset])
+#define VERT_SAVE_SPEC( idx )    if (specoffset) spec[idx] = v[idx]->ui[specoffset]
+#define VERT_RESTORE_SPEC( idx ) if (specoffset) v[idx]->ui[specoffset] = spec[idx]
+
+#define LOCAL_VARS(n)							\
+   intelContextPtr intel = INTEL_CONTEXT(ctx);				\
+   GLuint color[n], spec[n];						\
+   GLuint coloroffset = intel->coloroffset;		\
+   GLboolean specoffset = intel->specoffset;			\
+   (void) color; (void) spec; (void) coloroffset; (void) specoffset;
+
+
+/***********************************************************************
+ *                Helpers for rendering unfilled primitives            *
+ ***********************************************************************/
+
+static const GLuint hw_prim[GL_POLYGON+1] = {
+   PRIM3D_POINTLIST,
+   PRIM3D_LINELIST,
+   PRIM3D_LINELIST,
+   PRIM3D_LINELIST,
+   PRIM3D_TRILIST,
+   PRIM3D_TRILIST,
+   PRIM3D_TRILIST,
+   PRIM3D_TRILIST,
+   PRIM3D_TRILIST,
+   PRIM3D_TRILIST
+};
+
+#define RASTERIZE(x) intelRasterPrimitive( ctx, x, hw_prim[x] )
+#define RENDER_PRIMITIVE intel->render_primitive
+#define TAG(x) x
+#define IND INTEL_FALLBACK_BIT
+#include "tnl_dd/t_dd_unfilled.h"
+#undef IND
+
+/***********************************************************************
+ *                      Generate GL render functions                   *
+ ***********************************************************************/
+
+#define IND (0)
+#define TAG(x) x
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (INTEL_OFFSET_BIT)
+#define TAG(x) x##_offset
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (INTEL_TWOSIDE_BIT)
+#define TAG(x) x##_twoside
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (INTEL_TWOSIDE_BIT|INTEL_OFFSET_BIT)
+#define TAG(x) x##_twoside_offset
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (INTEL_UNFILLED_BIT)
+#define TAG(x) x##_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (INTEL_OFFSET_BIT|INTEL_UNFILLED_BIT)
+#define TAG(x) x##_offset_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (INTEL_TWOSIDE_BIT|INTEL_UNFILLED_BIT)
+#define TAG(x) x##_twoside_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (INTEL_TWOSIDE_BIT|INTEL_OFFSET_BIT|INTEL_UNFILLED_BIT)
+#define TAG(x) x##_twoside_offset_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (INTEL_FALLBACK_BIT)
+#define TAG(x) x##_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (INTEL_OFFSET_BIT|INTEL_FALLBACK_BIT)
+#define TAG(x) x##_offset_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (INTEL_TWOSIDE_BIT|INTEL_FALLBACK_BIT)
+#define TAG(x) x##_twoside_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (INTEL_TWOSIDE_BIT|INTEL_OFFSET_BIT|INTEL_FALLBACK_BIT)
+#define TAG(x) x##_twoside_offset_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (INTEL_UNFILLED_BIT|INTEL_FALLBACK_BIT)
+#define TAG(x) x##_unfilled_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (INTEL_OFFSET_BIT|INTEL_UNFILLED_BIT|INTEL_FALLBACK_BIT)
+#define TAG(x) x##_offset_unfilled_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (INTEL_TWOSIDE_BIT|INTEL_UNFILLED_BIT|INTEL_FALLBACK_BIT)
+#define TAG(x) x##_twoside_unfilled_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (INTEL_TWOSIDE_BIT|INTEL_OFFSET_BIT|INTEL_UNFILLED_BIT| \
+	     INTEL_FALLBACK_BIT)
+#define TAG(x) x##_twoside_offset_unfilled_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+
+static void init_rast_tab( void )
+{
+   init();
+   init_offset();
+   init_twoside();
+   init_twoside_offset();
+   init_unfilled();
+   init_offset_unfilled();
+   init_twoside_unfilled();
+   init_twoside_offset_unfilled();
+   init_fallback();
+   init_offset_fallback();
+   init_twoside_fallback();
+   init_twoside_offset_fallback();
+   init_unfilled_fallback();
+   init_offset_unfilled_fallback();
+   init_twoside_unfilled_fallback();
+   init_twoside_offset_unfilled_fallback();
+}
+
+
+/***********************************************************************
+ *                    Rasterization fallback helpers                   *
+ ***********************************************************************/
+
+
+/* This code is hit only when a mix of accelerated and unaccelerated
+ * primitives are being drawn, and only for the unaccelerated
+ * primitives.
+ */
+static void
+intel_fallback_tri( intelContextPtr intel,
+		   intelVertex *v0,
+		   intelVertex *v1,
+		   intelVertex *v2 )
+{
+   GLcontext *ctx = &intel->ctx;
+   SWvertex v[3];
+
+   if (0)
+      fprintf(stderr, "\n%s\n", __FUNCTION__);
+
+   _swsetup_Translate( ctx, v0, &v[0] );
+   _swsetup_Translate( ctx, v1, &v[1] );
+   _swsetup_Translate( ctx, v2, &v[2] );
+   intelSpanRenderStart( ctx );
+   _swrast_Triangle( ctx, &v[0], &v[1], &v[2] );
+   intelSpanRenderFinish( ctx );
+}
+
+
+static void
+intel_fallback_line( intelContextPtr intel,
+		    intelVertex *v0,
+		    intelVertex *v1 )
+{
+   GLcontext *ctx = &intel->ctx;
+   SWvertex v[2];
+
+   if (0)
+      fprintf(stderr, "\n%s\n", __FUNCTION__);
+
+   _swsetup_Translate( ctx, v0, &v[0] );
+   _swsetup_Translate( ctx, v1, &v[1] );
+   intelSpanRenderStart( ctx );
+   _swrast_Line( ctx, &v[0], &v[1] );
+   intelSpanRenderFinish( ctx );
+}
+
+
+static void
+intel_fallback_point( intelContextPtr intel,
+		     intelVertex *v0 )
+{
+   GLcontext *ctx = &intel->ctx;
+   SWvertex v[1];
+
+   if (0)
+      fprintf(stderr, "\n%s\n", __FUNCTION__);
+
+   _swsetup_Translate( ctx, v0, &v[0] );
+   intelSpanRenderStart( ctx );
+   _swrast_Point( ctx, &v[0] );
+   intelSpanRenderFinish( ctx );
+}
+
+
+
+/**********************************************************************/
+/*               Render unclipped begin/end objects                   */
+/**********************************************************************/
+
+#define IND 0
+#define V(x) (intelVertex *)(vertptr + ((x)*vertsize*sizeof(GLuint)))
+#define RENDER_POINTS( start, count )	\
+   for ( ; start < count ; start++) POINT( V(ELT(start)) );
+#define RENDER_LINE( v0, v1 )         LINE( V(v0), V(v1) )
+#define RENDER_TRI(  v0, v1, v2 )     TRI(  V(v0), V(v1), V(v2) )
+#define RENDER_QUAD( v0, v1, v2, v3 ) QUAD( V(v0), V(v1), V(v2), V(v3) )
+#define INIT(x) intelRenderPrimitive( ctx, x )
+#undef LOCAL_VARS
+#define LOCAL_VARS						\
+    intelContextPtr intel = INTEL_CONTEXT(ctx);			\
+    GLubyte *vertptr = (GLubyte *)intel->verts;			\
+    const GLuint vertsize = intel->vertex_size;       	\
+    const GLuint * const elt = TNL_CONTEXT(ctx)->vb.Elts;	\
+    (void) elt;
+#define RESET_STIPPLE
+#define RESET_OCCLUSION
+#define PRESERVE_VB_DEFS
+#define ELT(x) x
+#define TAG(x) intel_##x##_verts
+#include "tnl/t_vb_rendertmp.h"
+#undef ELT
+#undef TAG
+#define TAG(x) intel_##x##_elts
+#define ELT(x) elt[x]
+#include "tnl/t_vb_rendertmp.h"
+
+/**********************************************************************/
+/*                   Render clipped primitives                        */
+/**********************************************************************/
+
+
+
+static void intelRenderClippedPoly( GLcontext *ctx, const GLuint *elts,
+				   GLuint n )
+{
+   intelContextPtr intel = INTEL_CONTEXT(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
+   GLuint prim = intel->render_primitive;
+
+   /* Render the new vertices as an unclipped polygon.
+    */
+   {
+      GLuint *tmp = VB->Elts;
+      VB->Elts = (GLuint *)elts;
+      tnl->Driver.Render.PrimTabElts[GL_POLYGON]( ctx, 0, n, 
+						  PRIM_BEGIN|PRIM_END );
+      VB->Elts = tmp;
+   }
+
+   /* Restore the render primitive
+    */
+   if (prim != GL_POLYGON)
+      tnl->Driver.Render.PrimitiveNotify( ctx, prim );
+}
+
+static void intelRenderClippedLine( GLcontext *ctx, GLuint ii, GLuint jj )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+
+   tnl->Driver.Render.Line( ctx, ii, jj );
+}
+
+static void intelFastRenderClippedPoly( GLcontext *ctx, const GLuint *elts,
+				       GLuint n )
+{
+   intelContextPtr intel = INTEL_CONTEXT( ctx );
+   const GLuint vertsize = intel->vertex_size;
+   GLuint *vb = intelExtendInlinePrimitive( intel, (n-2) * 3 * vertsize );
+   GLubyte *vertptr = (GLubyte *)intel->verts;
+   const GLuint *start = (const GLuint *)V(elts[0]);
+   int i,j;
+
+   for (i = 2 ; i < n ; i++) {
+      COPY_DWORDS( j, vb, vertsize, V(elts[i-1]) );
+      COPY_DWORDS( j, vb, vertsize, V(elts[i]) );
+      COPY_DWORDS( j, vb, vertsize, start );
+   }
+}
+
+/**********************************************************************/
+/*                    Choose render functions                         */
+/**********************************************************************/
+
+
+
+
+#define POINT_FALLBACK (0)
+#define LINE_FALLBACK (DD_LINE_STIPPLE)
+#define TRI_FALLBACK (0)
+#define ANY_FALLBACK_FLAGS (POINT_FALLBACK|LINE_FALLBACK|TRI_FALLBACK|\
+                            DD_TRI_STIPPLE|DD_POINT_ATTEN)
+#define ANY_RASTER_FLAGS (DD_TRI_LIGHT_TWOSIDE|DD_TRI_OFFSET|DD_TRI_UNFILLED)
+
+void intelChooseRenderState(GLcontext *ctx)
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   intelContextPtr intel = INTEL_CONTEXT(ctx);
+   GLuint flags = ctx->_TriangleCaps;
+   const struct gl_fragment_program *fprog = ctx->FragmentProgram._Current;
+   GLboolean have_wpos = (fprog && (fprog->Base.InputsRead & FRAG_BIT_WPOS));
+   GLuint index = 0;
+
+   if (INTEL_DEBUG & DEBUG_STATE)
+     fprintf(stderr,"\n%s\n",__FUNCTION__);
+
+   if ((flags & (ANY_FALLBACK_FLAGS|ANY_RASTER_FLAGS)) || have_wpos) {
+
+      if (flags & ANY_RASTER_FLAGS) {
+	 if (flags & DD_TRI_LIGHT_TWOSIDE)    index |= INTEL_TWOSIDE_BIT;
+	 if (flags & DD_TRI_OFFSET)	      index |= INTEL_OFFSET_BIT;
+	 if (flags & DD_TRI_UNFILLED)	      index |= INTEL_UNFILLED_BIT;
+      }
+
+      if (have_wpos) {
+	 intel->draw_point = intel_wpos_point;
+	 intel->draw_line = intel_wpos_line;
+	 intel->draw_tri = intel_wpos_triangle;
+
+	 /* Make sure these get called:
+	  */
+	 index |= INTEL_FALLBACK_BIT;
+      }
+      else {
+	 intel->draw_point = intel_draw_point;
+	 intel->draw_line = intel_draw_line;
+	 intel->draw_tri = intel_draw_triangle;
+      }
+
+      /* Hook in fallbacks for specific primitives.
+       */
+      if (flags & ANY_FALLBACK_FLAGS)
+      {
+	 if (flags & POINT_FALLBACK)
+	    intel->draw_point = intel_fallback_point;
+
+	 if (flags & LINE_FALLBACK)
+	    intel->draw_line = intel_fallback_line;
+
+	 if (flags & TRI_FALLBACK)
+	    intel->draw_tri = intel_fallback_tri;
+
+	 if ((flags & DD_TRI_STIPPLE) && !intel->hw_stipple) 
+	    intel->draw_tri = intel_fallback_tri;
+
+	 if (flags & DD_POINT_ATTEN)
+	    intel->draw_point = intel_atten_point;
+
+	 index |= INTEL_FALLBACK_BIT;
+      }
+   }
+
+   if (intel->RenderIndex != index) {
+      intel->RenderIndex = index;
+
+      tnl->Driver.Render.Points = rast_tab[index].points;
+      tnl->Driver.Render.Line = rast_tab[index].line;
+      tnl->Driver.Render.Triangle = rast_tab[index].triangle;
+      tnl->Driver.Render.Quad = rast_tab[index].quad;
+
+      if (index == 0) {
+	 tnl->Driver.Render.PrimTabVerts = intel_render_tab_verts;
+	 tnl->Driver.Render.PrimTabElts = intel_render_tab_elts;
+	 tnl->Driver.Render.ClippedLine = line; /* from tritmp.h */
+	 tnl->Driver.Render.ClippedPolygon = intelFastRenderClippedPoly;
+      } else {
+	 tnl->Driver.Render.PrimTabVerts = _tnl_render_tab_verts;
+	 tnl->Driver.Render.PrimTabElts = _tnl_render_tab_elts;
+	 tnl->Driver.Render.ClippedLine = intelRenderClippedLine;
+	 tnl->Driver.Render.ClippedPolygon = intelRenderClippedPoly;
+      }
+   }
+}
+
+static const GLenum reduced_prim[GL_POLYGON+1] = {
+   GL_POINTS,
+   GL_LINES,
+   GL_LINES,
+   GL_LINES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_TRIANGLES
+};
+
+
+/**********************************************************************/
+/*                 High level hooks for t_vb_render.c                 */
+/**********************************************************************/
+
+
+
+
+static void intelRunPipeline( GLcontext *ctx )
+{
+   intelContextPtr intel = INTEL_CONTEXT(ctx);
+
+   if (intel->NewGLState) {
+      if (intel->NewGLState & _NEW_TEXTURE) {
+	 intel->vtbl.update_texture_state( intel ); 
+      }
+
+      if (!intel->Fallback) {
+	 if (intel->NewGLState & _INTEL_NEW_RENDERSTATE)
+	    intelChooseRenderState( ctx );
+      }
+
+      intel->NewGLState = 0;
+   }
+
+   _tnl_run_pipeline( ctx );
+}
+
+static void intelRenderStart( GLcontext *ctx )
+{
+   INTEL_CONTEXT(ctx)->vtbl.render_start( INTEL_CONTEXT(ctx) );
+}
+
+static void intelRenderFinish( GLcontext *ctx )
+{
+   if (INTEL_CONTEXT(ctx)->RenderIndex & INTEL_FALLBACK_BIT)
+      _swrast_flush( ctx );
+}
+
+
+
+
+ /* System to flush dma and emit state changes based on the rasterized
+  * primitive.
+  */
+static void intelRasterPrimitive( GLcontext *ctx, GLenum rprim, GLuint hwprim )
+{
+   intelContextPtr intel = INTEL_CONTEXT(ctx);
+
+   if (0)
+      fprintf(stderr, "%s %s %x\n", __FUNCTION__, 
+	      _mesa_lookup_enum_by_nr(rprim), hwprim);
+
+   intel->vtbl.reduced_primitive_state( intel, rprim );
+    
+   /* Start a new primitive.  Arrange to have it flushed later on.
+    */
+   if (hwprim != intel->prim.primitive) 
+      intelStartInlinePrimitive( intel, hwprim );
+}
+
+
+/* 
+ */
+static void intelRenderPrimitive( GLcontext *ctx, GLenum prim )
+{
+   intelContextPtr intel = INTEL_CONTEXT(ctx);
+
+   if (0)
+      fprintf(stderr, "%s %s\n", __FUNCTION__, _mesa_lookup_enum_by_nr(prim));
+
+   /* Let some clipping routines know which primitive they're dealing
+    * with.
+    */
+   intel->render_primitive = prim;
+
+   /* Shortcircuit this when called from t_dd_rendertmp.h for unfilled
+    * triangles.  The rasterized primitive will always be reset by
+    * lower level functions in that case, potentially pingponging the
+    * state:
+    */
+   if (reduced_prim[prim] == GL_TRIANGLES && 
+       (ctx->_TriangleCaps & DD_TRI_UNFILLED))
+      return;
+
+   /* Set some primitive-dependent state and Start? a new primitive.
+    */
+   intelRasterPrimitive( ctx, reduced_prim[prim], hw_prim[prim] );
+}
+
+
+/**********************************************************************/
+/*           Transition to/from hardware rasterization.               */
+/**********************************************************************/
+
+static struct {
+   GLuint bit;
+   const char *str;
+} fallbackStrings[] = {
+   { INTEL_FALLBACK_DRAW_BUFFER, "Draw buffer" },
+   { INTEL_FALLBACK_READ_BUFFER, "Read buffer" },
+   { INTEL_FALLBACK_USER, "User" },
+   { INTEL_FALLBACK_NO_BATCHBUFFER, "No Batchbuffer" },
+   { INTEL_FALLBACK_NO_TEXMEM, "No Texmem" },
+   { INTEL_FALLBACK_RENDERMODE, "Rendermode" },
+
+   { I830_FALLBACK_TEXTURE, "i830 texture" },
+   { I830_FALLBACK_COLORMASK, "i830 colormask" },
+   { I830_FALLBACK_STENCIL, "i830 stencil" },
+   { I830_FALLBACK_STIPPLE, "i830 stipple" },
+   { I830_FALLBACK_LOGICOP, "i830 logicop" },
+
+   { I915_FALLBACK_TEXTURE, "i915 texture" },
+   { I915_FALLBACK_COLORMASK, "i915 colormask" },
+   { I915_FALLBACK_STENCIL, "i915 stencil" },
+   { I915_FALLBACK_STIPPLE, "i915 stipple" },
+   { I915_FALLBACK_PROGRAM, "i915 program" },
+   { I915_FALLBACK_LOGICOP, "i915 logicop" },
+   { I915_FALLBACK_POLYGON_SMOOTH, "i915 polygon smooth" },
+   { I915_FALLBACK_POINT_SMOOTH, "i915 point smooth" },
+
+   { 0, NULL }
+};
+
+
+static const char *
+getFallbackString(GLuint bit)
+{
+   int i;
+   for (i = 0; fallbackStrings[i].bit; i++) {
+      if (fallbackStrings[i].bit == bit)
+         return fallbackStrings[i].str;
+   }
+   return "unknown fallback bit";
+}
+
+
+void intelFallback( intelContextPtr intel, GLuint bit, GLboolean mode )
+{
+   GLcontext *ctx = &intel->ctx;
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   GLuint oldfallback = intel->Fallback;
+
+   if (mode) {
+      intel->Fallback |= bit;
+      if (oldfallback == 0) {
+         intelFlush(ctx);
+         if (INTEL_DEBUG & DEBUG_FALLBACKS) 
+            fprintf(stderr, "ENTER FALLBACK 0x%x: %s\n",
+                    bit, getFallbackString(bit));
+         _swsetup_Wakeup( ctx );
+         intel->RenderIndex = ~0;
+      }
+   }
+   else {
+      intel->Fallback &= ~bit;
+      if (oldfallback == bit) {
+         _swrast_flush( ctx );
+         if (INTEL_DEBUG & DEBUG_FALLBACKS) 
+            fprintf(stderr, "LEAVE FALLBACK 0x%x: %s\n",
+                    bit, getFallbackString(bit));
+         tnl->Driver.Render.Start = intelRenderStart;
+         tnl->Driver.Render.PrimitiveNotify = intelRenderPrimitive;
+         tnl->Driver.Render.Finish = intelRenderFinish;
+         tnl->Driver.Render.BuildVertices = _tnl_build_vertices;
+         tnl->Driver.Render.CopyPV = _tnl_copy_pv;
+         tnl->Driver.Render.Interp = _tnl_interp;
+
+         _tnl_invalidate_vertex_state( ctx, ~0 );
+         _tnl_invalidate_vertices( ctx, ~0 );
+         _tnl_install_attrs( ctx, 
+                             intel->vertex_attrs, 
+                             intel->vertex_attr_count,
+                             intel->ViewportMatrix.m, 0 ); 
+
+         intel->NewGLState |= _INTEL_NEW_RENDERSTATE;
+      }
+   }
+}
+
+
+
+
+/**********************************************************************/
+/*                            Initialization.                         */
+/**********************************************************************/
+
+
+void intelInitTriFuncs( GLcontext *ctx )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   static int firsttime = 1;
+
+   if (firsttime) {
+      init_rast_tab();
+      firsttime = 0;
+   }
+
+   tnl->Driver.RunPipeline = intelRunPipeline;
+   tnl->Driver.Render.Start = intelRenderStart;
+   tnl->Driver.Render.Finish = intelRenderFinish;
+   tnl->Driver.Render.PrimitiveNotify = intelRenderPrimitive;
+   tnl->Driver.Render.ResetLineStipple = _swrast_ResetLineStipple;
+   tnl->Driver.Render.BuildVertices = _tnl_build_vertices;
+   tnl->Driver.Render.CopyPV = _tnl_copy_pv;
+   tnl->Driver.Render.Interp = _tnl_interp;
+}
diff --git a/i915/intel_tris.h b/i915/intel_tris.h
new file mode 100644
index 0000000..d7e382f
--- /dev/null
+++ b/i915/intel_tris.h
@@ -0,0 +1,46 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef INTELTRIS_INC
+#define INTELTRIS_INC
+
+#include "mtypes.h"
+
+#define _INTEL_NEW_RENDERSTATE (_DD_NEW_LINE_STIPPLE |		\
+			       _DD_NEW_TRI_UNFILLED |		\
+			       _DD_NEW_TRI_LIGHT_TWOSIDE |	\
+			       _DD_NEW_TRI_OFFSET |		\
+			       _DD_NEW_TRI_STIPPLE |		\
+			       _NEW_PROGRAM |		\
+			       _NEW_POLYGONSTIPPLE)
+
+extern void intelInitTriFuncs( GLcontext *ctx );
+
+extern void intelPrintRenderState( const char *msg, GLuint state );
+extern void intelChooseRenderState( GLcontext *ctx );
+
+#endif
diff --git a/i915/server/i830_common.h b/i915/server/i830_common.h
new file mode 100644
index 0000000..fb6ceaa
--- /dev/null
+++ b/i915/server/i830_common.h
@@ -0,0 +1,212 @@
+/**************************************************************************
+
+Copyright 2001 VA Linux Systems Inc., Fremont, California.
+Copyright 2002 Tungsten Graphics Inc., Cedar Park, Texas.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/* $XFree86: xc/programs/Xserver/hw/xfree86/drivers/i810/i830_common.h,v 1.1 2002/09/11 00:29:32 dawes Exp $ */
+
+#ifndef _I830_COMMON_H_
+#define _I830_COMMON_H_
+
+
+#define I830_NR_TEX_REGIONS 255	/* maximum due to use of chars for next/prev */
+#define I830_LOG_MIN_TEX_REGION_SIZE 14
+
+
+/* Driver specific DRM command indices
+ * NOTE: these are not OS specific, but they are driver specific
+ */
+#define DRM_I830_INIT                     0x00
+#define DRM_I830_FLUSH                    0x01
+#define DRM_I830_FLIP                     0x02
+#define DRM_I830_BATCHBUFFER              0x03
+#define DRM_I830_IRQ_EMIT                 0x04
+#define DRM_I830_IRQ_WAIT                 0x05
+#define DRM_I830_GETPARAM                 0x06
+#define DRM_I830_SETPARAM                 0x07
+#define DRM_I830_ALLOC                    0x08
+#define DRM_I830_FREE                     0x09
+#define DRM_I830_INIT_HEAP                0x0a
+#define DRM_I830_CMDBUFFER                0x0b
+#define DRM_I830_DESTROY_HEAP             0x0c
+
+typedef struct {
+   enum {
+      I830_INIT_DMA = 0x01,
+      I830_CLEANUP_DMA = 0x02,
+      I830_RESUME_DMA = 0x03
+   } func;
+   unsigned int mmio_offset;
+   int sarea_priv_offset;
+   unsigned int ring_start;
+   unsigned int ring_end;
+   unsigned int ring_size;
+   unsigned int front_offset;
+   unsigned int back_offset;
+   unsigned int depth_offset;
+   unsigned int w;
+   unsigned int h;
+   unsigned int pitch;
+   unsigned int pitch_bits;
+   unsigned int back_pitch;
+   unsigned int depth_pitch;
+   unsigned int cpp;
+   unsigned int chipset;
+} drmI830Init;
+
+typedef struct {
+	drmTextureRegion texList[I830_NR_TEX_REGIONS+1];
+        int last_upload;	/* last time texture was uploaded */
+        int last_enqueue;	/* last time a buffer was enqueued */
+	int last_dispatch;	/* age of the most recently dispatched buffer */
+	int ctxOwner;		/* last context to upload state */
+	int texAge;
+        int pf_enabled;		/* is pageflipping allowed? */
+        int pf_active;               
+        int pf_current_page;	/* which buffer is being displayed? */
+        int perf_boxes;	        /* performance boxes to be displayed */   
+	int width, height;      /* screen size in pixels */
+
+	drm_handle_t front_handle;
+	int front_offset;
+	int front_size;
+
+	drm_handle_t back_handle;
+	int back_offset;
+	int back_size;
+
+	drm_handle_t depth_handle;
+	int depth_offset;
+	int depth_size;
+
+	drm_handle_t tex_handle;
+	int tex_offset;
+	int tex_size;
+	int log_tex_granularity;
+	int pitch;
+	int rotation;           /* 0, 90, 180 or 270 */
+	int rotated_offset;
+	int rotated_size;
+	int rotated_pitch;
+	int virtualX, virtualY;
+
+	unsigned int front_tiled;
+	unsigned int back_tiled;
+	unsigned int depth_tiled;
+	unsigned int rotated_tiled;
+	unsigned int rotated2_tiled;
+
+	int pipeA_x;
+	int pipeA_y;
+	int pipeA_w;
+	int pipeA_h;
+	int pipeB_x;
+	int pipeB_y;
+	int pipeB_w;
+	int pipeB_h;
+} drmI830Sarea;
+
+/* Flags for perf_boxes
+ */
+#define I830_BOX_RING_EMPTY    0x1 /* populated by kernel */
+#define I830_BOX_FLIP          0x2 /* populated by kernel */
+#define I830_BOX_WAIT          0x4 /* populated by kernel & client */
+#define I830_BOX_TEXTURE_LOAD  0x8 /* populated by kernel */
+#define I830_BOX_LOST_CONTEXT  0x10 /* populated by client */
+
+
+typedef struct {
+   	int start;		/* agp offset */
+	int used;		/* nr bytes in use */
+	int DR1;		/* hw flags for GFX_OP_DRAWRECT_INFO */
+        int DR4;		/* window origin for GFX_OP_DRAWRECT_INFO*/
+	int num_cliprects;	/* mulitpass with multiple cliprects? */
+        drm_clip_rect_t *cliprects; /* pointer to userspace cliprects */
+} drmI830BatchBuffer;
+
+typedef struct {
+   	char *buf;		/* agp offset */
+	int sz; 		/* nr bytes in use */
+	int DR1;		/* hw flags for GFX_OP_DRAWRECT_INFO */
+        int DR4;		/* window origin for GFX_OP_DRAWRECT_INFO*/
+	int num_cliprects;	/* mulitpass with multiple cliprects? */
+        drm_clip_rect_t *cliprects; /* pointer to userspace cliprects */
+} drmI830CmdBuffer;
+ 
+typedef struct {
+	int *irq_seq;
+} drmI830IrqEmit;
+
+typedef struct {
+	int irq_seq;
+} drmI830IrqWait;
+
+typedef struct {
+	int param;
+	int *value;
+} drmI830GetParam;
+
+#define I830_PARAM_IRQ_ACTIVE     1
+#define I830_PARAM_ALLOW_BATCHBUFFER   2 
+
+typedef struct {
+	int param;
+	int value;
+} drmI830SetParam;
+
+#define I830_SETPARAM_USE_MI_BATCHBUFFER_START  1
+#define I830_SETPARAM_TEX_LRU_LOG_GRANULARITY   2
+#define I830_SETPARAM_ALLOW_BATCHBUFFER         3
+
+
+/* A memory manager for regions of shared memory:
+ */
+#define I830_MEM_REGION_AGP 1
+
+typedef struct {
+	int region;
+	int alignment;
+	int size;
+	int *region_offset;	/* offset from start of fb or agp */
+} drmI830MemAlloc;
+
+typedef struct {
+	int region;
+	int region_offset;
+} drmI830MemFree;
+
+typedef struct {
+	int region;
+	int size;
+	int start;	
+} drmI830MemInitHeap;
+
+typedef struct {
+	int region;
+} drmI830MemDestroyHeap;
+
+
+#endif /* _I830_DRM_H_ */
diff --git a/i915/server/i830_dri.h b/i915/server/i830_dri.h
new file mode 100644
index 0000000..6c9a709
--- /dev/null
+++ b/i915/server/i830_dri.h
@@ -0,0 +1,73 @@
+/* $XFree86: xc/programs/Xserver/hw/xfree86/drivers/i810/i830_dri.h,v 1.4 2002/10/30 12:52:18 alanh Exp $ */
+
+#ifndef _I830_DRI_H
+#define _I830_DRI_H
+
+#include "xf86drm.h"
+#include "i830_common.h"
+
+#define I830_MAX_DRAWABLES 256
+
+#define I830_MAJOR_VERSION 1
+#define I830_MINOR_VERSION 3
+#define I830_PATCHLEVEL 0
+
+#define I830_REG_SIZE 0x80000
+
+typedef struct _I830DRIRec {
+   drm_handle_t regs;
+   drmSize regsSize;
+
+   drmSize backbufferSize;
+   drm_handle_t backbuffer;
+
+   drmSize depthbufferSize;
+   drm_handle_t depthbuffer;
+
+   drmSize rotatedSize;
+   drm_handle_t rotatedbuffer;
+
+   drm_handle_t textures;
+   int textureSize;
+
+   drm_handle_t agp_buffers;
+   drmSize agp_buf_size;
+
+   int deviceID;
+   int width;
+   int height;
+   int mem;
+   int cpp;
+   int bitsPerPixel;
+
+   int fbOffset;
+   int fbStride;
+
+   int backOffset;
+   int backPitch;
+
+   int depthOffset;
+   int depthPitch;
+
+   int rotatedOffset;
+   int rotatedPitch;
+
+   int logTextureGranularity;
+   int textureOffset;
+
+   int irq;
+   int sarea_priv_offset;
+} I830DRIRec, *I830DRIPtr;
+
+typedef struct {
+   /* Nothing here yet */
+   int dummy;
+} I830ConfigPrivRec, *I830ConfigPrivPtr;
+
+typedef struct {
+   /* Nothing here yet */
+   int dummy;
+} I830DRIContextRec, *I830DRIContextPtr;
+
+
+#endif
diff --git a/i915/server/intel.h b/i915/server/intel.h
new file mode 100644
index 0000000..d7858a2
--- /dev/null
+++ b/i915/server/intel.h
@@ -0,0 +1,328 @@
+#ifndef _INTEL_H_
+#define _INTEL_H_
+
+#include "xf86drm.h"		/* drm_handle_t, etc */
+
+/* Intel */
+#ifndef PCI_CHIP_I810
+#define PCI_CHIP_I810              0x7121
+#define PCI_CHIP_I810_DC100        0x7123
+#define PCI_CHIP_I810_E            0x7125
+#define PCI_CHIP_I815              0x1132
+#define PCI_CHIP_I810_BRIDGE       0x7120
+#define PCI_CHIP_I810_DC100_BRIDGE 0x7122
+#define PCI_CHIP_I810_E_BRIDGE     0x7124
+#define PCI_CHIP_I815_BRIDGE       0x1130
+#endif
+
+#define PCI_CHIP_845_G			0x2562
+#define PCI_CHIP_I830_M			0x3577
+
+#ifndef PCI_CHIP_I855_GM
+#define PCI_CHIP_I855_GM	   0x3582
+#define PCI_CHIP_I855_GM_BRIDGE	   0x3580
+#endif
+
+#ifndef PCI_CHIP_I865_G
+#define PCI_CHIP_I865_G		   0x2572
+#define PCI_CHIP_I865_G_BRIDGE	   0x2570
+#endif
+
+#ifndef PCI_CHIP_I915_G
+#define PCI_CHIP_I915_G		   0x2582
+#define PCI_CHIP_I915_G_BRIDGE	   0x2580
+#endif
+
+#ifndef PCI_CHIP_I915_GM
+#define PCI_CHIP_I915_GM	   0x2592
+#define PCI_CHIP_I915_GM_BRIDGE	   0x2590
+#endif
+
+#ifndef PCI_CHIP_E7221_G
+#define PCI_CHIP_E7221_G	   0x258A
+/* Same as I915_G_BRIDGE */
+#define PCI_CHIP_E7221_G_BRIDGE	   0x2580
+#endif
+
+#ifndef PCI_CHIP_I945_G
+#define PCI_CHIP_I945_G        0x2772
+#define PCI_CHIP_I945_G_BRIDGE 0x2770
+#endif
+
+#ifndef PCI_CHIP_I945_GM
+#define PCI_CHIP_I945_GM        0x27A2
+#define PCI_CHIP_I945_GM_BRIDGE 0x27A0
+#endif
+
+#define IS_I810(pI810) (pI810->Chipset == PCI_CHIP_I810 ||	\
+			pI810->Chipset == PCI_CHIP_I810_DC100 || \
+			pI810->Chipset == PCI_CHIP_I810_E)
+#define IS_I815(pI810) (pI810->Chipset == PCI_CHIP_I815)
+#define IS_I830(pI810) (pI810->Chipset == PCI_CHIP_I830_M)
+#define IS_845G(pI810) (pI810->Chipset == PCI_CHIP_845_G)
+#define IS_I85X(pI810)  (pI810->Chipset == PCI_CHIP_I855_GM)
+#define IS_I852(pI810)  (pI810->Chipset == PCI_CHIP_I855_GM && (pI810->variant == I852_GM || pI810->variant == I852_GME))
+#define IS_I855(pI810)  (pI810->Chipset == PCI_CHIP_I855_GM && (pI810->variant == I855_GM || pI810->variant == I855_GME))
+#define IS_I865G(pI810) (pI810->Chipset == PCI_CHIP_I865_G)
+
+#define IS_I915G(pI810) (pI810->Chipset == PCI_CHIP_I915_G || pI810->Chipset == PCI_CHIP_E7221_G)
+#define IS_I915GM(pI810) (pI810->Chipset == PCI_CHIP_I915_GM)
+#define IS_I945G(pI810) (pI810->Chipset == PCI_CHIP_I945_G)
+#define IS_I945GM(pI810) (pI810->Chipset == PCI_CHIP_I945_GM)
+#define IS_I9XX(pI810) (IS_I915G(pI810) || IS_I915GM(pI810) || IS_I945G(pI810) || IS_I945GM(pI810))
+
+#define IS_MOBILE(pI810) (IS_I830(pI810) || IS_I85X(pI810) || IS_I915GM(pI810) || IS_I945GM(pI810))
+
+#define I830_GMCH_CTRL		0x52
+
+
+#define I830_GMCH_GMS_MASK			0x70
+#define I830_GMCH_GMS_DISABLED		0x00
+#define I830_GMCH_GMS_LOCAL			0x10
+#define I830_GMCH_GMS_STOLEN_512	0x20
+#define I830_GMCH_GMS_STOLEN_1024	0x30
+#define I830_GMCH_GMS_STOLEN_8192	0x40
+
+#define I855_GMCH_GMS_MASK			(0x7 << 4)
+#define I855_GMCH_GMS_DISABLED			0x00
+#define I855_GMCH_GMS_STOLEN_1M			(0x1 << 4)
+#define I855_GMCH_GMS_STOLEN_4M			(0x2 << 4)
+#define I855_GMCH_GMS_STOLEN_8M			(0x3 << 4)
+#define I855_GMCH_GMS_STOLEN_16M		(0x4 << 4)
+#define I855_GMCH_GMS_STOLEN_32M		(0x5 << 4)
+#define I915G_GMCH_GMS_STOLEN_48M		(0x6 << 4)
+#define I915G_GMCH_GMS_STOLEN_64M		(0x7 << 4)
+
+typedef unsigned char Bool;
+#define TRUE 1
+#define FALSE 0
+
+#define PIPE_NONE	0<<0
+#define PIPE_CRT	1<<0
+#define PIPE_TV		1<<1
+#define PIPE_DFP	1<<2
+#define PIPE_LFP	1<<3
+#define PIPE_CRT2	1<<4
+#define PIPE_TV2	1<<5
+#define PIPE_DFP2	1<<6
+#define PIPE_LFP2	1<<7
+
+typedef struct _I830MemPool *I830MemPoolPtr;
+typedef struct _I830MemRange *I830MemRangePtr;
+typedef struct _I830MemRange {
+   long Start;
+   long End;
+   long Size;
+   unsigned long Physical;
+   unsigned long Offset;		/* Offset of AGP-allocated portion */
+   unsigned long Alignment;
+   drm_handle_t Key;
+   unsigned long Pitch; // add pitch
+   I830MemPoolPtr Pool;
+} I830MemRange;
+
+typedef struct _I830MemPool {
+   I830MemRange Total;
+   I830MemRange Free;
+   I830MemRange Fixed;
+   I830MemRange Allocated;
+} I830MemPool;
+
+typedef struct {
+   int tail_mask;
+   I830MemRange mem;
+   unsigned char *virtual_start;
+   int head;
+   int tail;
+   int space;
+} I830RingBuffer;
+
+typedef struct _I830Rec {
+   unsigned char *MMIOBase;
+   unsigned char *FbBase;
+   int cpp;
+
+   unsigned int bios_version;
+
+   /* These are set in PreInit and never changed. */
+   long FbMapSize;
+   long TotalVideoRam;
+   I830MemRange StolenMemory;		/* pre-allocated memory */
+   long BIOSMemorySize;			/* min stolen pool size */
+   int BIOSMemSizeLoc;
+
+   /* These change according to what has been allocated. */
+   long FreeMemory;
+   I830MemRange MemoryAperture;
+   I830MemPool StolenPool;
+   long allocatedMemory;
+
+   /* Regions allocated either from the above pools, or from agpgart. */
+   /* for single and dual head configurations */
+   I830MemRange FrontBuffer;
+   I830MemRange FrontBuffer2;
+   I830MemRange Scratch;
+   I830MemRange Scratch2;
+
+   I830RingBuffer *LpRing;
+
+   I830MemRange BackBuffer;
+   I830MemRange DepthBuffer;
+   I830MemRange TexMem;
+   int TexGranularity;
+   I830MemRange ContextMem;
+   int drmMinor;
+   Bool have3DWindows;
+
+   Bool NeedRingBufferLow;
+   Bool allowPageFlip;
+   Bool disableTiling;
+
+   int Chipset;
+   unsigned long LinearAddr;
+   unsigned long MMIOAddr;
+
+   drmSize           registerSize;     /**< \brief MMIO register map size */
+   drm_handle_t         registerHandle;   /**< \brief MMIO register map handle */
+  //   IOADDRESS ioBase;
+   int               irq;              /**< \brief IRQ number */
+   int GttBound;
+
+   drm_handle_t ring_map;
+   unsigned int Fence[8];
+
+} I830Rec;
+
+/*
+ * 12288 is set as the maximum, chosen because it is enough for
+ * 1920x1440@32bpp with a 2048 pixel line pitch with some to spare.
+ */
+#define I830_MAXIMUM_VBIOS_MEM		12288
+#define I830_DEFAULT_VIDEOMEM_2D	(MB(32) / 1024)
+#define I830_DEFAULT_VIDEOMEM_3D	(MB(64) / 1024)
+
+/* Flags for memory allocation function */
+#define FROM_ANYWHERE			0x00000000
+#define FROM_POOL_ONLY			0x00000001
+#define FROM_NEW_ONLY			0x00000002
+#define FROM_MASK			0x0000000f
+
+#define ALLOCATE_AT_TOP			0x00000010
+#define ALLOCATE_AT_BOTTOM		0x00000020
+#define FORCE_GAPS			0x00000040
+
+#define NEED_PHYSICAL_ADDR		0x00000100
+#define ALIGN_BOTH_ENDS			0x00000200
+#define FORCE_LOW			0x00000400
+
+#define ALLOC_NO_TILING			0x00001000
+#define ALLOC_INITIAL			0x00002000
+
+#define ALLOCATE_DRY_RUN		0x80000000
+
+/* Chipset registers for VIDEO BIOS memory RW access */
+#define _855_DRAM_RW_CONTROL 0x58
+#define _845_DRAM_RW_CONTROL 0x90
+#define DRAM_WRITE    0x33330000
+
+#define KB(x) ((x) * 1024)
+#define MB(x) ((x) * KB(1024))
+
+#define GTT_PAGE_SIZE			KB(4)
+#define ROUND_TO(x, y)			(((x) + (y) - 1) / (y) * (y))
+#define ROUND_DOWN_TO(x, y)		((x) / (y) * (y))
+#define ROUND_TO_PAGE(x)		ROUND_TO((x), GTT_PAGE_SIZE)
+#define ROUND_TO_MB(x)			ROUND_TO((x), MB(1))
+#define PRIMARY_RINGBUFFER_SIZE		KB(128)
+
+
+/* Ring buffer registers, p277, overview p19
+ */
+#define LP_RING     0x2030
+#define HP_RING     0x2040
+
+#define RING_TAIL      0x00
+#define TAIL_ADDR           0x000FFFF8
+#define I830_TAIL_MASK	    0x001FFFF8
+
+#define RING_HEAD      0x04
+#define HEAD_WRAP_COUNT     0xFFE00000
+#define HEAD_WRAP_ONE       0x00200000
+#define HEAD_ADDR           0x001FFFFC
+#define I830_HEAD_MASK      0x001FFFFC
+
+#define RING_START     0x08
+#define START_ADDR          0x03FFFFF8
+#define I830_RING_START_MASK	0xFFFFF000
+
+#define RING_LEN       0x0C
+#define RING_NR_PAGES       0x001FF000 
+#define I830_RING_NR_PAGES	0x001FF000
+#define RING_REPORT_MASK    0x00000006
+#define RING_REPORT_64K     0x00000002
+#define RING_REPORT_128K    0x00000004
+#define RING_NO_REPORT      0x00000000
+#define RING_VALID_MASK     0x00000001
+#define RING_VALID          0x00000001
+#define RING_INVALID        0x00000000
+
+
+/* Fence/Tiling ranges [0..7]
+ */
+#define FENCE            0x2000
+#define FENCE_NR         8
+
+#define I915G_FENCE_START_MASK	0x0ff00000
+
+#define I830_FENCE_START_MASK	0x07f80000
+
+#define FENCE_START_MASK    0x03F80000
+#define FENCE_X_MAJOR       0x00000000
+#define FENCE_Y_MAJOR       0x00001000
+#define FENCE_SIZE_MASK     0x00000700
+#define FENCE_SIZE_512K     0x00000000
+#define FENCE_SIZE_1M       0x00000100
+#define FENCE_SIZE_2M       0x00000200
+#define FENCE_SIZE_4M       0x00000300
+#define FENCE_SIZE_8M       0x00000400
+#define FENCE_SIZE_16M      0x00000500
+#define FENCE_SIZE_32M      0x00000600
+#define FENCE_SIZE_64M	    0x00000700
+#define I915G_FENCE_SIZE_1M       0x00000000
+#define I915G_FENCE_SIZE_2M       0x00000100
+#define I915G_FENCE_SIZE_4M       0x00000200
+#define I915G_FENCE_SIZE_8M       0x00000300
+#define I915G_FENCE_SIZE_16M      0x00000400
+#define I915G_FENCE_SIZE_32M      0x00000500
+#define I915G_FENCE_SIZE_64M	0x00000600
+#define I915G_FENCE_SIZE_128M	0x00000700
+#define FENCE_PITCH_1       0x00000000
+#define FENCE_PITCH_2       0x00000010
+#define FENCE_PITCH_4       0x00000020
+#define FENCE_PITCH_8       0x00000030
+#define FENCE_PITCH_16      0x00000040
+#define FENCE_PITCH_32      0x00000050
+#define FENCE_PITCH_64	    0x00000060
+#define FENCE_VALID         0x00000001
+
+#include <mmio.h>
+
+#  define MMIO_IN8(base, offset) \
+	*(volatile unsigned char *)(((unsigned char*)(base)) + (offset))
+#  define MMIO_IN32(base, offset) \
+	read_MMIO_LE32(base, offset)
+#  define MMIO_OUT8(base, offset, val) \
+	*(volatile unsigned char *)(((unsigned char*)(base)) + (offset)) = (val)
+#  define MMIO_OUT32(base, offset, val) \
+	*(volatile unsigned int *)(void *)(((unsigned char*)(base)) + (offset)) = CPU_TO_LE32(val)
+
+
+				/* Memory mapped register access macros */
+#define INREG8(addr)        MMIO_IN8(MMIO, addr)
+#define INREG(addr)         MMIO_IN32(MMIO, addr)
+#define OUTREG8(addr, val)  MMIO_OUT8(MMIO, addr, val)
+#define OUTREG(addr, val)   MMIO_OUT32(MMIO, addr, val)
+
+#define DSPABASE		0x70184
+
+#endif
diff --git a/i915/server/intel_dri.c b/i915/server/intel_dri.c
new file mode 100644
index 0000000..b6946b7
--- /dev/null
+++ b/i915/server/intel_dri.c
@@ -0,0 +1,1283 @@
+/**
+ * \file server/intel_dri.c
+ * \brief File to perform the device-specific initialization tasks typically
+ * done in the X server.
+ *
+ * Here they are converted to run in the client (or perhaps a standalone
+ * process), and to work with the frame buffer device rather than the X
+ * server infrastructure.
+ * 
+ * Copyright (C) 2006 Dave Airlie (airlied@linux.ie)
+
+ Permission is hereby granted, free of charge, to any person obtaining a
+ copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sub license, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial portions
+ of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT HOLDERS AND/OR THEIR SUPPLIERS BE LIABLE FOR
+ ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+
+#include "driver.h"
+#include "drm.h"
+
+#include "intel.h"
+#include "i830_dri.h"
+
+#include "memops.h"
+#include "pciaccess.h"
+
+static size_t drm_page_size;
+static int nextTile = 0;
+#define xf86DrvMsg(...) do {} while(0)
+
+static const int pitches[] = {
+  128 * 8,
+  128 * 16,
+  128 * 32,
+  128 * 64,
+  0
+};
+
+static Bool I830DRIDoMappings(DRIDriverContext *ctx, I830Rec *pI830, drmI830Sarea *sarea);
+
+static unsigned long
+GetBestTileAlignment(unsigned long size)
+{
+   unsigned long i;
+
+   for (i = KB(512); i < size; i <<= 1)
+      ;
+
+   if (i > MB(64))
+      i = MB(64);
+
+   return i;
+}
+
+static void SetFenceRegs(const DRIDriverContext *ctx, I830Rec *pI830)
+{
+  int i;
+  unsigned char *MMIO = ctx->MMIOAddress;
+
+  for (i = 0; i < 8; i++) {
+    OUTREG(FENCE + i * 4, pI830->Fence[i]);
+    //    if (I810_DEBUG & DEBUG_VERBOSE_VGA)
+    fprintf(stderr,"Fence Register : %x\n", pI830->Fence[i]);
+  }
+}
+
+/* Tiled memory is good... really, really good...
+ *
+ * Need to make it less likely that we miss out on this - probably
+ * need to move the frontbuffer away from the 'guarenteed' alignment
+ * of the first memory segment, or perhaps allocate a discontigous
+ * framebuffer to get more alignment 'sweet spots'.
+ */
+static void
+SetFence(const DRIDriverContext *ctx, I830Rec *pI830,
+	 int nr, unsigned int start, unsigned int pitch,
+         unsigned int size)
+{
+   unsigned int val;
+   unsigned int fence_mask = 0;
+   unsigned int fence_pitch;
+
+   if (nr < 0 || nr > 7) {
+      fprintf(stderr,
+		 "SetFence: fence %d out of range\n",nr);
+      return;
+   }
+
+   pI830->Fence[nr] = 0;
+
+   if (IS_I9XX(pI830))
+   	fence_mask = ~I915G_FENCE_START_MASK;
+   else
+   	fence_mask = ~I830_FENCE_START_MASK;
+
+   if (start & fence_mask) {
+      fprintf(stderr,
+		 "SetFence: %d: start (0x%08x) is not %s aligned\n",
+		 nr, start, (IS_I9XX(pI830)) ? "1MB" : "512k");
+      return;
+   }
+
+   if (start % size) {
+      fprintf(stderr,
+		 "SetFence: %d: start (0x%08x) is not size (%dk) aligned\n",
+		 nr, start, size / 1024);
+      return;
+   }
+
+   if (pitch & 127) {
+      fprintf(stderr,
+		 "SetFence: %d: pitch (%d) not a multiple of 128 bytes\n",
+		 nr, pitch);
+      return;
+   }
+
+   val = (start | FENCE_X_MAJOR | FENCE_VALID);
+
+   if (IS_I9XX(pI830)) {
+   	switch (size) {
+	   case MB(1):
+      		val |= I915G_FENCE_SIZE_1M;
+      		break;
+   	   case MB(2):
+      		val |= I915G_FENCE_SIZE_2M;
+      		break;
+   	   case MB(4):
+      		val |= I915G_FENCE_SIZE_4M;
+      		break;
+   	   case MB(8):
+      		val |= I915G_FENCE_SIZE_8M;
+      		break;
+   	   case MB(16):
+      		val |= I915G_FENCE_SIZE_16M;
+      		break;
+   	   case MB(32):
+      		val |= I915G_FENCE_SIZE_32M;
+      		break;
+   	   case MB(64):
+      		val |= I915G_FENCE_SIZE_64M;
+      		break;
+   	   default:
+      		fprintf(stderr,
+		 "SetFence: %d: illegal size (%d kByte)\n", nr, size / 1024);
+      		return;
+   	}
+    } else {
+   	switch (size) {
+	   case KB(512):
+      		val |= FENCE_SIZE_512K;
+      		break;
+	   case MB(1):
+      		val |= FENCE_SIZE_1M;
+      		break;
+   	   case MB(2):
+      		val |= FENCE_SIZE_2M;
+      		break;
+   	   case MB(4):
+      		val |= FENCE_SIZE_4M;
+      		break;
+   	   case MB(8):
+      		val |= FENCE_SIZE_8M;
+      		break;
+   	   case MB(16):
+      		val |= FENCE_SIZE_16M;
+      		break;
+   	   case MB(32):
+      		val |= FENCE_SIZE_32M;
+      		break;
+   	   case MB(64):
+      		val |= FENCE_SIZE_64M;
+      		break;
+   	   default:
+      		fprintf(stderr,
+		 "SetFence: %d: illegal size (%d kByte)\n", nr, size / 1024);
+      		return;
+   	}
+   }
+
+   if (IS_I9XX(pI830))
+	fence_pitch = pitch / 512;
+   else
+	fence_pitch = pitch / 128;
+
+   switch (fence_pitch) {
+   case 1:
+      val |= FENCE_PITCH_1;
+      break;
+   case 2:
+      val |= FENCE_PITCH_2;
+      break;
+   case 4:
+      val |= FENCE_PITCH_4;
+      break;
+   case 8:
+      val |= FENCE_PITCH_8;
+      break;
+   case 16:
+      val |= FENCE_PITCH_16;
+      break;
+   case 32:
+      val |= FENCE_PITCH_32;
+      break;
+   case 64:
+      val |= FENCE_PITCH_64;
+      break;
+   default:
+      fprintf(stderr,
+		 "SetFence: %d: illegal pitch (%d)\n", nr, pitch);
+      return;
+   }
+
+   pI830->Fence[nr] = val;
+}
+
+static Bool
+MakeTiles(const DRIDriverContext *ctx, I830Rec *pI830, I830MemRange *pMem)
+{
+   int pitch, ntiles, i;
+
+   pitch = pMem->Pitch * ctx->cpp;
+   /*
+    * Simply try to break the region up into at most four pieces of size
+    * equal to the alignment.
+    */
+   ntiles = ROUND_TO(pMem->Size, pMem->Alignment) / pMem->Alignment;
+   if (ntiles >= 4) {
+      return FALSE;
+   }
+
+   for (i = 0; i < ntiles; i++, nextTile++) {
+     SetFence(ctx, pI830, nextTile, pMem->Start + i * pMem->Alignment,
+	       pitch, pMem->Alignment);
+   }
+   return TRUE;
+}
+
+static void I830SetupMemoryTiling(const DRIDriverContext *ctx, I830Rec *pI830)
+{
+  int i;
+
+  /* Clear out */
+  for (i = 0; i < 8; i++)
+    pI830->Fence[i] = 0;
+  
+  nextTile = 0;
+
+  if (pI830->BackBuffer.Alignment >= KB(512)) {
+    if (MakeTiles(ctx, pI830, &(pI830->BackBuffer))) {
+      fprintf(stderr,
+		 "Activating tiled memory for the back buffer.\n");
+    } else {
+      fprintf(stderr,
+		 "MakeTiles failed for the back buffer.\n");
+      pI830->allowPageFlip = FALSE;
+    }
+  }
+  
+  if (pI830->DepthBuffer.Alignment >= KB(512)) {
+    if (MakeTiles(ctx, pI830, &(pI830->DepthBuffer))) {
+      fprintf(stderr,
+		 "Activating tiled memory for the depth buffer.\n");
+    } else {
+      fprintf(stderr,
+		 "MakeTiles failed for the depth buffer.\n");
+    }
+  }
+
+  return;
+}
+
+static int I830DetectMemory(const DRIDriverContext *ctx, I830Rec *pI830)
+{
+  struct pci_device host_bridge;
+  uint32_t gmch_ctrl;
+  int memsize = 0;
+  int range;
+
+  memset(&host_bridge, 0, sizeof(host_bridge));
+
+  pci_device_cfg_read_u32(&host_bridge, &gmch_ctrl, I830_GMCH_CTRL);
+  
+  /* We need to reduce the stolen size, by the GTT and the popup.
+   * The GTT varying according the the FbMapSize and the popup is 4KB */
+  range = (ctx->shared.fbSize / (1024*1024)) + 4;
+
+   if (IS_I85X(pI830) || IS_I865G(pI830) || IS_I9XX(pI830)) {
+      switch (gmch_ctrl & I830_GMCH_GMS_MASK) {
+      case I855_GMCH_GMS_STOLEN_1M:
+	 memsize = MB(1) - KB(range);
+	 break;
+      case I855_GMCH_GMS_STOLEN_4M:
+	 memsize = MB(4) - KB(range);
+	 break;
+      case I855_GMCH_GMS_STOLEN_8M:
+	 memsize = MB(8) - KB(range);
+	 break;
+      case I855_GMCH_GMS_STOLEN_16M:
+	 memsize = MB(16) - KB(range);
+	 break;
+      case I855_GMCH_GMS_STOLEN_32M:
+	 memsize = MB(32) - KB(range);
+	 break;
+      case I915G_GMCH_GMS_STOLEN_48M:
+	 if (IS_I9XX(pI830))
+	    memsize = MB(48) - KB(range);
+	 break;
+      case I915G_GMCH_GMS_STOLEN_64M:
+	 if (IS_I9XX(pI830))
+	    memsize = MB(64) - KB(range);
+	 break;
+      }
+   } else {
+      switch (gmch_ctrl & I830_GMCH_GMS_MASK) {
+      case I830_GMCH_GMS_STOLEN_512:
+	 memsize = KB(512) - KB(range);
+	 break;
+      case I830_GMCH_GMS_STOLEN_1024:
+	 memsize = MB(1) - KB(range);
+	 break;
+      case I830_GMCH_GMS_STOLEN_8192:
+	 memsize = MB(8) - KB(range);
+	 break;
+      case I830_GMCH_GMS_LOCAL:
+	 memsize = 0;
+	 xf86DrvMsg(pScrn->scrnIndex, X_WARNING,
+		    "Local memory found, but won't be used.\n");
+	 break;
+      }
+   }
+   if (memsize > 0) {
+     fprintf(stderr,
+		 "detected %d kB stolen memory.\n", memsize / 1024);
+   } else {
+     fprintf(stderr,
+		 "no video memory detected.\n");
+   }
+   return memsize;
+}
+
+static int AgpInit(const DRIDriverContext *ctx, I830Rec *info)
+{
+  unsigned long mode = 0x4;
+
+  if (drmAgpAcquire(ctx->drmFD) < 0) {
+    fprintf(stderr, "[gart] AGP not available\n");
+    return 0;
+  }
+  
+  if (drmAgpEnable(ctx->drmFD, mode) < 0) {
+    fprintf(stderr, "[gart] AGP not enabled\n");
+    drmAgpRelease(ctx->drmFD);
+    return 0;
+  }
+  else
+    fprintf(stderr, "[gart] AGP enabled at %dx\n", ctx->agpmode);
+
+  return 1;
+}
+
+/*
+ * Allocate memory from the given pool.  Grow the pool if needed and if
+ * possible.
+ */
+static unsigned long
+AllocFromPool(const DRIDriverContext *ctx, I830Rec *pI830, 
+	      I830MemRange *result, I830MemPool *pool,
+	      long size, unsigned long alignment, int flags)
+{
+   long needed, start, end;
+
+   if (!result || !pool || !size)
+      return 0;
+
+   /* Calculate how much space is needed. */
+   if (alignment <= GTT_PAGE_SIZE)
+      needed = size;
+   else {
+	 start = ROUND_TO(pool->Free.Start, alignment);
+	 end = ROUND_TO(start + size, alignment);
+	 needed = end - pool->Free.Start;
+   }
+   if (needed > pool->Free.Size) {
+     return 0;
+   }
+
+   result->Start = ROUND_TO(pool->Free.Start, alignment);
+   pool->Free.Start += needed;
+   result->End = pool->Free.Start;
+
+   pool->Free.Size = pool->Free.End - pool->Free.Start;
+   result->Size = result->End - result->Start;
+   result->Pool = pool;
+   result->Alignment = alignment;
+   return needed;
+}
+
+static unsigned long AllocFromAGP(const DRIDriverContext *ctx, I830Rec *pI830, long size, unsigned long alignment, I830MemRange  *result)
+{
+   unsigned long start, end;
+   unsigned long newApStart, newApEnd;
+   int ret;
+   if (!result || !size)
+      return 0;
+   
+   if (!alignment)
+     alignment = 4;
+
+   start = ROUND_TO(pI830->MemoryAperture.Start, alignment);
+   end = ROUND_TO(start + size, alignment);
+   newApStart = end;
+   newApEnd = pI830->MemoryAperture.End;
+
+   ret=drmAgpAlloc(ctx->drmFD, size, 0, &(result->Physical), (drm_handle_t *)&(result->Key));
+   
+   if (ret)
+   {
+     fprintf(stderr,"drmAgpAlloc failed %d\n", ret);
+     return 0;
+   }
+   pI830->allocatedMemory += size;
+   pI830->MemoryAperture.Start = newApStart;
+   pI830->MemoryAperture.End = newApEnd;
+   pI830->MemoryAperture.Size = newApEnd - newApStart;
+   //   pI830->FreeMemory -= size;
+   result->Start = start;
+   result->End = start + size;
+   result->Size = size;
+   result->Offset = start;
+   result->Alignment = alignment;
+   result->Pool = NULL;
+  
+   return size;
+}
+
+unsigned long
+I830AllocVidMem(const DRIDriverContext *ctx, I830Rec *pI830,
+                I830MemRange *result, I830MemPool *pool, long size,
+                unsigned long alignment, int flags)
+{
+   unsigned long ret;
+
+   if (!result)
+      return 0;
+
+   /* Make sure these are initialised. */
+   result->Size = 0;
+   result->Key = -1;
+
+   if (!size) {
+      return 0;
+   }
+
+   if (pool->Free.Size < size) {
+      ret = AllocFromAGP(ctx, pI830, size, alignment, result);
+   }
+   else {
+      ret = AllocFromPool(ctx, pI830, result, pool, size, alignment, flags);
+      if (ret == 0)
+         ret = AllocFromAGP(ctx, pI830, size, alignment, result);
+   }
+   return ret;
+}
+
+static Bool BindAgpRange(const DRIDriverContext *ctx, I830MemRange *mem)
+{
+  if (!mem)
+    return FALSE;
+  
+  if (mem->Key == -1)
+    return TRUE;
+
+  return !drmAgpBind(ctx->drmFD, mem->Key, mem->Offset);
+}
+
+/* simple memory allocation routines needed */
+/* put ring buffer in low memory */
+/* need to allocate front, back, depth buffers aligned correctly,
+   allocate ring buffer, 
+*/
+
+/* */
+static Bool
+I830AllocateMemory(const DRIDriverContext *ctx, I830Rec *pI830)
+{
+  unsigned long size, ret;
+  unsigned long lines, lineSize, align;
+
+  /* allocate ring buffer */
+  memset(pI830->LpRing, 0, sizeof(I830RingBuffer));
+  pI830->LpRing->mem.Key = -1;
+
+  size = PRIMARY_RINGBUFFER_SIZE;
+  
+  ret = I830AllocVidMem(ctx, pI830, &pI830->LpRing->mem, &pI830->StolenPool, size, 0x1000, 0);
+  
+  if (ret != size)
+  {
+    fprintf(stderr,"unable to allocate ring buffer %ld\n", ret);
+    return FALSE;
+  }
+
+  pI830->LpRing->tail_mask = pI830->LpRing->mem.Size - 1;
+
+  
+  /* allocate front buffer */
+  memset(&(pI830->FrontBuffer), 0, sizeof(pI830->FrontBuffer));
+  pI830->FrontBuffer.Key = -1;
+  pI830->FrontBuffer.Pitch = ctx->shared.virtualWidth;
+
+  align = KB(512);  
+
+  lineSize = ctx->shared.virtualWidth * ctx->cpp;
+  lines = (ctx->shared.virtualHeight + 15) / 16 * 16;
+  size = lineSize * lines;
+  size = ROUND_TO_PAGE(size);
+
+  align = GetBestTileAlignment(size);
+
+  ret = I830AllocVidMem(ctx, pI830, &pI830->FrontBuffer, &pI830->StolenPool, size, align, 0);
+  if (ret < size)
+  {
+    fprintf(stderr,"unable to allocate front buffer %ld\n", ret);
+    return FALSE;
+  }
+
+  memset(&(pI830->BackBuffer), 0, sizeof(pI830->BackBuffer));
+  pI830->BackBuffer.Key = -1;
+  pI830->BackBuffer.Pitch = ctx->shared.virtualWidth;
+
+  ret = I830AllocVidMem(ctx, pI830, &pI830->BackBuffer, &pI830->StolenPool, size, align, 0);
+  if (ret < size)
+  {
+    fprintf(stderr,"unable to allocate back buffer %ld\n", ret);
+    return FALSE;
+  }
+  
+  memset(&(pI830->DepthBuffer), 0, sizeof(pI830->DepthBuffer));
+  pI830->DepthBuffer.Key = -1;
+  pI830->DepthBuffer.Pitch = ctx->shared.virtualWidth;
+
+  ret = I830AllocVidMem(ctx, pI830, &pI830->DepthBuffer, &pI830->StolenPool, size, align, 0);
+  if (ret < size)
+  {
+    fprintf(stderr,"unable to allocate depth buffer %ld\n", ret);
+    return FALSE;
+  }
+
+  memset(&(pI830->ContextMem), 0, sizeof(pI830->ContextMem));
+  pI830->ContextMem.Key = -1;
+  size = KB(32);
+
+  ret = I830AllocVidMem(ctx, pI830, &pI830->ContextMem, &pI830->StolenPool, size, align, 0);
+  if (ret < size)
+  {
+    fprintf(stderr,"unable to allocate context buffer %ld\n", ret);
+    return FALSE;
+  }
+  
+  memset(&(pI830->TexMem), 0, sizeof(pI830->TexMem));
+  pI830->TexMem.Key = -1;
+
+  size = 32768 * 1024;
+  ret = AllocFromAGP(ctx, pI830, size, align, &pI830->TexMem);
+  if (ret < size)
+  {
+    fprintf(stderr,"unable to allocate texture memory %ld\n", ret);
+    return FALSE;
+  }
+
+  return TRUE;
+}
+
+static Bool
+I830BindMemory(const DRIDriverContext *ctx, I830Rec *pI830)
+{
+  if (!BindAgpRange(ctx, &pI830->LpRing->mem))
+    return FALSE;
+  if (!BindAgpRange(ctx, &pI830->FrontBuffer))
+    return FALSE;
+  if (!BindAgpRange(ctx, &pI830->BackBuffer))
+    return FALSE;
+  if (!BindAgpRange(ctx, &pI830->DepthBuffer))
+    return FALSE;
+  if (!BindAgpRange(ctx, &pI830->ContextMem))
+    return FALSE;
+  if (!BindAgpRange(ctx, &pI830->TexMem))
+    return FALSE;
+
+  return TRUE;
+}
+
+static Bool
+I830CleanupDma(const DRIDriverContext *ctx)
+{
+   drmI830Init info;
+
+   memset(&info, 0, sizeof(drmI830Init));
+   info.func = I830_CLEANUP_DMA;
+
+   if (drmCommandWrite(ctx->drmFD, DRM_I830_INIT,
+		       &info, sizeof(drmI830Init))) {
+     fprintf(stderr, "I830 Dma Cleanup Failed\n");
+      return FALSE;
+   }
+
+   return TRUE;
+}
+
+static Bool
+I830InitDma(const DRIDriverContext *ctx, I830Rec *pI830)
+{
+   I830RingBuffer *ring = pI830->LpRing;
+   drmI830Init info;
+
+   memset(&info, 0, sizeof(drmI830Init));
+   info.func = I830_INIT_DMA;
+
+   info.ring_start = ring->mem.Start + pI830->LinearAddr;
+   info.ring_end = ring->mem.End + pI830->LinearAddr;
+   info.ring_size = ring->mem.Size;
+
+   info.mmio_offset = (unsigned int)ctx->MMIOStart;
+
+   info.sarea_priv_offset = sizeof(drm_sarea_t);
+
+   info.front_offset = pI830->FrontBuffer.Start;
+   info.back_offset = pI830->BackBuffer.Start;
+   info.depth_offset = pI830->DepthBuffer.Start;
+   info.w = ctx->shared.virtualWidth;
+   info.h = ctx->shared.virtualHeight;
+   info.pitch = ctx->shared.virtualWidth;
+   info.back_pitch = pI830->BackBuffer.Pitch;
+   info.depth_pitch = pI830->DepthBuffer.Pitch;
+   info.cpp = ctx->cpp;
+
+   if (drmCommandWrite(ctx->drmFD, DRM_I830_INIT,
+		       &info, sizeof(drmI830Init))) {
+      fprintf(stderr,
+		 "I830 Dma Initialization Failed\n");
+      return FALSE;
+   }
+
+   return TRUE;
+}
+
+static int I830CheckDRMVersion( const DRIDriverContext *ctx,
+				  I830Rec *pI830 )
+{
+   drmVersionPtr  version;
+
+   version = drmGetVersion(ctx->drmFD);
+
+   if (version) {
+     int req_minor, req_patch;
+
+     req_minor = 4;
+     req_patch = 0;	
+
+     if (version->version_major != 1 ||
+	 version->version_minor < req_minor ||
+	 (version->version_minor == req_minor && 
+	  version->version_patchlevel < req_patch)) {
+       /* Incompatible drm version */
+       fprintf(stderr,
+	       "[dri] I830DRIScreenInit failed because of a version "
+	       "mismatch.\n"
+	       "[dri] i915.o kernel module version is %d.%d.%d "
+	       "but version 1.%d.%d or newer is needed.\n"
+	       "[dri] Disabling DRI.\n",
+	       version->version_major,
+	       version->version_minor,
+	       version->version_patchlevel,
+	       req_minor,
+	       req_patch);
+       drmFreeVersion(version);
+       return 0;
+     }
+     
+     pI830->drmMinor = version->version_minor;
+     drmFreeVersion(version);
+   }
+   return 1;
+}
+
+static void
+I830SetRingRegs(const DRIDriverContext *ctx, I830Rec *pI830)
+{
+  unsigned int itemp;
+  unsigned char *MMIO = ctx->MMIOAddress;
+
+   OUTREG(LP_RING + RING_LEN, 0);
+   OUTREG(LP_RING + RING_TAIL, 0);
+   OUTREG(LP_RING + RING_HEAD, 0);
+
+   if ((long)(pI830->LpRing->mem.Start & I830_RING_START_MASK) !=
+       pI830->LpRing->mem.Start) {
+      fprintf(stderr,
+		 "I830SetRingRegs: Ring buffer start (%lx) violates its "
+		 "mask (%x)\n", pI830->LpRing->mem.Start, I830_RING_START_MASK);
+   }
+   /* Don't care about the old value.  Reserved bits must be zero anyway. */
+   itemp = pI830->LpRing->mem.Start & I830_RING_START_MASK;
+   OUTREG(LP_RING + RING_START, itemp);
+
+   if (((pI830->LpRing->mem.Size - 4096) & I830_RING_NR_PAGES) !=
+       pI830->LpRing->mem.Size - 4096) {
+      fprintf(stderr,
+		 "I830SetRingRegs: Ring buffer size - 4096 (%lx) violates its "
+		 "mask (%x)\n", pI830->LpRing->mem.Size - 4096,
+		 I830_RING_NR_PAGES);
+   }
+   /* Don't care about the old value.  Reserved bits must be zero anyway. */
+   itemp = (pI830->LpRing->mem.Size - 4096) & I830_RING_NR_PAGES;
+   itemp |= (RING_NO_REPORT | RING_VALID);
+   OUTREG(LP_RING + RING_LEN, itemp);
+
+   pI830->LpRing->head = INREG(LP_RING + RING_HEAD) & I830_HEAD_MASK;
+   pI830->LpRing->tail = INREG(LP_RING + RING_TAIL);
+   pI830->LpRing->space = pI830->LpRing->head - (pI830->LpRing->tail + 8);
+   if (pI830->LpRing->space < 0)
+      pI830->LpRing->space += pI830->LpRing->mem.Size;
+
+   SetFenceRegs(ctx, pI830);
+   
+   /* RESET THE DISPLAY PIPE TO POINT TO THE FRONTBUFFER - hacky
+      hacky hacky */
+   OUTREG(DSPABASE, pI830->FrontBuffer.Start + pI830->LinearAddr);
+
+}
+
+static Bool
+I830SetParam(const DRIDriverContext *ctx, int param, int value)
+{
+   drmI830SetParam sp;
+
+   memset(&sp, 0, sizeof(sp));
+   sp.param = param;
+   sp.value = value;
+
+   if (drmCommandWrite(ctx->drmFD, DRM_I830_SETPARAM, &sp, sizeof(sp))) {
+      fprintf(stderr, "I830 SetParam Failed\n");
+      return FALSE;
+   }
+
+   return TRUE;
+}
+
+static Bool
+I830DRIMapScreenRegions(DRIDriverContext *ctx, I830Rec *pI830, drmI830Sarea *sarea)
+{
+   fprintf(stderr,
+              "[drm] Mapping front buffer\n");
+
+   if (drmAddMap(ctx->drmFD,
+                 (drm_handle_t)(sarea->front_offset + pI830->LinearAddr),
+                 sarea->front_size,
+                 DRM_FRAME_BUFFER,  /*DRM_AGP,*/
+                 0,
+                 &sarea->front_handle) < 0) {
+     fprintf(stderr,
+	     "[drm] drmAddMap(front_handle) failed. Disabling DRI\n");
+      return FALSE;
+   }
+   ctx->shared.hFrameBuffer = sarea->front_handle;
+   ctx->shared.fbSize = sarea->front_size;
+   fprintf(stderr, "[drm] Front Buffer = 0x%08x\n",
+	   sarea->front_handle);
+
+   if (drmAddMap(ctx->drmFD,
+                 (drm_handle_t)(sarea->back_offset),
+                 sarea->back_size, DRM_AGP, 0,
+                 &sarea->back_handle) < 0) {
+      fprintf(stderr,
+                 "[drm] drmAddMap(back_handle) failed. Disabling DRI\n");
+      return FALSE;
+   }
+   fprintf(stderr, "[drm] Back Buffer = 0x%08x\n",
+              sarea->back_handle);
+
+   if (drmAddMap(ctx->drmFD,
+                 (drm_handle_t)sarea->depth_offset,
+                 sarea->depth_size, DRM_AGP, 0,
+                 &sarea->depth_handle) < 0) {
+      fprintf(stderr,
+                 "[drm] drmAddMap(depth_handle) failed. Disabling DRI\n");
+      return FALSE;
+   }
+   fprintf(stderr, "[drm] Depth Buffer = 0x%08x\n",
+              sarea->depth_handle);
+
+   if (drmAddMap(ctx->drmFD,
+		 (drm_handle_t)sarea->tex_offset,
+		 sarea->tex_size, DRM_AGP, 0,
+		 &sarea->tex_handle) < 0) {
+      fprintf(stderr,
+		 "[drm] drmAddMap(tex_handle) failed. Disabling DRI\n");
+      return FALSE;
+   }
+   fprintf(stderr, "[drm] textures = 0x%08x\n",
+	      sarea->tex_handle);
+
+   return TRUE;
+}
+
+
+static void
+I830DRIUnmapScreenRegions(const DRIDriverContext *ctx, I830Rec *pI830, drmI830Sarea *sarea)
+{
+#if 1
+   if (sarea->front_handle) {
+      drmRmMap(ctx->drmFD, sarea->front_handle);
+      sarea->front_handle = 0;
+   }
+#endif
+   if (sarea->back_handle) {
+      drmRmMap(ctx->drmFD, sarea->back_handle);
+      sarea->back_handle = 0;
+   }
+   if (sarea->depth_handle) {
+      drmRmMap(ctx->drmFD, sarea->depth_handle);
+      sarea->depth_handle = 0;
+   }
+   if (sarea->tex_handle) {
+      drmRmMap(ctx->drmFD, sarea->tex_handle);
+      sarea->tex_handle = 0;
+   }
+}
+
+static void
+I830InitTextureHeap(const DRIDriverContext *ctx, I830Rec *pI830, drmI830Sarea *sarea)
+{
+   /* Start up the simple memory manager for agp space */
+   drmI830MemInitHeap drmHeap;
+   drmHeap.region = I830_MEM_REGION_AGP;
+   drmHeap.start  = 0;
+   drmHeap.size   = sarea->tex_size;
+      
+   if (drmCommandWrite(ctx->drmFD, DRM_I830_INIT_HEAP,
+			  &drmHeap, sizeof(drmHeap))) {
+      fprintf(stderr,
+		    "[drm] Failed to initialized agp heap manager\n");
+   } else {
+      fprintf(stderr,
+		    "[drm] Initialized kernel agp heap manager, %d\n",
+		    sarea->tex_size);
+
+      I830SetParam(ctx, I830_SETPARAM_TEX_LRU_LOG_GRANULARITY, 
+		      sarea->log_tex_granularity);
+   }
+}
+
+static Bool
+I830DRIDoMappings(DRIDriverContext *ctx, I830Rec *pI830, drmI830Sarea *sarea)
+{
+  if (drmAddMap(ctx->drmFD,
+		(drm_handle_t)pI830->LpRing->mem.Start,
+		pI830->LpRing->mem.Size, DRM_AGP, 0,
+		&pI830->ring_map) < 0) {
+    fprintf(stderr,
+	    "[drm] drmAddMap(ring_map) failed. Disabling DRI\n");
+    return FALSE;
+  }
+  fprintf(stderr, "[drm] ring buffer = 0x%08x\n",
+	  pI830->ring_map);
+
+  if (I830InitDma(ctx, pI830) == FALSE) {
+    return FALSE;
+  }
+  
+   /* init to zero to be safe */
+
+  I830DRIMapScreenRegions(ctx, pI830, sarea);
+  I830InitTextureHeap(ctx, pI830, sarea);
+
+   if (ctx->pciDevice != PCI_CHIP_845_G &&
+       ctx->pciDevice != PCI_CHIP_I830_M) {
+      I830SetParam(ctx, I830_SETPARAM_USE_MI_BATCHBUFFER_START, 1 );
+   }
+
+   /* Okay now initialize the dma engine */
+   {
+      pI830->irq = drmGetInterruptFromBusID(ctx->drmFD,
+					    ctx->pciBus,
+					    ctx->pciDevice,
+					    ctx->pciFunc);
+
+      if (drmCtlInstHandler(ctx->drmFD, pI830->irq)) {
+	 fprintf(stderr,
+		    "[drm] failure adding irq handler\n");
+	 pI830->irq = 0;
+	 return FALSE;
+      }
+      else
+	 fprintf(stderr,
+		    "[drm] dma control initialized, using IRQ %d\n",
+		    pI830->irq);
+   }
+
+   fprintf(stderr, "[dri] visual configs initialized\n");
+
+   return TRUE;
+}
+
+static Bool
+I830ClearScreen(DRIDriverContext *ctx, I830Rec *pI830, drmI830Sarea *sarea)
+{
+  /* need to drmMap front and back buffers and zero them */
+  drmAddress map_addr;
+  int ret;
+
+  ret = drmMap(ctx->drmFD,
+	       sarea->front_handle,
+	       sarea->front_size,
+	       &map_addr);
+
+  if (ret)
+  {
+    fprintf(stderr, "Unable to map front buffer\n");
+    return FALSE;
+  }
+
+  drimemsetio((char *)map_addr,
+	      0,
+	      sarea->front_size);
+  drmUnmap(map_addr, sarea->front_size);
+
+
+  ret = drmMap(ctx->drmFD,
+	       sarea->back_handle,
+	       sarea->back_size,
+	       &map_addr);
+
+  if (ret)
+  {
+    fprintf(stderr, "Unable to map back buffer\n");
+    return FALSE;
+  }
+
+  drimemsetio((char *)map_addr,
+	      0,
+	      sarea->back_size);
+  drmUnmap(map_addr, sarea->back_size);
+
+  return TRUE;
+}
+
+static Bool
+I830ScreenInit(DRIDriverContext *ctx, I830Rec *pI830)
+		  
+{
+   I830DRIPtr pI830DRI;
+   drmI830Sarea *pSAREAPriv;
+   int err;
+      
+   drm_page_size = getpagesize();   
+
+   pI830->registerSize = ctx->MMIOSize;
+   /* This is a hack for now.  We have to have more than a 4k page here
+    * because of the size of the state.  However, the state should be
+    * in a per-context mapping.  This will be added in the Mesa 3.5 port
+    * of the I830 driver.
+    */
+   ctx->shared.SAREASize = SAREA_MAX;
+
+   /* Note that drmOpen will try to load the kernel module, if needed. */
+   ctx->drmFD = drmOpen("i915", NULL );
+   if (ctx->drmFD < 0) {
+      fprintf(stderr, "[drm] drmOpen failed\n");
+      return 0;
+   }
+
+   if ((err = drmSetBusid(ctx->drmFD, ctx->pciBusID)) < 0) {
+      fprintf(stderr, "[drm] drmSetBusid failed (%d, %s), %s\n",
+	      ctx->drmFD, ctx->pciBusID, strerror(-err));
+      return 0;
+   }
+
+   if (drmAddMap( ctx->drmFD,
+		  0,
+		  ctx->shared.SAREASize,
+		  DRM_SHM,
+		  DRM_CONTAINS_LOCK,
+		  &ctx->shared.hSAREA) < 0)
+   {
+     fprintf(stderr, "[drm] drmAddMap failed\n");
+     return 0;
+   }
+
+   fprintf(stderr, "[drm] added %d byte SAREA at 0x%08x\n",
+	   ctx->shared.SAREASize, ctx->shared.hSAREA);
+   
+   if (drmMap( ctx->drmFD,
+	       ctx->shared.hSAREA,
+	       ctx->shared.SAREASize,
+	       (drmAddressPtr)(&ctx->pSAREA)) < 0)
+   {
+      fprintf(stderr, "[drm] drmMap failed\n");
+      return 0;
+   
+   }
+   
+   memset(ctx->pSAREA, 0, ctx->shared.SAREASize);
+   fprintf(stderr, "[drm] mapped SAREA 0x%08x to %p, size %d\n",
+	   ctx->shared.hSAREA, ctx->pSAREA, ctx->shared.SAREASize);
+   
+
+   if (drmAddMap(ctx->drmFD, 
+		 ctx->MMIOStart,
+		 ctx->MMIOSize,
+		 DRM_REGISTERS, 
+		 DRM_READ_ONLY, 
+		 &pI830->registerHandle) < 0) {
+      fprintf(stderr, "[drm] drmAddMap mmio failed\n");	
+      return 0;
+   }
+   fprintf(stderr,
+	   "[drm] register handle = 0x%08x\n", pI830->registerHandle);
+
+
+   if (!I830CheckDRMVersion(ctx, pI830)) {
+     return FALSE;
+   }
+
+   /* Create a 'server' context so we can grab the lock for
+    * initialization ioctls.
+    */
+   if ((err = drmCreateContext(ctx->drmFD, &ctx->serverContext)) != 0) {
+      fprintf(stderr, "%s: drmCreateContext failed %d\n", __FUNCTION__, err);
+      return 0;
+   }
+
+   DRM_LOCK(ctx->drmFD, ctx->pSAREA, ctx->serverContext, 0); 
+
+   /* Initialize the SAREA private data structure */
+   pSAREAPriv = (drmI830Sarea *)(((char*)ctx->pSAREA) + 
+				 sizeof(drm_sarea_t));
+   memset(pSAREAPriv, 0, sizeof(*pSAREAPriv));
+
+   pI830->StolenMemory.Size = I830DetectMemory(ctx, pI830);
+   pI830->StolenMemory.Start = 0;
+   pI830->StolenMemory.End = pI830->StolenMemory.Size;
+
+   pI830->MemoryAperture.Start = pI830->StolenMemory.End;
+   pI830->MemoryAperture.End = KB(40000);
+   pI830->MemoryAperture.Size = pI830->MemoryAperture.End - pI830->MemoryAperture.Start;
+
+   pI830->StolenPool.Fixed = pI830->StolenMemory;
+   pI830->StolenPool.Total = pI830->StolenMemory;
+   pI830->StolenPool.Free = pI830->StolenPool.Total;
+   pI830->FreeMemory = pI830->StolenPool.Total.Size;
+
+   if (!AgpInit(ctx, pI830))
+     return FALSE;
+
+   if (I830AllocateMemory(ctx, pI830) == FALSE)
+   {
+     return FALSE;
+   }
+
+   if (I830BindMemory(ctx, pI830) == FALSE)
+   {
+     return FALSE;
+   }
+
+   pSAREAPriv->front_offset = pI830->FrontBuffer.Start;
+   pSAREAPriv->front_size = pI830->FrontBuffer.Size;
+   pSAREAPriv->width = ctx->shared.virtualWidth;
+   pSAREAPriv->height = ctx->shared.virtualHeight;
+   pSAREAPriv->pitch = ctx->shared.virtualWidth;
+   pSAREAPriv->virtualX = ctx->shared.virtualWidth;
+   pSAREAPriv->virtualY = ctx->shared.virtualHeight;
+   pSAREAPriv->back_offset = pI830->BackBuffer.Start;
+   pSAREAPriv->back_size = pI830->BackBuffer.Size;
+   pSAREAPriv->depth_offset = pI830->DepthBuffer.Start;
+   pSAREAPriv->depth_size = pI830->DepthBuffer.Size;
+   pSAREAPriv->tex_offset = pI830->TexMem.Start;
+   pSAREAPriv->tex_size = pI830->TexMem.Size;
+   pSAREAPriv->log_tex_granularity = pI830->TexGranularity;
+
+   ctx->driverClientMsg = malloc(sizeof(I830DRIRec));
+   ctx->driverClientMsgSize = sizeof(I830DRIRec);
+   pI830DRI = (I830DRIPtr)ctx->driverClientMsg;
+   pI830DRI->deviceID = pI830->Chipset;
+   pI830DRI->regsSize = I830_REG_SIZE;
+   pI830DRI->width = ctx->shared.virtualWidth;
+   pI830DRI->height = ctx->shared.virtualHeight;
+   pI830DRI->mem = ctx->shared.fbSize;
+   pI830DRI->cpp = ctx->cpp;
+   pI830DRI->backOffset = pI830->BackBuffer.Start;
+   pI830DRI->backPitch = pI830->BackBuffer.Pitch; 
+
+   pI830DRI->depthOffset = pI830->DepthBuffer.Start;
+   pI830DRI->depthPitch = pI830->DepthBuffer.Pitch; 
+
+   pI830DRI->fbOffset = pI830->FrontBuffer.Start;
+   pI830DRI->fbStride = pI830->FrontBuffer.Pitch;
+
+   pI830DRI->bitsPerPixel = ctx->bpp;
+   pI830DRI->sarea_priv_offset = sizeof(drm_sarea_t);
+   
+   err = I830DRIDoMappings(ctx, pI830, pSAREAPriv);
+   if (err == FALSE)
+       return FALSE;
+
+   I830SetupMemoryTiling(ctx, pI830);
+
+   /* Quick hack to clear the front & back buffers.  Could also use
+    * the clear ioctl to do this, but would need to setup hw state
+    * first.
+    */
+   I830ClearScreen(ctx, pI830, pSAREAPriv);
+
+   I830SetRingRegs(ctx, pI830);
+
+   return TRUE;
+}
+
+
+/**
+ * \brief Validate the fbdev mode.
+ * 
+ * \param ctx display handle.
+ *
+ * \return one on success, or zero on failure.
+ *
+ * Saves some registers and returns 1.
+ *
+ * \sa radeonValidateMode().
+ */
+static int i830ValidateMode( const DRIDriverContext *ctx )
+{
+  return 1;
+}
+
+/**
+ * \brief Examine mode returned by fbdev.
+ * 
+ * \param ctx display handle.
+ *
+ * \return one on success, or zero on failure.
+ *
+ * Restores registers that fbdev has clobbered and returns 1.
+ *
+ * \sa i810ValidateMode().
+ */
+static int i830PostValidateMode( const DRIDriverContext *ctx )
+{
+  I830Rec *pI830 = ctx->driverPrivate;
+
+  I830SetRingRegs(ctx, pI830);
+  return 1;
+}
+
+
+/**
+ * \brief Initialize the framebuffer device mode
+ *
+ * \param ctx display handle.
+ *
+ * \return one on success, or zero on failure.
+ *
+ * Fills in \p info with some default values and some information from \p ctx
+ * and then calls I810ScreenInit() for the screen initialization.
+ * 
+ * Before exiting clears the framebuffer memory accessing it directly.
+ */
+static int i830InitFBDev( DRIDriverContext *ctx )
+{
+  I830Rec *pI830 = calloc(1, sizeof(I830Rec));
+  int i;
+
+   {
+      int  dummy = ctx->shared.virtualWidth;
+
+      switch (ctx->bpp / 8) {
+      case 1: dummy = (ctx->shared.virtualWidth + 127) & ~127; break;
+      case 2: dummy = (ctx->shared.virtualWidth +  31) &  ~31; break;
+      case 3:
+      case 4: dummy = (ctx->shared.virtualWidth +  15) &  ~15; break;
+      }
+
+      ctx->shared.virtualWidth = dummy;
+      ctx->shared.Width = ctx->shared.virtualWidth;
+   }
+
+
+   for (i = 0; pitches[i] != 0; i++) {
+     if (pitches[i] >= ctx->shared.virtualWidth) {
+       ctx->shared.virtualWidth = pitches[i];
+       break;
+     }
+   }
+
+   ctx->driverPrivate = (void *)pI830;
+   
+   pI830->LpRing = calloc(1, sizeof(I830RingBuffer));
+   pI830->Chipset = ctx->chipset;
+   pI830->LinearAddr = ctx->FBStart;
+
+   if (!I830ScreenInit( ctx, pI830 ))
+      return 0;
+
+   
+   return 1;
+}
+
+
+/**
+ * \brief The screen is being closed, so clean up any state and free any
+ * resources used by the DRI.
+ *
+ * \param ctx display handle.
+ *
+ * Unmaps the SAREA, closes the DRM device file descriptor and frees the driver
+ * private data.
+ */
+static void i830HaltFBDev( DRIDriverContext *ctx )
+{
+  drmI830Sarea *pSAREAPriv;
+  I830Rec *pI830 = ctx->driverPrivate;
+
+   if (pI830->irq) {
+       drmCtlUninstHandler(ctx->drmFD);
+       pI830->irq = 0;   }
+
+   I830CleanupDma(ctx);
+
+  pSAREAPriv = (drmI830Sarea *)(((char*)ctx->pSAREA) + 
+				sizeof(drm_sarea_t));
+
+  I830DRIUnmapScreenRegions(ctx, pI830, pSAREAPriv);
+  drmUnmap( ctx->pSAREA, ctx->shared.SAREASize );
+  drmClose(ctx->drmFD);
+  
+  if (ctx->driverPrivate) {
+    free(ctx->driverPrivate);
+    ctx->driverPrivate = 0;
+  }
+}
+
+
+extern void i810NotifyFocus( int );
+
+/**
+ * \brief Exported driver interface for Mini GLX.
+ *
+ * \sa DRIDriverRec.
+ */
+const struct DRIDriverRec __driDriver = {
+   i830ValidateMode,
+   i830PostValidateMode,
+   i830InitFBDev,
+   i830HaltFBDev,
+   NULL,//I830EngineShutdown,
+   NULL, //I830EngineRestore,  
+#ifndef _EMBEDDED
+   0,
+#else
+   i810NotifyFocus, 
+#endif
+};
diff --git a/i965/Makefile.am b/i965/Makefile.am
new file mode 100644
index 0000000..163ad0f
--- /dev/null
+++ b/i965/Makefile.am
@@ -0,0 +1,79 @@
+AM_CFLAGS = -DIN_DRI_DRIVER -DGLX_DIRECT_RENDERING -DGLX_INDIRECT_RENDERING
+
+i965_dri_la_LTLIBRARIES = i965_dri.la
+i965_dri_la_CFLAGS = $(AM_CFLAGS) $(DRM_CFLAGS) $(DRI_CFLAGS) -Iserver -I../shared
+i965_dri_la_LDFLAGS = -module -noprefix -avoid-version -lm -ldl\
+	 $(DRM_LIBS) $(DRI_LIBS)
+i965_dri_ladir = @libdir@/dri
+i965_dri_la_SOURCES = \
+	bufmgr_fake.c \
+	intel_batchbuffer.c \
+	intel_blit.c \
+	intel_buffer_objects.c \
+	intel_buffers.c \
+	intel_context.c \
+	intel_ioctl.c \
+	intel_mipmap_tree.c \
+	intel_regions.c \
+	intel_screen.c \
+	intel_span.c \
+	intel_pixel_copy.c \
+	intel_pixel_bitmap.c \
+	intel_state.c \
+	intel_tex.c \
+	../shared/intel_tex_layout.c \
+	intel_tex_validate.c \
+	brw_aub.c \
+	brw_aub_playback.c \
+	brw_cc.c \
+	brw_clip.c \
+	brw_clip_line.c \
+	brw_clip_point.c \
+	brw_clip_state.c \
+	brw_clip_tri.c \
+	brw_clip_unfilled.c \
+	brw_clip_util.c \
+	brw_context.c \
+	brw_curbe.c \
+	brw_draw.c \
+	brw_draw_upload.c \
+	brw_eu.c \
+	brw_eu_debug.c \
+	brw_eu_emit.c \
+	brw_eu_util.c \
+	brw_fallback.c \
+	brw_gs.c \
+	brw_gs_emit.c \
+	brw_gs_state.c \
+	brw_hal.c \
+	brw_metaops.c \
+	brw_misc_state.c \
+	brw_program.c \
+	brw_sf.c \
+	brw_sf_emit.c \
+	brw_sf_state.c \
+	brw_state_batch.c \
+	brw_state_cache.c \
+	brw_state_pool.c \
+	brw_state_upload.c \
+	brw_tex.c \
+	brw_tex_layout.c \
+	brw_urb.c \
+	brw_util.c \
+	brw_vs.c \
+	brw_vs_constval.c \
+	brw_vs_emit.c \
+	brw_vs_state.c \
+	brw_vs_tnl.c \
+	brw_vtbl.c \
+	brw_wm.c \
+	brw_wm_debug.c \
+	brw_wm_emit.c \
+	brw_wm_fp.c \
+	brw_wm_iz.c \
+	brw_wm_pass0.c \
+	brw_wm_pass1.c \
+	brw_wm_pass2.c \
+	brw_wm_sampler_state.c \
+	brw_wm_state.c \
+	brw_wm_surface_state.c 
diff --git a/i965/brw_aub.c b/i965/brw_aub.c
new file mode 100644
index 0000000..c549f7a
--- /dev/null
+++ b/i965/brw_aub.c
@@ -0,0 +1,353 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+
+#include "brw_context.h"
+#include "brw_aub.h"
+#include "intel_regions.h"
+#include <stdio.h>
+
+extern char *__progname;
+
+
+/* Registers to control page table
+ */
+#define PGETBL_CTL       0x2020
+#define PGETBL_ENABLED   0x1
+
+#define NR_GTT_ENTRIES  65536	/* 256 mb */
+
+#define FAIL										\
+do {											\
+   fprintf(stderr, "failed to write aub data at %s/%d\n", __FUNCTION__, __LINE__);	\
+   exit(1);										\
+} while (0)
+
+
+/* Emit the headers at the top of each aubfile.  Initialize the GTT.
+ */
+static void init_aubfile( FILE *aub_file )
+{   
+   struct aub_file_header fh;
+   struct aub_block_header bh;
+   unsigned int data;
+
+   static int nr;
+   
+   nr++;
+
+   /* Emit the aub header:
+    */
+   memset(&fh, 0, sizeof(fh));
+
+   fh.instruction_type = AUB_FILE_HEADER;
+   fh.minor = 0x0;
+   fh.major = 0x7;
+   memcpy(fh.application, __progname, sizeof(fh.application));
+   fh.day = (nr>>24) & 0xff;
+   fh.month = 0x0;
+   fh.year = 0x0;
+   fh.timezone = 0x0;
+   fh.second = nr & 0xff;
+   fh.minute = (nr>>8) & 0xff;
+   fh.hour = (nr>>16) & 0xff;
+   fh.comment_length = 0x0;   
+
+   if (fwrite(&fh, sizeof(fh), 1, aub_file) < 1) 
+      FAIL;
+         
+   /* Setup the GTT starting at main memory address zero (!):
+    */
+   memset(&bh, 0, sizeof(bh));
+   
+   bh.instruction_type = AUB_BLOCK_HEADER;
+   bh.operation = BH_MMI0_WRITE32;
+   bh.type = 0x0;
+   bh.address_space = ADDR_GTT;	/* ??? */
+   bh.general_state_type = 0x0;
+   bh.surface_state_type = 0x0;
+   bh.address = PGETBL_CTL;
+   bh.length = 0x4;
+
+   if (fwrite(&bh, sizeof(bh), 1, aub_file) < 1) 
+      FAIL;
+
+   data = 0x0 | PGETBL_ENABLED;
+
+   if (fwrite(&data, sizeof(data), 1, aub_file) < 1) 
+      FAIL;
+}
+
+
+static void init_aub_gtt( struct brw_context *brw,
+			  GLuint start_offset, 
+			  GLuint size )
+{
+   FILE *aub_file = brw->intel.aub_file;
+   struct aub_block_header bh;
+   unsigned int i;
+
+   assert(start_offset + size < NR_GTT_ENTRIES * 4096);
+
+
+   memset(&bh, 0, sizeof(bh));
+   
+   bh.instruction_type = AUB_BLOCK_HEADER;
+   bh.operation = BH_DATA_WRITE;
+   bh.type = 0x0;
+   bh.address_space = ADDR_MAIN;
+   bh.general_state_type = 0x0;
+   bh.surface_state_type = 0x0;
+   bh.address =  start_offset / 4096 * 4;
+   bh.length = size / 4096 * 4;
+
+   if (fwrite(&bh, sizeof(bh), 1, aub_file) < 1) 
+      FAIL;
+
+   for (i = 0; i < size / 4096; i++) {
+      GLuint data = brw->next_free_page | 1;
+
+      brw->next_free_page += 4096;
+
+      if (fwrite(&data, sizeof(data), 1, aub_file) < 1) 
+	 FAIL;
+   }
+
+}
+
+static void write_block_header( FILE *aub_file,
+				struct aub_block_header *bh,
+				const GLuint *data,
+				GLuint sz )
+{
+   sz = (sz + 3) & ~3;
+
+   if (fwrite(bh, sizeof(*bh), 1, aub_file) < 1) 
+      FAIL;
+
+   if (fwrite(data, sz, 1, aub_file) < 1) 
+      FAIL;
+
+   fflush(aub_file);
+}
+
+
+static void write_dump_bmp( FILE *aub_file,
+			    struct aub_dump_bmp *db )
+{
+   if (fwrite(db, sizeof(*db), 1, aub_file) < 1) 
+      FAIL;
+
+   fflush(aub_file);
+}
+
+
+
+static void brw_aub_gtt_data( struct intel_context *intel,
+			      GLuint offset,
+			      const void *data,
+			      GLuint sz,
+			      GLuint type,
+			      GLuint state_type )
+{
+   struct aub_block_header bh;
+
+   bh.instruction_type = AUB_BLOCK_HEADER;
+   bh.operation = BH_DATA_WRITE;
+   bh.type = type;
+   bh.address_space = ADDR_GTT;
+   bh.pad0 = 0;
+
+   if (type == DW_GENERAL_STATE) {
+      bh.general_state_type = state_type;
+      bh.surface_state_type = 0;
+   }
+   else {
+      bh.general_state_type = 0;
+      bh.surface_state_type = state_type;
+   }
+
+   bh.pad1 = 0;
+   bh.address = offset;
+   bh.length = sz;
+
+   write_block_header(intel->aub_file, &bh, data, sz);
+}
+
+
+
+static void brw_aub_gtt_cmds( struct intel_context *intel,
+			      GLuint offset,
+			      const void *data,
+			      GLuint sz )
+{
+   struct brw_context *brw = brw_context(&intel->ctx);
+   struct aub_block_header bh;   
+   GLuint type = CW_PRIMARY_RING_A;
+   
+
+   bh.instruction_type = AUB_BLOCK_HEADER;
+   bh.operation = BH_COMMAND_WRITE;
+   bh.type = type;
+   bh.address_space = ADDR_GTT;
+   bh.pad0 = 0;
+   bh.general_state_type = 0;
+   bh.surface_state_type = 0;
+   bh.pad1 = 0;
+   bh.address = offset;
+   bh.length = sz;
+
+   write_block_header(brw->intel.aub_file, &bh, data, sz);
+}
+
+static void brw_aub_dump_bmp( struct intel_context *intel,
+			      GLuint buffer )
+{
+   struct brw_context *brw = brw_context(&intel->ctx);
+   intelScreenPrivate *intelScreen = brw->intel.intelScreen;
+   struct aub_dump_bmp db;
+   GLuint format;
+
+   if (intelScreen->cpp == 4)
+      format = 0x7;
+   else
+      format = 0x3;
+
+
+   if (buffer == 0) {
+      db.instruction_type = AUB_DUMP_BMP;
+      db.xmin = 0;
+      db.ymin = 0;
+      db.format = format;
+      db.bpp = intelScreen->cpp * 8;
+      db.pitch = intelScreen->front.pitch / intelScreen->cpp;
+      db.xsize = intelScreen->width;
+      db.ysize = intelScreen->height;
+      db.addr = intelScreen->front.offset;
+      db.unknown = 0x0;		/* 4: xmajor tiled, 0: not tiled */
+
+      write_dump_bmp(brw->intel.aub_file, &db);
+   }
+   else {
+      db.instruction_type = AUB_DUMP_BMP;
+      db.xmin = 0;
+      db.ymin = 0;
+      db.format = format;
+      db.bpp = intel->back_region->cpp * 8;
+      db.pitch = intel->back_region->pitch;
+      db.xsize = intel->back_region->pitch;
+      db.ysize = intel->back_region->height;
+      db.addr = intelScreen->back.offset;
+      db.unknown = intel->back_region->tiled ? 0x4 : 0x0;
+
+      write_dump_bmp(brw->intel.aub_file, &db);
+   }
+}
+
+/* Attempt to prevent monster aubfiles by closing and reopening when
+ * the state pools wrap.
+ */
+static void brw_aub_wrap( struct intel_context *intel )
+{
+   struct brw_context *brw = brw_context(&intel->ctx);   
+   if (intel->aub_file) {
+      brw_aub_destroy(brw);
+      brw_aub_init(brw);
+   }
+   brw->wrap = 1;		/* ??? */
+}
+
+
+int brw_aub_init( struct brw_context *brw )
+{
+   struct intel_context *intel = &brw->intel;
+   intelScreenPrivate *intelScreen = intel->intelScreen;
+   char filename[80];
+   int val;
+   static int i = 0;
+
+   i++;
+
+   if (_mesa_getenv("INTEL_REPLAY"))
+      return 0;
+
+   if (_mesa_getenv("INTEL_AUBFILE")) {
+      val = snprintf(filename, sizeof(filename), "%s%d.aub", _mesa_getenv("INTEL_AUBFILE"), i%4);
+      _mesa_printf("--> Aub file: %s\n", filename);
+      brw->intel.aub_file = fopen(filename, "w");
+   }
+   else if (_mesa_getenv("INTEL_AUB")) {
+      val = snprintf(filename, sizeof(filename), "%s.aub", __progname);
+      if (val < 0 || val > sizeof(filename)) 
+	 strcpy(filename, "default.aub");   
+   
+      _mesa_printf("--> Aub file: %s\n", filename);
+      brw->intel.aub_file = fopen(filename, "w");
+   }
+   else {
+      return 0;
+   }
+
+   if (!brw->intel.aub_file) {
+      _mesa_printf("couldn't open aubfile\n");
+      exit(1);
+   }
+
+   brw->intel.vtbl.aub_commands = brw_aub_gtt_cmds;
+   brw->intel.vtbl.aub_dump_bmp = brw_aub_dump_bmp;
+   brw->intel.vtbl.aub_gtt_data = brw_aub_gtt_data;
+   brw->intel.vtbl.aub_wrap = brw_aub_wrap;
+   
+   init_aubfile(brw->intel.aub_file);
+
+   /* The GTT is located starting address zero in main memory.  Pages
+    * to populate the gtt start after this point.
+    */
+   brw->next_free_page = (NR_GTT_ENTRIES * 4 + 4095) & ~4095;
+
+   /* More or less correspond with all the agp regions mapped by the
+    * driver:
+    */
+   init_aub_gtt(brw, 0, 4096*4); /* so new fulsim doesn't crash */
+   init_aub_gtt(brw, intelScreen->front.offset, intelScreen->back.size);
+   init_aub_gtt(brw, intelScreen->back.offset, intelScreen->back.size);
+   init_aub_gtt(brw, intelScreen->depth.offset, intelScreen->back.size);
+   init_aub_gtt(brw, intelScreen->tex.offset, intelScreen->tex.size);
+
+   return 0;
+}
+
+void brw_aub_destroy( struct brw_context *brw )
+{
+   if (brw->intel.aub_file) {
+      fclose(brw->intel.aub_file);
+      brw->intel.aub_file = NULL;
+   }
+}
diff --git a/i965/brw_aub.h b/i965/brw_aub.h
new file mode 100644
index 0000000..198e36d
--- /dev/null
+++ b/i965/brw_aub.h
@@ -0,0 +1,172 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+
+#ifndef BRW_AUB_H
+#define BRW_AUB_H
+
+struct aub_file_header {
+   unsigned int instruction_type;
+   unsigned int pad0:16;
+   unsigned int minor:8;
+   unsigned int major:8;
+   unsigned char application[8*4];
+   unsigned int day:8;
+   unsigned int month:8;
+   unsigned int year:16;
+   unsigned int timezone:8;
+   unsigned int second:8;
+   unsigned int minute:8;
+   unsigned int hour:8;
+   unsigned int comment_length:16;   
+   unsigned int pad1:16;
+};
+
+struct aub_block_header {
+   unsigned int instruction_type;
+   unsigned int operation:8;
+   unsigned int type:8;
+   unsigned int address_space:8;
+   unsigned int pad0:8;
+   unsigned int general_state_type:8;
+   unsigned int surface_state_type:8;
+   unsigned int pad1:16;
+   unsigned int address;
+   unsigned int length;
+};
+
+struct aub_dump_bmp {
+   unsigned int instruction_type;
+   unsigned int xmin:16;
+   unsigned int ymin:16;
+   unsigned int pitch:16;
+   unsigned int bpp:8;
+   unsigned int format:8;
+   unsigned int xsize:16;
+   unsigned int ysize:16;
+   unsigned int addr;
+   unsigned int unknown;
+};
+
+enum bh_operation {
+   BH_COMMENT,
+   BH_DATA_WRITE,
+   BH_COMMAND_WRITE,
+   BH_MMI0_WRITE32,
+   BH_END_SCENE,
+   BH_CONFIG_MEMORY_MAP,
+   BH_MAX_OPERATION
+};
+
+enum command_write_type {
+   CW_HWB_RING = 1,
+   CW_PRIMARY_RING_A,
+   CW_PRIMARY_RING_B,		/* XXX - disagreement with listaub! */
+   CW_PRIMARY_RING_C,
+   CW_MAX_TYPE
+};
+
+enum data_write_type {
+   DW_NOTYPE,
+   DW_BATCH_BUFFER,
+   DW_BIN_BUFFER,
+   DW_BIN_POINTER_LIST,
+   DW_SLOW_STATE_BUFFER,
+   DW_VERTEX_BUFFER,
+   DW_2D_MAP,
+   DW_CUBE_MAP,
+   DW_INDIRECT_STATE_BUFFER,
+   DW_VOLUME_MAP,
+   DW_1D_MAP,
+   DW_CONSTANT_BUFFER,
+   DW_CONSTANT_URB_ENTRY,
+   DW_INDEX_BUFFER,
+   DW_GENERAL_STATE,
+   DW_SURFACE_STATE,
+   DW_MEDIA_OBJECT_INDIRECT_DATA,
+   DW_MAX_TYPE
+};
+
+enum data_write_general_state_type {
+   DWGS_NOTYPE,
+   DWGS_VERTEX_SHADER_STATE,
+   DWGS_GEOMETRY_SHADER_STATE ,
+   DWGS_CLIPPER_STATE,
+   DWGS_STRIPS_FANS_STATE,
+   DWGS_WINDOWER_IZ_STATE,
+   DWGS_COLOR_CALC_STATE,
+   DWGS_CLIPPER_VIEWPORT_STATE,	/* was 0x7 */
+   DWGS_STRIPS_FANS_VIEWPORT_STATE,
+   DWGS_COLOR_CALC_VIEWPORT_STATE, /* was 0x9 */
+   DWGS_SAMPLER_STATE,
+   DWGS_KERNEL_INSTRUCTIONS,
+   DWGS_SCRATCH_SPACE,
+   DWGS_SAMPLER_DEFAULT_COLOR,
+   DWGS_INTERFACE_DESCRIPTOR,
+   DWGS_VLD_STATE,
+   DWGS_VFE_STATE,
+   DWGS_MAX_TYPE
+};
+
+enum data_write_surface_state_type {
+   DWSS_NOTYPE,
+   DWSS_BINDING_TABLE_STATE,
+   DWSS_SURFACE_STATE,
+   DWSS_MAX_TYPE
+};
+
+enum memory_map_type {
+   MM_DEFAULT,
+   MM_DYNAMIC,
+   MM_MAX_TYPE
+};
+
+enum address_space {
+   ADDR_GTT,
+   ADDR_LOCAL,
+   ADDR_MAIN,
+   ADDR_MAX
+};
+
+
+#define AUB_FILE_HEADER 0xe085000b
+#define AUB_BLOCK_HEADER 0xe0c10003
+#define AUB_DUMP_BMP 0xe09e0004
+
+struct brw_context;
+struct intel_context;
+
+int brw_aub_init( struct brw_context *brw );
+void brw_aub_destroy( struct brw_context *brw );
+
+int brw_playback_aubfile(struct brw_context *brw,
+			 const char *filename);
+
+#endif
diff --git a/i965/brw_aub_playback.c b/i965/brw_aub_playback.c
new file mode 100644
index 0000000..99d9475
--- /dev/null
+++ b/i965/brw_aub_playback.c
@@ -0,0 +1,446 @@
+
+#include <stdio.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+#include "brw_aub.h"
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "intel_ioctl.h"
+#include "bufmgr.h"
+
+struct aub_state {
+   struct intel_context *intel;
+   const char *map;
+   unsigned int csr;
+   unsigned int sz;
+};
+
+
+static int gobble( struct aub_state *s, int size )
+{
+   if (s->csr + size > s->sz) {
+      _mesa_printf("EOF in %s\n", __FUNCTION__);
+      return 1;
+   }
+
+   s->csr += size;
+   return 0;
+}
+
+static void flush_and_fence( struct aub_state *s )
+{
+   struct intel_context *intel = s->intel;
+   GLuint buf[2];
+
+   buf[0] = intel->vtbl.flush_cmd();
+   buf[1] = 0;
+
+   intel_cmd_ioctl(intel, (char *)&buf, sizeof(buf));
+      
+   intelWaitIrq( intel, intelEmitIrqLocked( intel ));
+}
+
+static void flush_cmds( struct aub_state *s,
+			const void *data,
+			int len )
+{
+   DBG("%s %d\n", __FUNCTION__, len);
+
+   if (len & 0x4) {
+      unsigned int *tmp = malloc(len + 4);
+      DBG("padding to octword\n");
+      memcpy(tmp, data, len);
+      tmp[len/4] = MI_NOOP;
+      flush_cmds(s, tmp, len+4);
+      free(tmp);
+      return;
+   }
+
+   /* For ring data, just send off immediately via an ioctl.
+    * This differs slightly from how the stream was executed
+    * initially as this would have been a batchbuffer.
+    */
+   intel_cmd_ioctl(s->intel, (void *)data, len);
+
+   if (1)
+      flush_and_fence(s);
+}
+
+static const char *pstrings[] = {
+   "none",
+   "POINTLIST",
+   "LINELIST",
+   "LINESTRIP",
+   "TRILIST",
+   "TRISTRIP",
+   "TRIFAN",
+   "QUADLIST",
+   "QUADSTRIP",
+   "LINELIST_ADJ",
+   "LINESTRIP_ADJ",
+   "TRILIST_ADJ",
+   "TRISTRIP_ADJ",
+   "TRISTRIP_REVERSE",
+   "POLYGON",
+   "RECTLIST",
+   "LINELOOP",
+   "POINTLIST_BF",
+   "LINESTRIP_CONT",
+   "LINESTRIP_BF",
+   "LINESTRIP_CONT_BF",
+   "TRIFAN_NOSTIPPLE",
+};
+
+static void do_3d_prim( struct aub_state *s,
+			const void *data,
+			int len )
+{
+   struct brw_3d_primitive prim;
+   const struct brw_3d_primitive *orig = data;
+   int i;
+
+   assert(len == sizeof(prim));
+   memcpy(&prim, data, sizeof(prim));
+
+#define START 0
+#define BLOCK (12*28)
+
+   if (orig->verts_per_instance < BLOCK)
+      flush_cmds(s, &prim, sizeof(prim));
+   else {
+      for (i = START; i + BLOCK < orig->verts_per_instance; i += BLOCK/2) {
+	 prim.start_vert_location = i;
+	 prim.verts_per_instance = BLOCK;
+	 _mesa_printf("%sprim %d/%s verts %d..%d (of %d)\n", 
+		      prim.header.indexed ? "INDEXED " : "",
+		      prim.header.topology, pstrings[prim.header.topology%16],
+		      prim.start_vert_location, 
+		      prim.start_vert_location + prim.verts_per_instance,
+		      orig->verts_per_instance);
+	 flush_cmds(s, &prim, sizeof(prim));
+      }
+   }
+}
+
+
+
+static struct {
+   int cmd;
+   const char *name;
+   int has_length;
+} cmd_info[] = {
+   { 0, "NOOP", 0 },
+   { 0x5410, "XY_COLOR_BLT_RGB", 1 },
+   { 0x5430, "XY_COLOR_BLT_RGBA", 1 },
+   { 0x54d0, "XY_SRC_COPY_BLT_RGB", 1 },
+   { 0x54f0, "XY_SRC_COPY_BLT_RGBA", 1 },
+   { CMD_URB_FENCE, "URB_FENCE",  1 },
+   { CMD_CONST_BUFFER_STATE, "CONST_BUFFER_STATE",  1 },
+   { CMD_CONST_BUFFER, "CONST_BUFFER",  1 },
+   { CMD_STATE_BASE_ADDRESS, "STATE_BASE_ADDRESS",  1 },
+   { CMD_STATE_INSN_POINTER, "STATE_INSN_POINTER",  1 },
+   { CMD_PIPELINE_SELECT_965, "PIPELINE_SELECT", 0, },
+   { CMD_PIPELINE_SELECT_IGD, "PIPELINE_SELECT", 0,},
+   { CMD_PIPELINED_STATE_POINTERS, "PIPELINED_STATE_POINTERS", 1 },
+   { CMD_BINDING_TABLE_PTRS, "BINDING_TABLE_PTRS", 1 },
+   { CMD_VERTEX_BUFFER, "VERTEX_BUFFER", 1 },
+   { CMD_VERTEX_ELEMENT, "VERTEX_ELEMENT", 1 },
+   { CMD_INDEX_BUFFER, "INDEX_BUFFER", 1 },
+   { CMD_VF_STATISTICS_965, "VF_STATISTICS", 0 },
+   { CMD_VF_STATISTICS_IGD, "VF_STATISTICS", 0 },
+   { CMD_DRAW_RECT, "DRAW_RECT", 1 },
+   { CMD_BLEND_CONSTANT_COLOR, "BLEND_CONSTANT_COLOR", 1 },
+   { CMD_CHROMA_KEY, "CHROMA_KEY", 1 },
+   { CMD_DEPTH_BUFFER, "DEPTH_BUFFER", 1 },
+   { CMD_POLY_STIPPLE_OFFSET, "POLY_STIPPLE_OFFSET", 1 },
+   { CMD_POLY_STIPPLE_PATTERN, "POLY_STIPPLE_PATTERN", 1 },
+   { CMD_LINE_STIPPLE_PATTERN, "LINE_STIPPLE_PATTERN", 1 },
+   { CMD_AA_LINE_PARAMETERS, "AA_LINE_PARAMETERS", 1},
+   { CMD_GLOBAL_DEPTH_OFFSET_CLAMP, "GLOBAL_DEPTH_OFFSET_CLAMP", 1 },
+   { CMD_PIPE_CONTROL, "PIPE_CONTROL", 1 },
+   { CMD_MI_FLUSH, "MI_FLUSH", 0 },
+   { CMD_3D_PRIM, "3D_PRIM", 1 },
+};
+
+#define NR_CMDS (sizeof(cmd_info)/sizeof(cmd_info[0]))
+
+
+static int find_command( unsigned int cmd )
+{
+   int i;
+
+   for (i = 0; i < NR_CMDS; i++) 
+      if (cmd == cmd_info[i].cmd) 
+	 return i;
+
+   return -1;
+}
+
+
+
+static int parse_commands( struct aub_state *s,
+			   const unsigned int *data,
+			   int len )
+{
+   while (len) {
+      int cmd = data[0] >> 16;
+      int dwords;
+      int i;
+
+      i = find_command(cmd);
+
+      if (i < 0) {
+	 _mesa_printf("couldn't find info for cmd %x\n", cmd);
+	 return 1;
+      }
+
+      if (cmd_info[i].has_length)
+	 dwords = (data[0] & 0xff) + 2;
+      else
+	 dwords = 1;
+
+      _mesa_printf("%s (%d dwords) 0x%x\n", cmd_info[i].name, dwords, data[0]);
+
+      if (len < dwords * 4) {
+	 _mesa_printf("EOF in %s (%d bytes)\n", __FUNCTION__, len);
+	 return 1;
+      }
+
+
+      if (0 && cmd == CMD_3D_PRIM)
+	 do_3d_prim(s, data, dwords * 4);
+      else
+	 flush_cmds(s, data, dwords * 4);
+
+      data += dwords;
+      len -= dwords * 4;
+   }
+
+   return 0;
+}
+
+
+
+static void parse_data_write( struct aub_state *s,
+			     const struct aub_block_header *bh,
+			     void *dest,
+			     const unsigned int *data,
+			     int len )
+{
+   switch (bh->type) {
+   case DW_GENERAL_STATE:
+      switch (bh->general_state_type) {
+      case DWGS_VERTEX_SHADER_STATE: {
+	 struct brw_vs_unit_state vs;
+	 assert(len == sizeof(vs));
+
+	 _mesa_printf("DWGS_VERTEX_SHADER_STATE\n");
+	 memcpy(&vs, data, sizeof(vs));
+
+/* 	 vs.vs6.vert_cache_disable = 1;  */
+/*  	 vs.thread4.max_threads = 4;  */
+
+	 memcpy(dest, &vs, sizeof(vs));
+	 return;
+      }
+      case DWGS_CLIPPER_STATE: {
+	 struct brw_clip_unit_state clip;
+	 assert(len == sizeof(clip));
+
+	 _mesa_printf("DWGS_CLIPPER_STATE\n");
+	 memcpy(&clip, data, sizeof(clip));
+
+/* 	 clip.thread4.max_threads = 0; */
+/*   	 clip.clip5.clip_mode = BRW_CLIPMODE_REJECT_ALL;   */
+
+	 memcpy(dest, &clip, sizeof(clip));
+	 return;
+      }
+
+      case DWGS_NOTYPE:
+      case DWGS_GEOMETRY_SHADER_STATE:
+      case DWGS_STRIPS_FANS_STATE:
+	 break;
+
+      case DWGS_WINDOWER_IZ_STATE: {
+	    struct brw_wm_unit_state wm;
+	    assert(len == sizeof(wm));
+
+	    _mesa_printf("DWGS_WINDOWER_IZ_STATE\n");
+	    memcpy(&wm, data, sizeof(wm));
+
+/* 	    wm.wm5.max_threads = 10; */
+
+	    memcpy(dest, &wm, sizeof(wm));
+	    return;
+	 }
+
+      case DWGS_COLOR_CALC_STATE:
+      case DWGS_CLIPPER_VIEWPORT_STATE:
+      case DWGS_STRIPS_FANS_VIEWPORT_STATE:
+      case DWGS_COLOR_CALC_VIEWPORT_STATE:
+      case DWGS_SAMPLER_STATE:
+      case DWGS_KERNEL_INSTRUCTIONS:
+      case DWGS_SCRATCH_SPACE:
+      case DWGS_SAMPLER_DEFAULT_COLOR:
+      case DWGS_INTERFACE_DESCRIPTOR:
+      case DWGS_VLD_STATE:
+      case DWGS_VFE_STATE:
+      default:
+	 break;
+      }
+      break;
+   case DW_SURFACE_STATE:
+      break;
+   case DW_1D_MAP:
+   case DW_2D_MAP:
+   case DW_CUBE_MAP:
+   case DW_VOLUME_MAP:
+   case DW_CONSTANT_BUFFER:
+   case DW_CONSTANT_URB_ENTRY:
+   case DW_VERTEX_BUFFER:
+   case DW_INDEX_BUFFER:
+   default:
+      break;
+   }
+
+   memcpy(dest, data, len);
+}
+
+
+/* In order to work, the memory layout has to be the same as the X
+ * server which created the aubfile.
+ */
+static int parse_block_header( struct aub_state *s )
+{
+   struct aub_block_header *bh = (struct aub_block_header *)(s->map + s->csr);
+   void *data = (void *)(bh + 1);
+   unsigned int len = (bh->length + 3) & ~3;
+
+   _mesa_printf("block header at 0x%x\n", s->csr);
+
+   if (s->csr + len + sizeof(*bh) > s->sz) {
+      _mesa_printf("EOF in data in %s\n", __FUNCTION__);
+      return 1;
+   }
+
+   if (bh->address_space == ADDR_GTT) {
+
+      switch (bh->operation)
+      {
+      case BH_DATA_WRITE: {
+	 void *dest = bmFindVirtual( s->intel, bh->address, len );
+	 if (dest == NULL) {
+	    _mesa_printf("Couldn't find virtual address for offset %x\n", bh->address);
+	    return 1;
+	 }
+
+#if 1
+	 parse_data_write(s, bh, dest, data, len);
+#else
+	 memcpy(dest, data, len);
+#endif
+	 break;
+      }
+      case BH_COMMAND_WRITE:
+#if 0
+	 intel_cmd_ioctl(s->intel, (void *)data, len);
+#else
+	 if (parse_commands(s, data, len) != 0)
+	    _mesa_printf("parse_commands failed\n");
+#endif
+	 break;
+      default:
+	 break;
+      }
+   }
+
+   s->csr += sizeof(*bh) + len;
+   return 0;
+}
+
+
+#define AUB_FILE_HEADER 0xe085000b
+#define AUB_BLOCK_HEADER 0xe0c10003
+#define AUB_DUMP_BMP 0xe09e0004
+
+int brw_playback_aubfile(struct brw_context *brw,
+			 const char *filename)
+{
+   struct intel_context *intel = &brw->intel;
+   struct aub_state state;
+   struct stat sb;
+   int fd;
+   int retval = 0;
+
+   state.intel = intel;
+
+   fd = open(filename, O_RDONLY, 0);
+   if (fd < 0) {
+      _mesa_printf("couldn't open aubfile: %s\n", filename);
+      return 1;
+   }
+
+   if (fstat(fd, &sb) != 0) {
+      _mesa_printf("couldn't open %s\n", filename);
+      return 1;
+   }
+
+   state.csr = 0;
+   state.sz = sb.st_size;
+   state.map = mmap(NULL, sb.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
+   
+   if (state.map == NULL) {
+      _mesa_printf("couldn't mmap %s\n", filename);
+      return 1;
+   }
+
+   LOCK_HARDWARE(intel); 
+   {
+      /* Make sure we don't confuse anything that might happen to be
+       * going on with the hardware:
+       */
+/*       bmEvictAll(intel); */
+/*       intel->vtbl.lost_hardware(intel); */
+      
+
+      /* Replay the aubfile item by item: 
+       */
+      while (retval == 0 && 
+	     state.csr != state.sz) {
+	 unsigned int insn = *(unsigned int *)(state.map + state.csr);
+
+	 switch (insn) {
+	 case AUB_FILE_HEADER:
+	    retval = gobble(&state, sizeof(struct aub_file_header));
+	    break;
+	 
+	 case AUB_BLOCK_HEADER:   
+	    retval = parse_block_header(&state);
+	    break;
+	 
+	 case AUB_DUMP_BMP:
+	    retval = gobble(&state, sizeof(struct aub_dump_bmp));
+	    break;
+	 
+	 default:
+	    _mesa_printf("unknown instruction %x\n", insn);
+	    retval = 1;
+	    break;
+	 }
+      }
+   }
+   UNLOCK_HARDWARE(intel);
+   return retval;
+}
+
+
+
+
+
+
+		  
diff --git a/i965/brw_cc.c b/i965/brw_cc.c
new file mode 100644
index 0000000..8a1d152
--- /dev/null
+++ b/i965/brw_cc.c
@@ -0,0 +1,173 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_util.h"
+#include "macros.h"
+#include "enums.h"
+
+static void upload_cc_vp( struct brw_context *brw )
+{
+   struct brw_cc_viewport ccv;
+
+   memset(&ccv, 0, sizeof(ccv));
+
+   ccv.min_depth = 0.0;
+   ccv.max_depth = 1.0;
+
+   brw->cc.vp_gs_offset = brw_cache_data( &brw->cache[BRW_CC_VP], &ccv );
+}
+
+const struct brw_tracked_state brw_cc_vp = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_CONTEXT,
+      .cache = 0
+   },
+   .update = upload_cc_vp
+};
+
+
+static void upload_cc_unit( struct brw_context *brw )
+{
+   struct brw_cc_unit_state cc;
+   
+   memset(&cc, 0, sizeof(cc));
+
+   /* _NEW_STENCIL */
+   if (brw->attribs.Stencil->Enabled) {
+      cc.cc0.stencil_enable = brw->attribs.Stencil->Enabled;
+      cc.cc0.stencil_func = intel_translate_compare_func(brw->attribs.Stencil->Function[0]);
+      cc.cc0.stencil_fail_op = intel_translate_stencil_op(brw->attribs.Stencil->FailFunc[0]);
+      cc.cc0.stencil_pass_depth_fail_op = intel_translate_stencil_op(brw->attribs.Stencil->ZFailFunc[0]);
+      cc.cc0.stencil_pass_depth_pass_op = intel_translate_stencil_op(brw->attribs.Stencil->ZPassFunc[0]);
+      cc.cc1.stencil_ref = brw->attribs.Stencil->Ref[0];
+      cc.cc1.stencil_write_mask = brw->attribs.Stencil->WriteMask[0];
+      cc.cc1.stencil_test_mask = brw->attribs.Stencil->ValueMask[0];
+
+      if (brw->attribs.Stencil->TestTwoSide) {
+	 cc.cc0.bf_stencil_enable = brw->attribs.Stencil->TestTwoSide;
+	 cc.cc0.bf_stencil_func = intel_translate_compare_func(brw->attribs.Stencil->Function[1]);
+	 cc.cc0.bf_stencil_fail_op = intel_translate_stencil_op(brw->attribs.Stencil->FailFunc[1]);
+	 cc.cc0.bf_stencil_pass_depth_fail_op = intel_translate_stencil_op(brw->attribs.Stencil->ZFailFunc[1]);
+	 cc.cc0.bf_stencil_pass_depth_pass_op = intel_translate_stencil_op(brw->attribs.Stencil->ZPassFunc[1]);
+	 cc.cc1.bf_stencil_ref = brw->attribs.Stencil->Ref[1];
+	 cc.cc2.bf_stencil_write_mask = brw->attribs.Stencil->WriteMask[1];
+	 cc.cc2.bf_stencil_test_mask = brw->attribs.Stencil->ValueMask[1];
+      }
+
+      /* Not really sure about this:
+       */
+      if (brw->attribs.Stencil->WriteMask[0] ||
+	  (brw->attribs.Stencil->TestTwoSide && brw->attribs.Stencil->WriteMask[1]))
+	 cc.cc0.stencil_write_enable = 1;
+   }
+
+   /* _NEW_COLOR */
+   if (brw->attribs.Color->_LogicOpEnabled) {
+      cc.cc2.logicop_enable = 1;
+      cc.cc5.logicop_func = intel_translate_logic_op( brw->attribs.Color->LogicOp );
+   }
+   else if (brw->attribs.Color->BlendEnabled) {
+      GLenum eqRGB = brw->attribs.Color->BlendEquationRGB;
+      GLenum eqA = brw->attribs.Color->BlendEquationA;
+      GLenum srcRGB = brw->attribs.Color->BlendSrcRGB;
+      GLenum dstRGB = brw->attribs.Color->BlendDstRGB;
+      GLenum srcA = brw->attribs.Color->BlendSrcA;
+      GLenum dstA = brw->attribs.Color->BlendDstA;
+
+      if (eqRGB == GL_MIN || eqRGB == GL_MAX) {
+	 srcRGB = dstRGB = GL_ONE;
+      }
+
+      if (eqA == GL_MIN || eqA == GL_MAX) {
+	 srcA = dstA = GL_ONE;
+      }
+
+      cc.cc6.dest_blend_factor = brw_translate_blend_factor(dstRGB); 
+      cc.cc6.src_blend_factor = brw_translate_blend_factor(srcRGB); 
+      cc.cc6.blend_function = brw_translate_blend_equation( eqRGB );
+
+      cc.cc5.ia_dest_blend_factor = brw_translate_blend_factor(dstA); 
+      cc.cc5.ia_src_blend_factor = brw_translate_blend_factor(srcA); 
+      cc.cc5.ia_blend_function = brw_translate_blend_equation( eqA );
+
+      cc.cc3.blend_enable = 1;
+      cc.cc3.ia_blend_enable = (srcA != srcRGB || 
+				dstA != dstRGB || 
+				eqA != eqRGB);
+   }
+
+   if (brw->attribs.Color->AlphaEnabled) {
+      cc.cc3.alpha_test = 1;
+      cc.cc3.alpha_test_func = intel_translate_compare_func(brw->attribs.Color->AlphaFunc);
+
+      UNCLAMPED_FLOAT_TO_UBYTE(cc.cc7.alpha_ref.ub[0], brw->attribs.Color->AlphaRef);
+
+      cc.cc3.alpha_test_format = BRW_ALPHATEST_FORMAT_UNORM8;
+   }
+
+   if (brw->attribs.Color->DitherFlag) {
+      cc.cc5.dither_enable = 1;
+      cc.cc6.y_dither_offset = 0; 
+      cc.cc6.x_dither_offset = 0;     
+   }
+
+   /* _NEW_DEPTH */
+   if (brw->attribs.Depth->Test) {
+      cc.cc2.depth_test = brw->attribs.Depth->Test;
+      cc.cc2.depth_test_function = intel_translate_compare_func(brw->attribs.Depth->Func);
+      cc.cc2.depth_write_enable = brw->attribs.Depth->Mask;
+   }
+ 
+   /* CACHE_NEW_CC_VP */
+   cc.cc4.cc_viewport_state_offset =  brw->cc.vp_gs_offset >> 5;
+ 
+   if (INTEL_DEBUG & DEBUG_STATS)
+      cc.cc5.statistics_enable = 1; 
+
+   brw->cc.state_gs_offset = brw_cache_data( &brw->cache[BRW_CC_UNIT], &cc );
+}
+
+const struct brw_tracked_state brw_cc_unit = {
+   .dirty = {
+      .mesa = _NEW_STENCIL | _NEW_COLOR | _NEW_DEPTH,
+      .brw = 0,
+      .cache = CACHE_NEW_CC_VP
+   },
+   .update = upload_cc_unit
+};
+
+
+
diff --git a/i965/brw_clip.c b/i965/brw_clip.c
new file mode 100644
index 0000000..8f907be
--- /dev/null
+++ b/i965/brw_clip.c
@@ -0,0 +1,270 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+
+#include "glheader.h"
+#include "macros.h"
+#include "enums.h"
+
+#include "intel_batchbuffer.h"
+
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_state.h"
+#include "brw_clip.h"
+
+
+#define FRONT_UNFILLED_BIT  0x1
+#define BACK_UNFILLED_BIT   0x2
+
+
+static void compile_clip_prog( struct brw_context *brw,
+			     struct brw_clip_prog_key *key )
+{
+   struct brw_clip_compile c;
+   const GLuint *program;
+   GLuint program_size;
+   GLuint delta;
+   GLuint i;
+
+   memset(&c, 0, sizeof(c));
+   
+   /* Begin the compilation:
+    */
+   brw_init_compile(brw, &c.func);
+
+   c.func.single_program_flow = 1;
+
+   c.key = *key;
+
+
+   /* Need to locate the two positions present in vertex + header.
+    * These are currently hardcoded:
+    */
+   c.header_position_offset = ATTR_SIZE;
+
+   for (i = 0, delta = REG_SIZE; i < VERT_RESULT_MAX; i++)
+      if (c.key.attrs & (1<<i)) {
+	 c.offset[i] = delta;
+	 delta += ATTR_SIZE;
+      }
+
+   c.nr_attrs = brw_count_bits(c.key.attrs);
+   c.nr_regs = (c.nr_attrs + 1) / 2 + 1;  /* are vertices packed, or reg-aligned? */
+   c.nr_bytes = c.nr_regs * REG_SIZE;
+
+   c.prog_data.clip_mode = c.key.clip_mode; /* XXX */
+
+   /* For some reason the thread is spawned with only 4 channels
+    * unmasked.  
+    */
+   brw_set_mask_control(&c.func, BRW_MASK_DISABLE);
+
+
+   /* Would ideally have the option of producing a program which could
+    * do all three:
+    */
+   switch (key->primitive) {
+   case GL_TRIANGLES: 
+      if (key->do_unfilled)
+	 brw_emit_unfilled_clip( &c );
+      else
+	 brw_emit_tri_clip( &c );
+      break;
+   case GL_LINES:
+      brw_emit_line_clip( &c );
+      break;
+   case GL_POINTS:
+      brw_emit_point_clip( &c );
+      break;
+   default:
+      assert(0);
+      return;
+   }
+
+	 
+
+   /* get the program
+    */
+   program = brw_get_program(&c.func, &program_size);
+
+   /* Upload
+    */
+   brw->clip.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_CLIP_PROG],
+						&c.key,
+						sizeof(c.key),
+						program,
+						program_size,
+						&c.prog_data,
+						&brw->clip.prog_data );
+}
+
+
+static GLboolean search_cache( struct brw_context *brw, 
+			       struct brw_clip_prog_key *key )
+{
+   return brw_search_cache(&brw->cache[BRW_CLIP_PROG], 
+			   key, sizeof(*key),
+			   &brw->clip.prog_data,
+			   &brw->clip.prog_gs_offset);
+}
+
+
+
+
+/* Calculate interpolants for triangle and line rasterization.
+ */
+static void upload_clip_prog( struct brw_context *brw )
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   struct brw_clip_prog_key key;
+
+   memset(&key, 0, sizeof(key));
+
+   /* Populate the key:
+    */
+   /* BRW_NEW_REDUCED_PRIMITIVE */
+   key.primitive = brw->intel.reduced_primitive;
+   /* CACHE_NEW_VS_PROG */
+   key.attrs = brw->vs.prog_data->outputs_written;
+   /* _NEW_LIGHT */
+   key.do_flat_shading = (brw->attribs.Light->ShadeModel == GL_FLAT);
+   /* _NEW_TRANSFORM */
+   key.nr_userclip = brw_count_bits(brw->attribs.Transform->ClipPlanesEnabled);
+   key.clip_mode = BRW_CLIPMODE_NORMAL;
+
+   /* _NEW_POLYGON */
+   if (key.primitive == GL_TRIANGLES) {
+      if (brw->attribs.Polygon->CullFaceMode == GL_FRONT_AND_BACK) 
+	 key.clip_mode = BRW_CLIPMODE_REJECT_ALL;
+      else {
+	 GLuint fill_front = CLIP_CULL;
+	 GLuint fill_back = CLIP_CULL;
+	 GLuint offset_front = 0;
+	 GLuint offset_back = 0;
+
+	 if (!brw->attribs.Polygon->CullFlag ||
+	     brw->attribs.Polygon->CullFaceMode != GL_FRONT) {
+	    switch (brw->attribs.Polygon->FrontMode) {
+	    case GL_FILL: 
+	       fill_front = CLIP_FILL; 
+	       offset_front = 0;
+	       break;
+	    case GL_LINE:
+	       key.do_unfilled = 1;
+	       fill_front = CLIP_LINE;
+	       offset_front = brw->attribs.Polygon->OffsetLine;
+	       break;
+	    case GL_POINT:
+	       key.do_unfilled = 1;
+	       fill_front = CLIP_POINT;
+	       offset_front = brw->attribs.Polygon->OffsetPoint;
+	       break;
+	    }
+	 }
+
+	 if (!brw->attribs.Polygon->CullFlag ||
+	     brw->attribs.Polygon->CullFaceMode != GL_BACK) {
+	    switch (brw->attribs.Polygon->BackMode) {
+	    case GL_FILL: 
+	       fill_back = CLIP_FILL; 
+	       offset_back = 0;
+	       break;
+	    case GL_LINE:
+	       key.do_unfilled = 1;
+	       fill_back = CLIP_LINE;
+	       offset_back = brw->attribs.Polygon->OffsetLine;
+	       break;
+	    case GL_POINT:
+	       key.do_unfilled = 1;
+	       fill_back = CLIP_POINT;
+	       offset_back = brw->attribs.Polygon->OffsetPoint;
+	       break;
+	    }
+	 }
+
+    if (brw->attribs.Polygon->BackMode != GL_FILL ||
+        brw->attribs.Polygon->FrontMode != GL_FILL)
+        key.do_unfilled = 1;
+
+	 /* Most cases the fixed function units will handle.  Cases where
+	  * one or more polygon faces are unfilled will require help:
+	  */
+	 if (key.do_unfilled) {
+	    key.clip_mode = BRW_CLIPMODE_CLIP_NON_REJECTED;
+
+	    if (offset_back || offset_front) {
+	       /* _NEW_POLYGON, _NEW_BUFFERS */
+	       key.offset_units = brw->attribs.Polygon->OffsetUnits * brw->intel.polygon_offset_scale;
+	       key.offset_factor = brw->attribs.Polygon->OffsetFactor * ctx->DrawBuffer->_MRD;
+	    }
+
+	    switch (brw->attribs.Polygon->FrontFace) {
+	    case GL_CCW:
+	       key.fill_ccw = fill_front;
+	       key.fill_cw = fill_back;
+	       key.offset_ccw = offset_front;
+	       key.offset_cw = offset_back;
+	       if (brw->attribs.Light->Model.TwoSide &&
+		   key.fill_cw != CLIP_CULL) 
+		  key.copy_bfc_cw = 1;
+	       break;
+	    case GL_CW:
+	       key.fill_cw = fill_front;
+	       key.fill_ccw = fill_back;
+	       key.offset_cw = offset_front;
+	       key.offset_ccw = offset_back;
+	       if (brw->attribs.Light->Model.TwoSide &&
+		   key.fill_ccw != CLIP_CULL) 
+		  key.copy_bfc_ccw = 1;
+	       break;
+	    }
+	 }
+      }
+   }
+
+   if (!search_cache(brw, &key))
+      compile_clip_prog( brw, &key );
+}
+
+
+const struct brw_tracked_state brw_clip_prog = {
+   .dirty = {
+      .mesa  = (_NEW_LIGHT | 
+		_NEW_TRANSFORM |
+		_NEW_POLYGON | 
+		_NEW_BUFFERS),
+      .brw   = (BRW_NEW_REDUCED_PRIMITIVE),
+      .cache = CACHE_NEW_VS_PROG
+   },
+   .update = upload_clip_prog
+};
diff --git a/i965/brw_clip.h b/i965/brw_clip.h
new file mode 100644
index 0000000..49b2770
--- /dev/null
+++ b/i965/brw_clip.h
@@ -0,0 +1,170 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+
+#ifndef BRW_CLIP_H
+#define BRW_CLIP_H
+
+
+#include "brw_context.h"
+#include "brw_eu.h"
+
+#define MAX_VERTS (3+6+6)	
+
+/* Note that if unfilled primitives are being emitted, we have to fix
+ * up polygon offset and flatshading at this point:
+ */
+struct brw_clip_prog_key {
+   GLuint attrs:16;		
+   GLuint primitive:4;
+   GLuint nr_userclip:3;
+   GLuint do_flat_shading:1;
+   GLuint do_unfilled:1;
+   GLuint fill_cw:2;		/* includes cull information */
+   GLuint fill_ccw:2;		/* includes cull information */
+   GLuint offset_cw:1;
+   GLuint offset_ccw:1;
+   GLuint pad0:1;
+
+   GLuint copy_bfc_cw:1;
+   GLuint copy_bfc_ccw:1;
+   GLuint clip_mode:3;
+   GLuint pad1:27;
+   
+   GLfloat offset_factor;
+   GLfloat offset_units;
+};
+
+
+#define CLIP_LINE   0
+#define CLIP_POINT  1
+#define CLIP_FILL   2
+#define CLIP_CULL   3
+
+
+#define PRIM_MASK  (0x1f)
+
+struct brw_clip_compile {
+   struct brw_compile func;
+   struct brw_clip_prog_key key;
+   struct brw_clip_prog_data prog_data;
+   
+   struct {
+      struct brw_reg R0;
+      struct brw_reg vertex[MAX_VERTS];
+
+      struct brw_reg t;
+      struct brw_reg t0, t1;
+      struct brw_reg dp0, dp1;
+
+      struct brw_reg dpPrev;
+      struct brw_reg dp;
+      struct brw_reg loopcount;
+      struct brw_reg nr_verts;
+      struct brw_reg planemask;
+
+      struct brw_reg inlist;
+      struct brw_reg outlist;
+      struct brw_reg freelist;
+
+      struct brw_reg dir;
+      struct brw_reg tmp0, tmp1;
+      struct brw_reg offset;
+      
+      struct brw_reg fixed_planes;
+      struct brw_reg plane_equation;
+   } reg;
+
+   /* 3 different ways of expressing vertex size:
+    */
+   GLuint nr_attrs;
+   GLuint nr_regs;
+   GLuint nr_bytes;
+
+   GLuint first_tmp;
+   GLuint last_tmp;
+
+   GLboolean need_direction;
+
+   GLuint last_mrf;
+
+   GLuint header_position_offset;
+   GLuint offset[VERT_ATTRIB_MAX];
+};
+
+#define ATTR_SIZE  (4*4)
+
+/* Points are only culled, so no need for a clip routine, however it
+ * works out easier to have a dummy one.
+ */
+void brw_emit_unfilled_clip( struct brw_clip_compile *c );
+void brw_emit_tri_clip( struct brw_clip_compile *c );
+void brw_emit_line_clip( struct brw_clip_compile *c );
+void brw_emit_point_clip( struct brw_clip_compile *c );
+
+/* brw_clip_tri.c, for use by the unfilled clip routine:
+ */
+void brw_clip_tri_init_vertices( struct brw_clip_compile *c );
+void brw_clip_tri_flat_shade( struct brw_clip_compile *c );
+void brw_clip_tri( struct brw_clip_compile *c );
+void brw_clip_tri_emit_polygon( struct brw_clip_compile *c );
+void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, 
+			      GLuint nr_verts );
+
+
+/* Utils:
+ */
+
+void brw_clip_interp_vertex( struct brw_clip_compile *c,
+			     struct brw_indirect dest_ptr,
+			     struct brw_indirect v0_ptr, /* from */
+			     struct brw_indirect v1_ptr, /* to */
+			     struct brw_reg t0,
+			     GLboolean force_edgeflag );
+
+void brw_clip_init_planes( struct brw_clip_compile *c );
+
+void brw_clip_emit_vue(struct brw_clip_compile *c, 
+		       struct brw_indirect vert,
+		       GLboolean allocate,
+		       GLboolean eot,
+		       GLuint header);
+
+void brw_clip_kill_thread(struct brw_clip_compile *c);
+
+struct brw_reg brw_clip_plane_stride( struct brw_clip_compile *c );
+struct brw_reg brw_clip_plane0_address( struct brw_clip_compile *c );
+
+void brw_clip_copy_colors( struct brw_clip_compile *c,
+			   GLuint to, GLuint from );
+
+void brw_clip_init_clipmask( struct brw_clip_compile *c );
+
+#endif
diff --git a/i965/brw_clip_line.c b/i965/brw_clip_line.c
new file mode 100644
index 0000000..8318227
--- /dev/null
+++ b/i965/brw_clip_line.c
@@ -0,0 +1,233 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+
+#include "glheader.h"
+#include "macros.h"
+#include "enums.h"
+
+#include "shader/program.h"
+#include "intel_batchbuffer.h"
+
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_clip.h"
+
+
+
+static void brw_clip_line_alloc_regs( struct brw_clip_compile *c )
+{
+   GLuint i = 0,j;
+
+   /* Register usage is static, precompute here:
+    */
+   c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++;
+
+   if (c->key.nr_userclip) {
+      c->reg.fixed_planes = brw_vec4_grf(i, 0);
+      i += (6 + c->key.nr_userclip + 1) / 2;
+
+      c->prog_data.curb_read_length = (6 + c->key.nr_userclip + 1) / 2;
+   }
+   else
+      c->prog_data.curb_read_length = 0;
+
+
+   /* Payload vertices plus space for more generated vertices:
+    */
+   for (j = 0; j < 4; j++) {
+      c->reg.vertex[j] = brw_vec4_grf(i, 0);
+      i += c->nr_regs;
+   }
+
+   c->reg.t           = brw_vec1_grf(i, 0);
+   c->reg.t0          = brw_vec1_grf(i, 1);
+   c->reg.t1          = brw_vec1_grf(i, 2);
+   c->reg.planemask   = retype(brw_vec1_grf(i, 3), BRW_REGISTER_TYPE_UD);
+   c->reg.plane_equation = brw_vec4_grf(i, 4);
+   i++;
+
+   c->reg.dp0         = brw_vec1_grf(i, 0); /* fixme - dp4 will clobber r.1,2,3 */
+   c->reg.dp1         = brw_vec1_grf(i, 4);
+   i++;
+
+   if (!c->key.nr_userclip) {
+      c->reg.fixed_planes = brw_vec8_grf(i, 0); 
+      i++;
+   }
+
+
+   c->first_tmp = i;
+   c->last_tmp = i;
+
+   c->prog_data.urb_read_length = c->nr_regs; /* ? */
+   c->prog_data.total_grf = i;
+}
+
+
+
+/* Line clipping, more or less following the following algorithm:
+ *
+ *  for (p=0;p<MAX_PLANES;p++) {
+ *     if (clipmask & (1 << p)) {
+ *        GLfloat dp0 = DOTPROD( vtx0, plane[p] );
+ *        GLfloat dp1 = DOTPROD( vtx1, plane[p] );
+ *
+ *        if (IS_NEGATIVE(dp1)) {
+ *           GLfloat t = dp1 / (dp1 - dp0);
+ *           if (t > t1) t1 = t;
+ *        } else {
+ *           GLfloat t = dp0 / (dp0 - dp1);
+ *           if (t > t0) t0 = t;
+ *        }
+ *  
+ *        if (t0 + t1 >= 1.0)
+ *           return;
+ *     }
+ *  }
+ *
+ *  interp( ctx, newvtx0, vtx0, vtx1, t0 );
+ *  interp( ctx, newvtx1, vtx1, vtx0, t1 );
+ *
+ */
+static void clip_and_emit_line( struct brw_clip_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_indirect vtx0     = brw_indirect(0, 0);
+   struct brw_indirect vtx1      = brw_indirect(1, 0);
+   struct brw_indirect newvtx0   = brw_indirect(2, 0);
+   struct brw_indirect newvtx1   = brw_indirect(3, 0);
+   struct brw_indirect plane_ptr = brw_indirect(4, 0);
+   struct brw_instruction *plane_loop;
+   struct brw_instruction *plane_active;
+   struct brw_instruction *is_negative;
+   struct brw_instruction *not_culled;
+   struct brw_reg v1_null_ud = retype(vec1(brw_null_reg()), BRW_REGISTER_TYPE_UD);
+
+   brw_MOV(p, get_addr_reg(vtx0),      brw_address(c->reg.vertex[0]));
+   brw_MOV(p, get_addr_reg(vtx1),      brw_address(c->reg.vertex[1]));
+   brw_MOV(p, get_addr_reg(newvtx0),   brw_address(c->reg.vertex[2]));
+   brw_MOV(p, get_addr_reg(newvtx1),   brw_address(c->reg.vertex[3]));
+   brw_MOV(p, get_addr_reg(plane_ptr), brw_clip_plane0_address(c));
+
+   /* Note: init t0, t1 together: 
+    */
+   brw_MOV(p, vec2(c->reg.t0), brw_imm_f(0));
+
+   brw_clip_init_planes(c);
+   brw_clip_init_clipmask(c);
+
+   plane_loop = brw_DO(p, BRW_EXECUTE_1);
+   {
+      /* if (planemask & 1)
+       */
+      brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+      brw_AND(p, v1_null_ud, c->reg.planemask, brw_imm_ud(1));
+      
+      plane_active = brw_IF(p, BRW_EXECUTE_1);
+      {
+	 if (c->key.nr_userclip)
+	    brw_MOV(p, c->reg.plane_equation, deref_4f(plane_ptr, 0));
+	 else
+	    brw_MOV(p, c->reg.plane_equation, deref_4b(plane_ptr, 0));
+
+	 /* dp = DP4(vtx->position, plane) 
+	  */
+	 brw_DP4(p, vec4(c->reg.dp0), deref_4f(vtx0, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation);
+
+	 /* if (IS_NEGATIVE(dp1)) 
+	  */
+	 brw_set_conditionalmod(p, BRW_CONDITIONAL_L);
+	 brw_DP4(p, vec4(c->reg.dp1), deref_4f(vtx1, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation);
+	 is_negative = brw_IF(p, BRW_EXECUTE_1);
+	 {
+	    brw_ADD(p, c->reg.t, c->reg.dp1, negate(c->reg.dp0));
+	    brw_math_invert(p, c->reg.t, c->reg.t);
+	    brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp1);
+
+	    brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t1 );
+	    brw_MOV(p, c->reg.t1, c->reg.t);
+	    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+	 } 
+	 is_negative = brw_ELSE(p, is_negative);
+	 {
+	    /* Coming back in.  We know that both cannot be negative
+	     * because the line would have been culled in that case.
+	     */
+	    brw_ADD(p, c->reg.t, c->reg.dp0, negate(c->reg.dp1));
+	    brw_math_invert(p, c->reg.t, c->reg.t);
+	    brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp0);
+
+	    brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t0 );
+	    brw_MOV(p, c->reg.t0, c->reg.t);
+	    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+	 }
+	 brw_ENDIF(p, is_negative);	 
+      }
+      brw_ENDIF(p, plane_active);
+      
+      /* plane_ptr++;
+       */
+      brw_ADD(p, get_addr_reg(plane_ptr), get_addr_reg(plane_ptr), brw_clip_plane_stride(c));
+
+      /* while (planemask>>=1) != 0
+       */
+      brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+      brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(1));
+   }
+   brw_WHILE(p, plane_loop);
+
+   brw_ADD(p, c->reg.t, c->reg.t0, c->reg.t1);
+   brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.t, brw_imm_f(1.0));
+   not_culled = brw_IF(p, BRW_EXECUTE_1);
+   {
+      brw_clip_interp_vertex(c, newvtx0, vtx0, vtx1, c->reg.t0, GL_FALSE);
+      brw_clip_interp_vertex(c, newvtx1, vtx1, vtx0, c->reg.t1, GL_FALSE);
+
+      brw_clip_emit_vue(c, newvtx0, 1, 0, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_START);
+      brw_clip_emit_vue(c, newvtx1, 0, 1, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_END); 
+   }
+   brw_ENDIF(p, not_culled);
+   brw_clip_kill_thread(c);
+}
+
+
+
+void brw_emit_line_clip( struct brw_clip_compile *c )
+{
+   brw_clip_line_alloc_regs(c);
+
+   if (c->key.do_flat_shading)
+      brw_clip_copy_colors(c, 0, 1);
+                
+   clip_and_emit_line(c);
+}
diff --git a/i965/brw_clip_point.c b/i965/brw_clip_point.c
new file mode 100644
index 0000000..2346980
--- /dev/null
+++ b/i965/brw_clip_point.c
@@ -0,0 +1,54 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+
+#include "glheader.h"
+#include "macros.h"
+#include "enums.h"
+
+#include "shader/program.h"
+#include "intel_batchbuffer.h"
+
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_clip.h"
+
+
+/* Point clipping, nothing to do?
+ */
+void brw_emit_point_clip( struct brw_clip_compile *c )
+{
+   /* Send an empty message to kill the thread:
+    */
+   brw_clip_tri_alloc_regs(c, 0);
+   brw_clip_kill_thread(c);
+}
diff --git a/i965/brw_clip_state.c b/i965/brw_clip_state.c
new file mode 100644
index 0000000..37a25a9
--- /dev/null
+++ b/i965/brw_clip_state.c
@@ -0,0 +1,97 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "macros.h"
+
+
+
+static void upload_clip_unit( struct brw_context *brw )
+{
+   struct brw_clip_unit_state clip;
+
+   memset(&clip, 0, sizeof(clip));
+
+   /* CACHE_NEW_CLIP_PROG */
+   clip.thread0.grf_reg_count = ((brw->clip.prog_data->total_grf-1) & ~15) / 16;
+   clip.thread0.kernel_start_pointer = brw->clip.prog_gs_offset >> 6;
+   clip.thread3.urb_entry_read_length = brw->clip.prog_data->urb_read_length;
+   clip.thread3.const_urb_entry_read_length = brw->clip.prog_data->curb_read_length;
+   clip.clip5.clip_mode = brw->clip.prog_data->clip_mode;
+
+   /* BRW_NEW_CURBE_OFFSETS */
+   clip.thread3.const_urb_entry_read_offset = brw->curbe.clip_start * 2;
+
+   /* BRW_NEW_URB_FENCE */
+   clip.thread4.nr_urb_entries = brw->urb.nr_clip_entries; 
+   clip.thread4.urb_entry_allocation_size = brw->urb.vsize - 1;
+   clip.thread4.max_threads = 0; /* Hmm, maybe the max is 1 or 2 threads */
+
+   if (INTEL_DEBUG & DEBUG_STATS)
+      clip.thread4.stats_enable = 1; 
+
+   /* CONSTANT */
+   clip.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+   clip.thread1.single_program_flow = 1;
+   clip.thread3.dispatch_grf_start_reg = 1;
+   clip.thread3.urb_entry_read_offset = 0;
+   clip.clip5.userclip_enable_flags = 0x7f;
+   clip.clip5.userclip_must_clip = 1;
+   clip.clip5.guard_band_enable = 0;
+   clip.clip5.viewport_z_clip_enable = 1;
+   clip.clip5.viewport_xy_clip_enable = 1;
+   clip.clip5.vertex_position_space = BRW_CLIP_NDCSPACE;
+   clip.clip5.api_mode = BRW_CLIP_API_OGL;   
+
+   if (BRW_IS_IGD(brw))
+      clip.clip5.negative_w_clip_test = 1;
+
+   clip.clip6.clipper_viewport_state_ptr = 0;
+   clip.viewport_xmin = -1;
+   clip.viewport_xmax = 1;
+   clip.viewport_ymin = -1;
+   clip.viewport_ymax = 1;
+
+   brw->clip.state_gs_offset = brw_cache_data( &brw->cache[BRW_CLIP_UNIT], &clip );
+}
+
+
+const struct brw_tracked_state brw_clip_unit = {
+   .dirty = {
+      .mesa  = 0,
+      .brw   = (BRW_NEW_CURBE_OFFSETS |
+		BRW_NEW_URB_FENCE),
+      .cache = CACHE_NEW_CLIP_PROG
+   },
+   .update = upload_clip_unit
+};
diff --git a/i965/brw_clip_tri.c b/i965/brw_clip_tri.c
new file mode 100644
index 0000000..0fc7306
--- /dev/null
+++ b/i965/brw_clip_tri.c
@@ -0,0 +1,467 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+
+#include "glheader.h"
+#include "macros.h"
+#include "enums.h"
+
+#include "shader/program.h"
+#include "intel_batchbuffer.h"
+
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_clip.h"
+
+
+
+void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, 
+			      GLuint nr_verts )
+{
+   GLuint i = 0,j;
+
+   /* Register usage is static, precompute here:
+    */
+   c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++;
+
+   if (c->key.nr_userclip) {
+      c->reg.fixed_planes = brw_vec4_grf(i, 0);
+      i += (6 + c->key.nr_userclip + 1) / 2;
+
+      c->prog_data.curb_read_length = (6 + c->key.nr_userclip + 1) / 2;
+   }
+   else
+      c->prog_data.curb_read_length = 0;
+
+
+   /* Payload vertices plus space for more generated vertices:
+    */
+   for (j = 0; j < nr_verts; j++) {
+      c->reg.vertex[j] = brw_vec4_grf(i, 0);
+      i += c->nr_regs;
+   }
+
+   if (c->nr_attrs & 1) {
+      for (j = 0; j < 3; j++) {
+	 GLuint delta = c->nr_attrs*16 + 32;
+	 brw_MOV(&c->func, byte_offset(c->reg.vertex[j], delta), brw_imm_f(0));
+      }
+   }
+
+   c->reg.t          = brw_vec1_grf(i, 0);
+   c->reg.loopcount  = retype(brw_vec1_grf(i, 1), BRW_REGISTER_TYPE_D);
+   c->reg.nr_verts   = retype(brw_vec1_grf(i, 2), BRW_REGISTER_TYPE_UD);
+   c->reg.planemask  = retype(brw_vec1_grf(i, 3), BRW_REGISTER_TYPE_UD);
+   c->reg.plane_equation = brw_vec4_grf(i, 4);
+   i++;
+
+   c->reg.dpPrev     = brw_vec1_grf(i, 0); /* fixme - dp4 will clobber r.1,2,3 */
+   c->reg.dp         = brw_vec1_grf(i, 4);
+   i++;
+
+   c->reg.inlist     = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0);
+   i++;
+
+   c->reg.outlist    = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0);
+   i++;
+
+   c->reg.freelist   = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0);
+   i++;
+
+   if (!c->key.nr_userclip) {
+      c->reg.fixed_planes = brw_vec8_grf(i, 0); 
+      i++;
+   }
+
+   if (c->key.do_unfilled) {
+      c->reg.dir     = brw_vec4_grf(i, 0);
+      c->reg.offset  = brw_vec4_grf(i, 4);
+      i++;
+      c->reg.tmp0    = brw_vec4_grf(i, 0);
+      c->reg.tmp1    = brw_vec4_grf(i, 4);
+      i++;
+   }
+
+   c->first_tmp = i;
+   c->last_tmp = i;
+
+   c->prog_data.urb_read_length = c->nr_regs; /* ? */
+   c->prog_data.total_grf = i;
+}
+
+
+
+void brw_clip_tri_init_vertices( struct brw_clip_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */
+   struct brw_instruction *is_rev;
+
+   /* Initial list of indices for incoming vertexes:
+    */
+   brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK)); 
+   brw_CMP(p, 
+	   vec1(brw_null_reg()), 
+	   BRW_CONDITIONAL_EQ, 
+	   tmp0,
+	   brw_imm_ud(_3DPRIM_TRISTRIP_REVERSE));
+
+   /* XXX: Is there an easier way to do this?  Need to reverse every
+    * second tristrip element:  Can ignore sometimes?
+    */
+   is_rev = brw_IF(p, BRW_EXECUTE_1);
+   {   
+      brw_MOV(p, get_element(c->reg.inlist, 0),  brw_address(c->reg.vertex[1]) );
+      brw_MOV(p, get_element(c->reg.inlist, 1),  brw_address(c->reg.vertex[0]) );
+      if (c->need_direction)
+	 brw_MOV(p, c->reg.dir, brw_imm_f(-1));
+   }
+   is_rev = brw_ELSE(p, is_rev);
+   {
+      brw_MOV(p, get_element(c->reg.inlist, 0),  brw_address(c->reg.vertex[0]) );
+      brw_MOV(p, get_element(c->reg.inlist, 1),  brw_address(c->reg.vertex[1]) );
+      if (c->need_direction)
+	 brw_MOV(p, c->reg.dir, brw_imm_f(1));
+   }
+   brw_ENDIF(p, is_rev);
+
+   brw_MOV(p, get_element(c->reg.inlist, 2),  brw_address(c->reg.vertex[2]) );
+   brw_MOV(p, brw_vec8_grf(c->reg.outlist.nr, 0), brw_imm_f(0));
+   brw_MOV(p, c->reg.nr_verts, brw_imm_ud(3));
+}
+
+
+
+void brw_clip_tri_flat_shade( struct brw_clip_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_instruction *is_poly;
+   struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */
+
+   brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK)); 
+   brw_CMP(p, 
+	   vec1(brw_null_reg()), 
+	   BRW_CONDITIONAL_EQ, 
+	   tmp0,
+	   brw_imm_ud(_3DPRIM_POLYGON));
+
+   is_poly = brw_IF(p, BRW_EXECUTE_1);
+   {   
+      brw_clip_copy_colors(c, 1, 0);
+      brw_clip_copy_colors(c, 2, 0);
+   }
+   is_poly = brw_ELSE(p, is_poly);
+   {
+      brw_clip_copy_colors(c, 0, 2);
+      brw_clip_copy_colors(c, 1, 2);
+   }
+   brw_ENDIF(p, is_poly);
+}
+
+
+
+/* Use mesa's clipping algorithms, translated to GEN4 assembly.
+ */
+void brw_clip_tri( struct brw_clip_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_indirect vtx = brw_indirect(0, 0);
+   struct brw_indirect vtxPrev = brw_indirect(1, 0);
+   struct brw_indirect vtxOut = brw_indirect(2, 0);
+   struct brw_indirect plane_ptr = brw_indirect(3, 0);
+   struct brw_indirect inlist_ptr = brw_indirect(4, 0);
+   struct brw_indirect outlist_ptr = brw_indirect(5, 0);
+   struct brw_indirect freelist_ptr = brw_indirect(6, 0);
+   struct brw_instruction *plane_loop;
+   struct brw_instruction *plane_active;
+   struct brw_instruction *vertex_loop;
+   struct brw_instruction *next_test;
+   struct brw_instruction *prev_test;
+   
+   brw_MOV(p, get_addr_reg(vtxPrev),     brw_address(c->reg.vertex[2]) );
+   brw_MOV(p, get_addr_reg(plane_ptr),   brw_clip_plane0_address(c));
+   brw_MOV(p, get_addr_reg(inlist_ptr),  brw_address(c->reg.inlist));
+   brw_MOV(p, get_addr_reg(outlist_ptr), brw_address(c->reg.outlist));
+
+   brw_MOV(p, get_addr_reg(freelist_ptr), brw_address(c->reg.vertex[3]) );
+
+   plane_loop = brw_DO(p, BRW_EXECUTE_1);
+   {
+      /* if (planemask & 1)
+       */
+      brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+      brw_AND(p, vec1(brw_null_reg()), c->reg.planemask, brw_imm_ud(1));
+      
+      plane_active = brw_IF(p, BRW_EXECUTE_1);
+      {
+	 /* vtxOut = freelist_ptr++ 
+	  */
+	 brw_MOV(p, get_addr_reg(vtxOut),       get_addr_reg(freelist_ptr) );
+	 brw_ADD(p, get_addr_reg(freelist_ptr), get_addr_reg(freelist_ptr), brw_imm_uw(c->nr_regs * REG_SIZE));
+
+	 if (c->key.nr_userclip)
+	    brw_MOV(p, c->reg.plane_equation, deref_4f(plane_ptr, 0));
+	 else
+	    brw_MOV(p, c->reg.plane_equation, deref_4b(plane_ptr, 0));
+	    
+	 brw_MOV(p, c->reg.loopcount, c->reg.nr_verts);
+	 brw_MOV(p, c->reg.nr_verts, brw_imm_ud(0));
+
+	 vertex_loop = brw_DO(p, BRW_EXECUTE_1);
+	 {
+	    /* vtx = *input_ptr;
+	     */
+	    brw_MOV(p, get_addr_reg(vtx), deref_1uw(inlist_ptr, 0));
+
+	    /* IS_NEGATIVE(prev) */
+	    brw_set_conditionalmod(p, BRW_CONDITIONAL_L);
+	    brw_DP4(p, vec4(c->reg.dpPrev), deref_4f(vtxPrev, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation);
+	    prev_test = brw_IF(p, BRW_EXECUTE_1);
+	    {
+	       /* IS_POSITIVE(next)
+		*/
+	       brw_set_conditionalmod(p, BRW_CONDITIONAL_GE);
+	       brw_DP4(p, vec4(c->reg.dp), deref_4f(vtx, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation);
+	       next_test = brw_IF(p, BRW_EXECUTE_1);
+	       {
+
+		  /* Coming back in.
+		   */
+		  brw_ADD(p, c->reg.t, c->reg.dpPrev, negate(c->reg.dp));
+		  brw_math_invert(p, c->reg.t, c->reg.t);
+		  brw_MUL(p, c->reg.t, c->reg.t, c->reg.dpPrev);
+
+		  /* If (vtxOut == 0) vtxOut = vtxPrev
+		   */
+		  brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, get_addr_reg(vtxOut), brw_imm_uw(0) );
+		  brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(vtxPrev) );
+		  brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+		  brw_clip_interp_vertex(c, vtxOut, vtxPrev, vtx, c->reg.t, GL_FALSE);
+
+		  /* *outlist_ptr++ = vtxOut;
+		   * nr_verts++; 
+		   * vtxOut = 0;
+		   */
+		  brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxOut));
+		  brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short)));
+		  brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1));
+		  brw_MOV(p, get_addr_reg(vtxOut), brw_imm_uw(0) );
+	       }
+	       brw_ENDIF(p, next_test);
+	       
+	    }
+	    prev_test = brw_ELSE(p, prev_test);
+	    {
+	       /* *outlist_ptr++ = vtxPrev;
+		* nr_verts++;
+		*/
+	       brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxPrev));
+	       brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short)));
+	       brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1));
+
+	       /* IS_NEGATIVE(next)
+		*/
+	       brw_set_conditionalmod(p, BRW_CONDITIONAL_L);
+	       brw_DP4(p, vec4(c->reg.dp), deref_4f(vtx, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation);
+	       next_test = brw_IF(p, BRW_EXECUTE_1);
+	       {
+		  /* Going out of bounds.  Avoid division by zero as we
+		   * know dp != dpPrev from DIFFERENT_SIGNS, above.
+		   */
+		  brw_ADD(p, c->reg.t, c->reg.dp, negate(c->reg.dpPrev));
+		  brw_math_invert(p, c->reg.t, c->reg.t);
+		  brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp);
+
+		  /* If (vtxOut == 0) vtxOut = vtx
+		   */
+		  brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, get_addr_reg(vtxOut), brw_imm_uw(0) );
+		  brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(vtx) );
+		  brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+		  brw_clip_interp_vertex(c, vtxOut, vtx, vtxPrev, c->reg.t, GL_TRUE);		  
+
+		  /* *outlist_ptr++ = vtxOut;
+		   * nr_verts++; 
+		   * vtxOut = 0;
+		   */
+		  brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxOut));
+		  brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short)));
+		  brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1));
+		  brw_MOV(p, get_addr_reg(vtxOut), brw_imm_uw(0) );
+	       } 	       
+	       brw_ENDIF(p, next_test);
+	    }
+	    brw_ENDIF(p, prev_test);
+	    
+	    /* vtxPrev = vtx;
+	     * inlist_ptr++;
+	     */
+	    brw_MOV(p, get_addr_reg(vtxPrev), get_addr_reg(vtx));
+	    brw_ADD(p, get_addr_reg(inlist_ptr), get_addr_reg(inlist_ptr), brw_imm_uw(sizeof(short)));
+
+	    /* while (--loopcount != 0)
+	     */
+	    brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+	    brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
+	 } 
+	 brw_WHILE(p, vertex_loop);
+
+	 /* vtxPrev = *(outlist_ptr-1)  OR: outlist[nr_verts-1]
+	  * inlist = outlist
+	  * inlist_ptr = &inlist[0]
+	  * outlist_ptr = &outlist[0]
+	  */
+	 brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_w(-2));
+	 brw_MOV(p, get_addr_reg(vtxPrev), deref_1uw(outlist_ptr, 0));
+	 brw_MOV(p, brw_vec8_grf(c->reg.inlist.nr, 0), brw_vec8_grf(c->reg.outlist.nr, 0));
+	 brw_MOV(p, get_addr_reg(inlist_ptr), brw_address(c->reg.inlist));
+	 brw_MOV(p, get_addr_reg(outlist_ptr), brw_address(c->reg.outlist));
+      }
+      brw_ENDIF(p, plane_active);
+      
+      /* plane_ptr++;
+       */
+      brw_ADD(p, get_addr_reg(plane_ptr), get_addr_reg(plane_ptr), brw_clip_plane_stride(c));
+
+      /* nr_verts >= 3 
+       */
+      brw_CMP(p,
+	      vec1(brw_null_reg()),
+	      BRW_CONDITIONAL_GE,
+	      c->reg.nr_verts,
+	      brw_imm_ud(3));
+   
+      /* && (planemask>>=1) != 0
+       */
+      brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+      brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(1));
+   }
+   brw_WHILE(p, plane_loop);
+}
+
+
+
+void brw_clip_tri_emit_polygon(struct brw_clip_compile *c)
+{
+   struct brw_compile *p = &c->func;
+   struct brw_instruction *loop, *if_insn;
+
+   /* for (loopcount = nr_verts-2; loopcount > 0; loopcount--)
+    */
+   brw_set_conditionalmod(p, BRW_CONDITIONAL_G);
+   brw_ADD(p,
+	   c->reg.loopcount,
+	   c->reg.nr_verts,
+	   brw_imm_d(-2));
+
+   if_insn = brw_IF(p, BRW_EXECUTE_1);
+   {
+      struct brw_indirect v0 = brw_indirect(0, 0);
+      struct brw_indirect vptr = brw_indirect(1, 0);
+
+      brw_MOV(p, get_addr_reg(vptr), brw_address(c->reg.inlist));
+      brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0));
+
+      brw_clip_emit_vue(c, v0, 1, 0, ((_3DPRIM_TRIFAN << 2) | R02_PRIM_START));
+      
+      brw_ADD(p, get_addr_reg(vptr), get_addr_reg(vptr), brw_imm_uw(2));
+      brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0));
+
+      loop = brw_DO(p, BRW_EXECUTE_1);
+      {
+	 brw_clip_emit_vue(c, v0, 1, 0, (_3DPRIM_TRIFAN << 2));
+  
+	 brw_ADD(p, get_addr_reg(vptr), get_addr_reg(vptr), brw_imm_uw(2));
+	 brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0));
+
+	 brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+	 brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
+      }
+      brw_WHILE(p, loop);
+
+      brw_clip_emit_vue(c, v0, 0, 1, ((_3DPRIM_TRIFAN << 2) | R02_PRIM_END));
+   }
+   brw_ENDIF(p, if_insn);
+}
+
+static void do_clip_tri( struct brw_clip_compile *c )
+{
+   brw_clip_init_planes(c);
+
+   brw_clip_tri(c);
+}
+
+
+static void maybe_do_clip_tri( struct brw_clip_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_instruction *do_clip;
+
+   brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, c->reg.planemask, brw_imm_ud(0));
+   do_clip = brw_IF(p, BRW_EXECUTE_1);
+   {
+      do_clip_tri(c);
+   }
+   brw_ENDIF(p, do_clip);
+}
+
+
+
+
+void brw_emit_tri_clip( struct brw_clip_compile *c )
+{
+   brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6);
+   brw_clip_tri_init_vertices(c);
+   brw_clip_init_clipmask(c);
+
+   /* Can't push into do_clip_tri because with polygon (or quad)
+    * flatshading, need to apply the flatshade here because we don't
+    * respect the PV when converting to trifan for emit:
+    */
+   if (c->key.do_flat_shading) 
+      brw_clip_tri_flat_shade(c); 
+      
+   if (c->key.clip_mode == BRW_CLIPMODE_NORMAL)
+      do_clip_tri(c);
+   else 
+      maybe_do_clip_tri(c);
+      
+   brw_clip_tri_emit_polygon(c);
+
+   /* Send an empty message to kill the thread:
+    */
+   brw_clip_kill_thread(c);
+}
+
+
+
diff --git a/i965/brw_clip_unfilled.c b/i965/brw_clip_unfilled.c
new file mode 100644
index 0000000..918e000
--- /dev/null
+++ b/i965/brw_clip_unfilled.c
@@ -0,0 +1,484 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+
+#include "glheader.h"
+#include "macros.h"
+#include "enums.h"
+
+#include "shader/program.h"
+#include "intel_batchbuffer.h"
+
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_clip.h"
+
+
+
+/* This is performed against the original triangles, so no indirection
+ * required:
+BZZZT!
+ */
+static void compute_tri_direction( struct brw_clip_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_reg e = c->reg.tmp0;
+   struct brw_reg f = c->reg.tmp1;
+   struct brw_reg v0 = byte_offset(c->reg.vertex[0], c->offset[VERT_RESULT_HPOS]); 
+   struct brw_reg v1 = byte_offset(c->reg.vertex[1], c->offset[VERT_RESULT_HPOS]); 
+   struct brw_reg v2 = byte_offset(c->reg.vertex[2], c->offset[VERT_RESULT_HPOS]); 
+
+
+   /* Calculate the vectors of two edges of the triangle:
+    */
+   brw_ADD(p, e, v0, negate(v2)); 
+   brw_ADD(p, f, v1, negate(v2)); 
+
+   /* Take their crossproduct:
+    */
+   brw_set_access_mode(p, BRW_ALIGN_16);
+   brw_MUL(p, vec4(brw_null_reg()), brw_swizzle(e, 1,2,0,3),  brw_swizzle(f,2,0,1,3));
+   brw_MAC(p, vec4(e),  negate(brw_swizzle(e, 2,0,1,3)), brw_swizzle(f,1,2,0,3));
+   brw_set_access_mode(p, BRW_ALIGN_1);
+
+   brw_MUL(p, c->reg.dir, c->reg.dir, vec4(e));
+}
+
+
+static void cull_direction( struct brw_clip_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_instruction *ccw;
+   GLuint conditional;
+
+   assert (!(c->key.fill_ccw == CLIP_CULL &&
+	     c->key.fill_cw == CLIP_CULL));
+
+   if (c->key.fill_ccw == CLIP_CULL)
+      conditional = BRW_CONDITIONAL_GE;
+   else
+      conditional = BRW_CONDITIONAL_L;
+
+   brw_CMP(p,
+	   vec1(brw_null_reg()),
+	   conditional,
+	   get_element(c->reg.dir, 2),
+	   brw_imm_f(0));
+   
+   ccw = brw_IF(p, BRW_EXECUTE_1);
+   {
+      brw_clip_kill_thread(c);
+   }
+   brw_ENDIF(p, ccw);
+}
+
+
+
+static void copy_bfc( struct brw_clip_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_instruction *ccw;
+   GLuint conditional;
+
+   /* Do we have any colors to copy? 
+    */
+   if (!(c->offset[VERT_RESULT_COL0] && c->offset[VERT_RESULT_BFC0]) &&
+       !(c->offset[VERT_RESULT_COL1] && c->offset[VERT_RESULT_BFC1]))
+      return;
+
+   /* In some wierd degnerate cases we can end up testing the
+    * direction twice, once for culling and once for bfc copying.  Oh
+    * well, that's what you get for setting wierd GL state.
+    */
+   if (c->key.copy_bfc_ccw)
+      conditional = BRW_CONDITIONAL_GE;
+   else
+      conditional = BRW_CONDITIONAL_L;
+
+   brw_CMP(p,
+	   vec1(brw_null_reg()),
+	   conditional,
+	   get_element(c->reg.dir, 2),
+	   brw_imm_f(0));
+   
+   ccw = brw_IF(p, BRW_EXECUTE_1);
+   {
+      GLuint i;
+
+      for (i = 0; i < 3; i++) {
+	 if (c->offset[VERT_RESULT_COL0] && c->offset[VERT_RESULT_BFC0])
+	    brw_MOV(p, 
+		    byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_COL0]),
+		    byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_BFC0]));
+
+	 if (c->offset[VERT_RESULT_COL1] && c->offset[VERT_RESULT_BFC1])
+	    brw_MOV(p, 
+		    byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_COL1]),
+		    byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_BFC1]));
+      }
+   }
+   brw_ENDIF(p, ccw);
+}
+
+
+
+
+/*
+  GLfloat iz	= 1.0 / dir.z;
+  GLfloat ac	= dir.x * iz;
+  GLfloat bc	= dir.y * iz;
+  offset = ctx->Polygon.OffsetUnits * DEPTH_SCALE;
+  offset += MAX2( abs(ac), abs(bc) ) * ctx->Polygon.OffsetFactor;
+  offset *= MRD;
+*/
+static void compute_offset( struct brw_clip_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_reg off = c->reg.offset;
+   struct brw_reg dir = c->reg.dir;
+   
+   brw_math_invert(p, get_element(off, 2), get_element(dir, 2));
+   brw_MUL(p, vec2(off), dir, get_element(off, 2));
+
+   brw_CMP(p, 
+	   vec1(brw_null_reg()), 
+	   BRW_CONDITIONAL_GE,
+	   brw_abs(get_element(off, 0)), 
+	   brw_abs(get_element(off, 1)));
+
+   brw_SEL(p, vec1(off), brw_abs(get_element(off, 0)), brw_abs(get_element(off, 1)));
+   brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+   brw_MUL(p, vec1(off), off, brw_imm_f(c->key.offset_factor));
+   brw_ADD(p, vec1(off), off, brw_imm_f(c->key.offset_units));
+}
+
+
+static void merge_edgeflags( struct brw_clip_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_instruction *is_poly;
+   struct brw_reg tmp0 = get_element_ud(c->reg.tmp0, 0);
+
+   brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK)); 
+   brw_CMP(p, 
+	   vec1(brw_null_reg()), 
+	   BRW_CONDITIONAL_EQ, 
+	   tmp0,
+	   brw_imm_ud(_3DPRIM_POLYGON));
+
+   /* Get away with using reg.vertex because we know that this is not
+    * a _3DPRIM_TRISTRIP_REVERSE:
+    */
+   is_poly = brw_IF(p, BRW_EXECUTE_1);
+   {   
+      brw_set_conditionalmod(p, BRW_CONDITIONAL_EQ);
+      brw_AND(p, vec1(brw_null_reg()), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<8));
+      brw_MOV(p, byte_offset(c->reg.vertex[0], c->offset[VERT_RESULT_EDGE]), brw_imm_f(0));
+      brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+      brw_set_conditionalmod(p, BRW_CONDITIONAL_EQ);
+      brw_AND(p, vec1(brw_null_reg()), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<9));
+      brw_MOV(p, byte_offset(c->reg.vertex[2], c->offset[VERT_RESULT_EDGE]), brw_imm_f(0));
+      brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+   }
+   brw_ENDIF(p, is_poly);
+}
+
+
+
+static void apply_one_offset( struct brw_clip_compile *c,
+			  struct brw_indirect vert )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_reg pos = deref_4f(vert, c->offset[VERT_RESULT_HPOS]);
+   struct brw_reg z = get_element(pos, 2);
+
+   brw_ADD(p, z, z, vec1(c->reg.offset));
+}
+
+
+
+/***********************************************************************
+ * Output clipped polygon as an unfilled primitive:
+ */
+static void emit_lines(struct brw_clip_compile *c,
+		       GLboolean do_offset)
+{
+   struct brw_compile *p = &c->func;
+   struct brw_instruction *loop;
+   struct brw_instruction *draw_edge;
+   struct brw_indirect v0 = brw_indirect(0, 0);
+   struct brw_indirect v1 = brw_indirect(1, 0);
+   struct brw_indirect v0ptr = brw_indirect(2, 0);
+   struct brw_indirect v1ptr = brw_indirect(3, 0);
+
+   /* Need a seperate loop for offset:
+    */
+   if (do_offset) {
+      brw_MOV(p, c->reg.loopcount, c->reg.nr_verts);
+      brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist));
+
+      loop = brw_DO(p, BRW_EXECUTE_1);
+      {
+	 brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0));
+	 brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2));
+	    
+	 apply_one_offset(c, v0);
+	    
+	 brw_set_conditionalmod(p, BRW_CONDITIONAL_G);
+	 brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
+      }
+      brw_WHILE(p, loop);
+   }
+
+   /* v1ptr = &inlist[nr_verts]
+    * *v1ptr = v0
+    */
+   brw_MOV(p, c->reg.loopcount, c->reg.nr_verts);
+   brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist));
+   brw_ADD(p, get_addr_reg(v1ptr), get_addr_reg(v0ptr), retype(c->reg.nr_verts, BRW_REGISTER_TYPE_UW));
+   brw_ADD(p, get_addr_reg(v1ptr), get_addr_reg(v1ptr), retype(c->reg.nr_verts, BRW_REGISTER_TYPE_UW));
+   brw_MOV(p, deref_1uw(v1ptr, 0), deref_1uw(v0ptr, 0));
+
+   loop = brw_DO(p, BRW_EXECUTE_1);
+   {
+      brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0));
+      brw_MOV(p, get_addr_reg(v1), deref_1uw(v0ptr, 2));
+      brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2));
+
+      /* draw edge if edgeflag != 0 */
+      brw_CMP(p, 
+	      vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, 
+	      deref_1f(v0, c->offset[VERT_RESULT_EDGE]),
+	      brw_imm_f(0));
+      draw_edge = brw_IF(p, BRW_EXECUTE_1);
+      {
+	 brw_clip_emit_vue(c, v0, 1, 0, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_START);
+	 brw_clip_emit_vue(c, v1, 1, 0, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_END);
+      }
+      brw_ENDIF(p, draw_edge);
+
+      brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+      brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
+   }
+   brw_WHILE(p, loop);
+}
+
+
+
+static void emit_points(struct brw_clip_compile *c,
+			GLboolean do_offset )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_instruction *loop;
+   struct brw_instruction *draw_point;
+
+   struct brw_indirect v0 = brw_indirect(0, 0);
+   struct brw_indirect v0ptr = brw_indirect(2, 0);
+
+   brw_MOV(p, c->reg.loopcount, c->reg.nr_verts);
+   brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist));
+
+   loop = brw_DO(p, BRW_EXECUTE_1);
+   {
+      brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0));
+      brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2));
+
+      /* draw if edgeflag != 0 
+       */
+      brw_CMP(p, 
+	      vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, 
+	      deref_1f(v0, c->offset[VERT_RESULT_EDGE]),
+	      brw_imm_f(0));
+      draw_point = brw_IF(p, BRW_EXECUTE_1);
+      {
+	 if (do_offset)
+	    apply_one_offset(c, v0);
+
+	 brw_clip_emit_vue(c, v0, 1, 0, (_3DPRIM_POINTLIST << 2) | R02_PRIM_START | R02_PRIM_END);
+      }
+      brw_ENDIF(p, draw_point);
+
+      brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+      brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
+   }
+   brw_WHILE(p, loop);
+}
+
+
+
+
+
+
+
+static void emit_primitives( struct brw_clip_compile *c,
+			     GLuint mode, 
+			     GLboolean do_offset )
+{
+   switch (mode) {
+   case CLIP_FILL:
+      brw_clip_tri_emit_polygon(c);
+      break;
+
+   case CLIP_LINE:
+      emit_lines(c, do_offset);
+      break;
+
+   case CLIP_POINT:
+      emit_points(c, do_offset);
+      break;
+
+   case CLIP_CULL:
+      assert(0);
+      break;
+   }
+} 
+
+
+
+static void emit_unfilled_primitives( struct brw_clip_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_instruction *ccw;
+
+   /* Direction culling has already been done.
+    */
+   if (c->key.fill_ccw != c->key.fill_cw &&
+       c->key.fill_ccw != CLIP_CULL &&
+       c->key.fill_cw != CLIP_CULL)
+   {
+      brw_CMP(p,
+	      vec1(brw_null_reg()),
+	      BRW_CONDITIONAL_GE,
+	      get_element(c->reg.dir, 2),
+	      brw_imm_f(0));
+   
+      ccw = brw_IF(p, BRW_EXECUTE_1);
+      {
+	 emit_primitives(c, c->key.fill_ccw, c->key.offset_ccw);
+      }
+      ccw = brw_ELSE(p, ccw);
+      {
+	 emit_primitives(c, c->key.fill_cw, c->key.offset_cw);
+      }
+      brw_ENDIF(p, ccw);
+   }
+   else if (c->key.fill_cw != CLIP_CULL) {
+      emit_primitives(c, c->key.fill_cw, c->key.offset_cw);
+   }
+   else if (c->key.fill_ccw != CLIP_CULL) { 
+      emit_primitives(c, c->key.fill_ccw, c->key.offset_ccw);
+   }
+}
+
+
+
+
+static void check_nr_verts( struct brw_clip_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_instruction *if_insn;
+
+   brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.nr_verts, brw_imm_d(3));      
+   if_insn = brw_IF(p, BRW_EXECUTE_1);
+   {
+      brw_clip_kill_thread(c);
+   }
+   brw_ENDIF(p, if_insn);
+}
+
+
+void brw_emit_unfilled_clip( struct brw_clip_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_instruction *do_clip;
+   
+
+   c->need_direction = ((c->key.offset_ccw || c->key.offset_cw) ||
+			(c->key.fill_ccw != c->key.fill_cw) ||
+			c->key.fill_ccw == CLIP_CULL ||
+			c->key.fill_cw == CLIP_CULL ||
+			c->key.copy_bfc_cw ||
+			c->key.copy_bfc_ccw);
+
+   brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6);
+   brw_clip_tri_init_vertices(c);
+
+   assert(c->offset[VERT_RESULT_EDGE]);
+
+   if (c->key.fill_ccw == CLIP_CULL &&
+       c->key.fill_cw == CLIP_CULL) {
+      brw_clip_kill_thread(c);
+      return;
+   }
+
+   merge_edgeflags(c);
+
+   /* Need to use the inlist indirection here: 
+    */
+   if (c->need_direction) 
+      compute_tri_direction(c);
+   
+   if (c->key.fill_ccw == CLIP_CULL ||
+       c->key.fill_cw == CLIP_CULL)
+      cull_direction(c);
+
+   if (c->key.offset_ccw ||
+       c->key.offset_cw)
+      compute_offset(c);
+
+   if (c->key.copy_bfc_ccw ||
+       c->key.copy_bfc_cw)
+      copy_bfc(c);
+
+   /* Need to do this whether we clip or not:
+    */
+   if (c->key.do_flat_shading)
+      brw_clip_tri_flat_shade(c);
+   
+   brw_clip_init_clipmask(c);
+   brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, c->reg.planemask, brw_imm_ud(0));
+   do_clip = brw_IF(p, BRW_EXECUTE_1);
+   {
+      brw_clip_init_planes(c);
+      brw_clip_tri(c);
+      check_nr_verts(c);
+   }
+   brw_ENDIF(p, do_clip);
+   
+   emit_unfilled_primitives(c);
+   brw_clip_kill_thread(c);
+}
+
+
+
diff --git a/i965/brw_clip_util.c b/i965/brw_clip_util.c
new file mode 100644
index 0000000..41d9b75
--- /dev/null
+++ b/i965/brw_clip_util.c
@@ -0,0 +1,356 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+
+
+#include "glheader.h"
+#include "macros.h"
+#include "enums.h"
+
+#include "shader/program.h"
+#include "intel_batchbuffer.h"
+
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_clip.h"
+
+
+
+
+
+static struct brw_reg get_tmp( struct brw_clip_compile *c )
+{
+   struct brw_reg tmp = brw_vec4_grf(c->last_tmp, 0);
+
+   if (++c->last_tmp > c->prog_data.total_grf)
+      c->prog_data.total_grf = c->last_tmp;
+
+   return tmp;
+}
+
+static void release_tmp( struct brw_clip_compile *c, struct brw_reg tmp )
+{
+   if (tmp.nr == c->last_tmp-1)
+      c->last_tmp--;
+}
+
+
+static struct brw_reg make_plane_ud(GLuint x, GLuint y, GLuint z, GLuint w)
+{
+   return brw_imm_ud((w<<24) | (z<<16) | (y<<8) | x);
+}
+
+
+void brw_clip_init_planes( struct brw_clip_compile *c )
+{
+   struct brw_compile *p = &c->func;
+
+   if (!c->key.nr_userclip) {
+      brw_MOV(p, get_element_ud(c->reg.fixed_planes, 0), make_plane_ud( 0,    0, 0xff, 1));
+      brw_MOV(p, get_element_ud(c->reg.fixed_planes, 1), make_plane_ud( 0,    0,    1, 1));
+      brw_MOV(p, get_element_ud(c->reg.fixed_planes, 2), make_plane_ud( 0, 0xff,    0, 1));
+      brw_MOV(p, get_element_ud(c->reg.fixed_planes, 3), make_plane_ud( 0,    1,    0, 1));
+      brw_MOV(p, get_element_ud(c->reg.fixed_planes, 4), make_plane_ud(0xff,  0,    0, 1));
+      brw_MOV(p, get_element_ud(c->reg.fixed_planes, 5), make_plane_ud( 1,    0,    0, 1));
+   }
+}
+
+
+
+#define W 3
+
+/* Project 'pos' to screen space (or back again), overwrite with results:
+ */
+static void brw_clip_project_position(struct brw_clip_compile *c, struct brw_reg pos )
+{
+   struct brw_compile *p = &c->func;
+
+   /* calc rhw 
+    */
+   brw_math_invert(p, get_element(pos, W), get_element(pos, W));
+
+   /* value.xyz *= value.rhw
+    */
+   brw_set_access_mode(p, BRW_ALIGN_16);
+   brw_MUL(p, brw_writemask(pos, WRITEMASK_XYZ), pos, brw_swizzle1(pos, W));
+   brw_set_access_mode(p, BRW_ALIGN_1);
+}
+
+
+static void brw_clip_project_vertex( struct brw_clip_compile *c, 
+				     struct brw_indirect vert_addr )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_reg tmp = get_tmp(c);
+
+   /* Fixup position.  Extract from the original vertex and re-project
+    * to screen space:
+    */
+   brw_MOV(p, tmp, deref_4f(vert_addr, c->offset[VERT_RESULT_HPOS]));
+   brw_clip_project_position(c, tmp);
+   brw_MOV(p, deref_4f(vert_addr, c->header_position_offset), tmp);
+	 
+   release_tmp(c, tmp);
+}
+
+
+
+
+/* Interpolate between two vertices and put the result into a0.0.  
+ * Increment a0.0 accordingly.
+ */
+void brw_clip_interp_vertex( struct brw_clip_compile *c,
+			     struct brw_indirect dest_ptr,
+			     struct brw_indirect v0_ptr, /* from */
+			     struct brw_indirect v1_ptr, /* to */
+			     struct brw_reg t0,
+			     GLboolean force_edgeflag)
+{
+   struct brw_compile *p = &c->func;
+   struct brw_reg tmp = get_tmp(c);
+   GLuint i;
+
+   /* Just copy the vertex header:
+    */
+   brw_copy_indirect_to_indirect(p, dest_ptr, v0_ptr, 1);
+      
+   /* Iterate over each attribute (could be done in pairs?)
+    */
+   for (i = 0; i < c->nr_attrs; i++) {
+      GLuint delta = i*16 + 32;
+
+      if (delta == c->offset[VERT_RESULT_EDGE]) {
+	 if (force_edgeflag) 
+	    brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(1));
+	 else
+	    brw_MOV(p, deref_4f(dest_ptr, delta), deref_4f(v0_ptr, delta));
+      }
+      else {
+	 /* Interpolate: 
+	  *
+	  *        New = attr0 + t*attr1 - t*attr0
+	  */
+	 brw_MUL(p, 
+		 vec4(brw_null_reg()),
+		 deref_4f(v1_ptr, delta),
+		 t0);
+
+	 brw_MAC(p, 
+		 tmp,	      
+		 negate(deref_4f(v0_ptr, delta)),
+		 t0); 
+	      
+	 brw_ADD(p,
+		 deref_4f(dest_ptr, delta), 
+		 deref_4f(v0_ptr, delta),
+		 tmp);
+      }
+   }
+
+   if (i & 1) {
+      GLuint delta = i*16 + 32;
+      brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(0));
+   }
+
+   release_tmp(c, tmp);
+
+   /* Recreate the projected (NDC) coordinate in the new vertex
+    * header:
+    */
+   brw_clip_project_vertex(c, dest_ptr );
+}
+
+
+
+
+#define MAX_MRF 16
+
+void brw_clip_emit_vue(struct brw_clip_compile *c, 
+		       struct brw_indirect vert,
+		       GLboolean allocate,
+		       GLboolean eot,
+		       GLuint header)
+{
+   struct brw_compile *p = &c->func;
+   GLuint start = c->last_mrf;
+
+   assert(!(allocate && eot));
+   
+   /* Cycle through mrf regs - probably futile as we have to wait for
+    * the allocation response anyway.  Also, the order this function
+    * is invoked doesn't correspond to the order the instructions will
+    * be executed, so it won't have any effect in many cases.
+    */
+#if 0
+   if (start + c->nr_regs + 1 >= MAX_MRF)
+      start = 0;
+
+   c->last_mrf = start + c->nr_regs + 1;
+#endif
+	
+   /* Copy the vertex from vertn into m1..mN+1:
+    */
+   brw_copy_from_indirect(p, brw_message_reg(start+1), vert, c->nr_regs);
+
+   /* Overwrite PrimType and PrimStart in the message header, for
+    * each vertex in turn:
+    */
+   brw_MOV(p, get_element_ud(c->reg.R0, 2), brw_imm_ud(header));
+
+
+   /* Send each vertex as a seperate write to the urb.  This
+    * is different to the concept in brw_sf_emit.c, where
+    * subsequent writes are used to build up a single urb
+    * entry.  Each of these writes instantiates a seperate
+    * urb entry - (I think... what about 'allocate'?)
+    */
+   brw_urb_WRITE(p, 
+		 allocate ? c->reg.R0 : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
+		 start,
+		 c->reg.R0,
+		 allocate,
+		 1,		/* used */
+		 c->nr_regs + 1, /* msg length */
+		 allocate ? 1 : 0, /* response_length */ 
+		 eot,		/* eot */
+		 1,		/* writes_complete */
+		 0,		/* urb offset */
+		 BRW_URB_SWIZZLE_NONE);
+}
+
+
+
+void brw_clip_kill_thread(struct brw_clip_compile *c)
+{
+   struct brw_compile *p = &c->func;
+
+   /* Send an empty message to kill the thread and release any
+    * allocated urb entry:
+    */
+   brw_urb_WRITE(p, 
+		 retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
+		 0,
+		 c->reg.R0,
+		 0,		/* allocate */
+		 0,		/* used */
+		 1, 		/* msg len */
+		 0, 		/* response len */
+		 1, 		/* eot */
+		 1,		/* writes complete */
+		 0,
+		 BRW_URB_SWIZZLE_NONE);
+}
+
+
+
+struct brw_reg brw_clip_plane0_address( struct brw_clip_compile *c )
+{
+   return brw_address(c->reg.fixed_planes);
+}
+
+
+struct brw_reg brw_clip_plane_stride( struct brw_clip_compile *c )
+{
+   if (c->key.nr_userclip) {
+      return brw_imm_uw(16);
+   }
+   else {
+      return brw_imm_uw(4);
+   }
+}
+
+
+/* If flatshading, distribute color from provoking vertex prior to
+ * clipping.
+ */
+void brw_clip_copy_colors( struct brw_clip_compile *c,
+			   GLuint to, GLuint from )
+{
+   struct brw_compile *p = &c->func;
+
+   if (c->offset[VERT_RESULT_COL0])
+      brw_MOV(p, 
+	      byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_COL0]),
+	      byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_COL0]));
+
+   if (c->offset[VERT_RESULT_COL1])
+      brw_MOV(p, 
+	      byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_COL1]),
+	      byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_COL1]));
+
+   if (c->offset[VERT_RESULT_BFC0])
+      brw_MOV(p, 
+	      byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_BFC0]),
+	      byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_BFC0]));
+
+   if (c->offset[VERT_RESULT_BFC1])
+      brw_MOV(p, 
+	      byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_BFC1]),
+	      byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_BFC1]));
+}
+
+
+
+void brw_clip_init_clipmask( struct brw_clip_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_reg incoming = get_element_ud(c->reg.R0, 2);
+   
+   /* Shift so that lowest outcode bit is rightmost: 
+    */
+   brw_MOV(p, c->reg.planemask, incoming);
+   brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(26));
+
+   if (c->key.nr_userclip) {
+      struct brw_reg tmp = retype(vec1(get_tmp(c)), BRW_REGISTER_TYPE_UD);
+
+      /* Rearrange userclip outcodes so that they come directly after
+       * the fixed plane bits.
+       */
+      brw_AND(p, tmp, incoming, brw_imm_ud(0x3f<<14));
+      brw_SHR(p, tmp, tmp, brw_imm_ud(8));
+      brw_OR(p, c->reg.planemask, c->reg.planemask, tmp);
+      
+      release_tmp(c, tmp);
+   }
+
+   if (!BRW_IS_IGD(p->brw)) {
+       /* Test for -ve rhw workaround 
+        */
+       brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+       brw_AND(p, vec1(brw_null_reg()), incoming, brw_imm_ud(1<<20));
+       brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(0x3f));
+   }
+
+   brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+}
+
diff --git a/i965/brw_context.c b/i965/brw_context.c
new file mode 100644
index 0000000..397a9bd
--- /dev/null
+++ b/i965/brw_context.c
@@ -0,0 +1,169 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+
+
+#include "brw_context.h"
+#include "brw_aub.h"
+#include "brw_defines.h"
+#include "brw_draw.h"
+#include "brw_vs.h"
+#include "imports.h"
+#include "intel_tex.h"
+#include "intel_blit.h"
+#include "intel_batchbuffer.h"
+
+#include "utils.h"
+#include "api_noop.h"
+#include "vtxfmt.h"
+
+/***************************************
+ * Mesa's Driver Functions
+ ***************************************/
+
+static const struct dri_extension brw_extensions[] =
+{
+    { "GL_ARB_depth_texture",              NULL },
+    { "GL_ARB_fragment_program",           NULL },
+    { "GL_ARB_shadow",                     NULL },
+    { "GL_EXT_shadow_funcs",               NULL },
+    /* ARB extn won't work if not enabled */
+    { "GL_SGIX_depth_texture",             NULL },
+    { "GL_ARB_texture_env_crossbar",       NULL },
+    { NULL,                                NULL }
+};
+
+
+static void brwInitDriverFunctions( struct dd_function_table *functions )
+{
+   intelInitDriverFunctions( functions );
+   brwInitTextureFuncs( functions );
+   brwInitFragProgFuncs( functions );
+}
+
+
+static void brw_init_attribs( struct brw_context *brw )
+{
+   GLcontext *ctx = &brw->intel.ctx;
+
+   brw->attribs.Color = &ctx->Color;
+   brw->attribs.Depth = &ctx->Depth;
+   brw->attribs.Fog = &ctx->Fog;
+   brw->attribs.Hint = &ctx->Hint;
+   brw->attribs.Light = &ctx->Light;
+   brw->attribs.Line = &ctx->Line;
+   brw->attribs.Point = &ctx->Point;
+   brw->attribs.Polygon = &ctx->Polygon;
+   brw->attribs.Scissor = &ctx->Scissor;
+   brw->attribs.Stencil = &ctx->Stencil;
+   brw->attribs.Texture = &ctx->Texture;
+   brw->attribs.Transform = &ctx->Transform;
+   brw->attribs.Viewport = &ctx->Viewport;
+   brw->attribs.VertexProgram = &ctx->VertexProgram;
+   brw->attribs.FragmentProgram = &ctx->FragmentProgram;
+   brw->attribs.PolygonStipple = &ctx->PolygonStipple[0];
+}
+
+
+GLboolean brwCreateContext( const __GLcontextModes *mesaVis,
+			    __DRIcontextPrivate *driContextPriv,
+			    void *sharedContextPrivate)
+{
+   struct dd_function_table functions;
+   struct brw_context *brw = (struct brw_context *) CALLOC_STRUCT(brw_context);
+   struct intel_context *intel = &brw->intel;
+   GLcontext *ctx = &intel->ctx;
+
+   if (!brw) {
+      _mesa_printf("%s: failed to alloc context\n", __FUNCTION__);
+      return GL_FALSE;
+   }
+
+   brwInitVtbl( brw );
+   brwInitDriverFunctions( &functions );
+
+   if (!intelInitContext( intel, mesaVis, driContextPriv,
+			  sharedContextPrivate, &functions )) {
+      _mesa_printf("%s: failed to init intel context\n", __FUNCTION__);
+      FREE(brw);
+      return GL_FALSE;
+   }
+
+   ctx->Const.MaxTextureUnits = BRW_MAX_TEX_UNIT;
+   ctx->Const.MaxTextureImageUnits = BRW_MAX_TEX_UNIT;
+   ctx->Const.MaxTextureCoordUnits = BRW_MAX_TEX_UNIT;
+
+
+   /* Advertise the full hardware capabilities.  The new memory
+    * manager should cope much better with overload situations:
+    */
+   ctx->Const.MaxTextureLevels = 12;
+   ctx->Const.Max3DTextureLevels = 9;
+   ctx->Const.MaxCubeTextureLevels = 12;
+   ctx->Const.MaxTextureRectSize = (1<<11);
+   ctx->Const.MaxTextureUnits = BRW_MAX_TEX_UNIT;
+   
+/*    ctx->Const.MaxNativeVertexProgramTemps = 32; */
+
+
+   driInitExtensions( ctx, brw_extensions, GL_FALSE );
+
+   brw_aub_init( brw );
+
+   brw_init_attribs( brw );
+   brw_init_metaops( brw );
+   brw_init_state( brw );
+
+   brw->state.dirty.mesa = ~0;
+   brw->state.dirty.brw = ~0;
+
+   memset(&brw->wm.bind, ~0, sizeof(brw->wm.bind));
+
+   brw->emit_state_always = 0;
+
+   ctx->FragmentProgram._MaintainTexEnvProgram = 1;
+
+   brw_draw_init( brw );
+
+   brw_ProgramCacheInit( ctx );
+
+   brw_FrameBufferTexInit( brw );
+
+   {
+      const char *filename = getenv("INTEL_REPLAY");
+      if (filename) {
+	 brw_playback_aubfile(brw, filename);
+	 exit(0);
+      }
+   }
+
+   return GL_TRUE;
+}
+
diff --git a/i965/brw_context.h b/i965/brw_context.h
new file mode 100644
index 0000000..08fdc54
--- /dev/null
+++ b/i965/brw_context.h
@@ -0,0 +1,706 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+
+
+#ifndef BRWCONTEXT_INC
+#define BRWCONTEXT_INC
+
+#include "intel_context.h"
+#include "brw_structs.h"
+#include "imports.h"
+
+
+/* Glossary:
+ *
+ * URB - uniform resource buffer.  A mid-sized buffer which is
+ * partitioned between the fixed function units and used for passing
+ * values (vertices, primitives, constants) between them.
+ *
+ * CURBE - constant URB entry.  An urb region (entry) used to hold
+ * constant values which the fixed function units can be instructed to
+ * preload into the GRF when spawining a thread.
+ *
+ * VUE - vertex URB entry.  An urb entry holding a vertex and usually
+ * a vertex header.  The header contains control information and
+ * things like primitive type, Begin/end flags and clip codes.  
+ *
+ * PUE - primitive URB entry.  An urb entry produced by the setup (SF)
+ * unit holding rasterization and interpolation parameters.
+ *
+ * GRF - general register file.  One of several register files
+ * addressable by programmed threads.  The inputs (r0, payload, curbe,
+ * urb) of the thread are preloaded to this area before the thread is
+ * spawned.  The registers are individually 8 dwords wide and suitable
+ * for general usage.  Registers holding thread input values are not
+ * special and may be overwritten.
+ *
+ * MRF - message register file.  Threads communicate (and terminate)
+ * by sending messages.  Message parameters are placed in contigous
+ * MRF registers.  All program output is via these messages.  URB
+ * entries are populated by sending a message to the shared URB
+ * function containing the new data, together with a control word,
+ * often an unmodified copy of R0.
+ *
+ * R0 - GRF register 0.  Typically holds control information used when
+ * sending messages to other threads.
+ *
+ * EU or GEN4 EU: The name of the programmable subsystem of the
+ * i965 hardware.  Threads are executed by the EU, the registers
+ * described above are part of the EU architecture.
+ *
+ * Fixed function units:
+ *
+ * CS - Command streamer.  Notional first unit, little software
+ * interaction.  Holds the URB entries used for constant data, ie the
+ * CURBEs.
+ *
+ * VF/VS - Vertex Fetch / Vertex Shader.  The fixed function part of
+ * this unit is responsible for pulling vertices out of vertex buffers
+ * in vram and injecting them into the processing pipe as VUEs.  If
+ * enabled, it first passes them to a VS thread which is a good place
+ * for the driver to implement any active vertex shader.
+ *
+ * GS - Geometry Shader.  This corresponds to a new DX10 concept.  If
+ * enabled, incoming strips etc are passed to GS threads in individual
+ * line/triangle/point units.  The GS thread may perform arbitary
+ * computation and emit whatever primtives with whatever vertices it
+ * chooses.  This makes GS an excellent place to implement GL's
+ * unfilled polygon modes, though of course it is capable of much
+ * more.  Additionally, GS is used to translate away primitives not
+ * handled by latter units, including Quads and Lineloops.
+ *
+ * CS - Clipper.  Mesa's clipping algorithms are imported to run on
+ * this unit.  The fixed function part performs cliptesting against
+ * the 6 fixed clipplanes and makes descisions on whether or not the
+ * incoming primitive needs to be passed to a thread for clipping.
+ * User clip planes are handled via cooperation with the VS thread.
+ *
+ * SF - Strips Fans or Setup: Triangles are prepared for
+ * rasterization.  Interpolation coefficients are calculated.
+ * Flatshading and two-side lighting usually performed here.
+ *
+ * WM - Windower.  Interpolation of vertex attributes performed here.
+ * Fragment shader implemented here.  SIMD aspects of EU taken full
+ * advantage of, as pixels are processed in blocks of 16.
+ *
+ * CC - Color Calculator.  No EU threads associated with this unit.
+ * Handles blending and (presumably) depth and stencil testing.
+ */
+
+#define BRW_FALLBACK_TEXTURE		 0x1
+#define BRW_MAX_CURBE                    (32*16)
+
+struct brw_context;
+
+#define BRW_NEW_URB_FENCE               0x1
+#define BRW_NEW_FRAGMENT_PROGRAM        0x2
+#define BRW_NEW_VERTEX_PROGRAM          0x4
+#define BRW_NEW_INPUT_DIMENSIONS        0x8
+#define BRW_NEW_CURBE_OFFSETS           0x10
+#define BRW_NEW_REDUCED_PRIMITIVE       0x20
+#define BRW_NEW_PRIMITIVE               0x40
+#define BRW_NEW_CONTEXT                 0x80
+#define BRW_NEW_WM_INPUT_DIMENSIONS     0x100
+#define BRW_NEW_INPUT_VARYING           0x200
+#define BRW_NEW_TNL_PROGRAM             0x400
+#define BRW_NEW_PSP                     0x800
+#define BRW_NEW_METAOPS                 0x1000
+#define BRW_NEW_FENCE                   0x2000
+#define BRW_NEW_LOCK                    0x4000
+
+
+
+struct brw_state_flags {
+   GLuint mesa;
+   GLuint cache;
+   GLuint brw;
+};
+
+struct brw_vertex_program {
+   struct gl_vertex_program program;
+   GLuint id;
+   GLuint param_state;		/* flags indicating state tracked by params */
+};
+
+
+
+struct brw_fragment_program {
+   struct gl_fragment_program program;
+   GLuint id;
+   GLuint param_state;		/* flags indicating state tracked by params */
+};
+
+
+
+
+/* Data about a particular attempt to compile a program.  Note that
+ * there can be many of these, each in a different GL state
+ * corresponding to a different brw_wm_prog_key struct, with different
+ * compiled programs:
+ */
+struct brw_wm_prog_data {
+   GLuint curb_read_length;
+   GLuint urb_read_length;
+
+   GLuint first_curbe_grf;
+   GLuint total_grf;
+   GLuint total_scratch;
+
+   GLuint nr_params;
+   GLboolean error;
+
+   /* Pointer to tracked values (only valid once
+    * _mesa_load_state_parameters has been called at runtime).
+    */
+   const GLfloat *param[BRW_MAX_CURBE];
+};
+
+struct brw_sf_prog_data {
+   GLuint urb_read_length;
+   GLuint total_grf;
+
+   /* Each vertex may have upto 12 attributes, 4 components each,
+    * except WPOS which requires only 2.  (11*4 + 2) == 44 ==> 11
+    * rows.
+    *
+    * Actually we use 4 for each, so call it 12 rows.
+    */
+   GLuint urb_entry_size;
+};
+
+struct brw_clip_prog_data {
+   GLuint curb_read_length;	/* user planes? */
+   GLuint clip_mode;
+   GLuint urb_read_length;
+   GLuint total_grf;
+};
+
+struct brw_gs_prog_data {
+   GLuint urb_read_length;
+   GLuint total_grf;
+};
+
+struct brw_vs_prog_data {
+   GLuint curb_read_length;
+   GLuint urb_read_length;
+   GLuint total_grf;
+   GLuint outputs_written;
+
+   GLuint inputs_read;
+
+   /* Used for calculating urb partitions:
+    */
+   GLuint urb_entry_size;
+};
+
+
+/* Size == 0 if output either not written, or always [0,0,0,1]
+ */
+struct brw_vs_ouput_sizes {
+   GLubyte output_size[VERT_RESULT_MAX];
+};
+
+
+#define BRW_MAX_TEX_UNIT 8
+#define BRW_WM_MAX_SURF BRW_MAX_TEX_UNIT + 1
+
+/* Create a fixed sized struct for caching binding tables:
+ */
+struct brw_surface_binding_table {
+   GLuint surf_ss_offset[BRW_WM_MAX_SURF];
+};
+
+
+struct brw_cache;
+
+struct brw_mem_pool {
+   struct buffer *buffer;
+
+   GLuint size;
+   GLuint offset;		/* offset of first free byte */
+
+   struct brw_context *brw;
+};
+
+struct brw_cache_item {
+   GLuint hash;
+   GLuint key_size;		/* for variable-sized keys */
+   const void *key;
+
+   GLuint offset;		/* offset within pool's buffer */
+   GLuint data_size;
+
+   struct brw_cache_item *next;
+};   
+
+
+
+struct brw_cache {
+   GLuint id;
+
+   const char *name;
+
+   struct brw_context *brw;
+   struct brw_mem_pool *pool;
+
+   struct brw_cache_item **items;
+   GLuint size, n_items;
+   
+   GLuint key_size;		/* for fixed-size keys */
+   GLuint aux_size;
+
+   GLuint aub_type;
+   GLuint aub_sub_type;
+   
+   GLuint last_addr;			/* offset of active item */
+};
+
+
+
+struct brw_state_pointers {
+   struct gl_colorbuffer_attrib	*Color;
+   struct gl_depthbuffer_attrib	*Depth;
+   struct gl_fog_attrib		*Fog;
+   struct gl_hint_attrib	*Hint;
+   struct gl_light_attrib	*Light;
+   struct gl_line_attrib	*Line;
+   struct gl_point_attrib	*Point;
+   struct gl_polygon_attrib	*Polygon;
+   GLuint                       *PolygonStipple;
+   struct gl_scissor_attrib	*Scissor;
+   struct gl_stencil_attrib	*Stencil;
+   struct gl_texture_attrib	*Texture;
+   struct gl_transform_attrib	*Transform;
+   struct gl_viewport_attrib	*Viewport;
+   struct gl_vertex_program_state *VertexProgram; 
+   struct gl_fragment_program_state *FragmentProgram;
+};
+
+/* Considered adding a member to this struct to document which flags
+ * an update might raise so that ordering of the state atoms can be
+ * checked or derived at runtime.  Dropped the idea in favor of having
+ * a debug mode where the state is monitored for flags which are
+ * raised that have already been tested against.
+ */
+struct brw_tracked_state {
+   struct brw_state_flags dirty;
+   void (*update)( struct brw_context *brw );
+};
+
+
+enum brw_cache_id {
+   BRW_CC_VP,
+   BRW_CC_UNIT,
+   BRW_WM_PROG,
+   BRW_SAMPLER_DEFAULT_COLOR,
+   BRW_SAMPLER,
+   BRW_WM_UNIT,
+   BRW_SF_PROG,
+   BRW_SF_VP,
+   BRW_SF_UNIT,
+   BRW_VS_UNIT,
+   BRW_VS_PROG,
+   BRW_GS_UNIT,
+   BRW_GS_PROG,
+   BRW_CLIP_VP,
+   BRW_CLIP_UNIT,
+   BRW_CLIP_PROG,
+
+   /* These two are in the SS pool:
+    */
+   BRW_SS_SURFACE,
+   BRW_SS_SURF_BIND,
+
+   BRW_MAX_CACHE
+};
+
+/* Flags for brw->state.cache.
+ */
+#define CACHE_NEW_CC_VP                  (1<<BRW_CC_VP)
+#define CACHE_NEW_CC_UNIT                (1<<BRW_CC_UNIT)
+#define CACHE_NEW_WM_PROG                (1<<BRW_WM_PROG)
+#define CACHE_NEW_SAMPLER_DEFAULT_COLOR  (1<<BRW_SAMPLER_DEFAULT_COLOR)
+#define CACHE_NEW_SAMPLER                (1<<BRW_SAMPLER)
+#define CACHE_NEW_WM_UNIT                (1<<BRW_WM_UNIT)
+#define CACHE_NEW_SF_PROG                (1<<BRW_SF_PROG)
+#define CACHE_NEW_SF_VP                  (1<<BRW_SF_VP)
+#define CACHE_NEW_SF_UNIT                (1<<BRW_SF_UNIT)
+#define CACHE_NEW_VS_UNIT                (1<<BRW_VS_UNIT)
+#define CACHE_NEW_VS_PROG                (1<<BRW_VS_PROG)
+#define CACHE_NEW_GS_UNIT                (1<<BRW_GS_UNIT)
+#define CACHE_NEW_GS_PROG                (1<<BRW_GS_PROG)
+#define CACHE_NEW_CLIP_VP                (1<<BRW_CLIP_VP)
+#define CACHE_NEW_CLIP_UNIT              (1<<BRW_CLIP_UNIT)
+#define CACHE_NEW_CLIP_PROG              (1<<BRW_CLIP_PROG)
+#define CACHE_NEW_SURFACE                (1<<BRW_SS_SURFACE)
+#define CACHE_NEW_SURF_BIND              (1<<BRW_SS_SURF_BIND)
+
+
+
+
+enum brw_mempool_id {
+   BRW_GS_POOL,
+   BRW_SS_POOL,
+   BRW_MAX_POOL
+};
+
+
+struct brw_cached_batch_item {
+   struct header *header;
+   GLuint sz;
+   struct brw_cached_batch_item *next;
+};
+   
+
+
+/* Protect against a future where VERT_ATTRIB_MAX > 32.  Wouldn't life
+ * be easier if C allowed arrays of packed elements?
+ */
+#define ATTRIB_BIT_DWORDS  ((VERT_ATTRIB_MAX+31)/32)
+
+struct brw_vertex_element {
+   const struct gl_client_array *glarray;
+
+   struct brw_vertex_element_state *vep;
+
+   GLuint index;
+   GLuint element_size;
+   GLuint count;
+   GLuint vbo_rebase_offset;
+};
+
+
+
+struct brw_vertex_info {
+   GLuint varying;  /* varying:1[VERT_ATTRIB_MAX] */
+   GLuint sizes[ATTRIB_BIT_DWORDS * 2]; /* sizes:2[VERT_ATTRIB_MAX] */
+};
+
+
+
+
+/* Cache for TNL programs.
+ */
+struct brw_tnl_cache_item {
+   GLuint hash;
+   void *key;
+   void *data;
+   struct brw_tnl_cache_item *next;
+};
+
+struct brw_tnl_cache {
+   struct brw_tnl_cache_item **items;
+   GLuint size, n_items;
+};
+
+
+
+struct brw_context 
+{
+   struct intel_context intel;
+   GLuint primitive;
+
+   GLboolean emit_state_always;
+   GLboolean wrap;
+   GLboolean tmp_fallback;
+
+   struct {
+      struct brw_state_flags dirty;
+      struct brw_tracked_state **atoms;
+      GLuint nr_atoms;
+
+
+      struct intel_region *draw_region;
+      struct intel_region *depth_region;
+   } state;
+
+   struct brw_state_pointers attribs;
+   struct brw_mem_pool pool[BRW_MAX_POOL];
+   struct brw_cache cache[BRW_MAX_CACHE];
+   struct brw_cached_batch_item *cached_batch_items;
+
+   struct {
+
+      /* Arrays with buffer objects to copy non-bufferobj arrays into
+       * for upload:
+       */
+      struct gl_client_array vbo_array[VERT_ATTRIB_MAX];
+
+      struct brw_vertex_element inputs[VERT_ATTRIB_MAX];
+
+#define BRW_NR_UPLOAD_BUFS 17
+#define BRW_UPLOAD_INIT_SIZE (128*1024)
+
+      struct {
+	 struct gl_buffer_object *vbo[BRW_NR_UPLOAD_BUFS];
+	 GLuint buf;
+	 GLuint offset;
+	 GLuint size;
+	 GLuint wrap;
+      } upload;
+
+      /* Summary of size and varying of active arrays, so we can check
+       * for changes to this state:
+       */
+      struct brw_vertex_info info;
+   } vb;
+
+   struct {
+      /* Will be allocated on demand if needed.   
+       */
+      struct brw_state_pointers attribs;
+      struct gl_vertex_program *vp;
+      struct gl_fragment_program *fp, *fp_tex;
+
+      struct gl_buffer_object *vbo;
+
+      struct intel_region *saved_draw_region;
+      struct intel_region *saved_depth_region;
+
+      GLuint restore_draw_mask;
+      struct gl_fragment_program *restore_fp;
+      
+      GLboolean active;
+   } metaops;
+
+   /* Track fixed function t&l in a vertex program:
+    */
+   struct gl_vertex_program *tnl_program;
+   struct brw_tnl_cache tnl_program_cache;
+
+   /* Active vertex program: 
+    */
+   const struct gl_vertex_program *vertex_program;
+   const struct gl_fragment_program *fragment_program;
+
+
+   /* For populating the gtt:
+    */
+   GLuint next_free_page;
+
+
+   /* BRW_NEW_URB_ALLOCATIONS:
+    */
+   struct {
+      GLuint vsize;		/* vertex size plus header in urb registers */
+      GLuint csize;		/* constant buffer size in urb registers */
+      GLuint sfsize;		/* setup data size in urb registers */
+
+      GLboolean constrained;
+
+      GLuint nr_vs_entries;
+      GLuint nr_gs_entries;
+      GLuint nr_clip_entries;
+      GLuint nr_sf_entries;
+      GLuint nr_cs_entries;
+
+/*       GLuint vs_size; */
+/*       GLuint gs_size; */
+/*       GLuint clip_size; */
+/*       GLuint sf_size; */
+/*       GLuint cs_size; */
+
+      GLuint vs_start;
+      GLuint gs_start;
+      GLuint clip_start;
+      GLuint sf_start;
+      GLuint cs_start;
+   } urb;
+
+   
+   /* BRW_NEW_CURBE_OFFSETS: 
+    */
+   struct {
+      GLuint wm_start;
+      GLuint wm_size;
+      GLuint clip_start;
+      GLuint clip_size;
+      GLuint vs_start;
+      GLuint vs_size;
+      GLuint total_size;
+
+      /* Dynamic tracker which changes to reflect the state referenced
+       * by active fp and vp program parameters:
+       */
+      struct brw_tracked_state tracked_state;
+
+      GLuint gs_offset;
+
+      GLfloat *last_buf;
+      GLuint last_bufsz;
+   } curbe;
+
+   struct {
+      struct brw_vs_prog_data *prog_data;
+
+      GLuint prog_gs_offset;
+      GLuint state_gs_offset;	
+   } vs;
+
+   struct {
+      struct brw_gs_prog_data *prog_data;
+
+      GLboolean prog_active;
+      GLuint prog_gs_offset;
+      GLuint state_gs_offset;	
+   } gs;
+
+   struct {
+      struct brw_clip_prog_data *prog_data;
+
+      GLuint prog_gs_offset;
+      GLuint vp_gs_offset;
+      GLuint state_gs_offset;	
+   } clip;
+
+
+   struct {
+      struct brw_sf_prog_data *prog_data;
+
+      GLuint prog_gs_offset;
+      GLuint vp_gs_offset;
+      GLuint state_gs_offset;
+   } sf;
+
+   struct {
+      struct brw_wm_prog_data *prog_data;
+      struct brw_wm_compile *compile_data;
+
+      /* Input sizes, calculated from active vertex program:
+       */
+      GLuint input_size_masks[4];
+
+
+      /* State structs
+       */
+      struct brw_sampler_default_color sdc[BRW_MAX_TEX_UNIT];
+      struct brw_sampler_state sampler[BRW_MAX_TEX_UNIT];
+
+      GLuint render_surf;
+      GLuint nr_surfaces;      
+
+      GLuint max_threads;
+      struct buffer *scratch_buffer;
+      GLuint scratch_buffer_size;
+
+      GLuint sampler_count;
+      GLuint sampler_gs_offset;
+
+      struct brw_surface_binding_table bind;
+      GLuint bind_ss_offset;
+
+      GLuint prog_gs_offset;
+      GLuint state_gs_offset;
+   } wm;
+
+
+   struct {
+      GLuint vp_gs_offset;
+      GLuint state_gs_offset;
+   } cc;
+
+   
+   /* Used to give every program string a unique id
+    */
+   GLuint program_id;
+};
+
+
+#define BRW_PACKCOLOR8888(r,g,b,a)  ((r<<24) | (g<<16) | (b<<8) | a)
+
+
+
+/*======================================================================
+ * brw_vtbl.c
+ */
+void brwInitVtbl( struct brw_context *brw );
+void brw_do_flush( struct brw_context *brw, 
+		   GLuint flags );
+
+/*======================================================================
+ * brw_context.c
+ */
+GLboolean brwCreateContext( const __GLcontextModes *mesaVis,
+			    __DRIcontextPrivate *driContextPriv,
+			    void *sharedContextPrivate);
+
+
+
+/*======================================================================
+ * brw_state.c
+ */
+void brw_validate_state( struct brw_context *brw );
+void brw_init_state( struct brw_context *brw );
+void brw_destroy_state( struct brw_context *brw );
+
+
+
+/*======================================================================
+ * brw_tex.c
+ */
+void brwUpdateTextureState( struct intel_context *intel );
+void brwInitTextureFuncs( struct dd_function_table *functions );
+void brw_FrameBufferTexInit( struct brw_context *brw );
+void brw_FrameBufferTexDestroy( struct brw_context *brw );
+
+/*======================================================================
+ * brw_metaops.c
+ */
+
+void brw_init_metaops( struct brw_context *brw );
+void brw_destroy_metaops( struct brw_context *brw );
+
+
+/*======================================================================
+ * brw_program.c
+ */
+void brwInitFragProgFuncs( struct dd_function_table *functions );
+
+
+/* brw_urb.c
+ */
+void brw_upload_urb_fence(struct brw_context *brw);
+
+void brw_upload_constant_buffer_state(struct brw_context *brw);
+
+
+/*======================================================================
+ * Inline conversion functions.  These are better-typed than the
+ * macros used previously:
+ */
+static inline struct brw_context *
+brw_context( GLcontext *ctx )
+{
+   return (struct brw_context *)ctx;
+}
+
+#endif
+
diff --git a/i965/brw_curbe.c b/i965/brw_curbe.c
new file mode 100644
index 0000000..5bf0ed5
--- /dev/null
+++ b/i965/brw_curbe.c
@@ -0,0 +1,380 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+
+
+
+#include "glheader.h"
+#include "context.h"
+#include "macros.h"
+#include "enums.h"
+#include "shader/prog_parameter.h"
+#include "shader/prog_statevars.h"
+#include "intel_batchbuffer.h"
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_state.h"
+#include "brw_util.h"
+#include "brw_aub.h"
+
+
+/* Partition the CURBE between the various users of constant values:
+ */
+static void calculate_curbe_offsets( struct brw_context *brw )
+{
+   /* CACHE_NEW_WM_PROG */
+   GLuint nr_fp_regs = (brw->wm.prog_data->nr_params + 15) / 16;
+   
+   /* BRW_NEW_VERTEX_PROGRAM */
+   struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program;
+   GLuint nr_vp_regs = (vp->program.Base.Parameters->NumParameters * 4 + 15) / 16;
+   GLuint nr_clip_regs = 0;
+   GLuint total_regs;
+
+   /* _NEW_TRANSFORM */
+   if (brw->attribs.Transform->ClipPlanesEnabled) {
+      GLuint nr_planes = 6 + brw_count_bits(brw->attribs.Transform->ClipPlanesEnabled);
+      nr_clip_regs = (nr_planes * 4 + 15) / 16;
+   }
+
+
+   total_regs = nr_fp_regs + nr_vp_regs + nr_clip_regs;
+
+   /* This can happen - what to do?  Probably rather than falling
+    * back, the best thing to do is emit programs which code the
+    * constants as immediate values.  Could do this either as a static
+    * cap on WM and VS, or adaptively.
+    *
+    * Unfortunately, this is currently dependent on the results of the
+    * program generation process (in the case of wm), so this would
+    * introduce the need to re-generate programs in the event of a
+    * curbe allocation failure.
+    */
+   /* Max size is 32 - just large enough to
+    * hold the 128 parameters allowed by
+    * the fragment and vertex program
+    * api's.  It's not clear what happens
+    * when both VP and FP want to use 128
+    * parameters, though. 
+    */
+   assert(total_regs <= 32);
+
+   /* Lazy resize:
+    */
+   if (nr_fp_regs > brw->curbe.wm_size ||
+       nr_vp_regs > brw->curbe.vs_size ||
+       nr_clip_regs != brw->curbe.clip_size ||
+       (total_regs < brw->curbe.total_size / 4 &&
+	brw->curbe.total_size > 16)) {
+
+      GLuint reg = 0;
+
+      /* Calculate a new layout: 
+       */
+      reg = 0;
+      brw->curbe.wm_start = reg;
+      brw->curbe.wm_size = nr_fp_regs; reg += nr_fp_regs;
+      brw->curbe.clip_start = reg;
+      brw->curbe.clip_size = nr_clip_regs; reg += nr_clip_regs;
+      brw->curbe.vs_start = reg;
+      brw->curbe.vs_size = nr_vp_regs; reg += nr_vp_regs;
+      brw->curbe.total_size = reg;
+
+      if (0)
+	 _mesa_printf("curbe wm %d+%d clip %d+%d vs %d+%d\n",
+		      brw->curbe.wm_start,
+		      brw->curbe.wm_size,
+		      brw->curbe.clip_start,
+		      brw->curbe.clip_size,
+		      brw->curbe.vs_start,
+		      brw->curbe.vs_size );
+
+      brw->state.dirty.brw |= BRW_NEW_CURBE_OFFSETS;
+   }
+}
+
+
+const struct brw_tracked_state brw_curbe_offsets = {
+   .dirty = {
+      .mesa = _NEW_TRANSFORM,
+      .brw  = BRW_NEW_VERTEX_PROGRAM,
+      .cache = CACHE_NEW_WM_PROG
+   },
+   .update = calculate_curbe_offsets
+};
+
+
+
+
+/* Define the number of curbes within CS's urb allocation.  Multiple
+ * urb entries -> multiple curbes.  These will be used by
+ * fixed-function hardware in a double-buffering scheme to avoid a
+ * pipeline stall each time the contents of the curbe is changed.
+ */
+void brw_upload_constant_buffer_state(struct brw_context *brw)
+{
+   struct brw_constant_buffer_state cbs; 
+   memset(&cbs, 0, sizeof(cbs));
+
+   /* It appears that this is the state packet for the CS unit, ie. the
+    * urb entries detailed here are housed in the CS range from the
+    * URB_FENCE command.
+    */
+   cbs.header.opcode = CMD_CONST_BUFFER_STATE;
+   cbs.header.length = sizeof(cbs)/4 - 2;
+
+   /* BRW_NEW_URB_FENCE */
+   cbs.bits0.nr_urb_entries = brw->urb.nr_cs_entries;
+   cbs.bits0.urb_entry_size = brw->urb.csize - 1;
+
+   assert(brw->urb.nr_cs_entries);
+   BRW_CACHED_BATCH_STRUCT(brw, &cbs);
+}      
+
+#if 0
+const struct brw_tracked_state brw_constant_buffer_state = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_URB_FENCE,
+      .cache = 0
+   },
+   .update = brw_upload_constant_buffer_state
+};
+#endif
+
+
+static GLfloat fixed_plane[6][4] = {
+   { 0,    0,   -1, 1 },
+   { 0,    0,    1, 1 },
+   { 0,   -1,    0, 1 },
+   { 0,    1,    0, 1 },
+   {-1,    0,    0, 1 },
+   { 1,    0,    0, 1 }
+};
+
+/* Upload a new set of constants.  Too much variability to go into the
+ * cache mechanism, but maybe would benefit from a comparison against
+ * the current uploaded set of constants.
+ */
+static void upload_constant_buffer(struct brw_context *brw)
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program;
+   struct brw_fragment_program *fp = (struct brw_fragment_program *)brw->fragment_program;
+   struct brw_mem_pool *pool = &brw->pool[BRW_GS_POOL];
+   GLuint sz = brw->curbe.total_size;
+   GLuint bufsz = sz * 16 * sizeof(GLfloat);
+   GLfloat *buf;
+   GLuint i;
+
+   /* Update our own dependency flags.  This works because this
+    * function will also be called whenever fp or vp changes.
+    */
+   brw->curbe.tracked_state.dirty.mesa = (_NEW_TRANSFORM|_NEW_PROJECTION);
+   brw->curbe.tracked_state.dirty.mesa |= vp->param_state;
+   brw->curbe.tracked_state.dirty.mesa |= fp->param_state;
+
+   if (sz == 0) {
+      struct brw_constant_buffer cb;
+      cb.header.opcode = CMD_CONST_BUFFER;
+      cb.header.length = sizeof(cb)/4 - 2;
+      cb.header.valid = 0;
+      cb.bits0.buffer_length = 0;
+      cb.bits0.buffer_address = 0;
+      BRW_BATCH_STRUCT(brw, &cb);
+
+      if (brw->curbe.last_buf) {
+	 free(brw->curbe.last_buf);
+	 brw->curbe.last_buf = NULL;
+	 brw->curbe.last_bufsz  = 0;
+      }
+       
+      return;
+   }
+
+   buf = (GLfloat *)malloc(bufsz);
+
+   memset(buf, 0, bufsz);
+
+   if (brw->curbe.wm_size) {
+      GLuint offset = brw->curbe.wm_start * 16;
+
+      _mesa_load_state_parameters(ctx, fp->program.Base.Parameters); 
+
+      for (i = 0; i < brw->wm.prog_data->nr_params; i++) 
+	 buf[offset + i] = brw->wm.prog_data->param[i][0];
+   }
+
+
+   /* The clipplanes are actually delivered to both CLIP and VS units.
+    * VS uses them to calculate the outcode bitmasks.
+    */
+   if (brw->curbe.clip_size) {
+      GLuint offset = brw->curbe.clip_start * 16;
+      GLuint j;
+
+      /* If any planes are going this way, send them all this way:
+       */
+      for (i = 0; i < 6; i++) {
+	 buf[offset + i * 4 + 0] = fixed_plane[i][0];
+	 buf[offset + i * 4 + 1] = fixed_plane[i][1];
+	 buf[offset + i * 4 + 2] = fixed_plane[i][2];
+	 buf[offset + i * 4 + 3] = fixed_plane[i][3];
+      }
+
+      /* Clip planes: _NEW_TRANSFORM plus _NEW_PROJECTION to get to
+       * clip-space:
+       */
+      assert(MAX_CLIP_PLANES == 6);
+      for (j = 0; j < MAX_CLIP_PLANES; j++) {
+	 if (brw->attribs.Transform->ClipPlanesEnabled & (1<<j)) {
+	    buf[offset + i * 4 + 0] = brw->attribs.Transform->_ClipUserPlane[j][0];
+	    buf[offset + i * 4 + 1] = brw->attribs.Transform->_ClipUserPlane[j][1];
+	    buf[offset + i * 4 + 2] = brw->attribs.Transform->_ClipUserPlane[j][2];
+	    buf[offset + i * 4 + 3] = brw->attribs.Transform->_ClipUserPlane[j][3];
+	    i++;
+	 }
+      }
+   }
+
+
+   if (brw->curbe.vs_size) {
+      GLuint offset = brw->curbe.vs_start * 16;
+      GLuint nr = vp->program.Base.Parameters->NumParameters;
+
+      _mesa_load_state_parameters(ctx, vp->program.Base.Parameters); 
+
+      for (i = 0; i < nr; i++) {
+	 buf[offset + i * 4 + 0] = vp->program.Base.Parameters->ParameterValues[i][0];
+	 buf[offset + i * 4 + 1] = vp->program.Base.Parameters->ParameterValues[i][1];
+	 buf[offset + i * 4 + 2] = vp->program.Base.Parameters->ParameterValues[i][2];
+	 buf[offset + i * 4 + 3] = vp->program.Base.Parameters->ParameterValues[i][3];
+      }
+   }
+
+   if (0) {
+      for (i = 0; i < sz*16; i+=4) 
+	 _mesa_printf("curbe %d.%d: %f %f %f %f\n", i/8, i&4,
+		      buf[i+0], buf[i+1], buf[i+2], buf[i+3]);
+
+      _mesa_printf("last_buf %p buf %p sz %d/%d cmp %d\n",
+		   brw->curbe.last_buf, buf,
+		   bufsz, brw->curbe.last_bufsz,
+		   brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1);
+   }
+
+   if (brw->curbe.last_buf &&
+       bufsz == brw->curbe.last_bufsz &&
+       memcmp(buf, brw->curbe.last_buf, bufsz) == 0) {
+      free(buf);
+/*       return; */
+   } 
+   else {
+      if (brw->curbe.last_buf)
+	 free(brw->curbe.last_buf);
+      brw->curbe.last_buf = buf;
+      brw->curbe.last_bufsz = bufsz;
+
+      
+      if (!brw_pool_alloc(pool, 
+			  bufsz,
+			  6,
+			  &brw->curbe.gs_offset)) {
+	 _mesa_printf("out of GS memory for curbe\n");
+	 assert(0);
+	 return;
+      }
+            
+
+      /* Copy data to the buffer:
+       */
+      bmBufferSubDataAUB(&brw->intel,
+			 pool->buffer,
+			 brw->curbe.gs_offset, 
+			 bufsz, 
+			 buf,
+			 DW_CONSTANT_BUFFER,
+			 0);
+   }
+
+   /* TODO: only emit the constant_buffer packet when necessary, ie:
+      - contents have changed
+      - offset has changed
+      - hw requirements due to other packets emitted.
+   */
+   {
+      struct brw_constant_buffer cb;
+      
+      memset(&cb, 0, sizeof(cb));
+
+      cb.header.opcode = CMD_CONST_BUFFER;
+      cb.header.length = sizeof(cb)/4 - 2;
+      cb.header.valid = 1;
+      cb.bits0.buffer_length = sz - 1;
+      cb.bits0.buffer_address = brw->curbe.gs_offset >> 6;
+      
+      /* Because this provokes an action (ie copy the constants into the
+       * URB), it shouldn't be shortcircuited if identical to the
+       * previous time - because eg. the urb destination may have
+       * changed, or the urb contents different to last time.  
+       *
+       * Note that the data referred to is actually copied internally,
+       * not just used in place according to passed pointer.
+       *
+       * It appears that the CS unit takes care of using each available
+       * URB entry (Const URB Entry == CURBE) in turn, and issuing
+       * flushes as necessary when doublebuffering of CURBEs isn't
+       * possible.
+       */
+/*       intel_batchbuffer_align(brw->intel.batch, 64, sizeof(cb)); */
+      BRW_BATCH_STRUCT(brw, &cb);
+/*       intel_batchbuffer_align(brw->intel.batch, 64, 0); */
+   }
+}
+
+/* This tracked state is unique in that the state it monitors varies
+ * dynamically depending on the parameters tracked by the fragment and
+ * vertex programs.  This is the template used as a starting point,
+ * each context will maintain a copy of this internally and update as
+ * required.
+ */
+const struct brw_tracked_state brw_constant_buffer = {
+   .dirty = {
+      .mesa = (_NEW_TRANSFORM|_NEW_PROJECTION),      /* plus fp and vp flags */
+      .brw  = (BRW_NEW_FRAGMENT_PROGRAM |
+	       BRW_NEW_VERTEX_PROGRAM |
+	       BRW_NEW_URB_FENCE | /* Implicit - hardware requires this, not used above */
+	       BRW_NEW_PSP | /* Implicit - hardware requires this, not used above */
+	       BRW_NEW_CURBE_OFFSETS),
+      .cache = (CACHE_NEW_WM_PROG) 
+   },
+   .update = upload_constant_buffer
+};
+
diff --git a/i965/brw_defines.h b/i965/brw_defines.h
new file mode 100644
index 0000000..101828b
--- /dev/null
+++ b/i965/brw_defines.h
@@ -0,0 +1,856 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+ 
+
+#ifndef BRW_DEFINES_H
+#define BRW_DEFINES_H
+
+/*
+ */
+#define MI_NOOP                              0x00
+#define MI_USER_INTERRUPT                    0x02
+#define MI_WAIT_FOR_EVENT                    0x03
+#define MI_FLUSH                             0x04
+#define MI_REPORT_HEAD                       0x07
+#define MI_ARB_ON_OFF                        0x08
+#define MI_BATCH_BUFFER_END                  0x0A
+#define MI_OVERLAY_FLIP                      0x11
+#define MI_LOAD_SCAN_LINES_INCL              0x12
+#define MI_LOAD_SCAN_LINES_EXCL              0x13
+#define MI_DISPLAY_BUFFER_INFO               0x14
+#define MI_SET_CONTEXT                       0x18
+#define MI_STORE_DATA_IMM                    0x20
+#define MI_STORE_DATA_INDEX                  0x21
+#define MI_LOAD_REGISTER_IMM                 0x22
+#define MI_STORE_REGISTER_MEM                0x24
+#define MI_BATCH_BUFFER_START                0x31
+
+#define MI_SYNCHRONOUS_FLIP                  0x0 
+#define MI_ASYNCHRONOUS_FLIP                 0x1
+
+#define MI_BUFFER_SECURE                     0x0 
+#define MI_BUFFER_NONSECURE                  0x1
+
+#define MI_ARBITRATE_AT_CHAIN_POINTS         0x0 
+#define MI_ARBITRATE_BETWEEN_INSTS           0x1
+#define MI_NO_ARBITRATION                    0x3 
+
+#define MI_CONDITION_CODE_WAIT_DISABLED      0x0
+#define MI_CONDITION_CODE_WAIT_0             0x1
+#define MI_CONDITION_CODE_WAIT_1             0x2
+#define MI_CONDITION_CODE_WAIT_2             0x3
+#define MI_CONDITION_CODE_WAIT_3             0x4
+#define MI_CONDITION_CODE_WAIT_4             0x5
+
+#define MI_DISPLAY_PIPE_A                    0x0
+#define MI_DISPLAY_PIPE_B                    0x1
+
+#define MI_DISPLAY_PLANE_A                   0x0 
+#define MI_DISPLAY_PLANE_B                   0x1
+#define MI_DISPLAY_PLANE_C                   0x2
+
+#define MI_STANDARD_FLIP                                 0x0
+#define MI_ENQUEUE_FLIP_PERFORM_BASE_FRAME_NUMBER_LOAD   0x1
+#define MI_ENQUEUE_FLIP_TARGET_FRAME_NUMBER_RELATIVE     0x2
+#define MI_ENQUEUE_FLIP_ABSOLUTE_TARGET_FRAME_NUMBER     0x3
+
+#define MI_PHYSICAL_ADDRESS                  0x0
+#define MI_VIRTUAL_ADDRESS                   0x1
+
+#define MI_BUFFER_MEMORY_MAIN                0x0 
+#define MI_BUFFER_MEMORY_GTT                 0x2
+#define MI_BUFFER_MEMORY_PER_PROCESS_GTT     0x3 
+
+#define MI_FLIP_CONTINUE                     0x0
+#define MI_FLIP_ON                           0x1
+#define MI_FLIP_OFF                          0x2
+
+#define MI_UNTRUSTED_REGISTER_SPACE          0x0
+#define MI_TRUSTED_REGISTER_SPACE            0x1
+
+/* 3D state:
+ */
+#define _3DOP_3DSTATE_PIPELINED       0x0
+#define _3DOP_3DSTATE_NONPIPELINED    0x1
+#define _3DOP_3DCONTROL               0x2
+#define _3DOP_3DPRIMITIVE             0x3
+
+#define _3DSTATE_PIPELINED_POINTERS       0x00
+#define _3DSTATE_BINDING_TABLE_POINTERS   0x01
+#define _3DSTATE_VERTEX_BUFFERS           0x08
+#define _3DSTATE_VERTEX_ELEMENTS          0x09
+#define _3DSTATE_INDEX_BUFFER             0x0A
+#define _3DSTATE_VF_STATISTICS            0x0B
+#define _3DSTATE_DRAWING_RECTANGLE            0x00
+#define _3DSTATE_CONSTANT_COLOR               0x01
+#define _3DSTATE_SAMPLER_PALETTE_LOAD         0x02
+#define _3DSTATE_CHROMA_KEY                   0x04
+#define _3DSTATE_DEPTH_BUFFER                 0x05
+#define _3DSTATE_POLY_STIPPLE_OFFSET          0x06
+#define _3DSTATE_POLY_STIPPLE_PATTERN         0x07
+#define _3DSTATE_LINE_STIPPLE                 0x08
+#define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP    0x09
+#define _3DCONTROL    0x00
+#define _3DPRIMITIVE  0x00
+
+#define PIPE_CONTROL_NOWRITE          0x00
+#define PIPE_CONTROL_WRITEIMMEDIATE   0x01
+#define PIPE_CONTROL_WRITEDEPTH       0x02
+#define PIPE_CONTROL_WRITETIMESTAMP   0x03
+
+#define PIPE_CONTROL_GTTWRITE_PROCESS_LOCAL 0x00
+#define PIPE_CONTROL_GTTWRITE_GLOBAL        0x01
+
+#define _3DPRIM_POINTLIST         0x01
+#define _3DPRIM_LINELIST          0x02
+#define _3DPRIM_LINESTRIP         0x03
+#define _3DPRIM_TRILIST           0x04
+#define _3DPRIM_TRISTRIP          0x05
+#define _3DPRIM_TRIFAN            0x06
+#define _3DPRIM_QUADLIST          0x07
+#define _3DPRIM_QUADSTRIP         0x08
+#define _3DPRIM_LINELIST_ADJ      0x09
+#define _3DPRIM_LINESTRIP_ADJ     0x0A
+#define _3DPRIM_TRILIST_ADJ       0x0B
+#define _3DPRIM_TRISTRIP_ADJ      0x0C
+#define _3DPRIM_TRISTRIP_REVERSE  0x0D
+#define _3DPRIM_POLYGON           0x0E
+#define _3DPRIM_RECTLIST          0x0F
+#define _3DPRIM_LINELOOP          0x10
+#define _3DPRIM_POINTLIST_BF      0x11
+#define _3DPRIM_LINESTRIP_CONT    0x12
+#define _3DPRIM_LINESTRIP_BF      0x13
+#define _3DPRIM_LINESTRIP_CONT_BF 0x14
+#define _3DPRIM_TRIFAN_NOSTIPPLE  0x15
+
+#define _3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL 0
+#define _3DPRIM_VERTEXBUFFER_ACCESS_RANDOM     1
+
+#define BRW_ANISORATIO_2     0 
+#define BRW_ANISORATIO_4     1 
+#define BRW_ANISORATIO_6     2 
+#define BRW_ANISORATIO_8     3 
+#define BRW_ANISORATIO_10    4 
+#define BRW_ANISORATIO_12    5 
+#define BRW_ANISORATIO_14    6 
+#define BRW_ANISORATIO_16    7
+
+#define BRW_BLENDFACTOR_ONE                 0x1
+#define BRW_BLENDFACTOR_SRC_COLOR           0x2
+#define BRW_BLENDFACTOR_SRC_ALPHA           0x3
+#define BRW_BLENDFACTOR_DST_ALPHA           0x4
+#define BRW_BLENDFACTOR_DST_COLOR           0x5
+#define BRW_BLENDFACTOR_SRC_ALPHA_SATURATE  0x6
+#define BRW_BLENDFACTOR_CONST_COLOR         0x7
+#define BRW_BLENDFACTOR_CONST_ALPHA         0x8
+#define BRW_BLENDFACTOR_SRC1_COLOR          0x9
+#define BRW_BLENDFACTOR_SRC1_ALPHA          0x0A
+#define BRW_BLENDFACTOR_ZERO                0x11
+#define BRW_BLENDFACTOR_INV_SRC_COLOR       0x12
+#define BRW_BLENDFACTOR_INV_SRC_ALPHA       0x13
+#define BRW_BLENDFACTOR_INV_DST_ALPHA       0x14
+#define BRW_BLENDFACTOR_INV_DST_COLOR       0x15
+#define BRW_BLENDFACTOR_INV_CONST_COLOR     0x17
+#define BRW_BLENDFACTOR_INV_CONST_ALPHA     0x18
+#define BRW_BLENDFACTOR_INV_SRC1_COLOR      0x19
+#define BRW_BLENDFACTOR_INV_SRC1_ALPHA      0x1A
+
+#define BRW_BLENDFUNCTION_ADD               0
+#define BRW_BLENDFUNCTION_SUBTRACT          1
+#define BRW_BLENDFUNCTION_REVERSE_SUBTRACT  2
+#define BRW_BLENDFUNCTION_MIN               3
+#define BRW_BLENDFUNCTION_MAX               4
+
+#define BRW_ALPHATEST_FORMAT_UNORM8         0
+#define BRW_ALPHATEST_FORMAT_FLOAT32        1
+
+#define BRW_CHROMAKEY_KILL_ON_ANY_MATCH  0
+#define BRW_CHROMAKEY_REPLACE_BLACK      1
+
+#define BRW_CLIP_API_OGL     0
+#define BRW_CLIP_API_DX      1
+
+#define BRW_CLIPMODE_NORMAL              0
+#define BRW_CLIPMODE_CLIP_ALL            1
+#define BRW_CLIPMODE_CLIP_NON_REJECTED   2
+#define BRW_CLIPMODE_REJECT_ALL          3
+#define BRW_CLIPMODE_ACCEPT_ALL          4
+
+#define BRW_CLIP_NDCSPACE     0
+#define BRW_CLIP_SCREENSPACE  1
+
+#define BRW_COMPAREFUNCTION_ALWAYS       0
+#define BRW_COMPAREFUNCTION_NEVER        1
+#define BRW_COMPAREFUNCTION_LESS         2
+#define BRW_COMPAREFUNCTION_EQUAL        3
+#define BRW_COMPAREFUNCTION_LEQUAL       4
+#define BRW_COMPAREFUNCTION_GREATER      5
+#define BRW_COMPAREFUNCTION_NOTEQUAL     6
+#define BRW_COMPAREFUNCTION_GEQUAL       7
+
+#define BRW_COVERAGE_PIXELS_HALF     0
+#define BRW_COVERAGE_PIXELS_1        1
+#define BRW_COVERAGE_PIXELS_2        2
+#define BRW_COVERAGE_PIXELS_4        3
+
+#define BRW_CULLMODE_BOTH        0
+#define BRW_CULLMODE_NONE        1
+#define BRW_CULLMODE_FRONT       2
+#define BRW_CULLMODE_BACK        3
+
+#define BRW_DEFAULTCOLOR_R8G8B8A8_UNORM      0
+#define BRW_DEFAULTCOLOR_R32G32B32A32_FLOAT  1
+
+#define BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT     0
+#define BRW_DEPTHFORMAT_D32_FLOAT                1
+#define BRW_DEPTHFORMAT_D24_UNORM_S8_UINT        2
+#define BRW_DEPTHFORMAT_D16_UNORM                5
+
+#define BRW_FLOATING_POINT_IEEE_754        0
+#define BRW_FLOATING_POINT_NON_IEEE_754    1
+
+#define BRW_FRONTWINDING_CW      0
+#define BRW_FRONTWINDING_CCW     1
+
+#define BRW_INDEX_BYTE     0
+#define BRW_INDEX_WORD     1
+#define BRW_INDEX_DWORD    2
+
+#define BRW_LOGICOPFUNCTION_CLEAR            0
+#define BRW_LOGICOPFUNCTION_NOR              1
+#define BRW_LOGICOPFUNCTION_AND_INVERTED     2
+#define BRW_LOGICOPFUNCTION_COPY_INVERTED    3
+#define BRW_LOGICOPFUNCTION_AND_REVERSE      4
+#define BRW_LOGICOPFUNCTION_INVERT           5
+#define BRW_LOGICOPFUNCTION_XOR              6
+#define BRW_LOGICOPFUNCTION_NAND             7
+#define BRW_LOGICOPFUNCTION_AND              8
+#define BRW_LOGICOPFUNCTION_EQUIV            9
+#define BRW_LOGICOPFUNCTION_NOOP             10
+#define BRW_LOGICOPFUNCTION_OR_INVERTED      11
+#define BRW_LOGICOPFUNCTION_COPY             12
+#define BRW_LOGICOPFUNCTION_OR_REVERSE       13
+#define BRW_LOGICOPFUNCTION_OR               14
+#define BRW_LOGICOPFUNCTION_SET              15  
+
+#define BRW_MAPFILTER_NEAREST        0x0 
+#define BRW_MAPFILTER_LINEAR         0x1 
+#define BRW_MAPFILTER_ANISOTROPIC    0x2
+
+#define BRW_MIPFILTER_NONE        0   
+#define BRW_MIPFILTER_NEAREST     1   
+#define BRW_MIPFILTER_LINEAR      3
+
+#define BRW_POLYGON_FRONT_FACING     0
+#define BRW_POLYGON_BACK_FACING      1
+
+#define BRW_PREFILTER_ALWAYS     0x0 
+#define BRW_PREFILTER_NEVER      0x1
+#define BRW_PREFILTER_LESS       0x2
+#define BRW_PREFILTER_EQUAL      0x3
+#define BRW_PREFILTER_LEQUAL     0x4
+#define BRW_PREFILTER_GREATER    0x5
+#define BRW_PREFILTER_NOTEQUAL   0x6
+#define BRW_PREFILTER_GEQUAL     0x7
+
+#define BRW_PROVOKING_VERTEX_0    0
+#define BRW_PROVOKING_VERTEX_1    1 
+#define BRW_PROVOKING_VERTEX_2    2
+
+#define BRW_RASTRULE_UPPER_LEFT  0    
+#define BRW_RASTRULE_UPPER_RIGHT 1
+
+#define BRW_RENDERTARGET_CLAMPRANGE_UNORM    0
+#define BRW_RENDERTARGET_CLAMPRANGE_SNORM    1
+#define BRW_RENDERTARGET_CLAMPRANGE_FORMAT   2
+
+#define BRW_STENCILOP_KEEP               0
+#define BRW_STENCILOP_ZERO               1
+#define BRW_STENCILOP_REPLACE            2
+#define BRW_STENCILOP_INCRSAT            3
+#define BRW_STENCILOP_DECRSAT            4
+#define BRW_STENCILOP_INCR               5
+#define BRW_STENCILOP_DECR               6
+#define BRW_STENCILOP_INVERT             7
+
+#define BRW_SURFACE_MIPMAPLAYOUT_BELOW   0
+#define BRW_SURFACE_MIPMAPLAYOUT_RIGHT   1
+
+#define BRW_SURFACEFORMAT_R32G32B32A32_FLOAT             0x000 
+#define BRW_SURFACEFORMAT_R32G32B32A32_SINT              0x001 
+#define BRW_SURFACEFORMAT_R32G32B32A32_UINT              0x002 
+#define BRW_SURFACEFORMAT_R32G32B32A32_UNORM             0x003 
+#define BRW_SURFACEFORMAT_R32G32B32A32_SNORM             0x004 
+#define BRW_SURFACEFORMAT_R64G64_FLOAT                   0x005 
+#define BRW_SURFACEFORMAT_R32G32B32X32_FLOAT             0x006 
+#define BRW_SURFACEFORMAT_R32G32B32A32_SSCALED           0x007
+#define BRW_SURFACEFORMAT_R32G32B32A32_USCALED           0x008
+#define BRW_SURFACEFORMAT_R32G32B32_FLOAT                0x040 
+#define BRW_SURFACEFORMAT_R32G32B32_SINT                 0x041 
+#define BRW_SURFACEFORMAT_R32G32B32_UINT                 0x042 
+#define BRW_SURFACEFORMAT_R32G32B32_UNORM                0x043 
+#define BRW_SURFACEFORMAT_R32G32B32_SNORM                0x044 
+#define BRW_SURFACEFORMAT_R32G32B32_SSCALED              0x045 
+#define BRW_SURFACEFORMAT_R32G32B32_USCALED              0x046 
+#define BRW_SURFACEFORMAT_R16G16B16A16_UNORM             0x080 
+#define BRW_SURFACEFORMAT_R16G16B16A16_SNORM             0x081 
+#define BRW_SURFACEFORMAT_R16G16B16A16_SINT              0x082 
+#define BRW_SURFACEFORMAT_R16G16B16A16_UINT              0x083 
+#define BRW_SURFACEFORMAT_R16G16B16A16_FLOAT             0x084 
+#define BRW_SURFACEFORMAT_R32G32_FLOAT                   0x085 
+#define BRW_SURFACEFORMAT_R32G32_SINT                    0x086 
+#define BRW_SURFACEFORMAT_R32G32_UINT                    0x087 
+#define BRW_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS       0x088 
+#define BRW_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT        0x089 
+#define BRW_SURFACEFORMAT_L32A32_FLOAT                   0x08A 
+#define BRW_SURFACEFORMAT_R32G32_UNORM                   0x08B 
+#define BRW_SURFACEFORMAT_R32G32_SNORM                   0x08C 
+#define BRW_SURFACEFORMAT_R64_FLOAT                      0x08D 
+#define BRW_SURFACEFORMAT_R16G16B16X16_UNORM             0x08E 
+#define BRW_SURFACEFORMAT_R16G16B16X16_FLOAT             0x08F 
+#define BRW_SURFACEFORMAT_A32X32_FLOAT                   0x090 
+#define BRW_SURFACEFORMAT_L32X32_FLOAT                   0x091 
+#define BRW_SURFACEFORMAT_I32X32_FLOAT                   0x092 
+#define BRW_SURFACEFORMAT_R16G16B16A16_SSCALED           0x093
+#define BRW_SURFACEFORMAT_R16G16B16A16_USCALED           0x094
+#define BRW_SURFACEFORMAT_R32G32_SSCALED                 0x095
+#define BRW_SURFACEFORMAT_R32G32_USCALED                 0x096
+#define BRW_SURFACEFORMAT_B8G8R8A8_UNORM                 0x0C0 
+#define BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB            0x0C1 
+#define BRW_SURFACEFORMAT_R10G10B10A2_UNORM              0x0C2 
+#define BRW_SURFACEFORMAT_R10G10B10A2_UNORM_SRGB         0x0C3 
+#define BRW_SURFACEFORMAT_R10G10B10A2_UINT               0x0C4 
+#define BRW_SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM       0x0C5 
+#define BRW_SURFACEFORMAT_R8G8B8A8_UNORM                 0x0C7 
+#define BRW_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB            0x0C8 
+#define BRW_SURFACEFORMAT_R8G8B8A8_SNORM                 0x0C9 
+#define BRW_SURFACEFORMAT_R8G8B8A8_SINT                  0x0CA 
+#define BRW_SURFACEFORMAT_R8G8B8A8_UINT                  0x0CB 
+#define BRW_SURFACEFORMAT_R16G16_UNORM                   0x0CC 
+#define BRW_SURFACEFORMAT_R16G16_SNORM                   0x0CD 
+#define BRW_SURFACEFORMAT_R16G16_SINT                    0x0CE 
+#define BRW_SURFACEFORMAT_R16G16_UINT                    0x0CF 
+#define BRW_SURFACEFORMAT_R16G16_FLOAT                   0x0D0 
+#define BRW_SURFACEFORMAT_B10G10R10A2_UNORM              0x0D1 
+#define BRW_SURFACEFORMAT_B10G10R10A2_UNORM_SRGB         0x0D2 
+#define BRW_SURFACEFORMAT_R11G11B10_FLOAT                0x0D3 
+#define BRW_SURFACEFORMAT_R32_SINT                       0x0D6 
+#define BRW_SURFACEFORMAT_R32_UINT                       0x0D7 
+#define BRW_SURFACEFORMAT_R32_FLOAT                      0x0D8 
+#define BRW_SURFACEFORMAT_R24_UNORM_X8_TYPELESS          0x0D9 
+#define BRW_SURFACEFORMAT_X24_TYPELESS_G8_UINT           0x0DA 
+#define BRW_SURFACEFORMAT_L16A16_UNORM                   0x0DF 
+#define BRW_SURFACEFORMAT_I24X8_UNORM                    0x0E0 
+#define BRW_SURFACEFORMAT_L24X8_UNORM                    0x0E1 
+#define BRW_SURFACEFORMAT_A24X8_UNORM                    0x0E2 
+#define BRW_SURFACEFORMAT_I32_FLOAT                      0x0E3 
+#define BRW_SURFACEFORMAT_L32_FLOAT                      0x0E4 
+#define BRW_SURFACEFORMAT_A32_FLOAT                      0x0E5 
+#define BRW_SURFACEFORMAT_B8G8R8X8_UNORM                 0x0E9 
+#define BRW_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB            0x0EA 
+#define BRW_SURFACEFORMAT_R8G8B8X8_UNORM                 0x0EB 
+#define BRW_SURFACEFORMAT_R8G8B8X8_UNORM_SRGB            0x0EC 
+#define BRW_SURFACEFORMAT_R9G9B9E5_SHAREDEXP             0x0ED 
+#define BRW_SURFACEFORMAT_B10G10R10X2_UNORM              0x0EE 
+#define BRW_SURFACEFORMAT_L16A16_FLOAT                   0x0F0 
+#define BRW_SURFACEFORMAT_R32_UNORM                      0x0F1 
+#define BRW_SURFACEFORMAT_R32_SNORM                      0x0F2 
+#define BRW_SURFACEFORMAT_R10G10B10X2_USCALED            0x0F3
+#define BRW_SURFACEFORMAT_R8G8B8A8_SSCALED               0x0F4
+#define BRW_SURFACEFORMAT_R8G8B8A8_USCALED               0x0F5
+#define BRW_SURFACEFORMAT_R16G16_SSCALED                 0x0F6
+#define BRW_SURFACEFORMAT_R16G16_USCALED                 0x0F7
+#define BRW_SURFACEFORMAT_R32_SSCALED                    0x0F8
+#define BRW_SURFACEFORMAT_R32_USCALED                    0x0F9
+#define BRW_SURFACEFORMAT_B5G6R5_UNORM                   0x100 
+#define BRW_SURFACEFORMAT_B5G6R5_UNORM_SRGB              0x101 
+#define BRW_SURFACEFORMAT_B5G5R5A1_UNORM                 0x102 
+#define BRW_SURFACEFORMAT_B5G5R5A1_UNORM_SRGB            0x103 
+#define BRW_SURFACEFORMAT_B4G4R4A4_UNORM                 0x104 
+#define BRW_SURFACEFORMAT_B4G4R4A4_UNORM_SRGB            0x105 
+#define BRW_SURFACEFORMAT_R8G8_UNORM                     0x106 
+#define BRW_SURFACEFORMAT_R8G8_SNORM                     0x107 
+#define BRW_SURFACEFORMAT_R8G8_SINT                      0x108 
+#define BRW_SURFACEFORMAT_R8G8_UINT                      0x109 
+#define BRW_SURFACEFORMAT_R16_UNORM                      0x10A 
+#define BRW_SURFACEFORMAT_R16_SNORM                      0x10B 
+#define BRW_SURFACEFORMAT_R16_SINT                       0x10C 
+#define BRW_SURFACEFORMAT_R16_UINT                       0x10D 
+#define BRW_SURFACEFORMAT_R16_FLOAT                      0x10E 
+#define BRW_SURFACEFORMAT_I16_UNORM                      0x111 
+#define BRW_SURFACEFORMAT_L16_UNORM                      0x112 
+#define BRW_SURFACEFORMAT_A16_UNORM                      0x113 
+#define BRW_SURFACEFORMAT_L8A8_UNORM                     0x114 
+#define BRW_SURFACEFORMAT_I16_FLOAT                      0x115
+#define BRW_SURFACEFORMAT_L16_FLOAT                      0x116
+#define BRW_SURFACEFORMAT_A16_FLOAT                      0x117 
+#define BRW_SURFACEFORMAT_R5G5_SNORM_B6_UNORM            0x119 
+#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM                 0x11A 
+#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB            0x11B
+#define BRW_SURFACEFORMAT_R8G8_SSCALED                   0x11C
+#define BRW_SURFACEFORMAT_R8G8_USCALED                   0x11D
+#define BRW_SURFACEFORMAT_R16_SSCALED                    0x11E
+#define BRW_SURFACEFORMAT_R16_USCALED                    0x11F
+#define BRW_SURFACEFORMAT_R8_UNORM                       0x140 
+#define BRW_SURFACEFORMAT_R8_SNORM                       0x141 
+#define BRW_SURFACEFORMAT_R8_SINT                        0x142 
+#define BRW_SURFACEFORMAT_R8_UINT                        0x143 
+#define BRW_SURFACEFORMAT_A8_UNORM                       0x144 
+#define BRW_SURFACEFORMAT_I8_UNORM                       0x145 
+#define BRW_SURFACEFORMAT_L8_UNORM                       0x146 
+#define BRW_SURFACEFORMAT_P4A4_UNORM                     0x147 
+#define BRW_SURFACEFORMAT_A4P4_UNORM                     0x148
+#define BRW_SURFACEFORMAT_R8_SSCALED                     0x149
+#define BRW_SURFACEFORMAT_R8_USCALED                     0x14A
+#define BRW_SURFACEFORMAT_R1_UINT                        0x181 
+#define BRW_SURFACEFORMAT_YCRCB_NORMAL                   0x182 
+#define BRW_SURFACEFORMAT_YCRCB_SWAPUVY                  0x183 
+#define BRW_SURFACEFORMAT_BC1_UNORM                      0x186 
+#define BRW_SURFACEFORMAT_BC2_UNORM                      0x187 
+#define BRW_SURFACEFORMAT_BC3_UNORM                      0x188 
+#define BRW_SURFACEFORMAT_BC4_UNORM                      0x189 
+#define BRW_SURFACEFORMAT_BC5_UNORM                      0x18A 
+#define BRW_SURFACEFORMAT_BC1_UNORM_SRGB                 0x18B 
+#define BRW_SURFACEFORMAT_BC2_UNORM_SRGB                 0x18C 
+#define BRW_SURFACEFORMAT_BC3_UNORM_SRGB                 0x18D 
+#define BRW_SURFACEFORMAT_MONO8                          0x18E 
+#define BRW_SURFACEFORMAT_YCRCB_SWAPUV                   0x18F 
+#define BRW_SURFACEFORMAT_YCRCB_SWAPY                    0x190 
+#define BRW_SURFACEFORMAT_DXT1_RGB                       0x191 
+#define BRW_SURFACEFORMAT_FXT1                           0x192 
+#define BRW_SURFACEFORMAT_R8G8B8_UNORM                   0x193 
+#define BRW_SURFACEFORMAT_R8G8B8_SNORM                   0x194 
+#define BRW_SURFACEFORMAT_R8G8B8_SSCALED                 0x195 
+#define BRW_SURFACEFORMAT_R8G8B8_USCALED                 0x196 
+#define BRW_SURFACEFORMAT_R64G64B64A64_FLOAT             0x197 
+#define BRW_SURFACEFORMAT_R64G64B64_FLOAT                0x198 
+#define BRW_SURFACEFORMAT_BC4_SNORM                      0x199 
+#define BRW_SURFACEFORMAT_BC5_SNORM                      0x19A 
+#define BRW_SURFACEFORMAT_R16G16B16_UNORM                0x19C 
+#define BRW_SURFACEFORMAT_R16G16B16_SNORM                0x19D 
+#define BRW_SURFACEFORMAT_R16G16B16_SSCALED              0x19E 
+#define BRW_SURFACEFORMAT_R16G16B16_USCALED              0x19F
+
+#define BRW_SURFACERETURNFORMAT_FLOAT32  0
+#define BRW_SURFACERETURNFORMAT_S1       1
+
+#define BRW_SURFACE_1D      0
+#define BRW_SURFACE_2D      1
+#define BRW_SURFACE_3D      2
+#define BRW_SURFACE_CUBE    3
+#define BRW_SURFACE_BUFFER  4
+#define BRW_SURFACE_NULL    7
+
+#define BRW_TEXCOORDMODE_WRAP            0
+#define BRW_TEXCOORDMODE_MIRROR          1
+#define BRW_TEXCOORDMODE_CLAMP           2
+#define BRW_TEXCOORDMODE_CUBE            3
+#define BRW_TEXCOORDMODE_CLAMP_BORDER    4
+#define BRW_TEXCOORDMODE_MIRROR_ONCE     5
+
+#define BRW_THREAD_PRIORITY_NORMAL   0
+#define BRW_THREAD_PRIORITY_HIGH     1
+
+#define BRW_TILEWALK_XMAJOR                 0
+#define BRW_TILEWALK_YMAJOR                 1
+
+#define BRW_VERTEX_SUBPIXEL_PRECISION_8BITS  0
+#define BRW_VERTEX_SUBPIXEL_PRECISION_4BITS  1
+
+#define BRW_VERTEXBUFFER_ACCESS_VERTEXDATA     0
+#define BRW_VERTEXBUFFER_ACCESS_INSTANCEDATA   1
+
+#define BRW_VFCOMPONENT_NOSTORE      0
+#define BRW_VFCOMPONENT_STORE_SRC    1
+#define BRW_VFCOMPONENT_STORE_0      2
+#define BRW_VFCOMPONENT_STORE_1_FLT  3
+#define BRW_VFCOMPONENT_STORE_1_INT  4
+#define BRW_VFCOMPONENT_STORE_VID    5
+#define BRW_VFCOMPONENT_STORE_IID    6
+#define BRW_VFCOMPONENT_STORE_PID    7
+
+
+
+/* Execution Unit (EU) defines
+ */
+
+#define BRW_ALIGN_1   0
+#define BRW_ALIGN_16  1
+
+#define BRW_ADDRESS_DIRECT                        0
+#define BRW_ADDRESS_REGISTER_INDIRECT_REGISTER    1
+
+#define BRW_CHANNEL_X     0
+#define BRW_CHANNEL_Y     1
+#define BRW_CHANNEL_Z     2
+#define BRW_CHANNEL_W     3
+
+#define BRW_COMPRESSION_NONE          0
+#define BRW_COMPRESSION_2NDHALF       1
+#define BRW_COMPRESSION_COMPRESSED    2
+
+#define BRW_CONDITIONAL_NONE  0
+#define BRW_CONDITIONAL_Z     1
+#define BRW_CONDITIONAL_NZ    2
+#define BRW_CONDITIONAL_EQ    1	/* Z */
+#define BRW_CONDITIONAL_NEQ   2	/* NZ */
+#define BRW_CONDITIONAL_G     3
+#define BRW_CONDITIONAL_GE    4
+#define BRW_CONDITIONAL_L     5
+#define BRW_CONDITIONAL_LE    6
+#define BRW_CONDITIONAL_C     7
+#define BRW_CONDITIONAL_O     8
+
+#define BRW_DEBUG_NONE        0
+#define BRW_DEBUG_BREAKPOINT  1
+
+#define BRW_DEPENDENCY_NORMAL         0
+#define BRW_DEPENDENCY_NOTCLEARED     1
+#define BRW_DEPENDENCY_NOTCHECKED     2
+#define BRW_DEPENDENCY_DISABLE        3
+
+#define BRW_EXECUTE_1     0
+#define BRW_EXECUTE_2     1
+#define BRW_EXECUTE_4     2
+#define BRW_EXECUTE_8     3
+#define BRW_EXECUTE_16    4
+#define BRW_EXECUTE_32    5
+
+#define BRW_HORIZONTAL_STRIDE_0   0
+#define BRW_HORIZONTAL_STRIDE_1   1
+#define BRW_HORIZONTAL_STRIDE_2   2
+#define BRW_HORIZONTAL_STRIDE_4   3
+
+#define BRW_INSTRUCTION_NORMAL    0
+#define BRW_INSTRUCTION_SATURATE  1
+
+#define BRW_MASK_ENABLE   0
+#define BRW_MASK_DISABLE  1
+
+#define BRW_OPCODE_MOV        1
+#define BRW_OPCODE_SEL        2
+#define BRW_OPCODE_NOT        4
+#define BRW_OPCODE_AND        5
+#define BRW_OPCODE_OR         6
+#define BRW_OPCODE_XOR        7
+#define BRW_OPCODE_SHR        8
+#define BRW_OPCODE_SHL        9
+#define BRW_OPCODE_RSR        10
+#define BRW_OPCODE_RSL        11
+#define BRW_OPCODE_ASR        12
+#define BRW_OPCODE_CMP        16
+#define BRW_OPCODE_JMPI       32
+#define BRW_OPCODE_IF         34
+#define BRW_OPCODE_IFF        35
+#define BRW_OPCODE_ELSE       36
+#define BRW_OPCODE_ENDIF      37
+#define BRW_OPCODE_DO         38
+#define BRW_OPCODE_WHILE      39
+#define BRW_OPCODE_BREAK      40
+#define BRW_OPCODE_CONTINUE   41
+#define BRW_OPCODE_HALT       42
+#define BRW_OPCODE_MSAVE      44
+#define BRW_OPCODE_MRESTORE   45
+#define BRW_OPCODE_PUSH       46
+#define BRW_OPCODE_POP        47
+#define BRW_OPCODE_WAIT       48
+#define BRW_OPCODE_SEND       49
+#define BRW_OPCODE_ADD        64
+#define BRW_OPCODE_MUL        65
+#define BRW_OPCODE_AVG        66
+#define BRW_OPCODE_FRC        67
+#define BRW_OPCODE_RNDU       68
+#define BRW_OPCODE_RNDD       69
+#define BRW_OPCODE_RNDE       70
+#define BRW_OPCODE_RNDZ       71
+#define BRW_OPCODE_MAC        72
+#define BRW_OPCODE_MACH       73
+#define BRW_OPCODE_LZD        74
+#define BRW_OPCODE_SAD2       80
+#define BRW_OPCODE_SADA2      81
+#define BRW_OPCODE_DP4        84
+#define BRW_OPCODE_DPH        85
+#define BRW_OPCODE_DP3        86
+#define BRW_OPCODE_DP2        87
+#define BRW_OPCODE_DPA2       88
+#define BRW_OPCODE_LINE       89
+#define BRW_OPCODE_NOP        126
+
+#define BRW_PREDICATE_NONE             0
+#define BRW_PREDICATE_NORMAL           1
+#define BRW_PREDICATE_ALIGN1_ANYV             2
+#define BRW_PREDICATE_ALIGN1_ALLV             3
+#define BRW_PREDICATE_ALIGN1_ANY2H            4
+#define BRW_PREDICATE_ALIGN1_ALL2H            5
+#define BRW_PREDICATE_ALIGN1_ANY4H            6
+#define BRW_PREDICATE_ALIGN1_ALL4H            7
+#define BRW_PREDICATE_ALIGN1_ANY8H            8
+#define BRW_PREDICATE_ALIGN1_ALL8H            9
+#define BRW_PREDICATE_ALIGN1_ANY16H           10
+#define BRW_PREDICATE_ALIGN1_ALL16H           11
+#define BRW_PREDICATE_ALIGN16_REPLICATE_X     2
+#define BRW_PREDICATE_ALIGN16_REPLICATE_Y     3
+#define BRW_PREDICATE_ALIGN16_REPLICATE_Z     4
+#define BRW_PREDICATE_ALIGN16_REPLICATE_W     5
+#define BRW_PREDICATE_ALIGN16_ANY4H           6
+#define BRW_PREDICATE_ALIGN16_ALL4H           7
+
+#define BRW_ARCHITECTURE_REGISTER_FILE    0
+#define BRW_GENERAL_REGISTER_FILE         1
+#define BRW_MESSAGE_REGISTER_FILE         2
+#define BRW_IMMEDIATE_VALUE               3
+
+#define BRW_REGISTER_TYPE_UD  0
+#define BRW_REGISTER_TYPE_D   1
+#define BRW_REGISTER_TYPE_UW  2
+#define BRW_REGISTER_TYPE_W   3
+#define BRW_REGISTER_TYPE_UB  4
+#define BRW_REGISTER_TYPE_B   5
+#define BRW_REGISTER_TYPE_VF  5	/* packed float vector, immediates only? */
+#define BRW_REGISTER_TYPE_HF  6
+#define BRW_REGISTER_TYPE_V   6	/* packed int vector, immediates only, uword dest only */
+#define BRW_REGISTER_TYPE_F   7
+
+#define BRW_ARF_NULL                  0x00
+#define BRW_ARF_ADDRESS               0x10
+#define BRW_ARF_ACCUMULATOR           0x20   
+#define BRW_ARF_FLAG                  0x30
+#define BRW_ARF_MASK                  0x40
+#define BRW_ARF_MASK_STACK            0x50
+#define BRW_ARF_MASK_STACK_DEPTH      0x60
+#define BRW_ARF_STATE                 0x70
+#define BRW_ARF_CONTROL               0x80
+#define BRW_ARF_NOTIFICATION_COUNT    0x90
+#define BRW_ARF_IP                    0xA0
+
+#define BRW_AMASK   0
+#define BRW_IMASK   1
+#define BRW_LMASK   2
+#define BRW_CMASK   3
+
+
+
+#define BRW_THREAD_NORMAL     0
+#define BRW_THREAD_ATOMIC     1
+#define BRW_THREAD_SWITCH     2
+
+#define BRW_VERTICAL_STRIDE_0                 0
+#define BRW_VERTICAL_STRIDE_1                 1
+#define BRW_VERTICAL_STRIDE_2                 2
+#define BRW_VERTICAL_STRIDE_4                 3
+#define BRW_VERTICAL_STRIDE_8                 4
+#define BRW_VERTICAL_STRIDE_16                5
+#define BRW_VERTICAL_STRIDE_32                6
+#define BRW_VERTICAL_STRIDE_64                7
+#define BRW_VERTICAL_STRIDE_128               8
+#define BRW_VERTICAL_STRIDE_256               9
+#define BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL   0xF
+
+#define BRW_WIDTH_1       0
+#define BRW_WIDTH_2       1
+#define BRW_WIDTH_4       2
+#define BRW_WIDTH_8       3
+#define BRW_WIDTH_16      4
+
+#define BRW_STATELESS_BUFFER_BOUNDARY_1K      0
+#define BRW_STATELESS_BUFFER_BOUNDARY_2K      1
+#define BRW_STATELESS_BUFFER_BOUNDARY_4K      2
+#define BRW_STATELESS_BUFFER_BOUNDARY_8K      3
+#define BRW_STATELESS_BUFFER_BOUNDARY_16K     4
+#define BRW_STATELESS_BUFFER_BOUNDARY_32K     5
+#define BRW_STATELESS_BUFFER_BOUNDARY_64K     6
+#define BRW_STATELESS_BUFFER_BOUNDARY_128K    7
+#define BRW_STATELESS_BUFFER_BOUNDARY_256K    8
+#define BRW_STATELESS_BUFFER_BOUNDARY_512K    9
+#define BRW_STATELESS_BUFFER_BOUNDARY_1M      10
+#define BRW_STATELESS_BUFFER_BOUNDARY_2M      11
+
+#define BRW_POLYGON_FACING_FRONT      0
+#define BRW_POLYGON_FACING_BACK       1
+
+#define BRW_MESSAGE_TARGET_NULL               0
+#define BRW_MESSAGE_TARGET_MATH               1
+#define BRW_MESSAGE_TARGET_SAMPLER            2
+#define BRW_MESSAGE_TARGET_GATEWAY            3
+#define BRW_MESSAGE_TARGET_DATAPORT_READ      4
+#define BRW_MESSAGE_TARGET_DATAPORT_WRITE     5
+#define BRW_MESSAGE_TARGET_URB                6
+#define BRW_MESSAGE_TARGET_THREAD_SPAWNER     7
+
+#define BRW_SAMPLER_RETURN_FORMAT_FLOAT32     0
+#define BRW_SAMPLER_RETURN_FORMAT_UINT32      2
+#define BRW_SAMPLER_RETURN_FORMAT_SINT32      3
+
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE              0
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE             0
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS        0
+#define BRW_SAMPLER_MESSAGE_SIMD8_KILLPIX             1
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD        1
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD         1
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS  2
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS    2
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE    0
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE     2
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO           2
+#define BRW_SAMPLER_MESSAGE_SIMD8_RESINFO             2
+#define BRW_SAMPLER_MESSAGE_SIMD16_RESINFO            2
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_LD                3
+#define BRW_SAMPLER_MESSAGE_SIMD8_LD                  3
+#define BRW_SAMPLER_MESSAGE_SIMD16_LD                 3
+
+#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW   0
+#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDHIGH  1
+#define BRW_DATAPORT_OWORD_BLOCK_2_OWORDS     2
+#define BRW_DATAPORT_OWORD_BLOCK_4_OWORDS     3
+#define BRW_DATAPORT_OWORD_BLOCK_8_OWORDS     4
+
+#define BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD     0
+#define BRW_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS    2
+
+#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS   2
+#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS  3
+
+#define BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ          0
+#define BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ     1
+#define BRW_DATAPORT_READ_MESSAGE_DWORD_BLOCK_READ          2
+#define BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ      3
+
+#define BRW_DATAPORT_READ_TARGET_DATA_CACHE      0
+#define BRW_DATAPORT_READ_TARGET_RENDER_CACHE    1
+#define BRW_DATAPORT_READ_TARGET_SAMPLER_CACHE   2
+
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE                0
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED     1
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01         2
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23         3
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01       4
+
+#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE                0
+#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE           1
+#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_BLOCK_WRITE                2
+#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE            3
+#define BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE              4
+#define BRW_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE     5
+#define BRW_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE               7
+
+#define BRW_MATH_FUNCTION_INV                              1
+#define BRW_MATH_FUNCTION_LOG                              2
+#define BRW_MATH_FUNCTION_EXP                              3
+#define BRW_MATH_FUNCTION_SQRT                             4
+#define BRW_MATH_FUNCTION_RSQ                              5
+#define BRW_MATH_FUNCTION_SIN                              6 /* was 7 */
+#define BRW_MATH_FUNCTION_COS                              7 /* was 8 */
+#define BRW_MATH_FUNCTION_SINCOS                           8 /* was 6 */
+#define BRW_MATH_FUNCTION_TAN                              9
+#define BRW_MATH_FUNCTION_POW                              10
+#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER   11
+#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT                 12
+#define BRW_MATH_FUNCTION_INT_DIV_REMAINDER                13
+
+#define BRW_MATH_INTEGER_UNSIGNED     0
+#define BRW_MATH_INTEGER_SIGNED       1
+
+#define BRW_MATH_PRECISION_FULL        0
+#define BRW_MATH_PRECISION_PARTIAL     1
+
+#define BRW_MATH_SATURATE_NONE         0
+#define BRW_MATH_SATURATE_SATURATE     1
+
+#define BRW_MATH_DATA_VECTOR  0
+#define BRW_MATH_DATA_SCALAR  1
+
+#define BRW_URB_OPCODE_WRITE  0
+
+#define BRW_URB_SWIZZLE_NONE          0
+#define BRW_URB_SWIZZLE_INTERLEAVE    1
+#define BRW_URB_SWIZZLE_TRANSPOSE     2
+
+#define BRW_SCRATCH_SPACE_SIZE_1K     0
+#define BRW_SCRATCH_SPACE_SIZE_2K     1
+#define BRW_SCRATCH_SPACE_SIZE_4K     2
+#define BRW_SCRATCH_SPACE_SIZE_8K     3
+#define BRW_SCRATCH_SPACE_SIZE_16K    4
+#define BRW_SCRATCH_SPACE_SIZE_32K    5
+#define BRW_SCRATCH_SPACE_SIZE_64K    6
+#define BRW_SCRATCH_SPACE_SIZE_128K   7
+#define BRW_SCRATCH_SPACE_SIZE_256K   8
+#define BRW_SCRATCH_SPACE_SIZE_512K   9
+#define BRW_SCRATCH_SPACE_SIZE_1M     10
+#define BRW_SCRATCH_SPACE_SIZE_2M     11
+
+
+
+
+#define CMD_URB_FENCE                 0x6000
+#define CMD_CONST_BUFFER_STATE        0x6001
+#define CMD_CONST_BUFFER              0x6002
+
+#define CMD_STATE_BASE_ADDRESS        0x6101
+#define CMD_STATE_INSN_POINTER        0x6102
+#define CMD_PIPELINE_SELECT_965       0x6104
+#define CMD_PIPELINE_SELECT_IGD       0x6904
+
+#define CMD_PIPELINED_STATE_POINTERS  0x7800
+#define CMD_BINDING_TABLE_PTRS        0x7801
+#define CMD_VERTEX_BUFFER             0x7808
+#define CMD_VERTEX_ELEMENT            0x7809
+#define CMD_INDEX_BUFFER              0x780a
+#define CMD_VF_STATISTICS_965         0x780b
+#define CMD_VF_STATISTICS_IGD         0x680b
+
+#define CMD_DRAW_RECT                 0x7900
+#define CMD_BLEND_CONSTANT_COLOR      0x7901
+#define CMD_CHROMA_KEY                0x7904
+#define CMD_DEPTH_BUFFER              0x7905
+#define CMD_POLY_STIPPLE_OFFSET       0x7906
+#define CMD_POLY_STIPPLE_PATTERN      0x7907
+#define CMD_LINE_STIPPLE_PATTERN      0x7908
+#define CMD_GLOBAL_DEPTH_OFFSET_CLAMP 0x7909
+#define CMD_AA_LINE_PARAMETERS        0x790a
+
+#define CMD_PIPE_CONTROL              0x7a00
+
+#define CMD_3D_PRIM                   0x7b00
+
+#define CMD_MI_FLUSH                  0x0200
+
+
+/* Various values from the R0 vertex header:
+ */
+#define R02_PRIM_END    0x1
+#define R02_PRIM_START  0x2
+
+#define BRW_IS_IGD(brw)     ((brw)->intel.intelScreen->deviceID == PCI_CHIP_IGD_GM)
+#define CMD_PIPELINE_SELECT(brw)       ((BRW_IS_IGD(brw)) ? CMD_PIPELINE_SELECT_IGD : CMD_PIPELINE_SELECT_965)
+#define CMD_VF_STATISTICS(brw)         ((BRW_IS_IGD(brw)) ? CMD_VF_STATISTICS_IGD : CMD_VF_STATISTICS_965)
+#define URB_SIZES(brw)                 ((BRW_IS_IGD(brw)) ? 384 : 256)  /* 512 bit unit */
+
+#endif
diff --git a/i965/brw_draw.c b/i965/brw_draw.c
new file mode 100644
index 0000000..f796472
--- /dev/null
+++ b/i965/brw_draw.c
@@ -0,0 +1,523 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include <stdlib.h>
+
+#include "glheader.h"
+#include "context.h"
+#include "state.h"
+#include "api_validate.h"
+#include "enums.h"
+
+#include "brw_draw.h"
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_aub.h"
+#include "brw_state.h"
+#include "brw_fallback.h"
+
+#include "intel_ioctl.h"
+#include "intel_batchbuffer.h"
+#include "intel_buffer_objects.h"
+
+#include "tnl/tnl.h"
+#include "vbo/vbo_context.h"
+#include "swrast/swrast.h"
+
+
+
+static GLuint hw_prim[GL_POLYGON+1] = {
+   _3DPRIM_POINTLIST,
+   _3DPRIM_LINELIST,
+   _3DPRIM_LINELOOP,
+   _3DPRIM_LINESTRIP,
+   _3DPRIM_TRILIST,
+   _3DPRIM_TRISTRIP,
+   _3DPRIM_TRIFAN,
+   _3DPRIM_QUADLIST,
+   _3DPRIM_QUADSTRIP,
+   _3DPRIM_POLYGON
+};
+
+
+static const GLenum reduced_prim[GL_POLYGON+1] = {  
+   GL_POINTS,
+   GL_LINES,
+   GL_LINES,
+   GL_LINES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_TRIANGLES
+};
+
+
+/* When the primitive changes, set a state bit and re-validate.  Not
+ * the nicest and would rather deal with this by having all the
+ * programs be immune to the active primitive (ie. cope with all
+ * possibilities).  That may not be realistic however.
+ */
+static GLuint brw_set_prim(struct brw_context *brw, GLenum prim)
+{
+   if (INTEL_DEBUG & DEBUG_PRIMS)
+      _mesa_printf("PRIM: %s\n", _mesa_lookup_enum_by_nr(prim));
+   
+   /* Slight optimization to avoid the GS program when not needed:
+    */
+   if (prim == GL_QUAD_STRIP &&
+       brw->attribs.Light->ShadeModel != GL_FLAT &&
+       brw->attribs.Polygon->FrontMode == GL_FILL &&
+       brw->attribs.Polygon->BackMode == GL_FILL)
+      prim = GL_TRIANGLE_STRIP;
+
+   if (prim != brw->primitive) {
+      brw->primitive = prim;
+      brw->state.dirty.brw |= BRW_NEW_PRIMITIVE;
+
+      if (reduced_prim[prim] != brw->intel.reduced_primitive) {
+	 brw->intel.reduced_primitive = reduced_prim[prim];
+	 brw->state.dirty.brw |= BRW_NEW_REDUCED_PRIMITIVE;
+      }
+
+      brw_validate_state(brw);
+   }
+
+   return hw_prim[prim];
+}
+
+
+static GLuint trim(GLenum prim, GLuint length)
+{
+   if (prim == GL_QUAD_STRIP)
+      return length > 3 ? (length - length % 2) : 0;
+   else if (prim == GL_QUADS)
+      return length - length % 4;
+   else 
+      return length;
+}
+
+
+static void brw_emit_cliprect( struct brw_context *brw, 
+			       const drm_clip_rect_t *rect )
+{
+   struct brw_drawrect bdr;
+
+   bdr.header.opcode = CMD_DRAW_RECT;
+   bdr.header.length = sizeof(bdr)/4 - 2;
+   bdr.xmin = rect->x1;
+   bdr.xmax = rect->x2 - 1;
+   bdr.ymin = rect->y1;
+   bdr.ymax = rect->y2 - 1;
+   bdr.xorg = brw->intel.drawX;
+   bdr.yorg = brw->intel.drawY;
+
+   intel_batchbuffer_data( brw->intel.batch, &bdr, sizeof(bdr), 
+			   INTEL_BATCH_NO_CLIPRECTS);
+}
+
+
+static void brw_emit_prim( struct brw_context *brw, 
+			   const struct _mesa_prim *prim )
+
+{
+   struct brw_3d_primitive prim_packet;
+
+   if (INTEL_DEBUG & DEBUG_PRIMS)
+      _mesa_printf("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode), 
+		   prim->start, prim->count);
+
+   prim_packet.header.opcode = CMD_3D_PRIM;
+   prim_packet.header.length = sizeof(prim_packet)/4 - 2;
+   prim_packet.header.pad = 0;
+   prim_packet.header.topology = brw_set_prim(brw, prim->mode);
+   prim_packet.header.indexed = prim->indexed;
+
+   prim_packet.verts_per_instance = trim(prim->mode, prim->count);
+   prim_packet.start_vert_location = prim->start;
+   prim_packet.instance_count = 1;
+   prim_packet.start_instance_location = 0;
+   prim_packet.base_vert_location = 0;
+
+   if (prim_packet.verts_per_instance) {
+      intel_batchbuffer_data( brw->intel.batch, &prim_packet, sizeof(prim_packet), 
+			      INTEL_BATCH_NO_CLIPRECTS);
+   }
+}
+
+static void brw_merge_inputs( struct brw_context *brw,
+		       const struct gl_client_array *arrays[])
+{
+   struct brw_vertex_element *inputs = brw->vb.inputs;
+   struct brw_vertex_info old = brw->vb.info;
+   GLuint i;
+
+   memset(inputs, 0, sizeof(*inputs));
+   memset(&brw->vb.info, 0, sizeof(brw->vb.info));
+
+   for (i = 0; i < VERT_ATTRIB_MAX; i++) {
+      brw->vb.inputs[i].glarray = arrays[i];
+
+      /* XXX: metaops passes null arrays */
+      if (arrays[i]) {
+	 if (arrays[i]->StrideB != 0)
+	    brw->vb.info.varying |= 1 << i;
+
+	 brw->vb.info.sizes[i/16] |= (inputs[i].glarray->Size - 1) << ((i%16) * 2);
+      }
+   }
+
+   /* Raise statechanges if input sizes and varying have changed: 
+    */
+   if (memcmp(brw->vb.info.sizes, old.sizes, sizeof(old.sizes)) != 0)
+      brw->state.dirty.brw |= BRW_NEW_INPUT_DIMENSIONS;
+
+   if (brw->vb.info.varying != old.varying)
+      brw->state.dirty.brw |= BRW_NEW_INPUT_VARYING;
+}
+
+/* XXX: could split the primitive list to fallback only on the
+ * non-conformant primitives.
+ */
+static GLboolean check_fallbacks( struct brw_context *brw,
+				  const struct _mesa_prim *prim,
+				  GLuint nr_prims )
+{
+   GLuint i;
+
+   if (!brw->intel.strict_conformance)
+      return GL_FALSE;
+
+   if (brw->attribs.Polygon->SmoothFlag) {
+      for (i = 0; i < nr_prims; i++)
+	 if (reduced_prim[prim[i].mode] == GL_TRIANGLES) 
+	    return GL_TRUE;
+   }
+
+   /* BRW hardware will do AA lines, but they are non-conformant it
+    * seems.  TBD whether we keep this fallback:
+    */
+   if (brw->attribs.Line->SmoothFlag) {
+      for (i = 0; i < nr_prims; i++)
+	 if (reduced_prim[prim[i].mode] == GL_LINES) 
+	    return GL_TRUE;
+   }
+
+   /* Stipple -- these fallbacks could be resolved with a little
+    * bit of work?
+    */
+   if (brw->attribs.Line->StippleFlag) {
+      for (i = 0; i < nr_prims; i++) {
+	 /* GS doesn't get enough information to know when to reset
+	  * the stipple counter?!?
+	  */
+	 if (prim[i].mode == GL_LINE_LOOP) 
+	    return GL_TRUE;
+	    
+	 if (prim[i].mode == GL_POLYGON &&
+	     (brw->attribs.Polygon->FrontMode == GL_LINE ||
+	      brw->attribs.Polygon->BackMode == GL_LINE))
+	    return GL_TRUE;
+      }
+   }
+
+
+   if (brw->attribs.Point->SmoothFlag) {
+      for (i = 0; i < nr_prims; i++)
+	 if (prim[i].mode == GL_POINTS) 
+	    return GL_TRUE;
+   }
+      
+   return GL_FALSE;
+}
+
+/* May fail if out of video memory for texture or vbo upload, or on
+ * fallback conditions.
+ */
+static GLboolean brw_try_draw_prims( GLcontext *ctx,
+				     const struct gl_client_array *arrays[],
+				     const struct _mesa_prim *prim,
+				     GLuint nr_prims,
+				     const struct _mesa_index_buffer *ib,
+				     GLuint min_index,
+				     GLuint max_index )
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct brw_context *brw = brw_context(ctx);
+   GLboolean retval = GL_FALSE;
+   GLuint i, j;
+
+   if (ctx->NewState)
+      _mesa_update_state( ctx );
+
+   /* Bind all inputs, derive varying and size information:
+    */
+   brw_merge_inputs( brw, arrays );
+      
+   /* Have to validate state quite late.  Will rebuild tnl_program,
+    * which depends on varying information.  
+    * 
+    * Note this is where brw->vs->prog_data.inputs_read is calculated,
+    * so can't access it earlier.
+    */
+
+   LOCK_HARDWARE(intel);
+
+   if (brw->intel.numClipRects == 0) {
+      assert(intel->batch->ptr == intel->batch->map + intel->batch->offset);
+      UNLOCK_HARDWARE(intel);
+      return GL_TRUE;
+   }
+
+   {
+      /* Set the first primitive early, ahead of validate_state:
+       */
+      brw_set_prim(brw, prim[0].mode);
+
+      /* XXX:  Need to separate validate and upload of state.  
+       */
+      brw_validate_state( brw );
+
+      /* Various fallback checks:
+       */
+      if (brw->intel.Fallback) 
+	 goto out;
+
+      if (check_fallbacks( brw, prim, nr_prims ))
+	 goto out;
+	  
+      /* Upload index, vertex data: 
+       */
+      if (ib)
+	 brw_upload_indices( brw, ib );
+
+      if (!brw_upload_vertices( brw, min_index, max_index)) {
+	 goto out;
+      }
+
+      /* For single cliprect, state is already emitted: 
+       */
+      if (brw->intel.numClipRects == 1) {
+	 for (i = 0; i < nr_prims; i++) {
+	    brw_emit_prim(brw, &prim[i]);   
+	 }
+      }
+      else {
+	 /* Otherwise, explicitly do the cliprects at this point:
+	  */
+          GLuint nprims = 0;
+	 for (j = 0; j < brw->intel.numClipRects; j++) {
+	    brw_emit_cliprect(brw, &brw->intel.pClipRects[j]);
+
+	    /* Emit prims to batchbuffer: 
+	     */
+	    for (i = 0; i < nr_prims; i++) {
+	       brw_emit_prim(brw, &prim[i]);   
+
+          if (++nprims == VBO_MAX_PRIM) {
+              intel_batchbuffer_flush(brw->intel.batch);
+              nprims = 0;
+          }
+	    }
+	 }
+      }
+      
+      intel->need_flush = GL_TRUE;
+      retval = GL_TRUE;
+   }
+
+ out:
+
+   /* Currently have to do this to synchronize with the map/unmap of
+    * the vertex buffer in brw_exec_api.c.  Not sure if there is any
+    * way around this, as not every flush is due to a buffer filling
+    * up.
+    */
+   if (!intel_batchbuffer_flush( brw->intel.batch )) {
+      DBG("%s intel_batchbuffer_flush failed\n", __FUNCTION__);
+      retval = GL_FALSE;
+   }
+
+   if (retval && intel->thrashing) {
+      bmSetFence(intel);
+   }
+
+   /* Free any old data so it doesn't clog up texture memory - we
+    * won't be referencing it again.
+    */
+   while (brw->vb.upload.wrap != brw->vb.upload.buf) {
+      ctx->Driver.BufferData(ctx,
+			     GL_ARRAY_BUFFER_ARB,
+			     BRW_UPLOAD_INIT_SIZE,
+			     NULL,
+			     GL_DYNAMIC_DRAW_ARB,
+			     brw->vb.upload.vbo[brw->vb.upload.wrap]);
+      brw->vb.upload.wrap++;
+      brw->vb.upload.wrap %= BRW_NR_UPLOAD_BUFS;
+   }
+
+   UNLOCK_HARDWARE(intel);
+
+   if (!retval)
+      DBG("%s failed\n", __FUNCTION__);
+
+   return retval;
+}
+
+static GLboolean brw_need_rebase( GLcontext *ctx,
+				  const struct gl_client_array *arrays[],
+				  const struct _mesa_index_buffer *ib,
+				  GLuint min_index )
+{
+   if (min_index == 0) 
+      return GL_FALSE;
+
+   if (ib) {
+      if (!vbo_all_varyings_in_vbos(arrays))
+	 return GL_TRUE;
+      else
+	 return GL_FALSE;
+   }
+   else {
+      /* Hmm.  This isn't quite what I wanted.  BRW can actually
+       * handle the mixed case well enough that we shouldn't need to
+       * rebase.  However, it's probably not very common, nor hugely
+       * expensive to do it this way:
+       */
+      if (!vbo_all_varyings_in_vbos(arrays))
+	 return GL_TRUE;
+      else
+	 return GL_FALSE;
+   }
+}
+				  
+
+void brw_draw_prims( GLcontext *ctx,
+		     const struct gl_client_array *arrays[],
+		     const struct _mesa_prim *prim,
+		     GLuint nr_prims,
+		     const struct _mesa_index_buffer *ib,
+		     GLuint min_index,
+		     GLuint max_index )
+{
+   struct intel_context *intel = intel_context(ctx);
+   GLboolean retval;
+
+   /* Decide if we want to rebase.  If so we end up recursing once
+    * only into this function.
+    */
+   if (brw_need_rebase( ctx, arrays, ib, min_index )) {
+      vbo_rebase_prims( ctx, arrays, 
+			prim, nr_prims, 
+			ib, min_index, max_index, 
+			brw_draw_prims );
+      
+      return;
+   }
+
+
+   /* Make a first attempt at drawing:
+    */
+   retval = brw_try_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
+
+   
+   /* This looks like out-of-memory but potentially we have
+    * situation where there is enough memory but it has become
+    * fragmented.  Clear out all heaps and start from scratch by
+    * faking a contended lock event:  (done elsewhere)
+    */
+   if (!retval && !intel->Fallback && bmError(intel)) {
+      DBG("retrying\n");
+      /* Then try a second time only to upload textures and draw the
+       * primitives:
+       */
+      retval = brw_try_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
+   }
+
+   /* Otherwise, we really are out of memory.  Pass the drawing
+    * command to the software tnl module and which will in turn call
+    * swrast to do the drawing.
+    */
+   if (!retval) {
+       _swsetup_Wakeup(ctx);
+      _tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
+   }
+
+   if (intel->aub_file && (INTEL_DEBUG & DEBUG_SYNC)) {
+      intelFinish( &intel->ctx );
+      intel->aub_wrap = 1;
+   }
+}
+
+
+static void brw_invalidate_vbo_cb( struct intel_context *intel, void *ptr )
+{
+   /* nothing to do, we don't rely on the contents being preserved */
+}
+
+
+void brw_draw_init( struct brw_context *brw )
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   struct vbo_context *vbo = vbo_context(ctx);
+   GLuint i;
+   
+   /* Register our drawing function: 
+    */
+   vbo->draw_prims = brw_draw_prims;
+
+   brw->vb.upload.size = BRW_UPLOAD_INIT_SIZE;
+
+   for (i = 0; i < BRW_NR_UPLOAD_BUFS; i++) {
+      brw->vb.upload.vbo[i] = ctx->Driver.NewBufferObject(ctx, 1, GL_ARRAY_BUFFER_ARB);
+      
+      /* NOTE:  These are set to no-backing-store.
+       */
+      bmBufferSetInvalidateCB(&brw->intel,
+			      intel_bufferobj_buffer(intel_buffer_object(brw->vb.upload.vbo[i])),
+			      brw_invalidate_vbo_cb,
+			      &brw->intel,
+			      GL_TRUE);
+   }
+
+   ctx->Driver.BufferData( ctx, 
+			   GL_ARRAY_BUFFER_ARB, 
+			   BRW_UPLOAD_INIT_SIZE,
+			   NULL,
+			   GL_DYNAMIC_DRAW_ARB,
+			   brw->vb.upload.vbo[0] );
+}
+
+void brw_draw_destroy( struct brw_context *brw )
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   GLuint i;
+   
+   for (i = 0; i < BRW_NR_UPLOAD_BUFS; i++)
+      ctx->Driver.DeleteBuffer(ctx, brw->vb.upload.vbo[i]);
+}
diff --git a/i965/brw_draw.h b/i965/brw_draw.h
new file mode 100644
index 0000000..0f7b738
--- /dev/null
+++ b/i965/brw_draw.h
@@ -0,0 +1,65 @@
+ /**************************************************************************
+ * 
+ * Copyright 2005 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef BRW_DRAW_H
+#define BRW_DRAW_H
+
+#include "mtypes.h"		/* for GLcontext... */
+#include "vbo/vbo.h"
+
+struct brw_context;
+
+
+void brw_draw_prims( GLcontext *ctx,
+		     const struct gl_client_array *arrays[],
+		     const struct _mesa_prim *prims,
+		     GLuint nr_prims,
+		     const struct _mesa_index_buffer *ib,
+		     GLuint min_index,
+		     GLuint max_index );
+
+void brw_draw_init( struct brw_context *brw );
+void brw_draw_destroy( struct brw_context *brw );
+
+/* brw_draw_current.c
+ */
+void brw_init_current_values(GLcontext *ctx,
+			     struct gl_client_array *arrays);
+
+
+/* brw_draw_upload.c
+ */
+void brw_upload_indices( struct brw_context *brw,
+			 const struct _mesa_index_buffer *index_buffer);
+
+GLboolean brw_upload_vertices( struct brw_context *brw,
+			       GLuint min_index,
+			       GLuint max_index );
+
+
+
+#endif
diff --git a/i965/brw_draw_upload.c b/i965/brw_draw_upload.c
new file mode 100644
index 0000000..6150cac
--- /dev/null
+++ b/i965/brw_draw_upload.c
@@ -0,0 +1,619 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include <stdlib.h>
+
+#include "glheader.h"
+#include "context.h"
+#include "state.h"
+#include "api_validate.h"
+#include "enums.h"
+
+#include "brw_draw.h"
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_aub.h"
+#include "brw_state.h"
+#include "brw_fallback.h"
+
+#include "intel_ioctl.h"
+#include "intel_batchbuffer.h"
+#include "intel_buffer_objects.h"
+
+
+struct brw_array_state {
+   union header_union header;
+
+   struct {
+      union {
+	 struct {
+	    GLuint pitch:11; 
+	    GLuint pad:15;
+	    GLuint access_type:1; 
+	    GLuint vb_index:5; 
+	 } bits;
+	 GLuint dword;
+      } vb0;
+   
+      struct buffer *buffer;
+      GLuint offset;
+
+      GLuint max_index;   
+      GLuint instance_data_step_rate;
+
+   } vb[BRW_VBP_MAX];
+};
+
+
+static struct buffer *array_buffer( const struct gl_client_array *array )
+{
+   return intel_bufferobj_buffer(intel_buffer_object(array->BufferObj));
+}
+
+static GLuint double_types[5] = {
+   0,
+   BRW_SURFACEFORMAT_R64_FLOAT,
+   BRW_SURFACEFORMAT_R64G64_FLOAT,
+   BRW_SURFACEFORMAT_R64G64B64_FLOAT,
+   BRW_SURFACEFORMAT_R64G64B64A64_FLOAT
+};
+
+static GLuint float_types[5] = {
+   0,
+   BRW_SURFACEFORMAT_R32_FLOAT,
+   BRW_SURFACEFORMAT_R32G32_FLOAT,
+   BRW_SURFACEFORMAT_R32G32B32_FLOAT,
+   BRW_SURFACEFORMAT_R32G32B32A32_FLOAT
+};
+
+static GLuint uint_types_norm[5] = {
+   0,
+   BRW_SURFACEFORMAT_R32_UNORM,
+   BRW_SURFACEFORMAT_R32G32_UNORM,
+   BRW_SURFACEFORMAT_R32G32B32_UNORM,
+   BRW_SURFACEFORMAT_R32G32B32A32_UNORM
+};
+
+static GLuint uint_types_scale[5] = {
+   0,
+   BRW_SURFACEFORMAT_R32_USCALED,
+   BRW_SURFACEFORMAT_R32G32_USCALED,
+   BRW_SURFACEFORMAT_R32G32B32_USCALED,
+   BRW_SURFACEFORMAT_R32G32B32A32_USCALED
+};
+
+static GLuint int_types_norm[5] = {
+   0,
+   BRW_SURFACEFORMAT_R32_SNORM,
+   BRW_SURFACEFORMAT_R32G32_SNORM,
+   BRW_SURFACEFORMAT_R32G32B32_SNORM,
+   BRW_SURFACEFORMAT_R32G32B32A32_SNORM
+};
+
+static GLuint int_types_scale[5] = {
+   0,
+   BRW_SURFACEFORMAT_R32_SSCALED,
+   BRW_SURFACEFORMAT_R32G32_SSCALED,
+   BRW_SURFACEFORMAT_R32G32B32_SSCALED,
+   BRW_SURFACEFORMAT_R32G32B32A32_SSCALED
+};
+
+static GLuint ushort_types_norm[5] = {
+   0,
+   BRW_SURFACEFORMAT_R16_UNORM,
+   BRW_SURFACEFORMAT_R16G16_UNORM,
+   BRW_SURFACEFORMAT_R16G16B16_UNORM,
+   BRW_SURFACEFORMAT_R16G16B16A16_UNORM
+};
+
+static GLuint ushort_types_scale[5] = {
+   0,
+   BRW_SURFACEFORMAT_R16_USCALED,
+   BRW_SURFACEFORMAT_R16G16_USCALED,
+   BRW_SURFACEFORMAT_R16G16B16_USCALED,
+   BRW_SURFACEFORMAT_R16G16B16A16_USCALED
+};
+
+static GLuint short_types_norm[5] = {
+   0,
+   BRW_SURFACEFORMAT_R16_SNORM,
+   BRW_SURFACEFORMAT_R16G16_SNORM,
+   BRW_SURFACEFORMAT_R16G16B16_SNORM,
+   BRW_SURFACEFORMAT_R16G16B16A16_SNORM
+};
+
+static GLuint short_types_scale[5] = {
+   0,
+   BRW_SURFACEFORMAT_R16_SSCALED,
+   BRW_SURFACEFORMAT_R16G16_SSCALED,
+   BRW_SURFACEFORMAT_R16G16B16_SSCALED,
+   BRW_SURFACEFORMAT_R16G16B16A16_SSCALED
+};
+
+static GLuint ubyte_types_norm[5] = {
+   0,
+   BRW_SURFACEFORMAT_R8_UNORM,
+   BRW_SURFACEFORMAT_R8G8_UNORM,
+   BRW_SURFACEFORMAT_R8G8B8_UNORM,
+   BRW_SURFACEFORMAT_R8G8B8A8_UNORM
+};
+
+static GLuint ubyte_types_scale[5] = {
+   0,
+   BRW_SURFACEFORMAT_R8_USCALED,
+   BRW_SURFACEFORMAT_R8G8_USCALED,
+   BRW_SURFACEFORMAT_R8G8B8_USCALED,
+   BRW_SURFACEFORMAT_R8G8B8A8_USCALED
+};
+
+static GLuint byte_types_norm[5] = {
+   0,
+   BRW_SURFACEFORMAT_R8_SNORM,
+   BRW_SURFACEFORMAT_R8G8_SNORM,
+   BRW_SURFACEFORMAT_R8G8B8_SNORM,
+   BRW_SURFACEFORMAT_R8G8B8A8_SNORM
+};
+
+static GLuint byte_types_scale[5] = {
+   0,
+   BRW_SURFACEFORMAT_R8_SSCALED,
+   BRW_SURFACEFORMAT_R8G8_SSCALED,
+   BRW_SURFACEFORMAT_R8G8B8_SSCALED,
+   BRW_SURFACEFORMAT_R8G8B8A8_SSCALED
+};
+
+
+static GLuint get_surface_type( GLenum type, GLuint size, GLboolean normalized )
+{
+   if (INTEL_DEBUG & DEBUG_VERTS)
+      _mesa_printf("type %s size %d normalized %d\n", 
+		   _mesa_lookup_enum_by_nr(type), size, normalized);
+
+   if (normalized) {
+      switch (type) {
+      case GL_DOUBLE: return double_types[size];
+      case GL_FLOAT: return float_types[size];
+      case GL_INT: return int_types_norm[size];
+      case GL_SHORT: return short_types_norm[size];
+      case GL_BYTE: return byte_types_norm[size];
+      case GL_UNSIGNED_INT: return uint_types_norm[size];
+      case GL_UNSIGNED_SHORT: return ushort_types_norm[size];
+      case GL_UNSIGNED_BYTE: return ubyte_types_norm[size];
+      default: assert(0); return 0;
+      }      
+   }
+   else {
+      switch (type) {
+      case GL_DOUBLE: return double_types[size];
+      case GL_FLOAT: return float_types[size];
+      case GL_INT: return int_types_scale[size];
+      case GL_SHORT: return short_types_scale[size];
+      case GL_BYTE: return byte_types_scale[size];
+      case GL_UNSIGNED_INT: return uint_types_scale[size];
+      case GL_UNSIGNED_SHORT: return ushort_types_scale[size];
+      case GL_UNSIGNED_BYTE: return ubyte_types_scale[size];
+      default: assert(0); return 0;
+      }      
+   }
+}
+
+
+static GLuint get_size( GLenum type )
+{
+   switch (type) {
+   case GL_DOUBLE: return sizeof(GLdouble);
+   case GL_FLOAT: return sizeof(GLfloat);
+   case GL_INT: return sizeof(GLint);
+   case GL_SHORT: return sizeof(GLshort);
+   case GL_BYTE: return sizeof(GLbyte);
+   case GL_UNSIGNED_INT: return sizeof(GLuint);
+   case GL_UNSIGNED_SHORT: return sizeof(GLushort);
+   case GL_UNSIGNED_BYTE: return sizeof(GLubyte);
+   default: return 0;
+   }      
+}
+
+static GLuint get_index_type(GLenum type) 
+{
+   switch (type) {
+   case GL_UNSIGNED_BYTE:  return BRW_INDEX_BYTE;
+   case GL_UNSIGNED_SHORT: return BRW_INDEX_WORD;
+   case GL_UNSIGNED_INT:   return BRW_INDEX_DWORD;
+   default: assert(0); return 0;
+   }
+}
+
+static void copy_strided_array( GLubyte *dest, 
+				const GLubyte *src, 
+				GLuint size, 
+				GLuint stride,
+				GLuint count )
+{
+   if (size == stride) 
+      do_memcpy(dest, src, count * size);
+   else {
+      GLuint i,j;
+   
+      for (i = 0; i < count; i++) {
+	 for (j = 0; j < size; j++)
+	    *dest++ = *src++;
+	 src += (stride - size);
+      }
+   }
+}
+
+static void wrap_buffers( struct brw_context *brw,
+			  GLuint size )
+{
+   GLcontext *ctx = &brw->intel.ctx;
+
+   if (size < BRW_UPLOAD_INIT_SIZE)
+      size = BRW_UPLOAD_INIT_SIZE;
+
+   brw->vb.upload.buf++;
+   brw->vb.upload.buf %= BRW_NR_UPLOAD_BUFS;
+   brw->vb.upload.offset = 0;
+
+   ctx->Driver.BufferData(ctx,
+			  GL_ARRAY_BUFFER_ARB,
+			  size,
+			  NULL,
+			  GL_DYNAMIC_DRAW_ARB,
+			  brw->vb.upload.vbo[brw->vb.upload.buf]);
+}
+
+static void get_space( struct brw_context *brw,
+		       GLuint size,
+		       struct gl_buffer_object **vbo_return,
+		       GLuint *offset_return )
+{
+   size = (size + 63) & ~63;
+   
+   if (brw->vb.upload.offset + size > BRW_UPLOAD_INIT_SIZE)
+      wrap_buffers(brw, size);
+
+   *vbo_return = brw->vb.upload.vbo[brw->vb.upload.buf];
+   *offset_return = brw->vb.upload.offset;
+
+   brw->vb.upload.offset += size;
+}
+
+
+
+static struct gl_client_array *
+copy_array_to_vbo_array( struct brw_context *brw,
+			 GLuint i,
+			 const struct gl_client_array *array,
+			 GLuint element_size,
+			 GLuint count)
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   struct gl_client_array *vbo_array = &brw->vb.vbo_array[i];
+   GLuint size = count * element_size;
+   struct gl_buffer_object *vbo;
+   GLuint offset;
+   GLuint new_stride;
+
+   get_space(brw, size, &vbo, &offset);
+
+   if (array->StrideB == 0) {
+      assert(count == 1);
+      new_stride = 0;
+   }
+   else 
+      new_stride = element_size;
+
+   vbo_array->Size = array->Size;
+   vbo_array->Type = array->Type;
+   vbo_array->Stride = new_stride;
+   vbo_array->StrideB = new_stride;   
+   vbo_array->Ptr = (const void *)offset;
+   vbo_array->Enabled = 1;
+   vbo_array->Normalized = array->Normalized;
+   vbo_array->_MaxElement = array->_MaxElement;	/* ? */
+   vbo_array->BufferObj = vbo;
+
+   {
+      GLubyte *map = ctx->Driver.MapBuffer(ctx,
+					   GL_ARRAY_BUFFER_ARB,
+					   GL_DYNAMIC_DRAW_ARB,
+					   vbo);
+   
+      map += offset;
+
+      copy_strided_array( map, 
+			  array->Ptr,
+			  element_size,
+			  array->StrideB,
+			  count);
+
+      ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER_ARB, vbo_array->BufferObj);
+   }
+
+   return vbo_array;
+}
+
+
+
+static struct gl_client_array *
+interleaved_vbo_array( struct brw_context *brw,
+		       GLuint i,
+		       const struct gl_client_array *uploaded_array,
+		       const struct gl_client_array *array,
+		       const char *ptr)
+{
+   struct gl_client_array *vbo_array = &brw->vb.vbo_array[i];
+
+   vbo_array->Size = array->Size;
+   vbo_array->Type = array->Type;
+   vbo_array->Stride = array->Stride;
+   vbo_array->StrideB = array->StrideB;   
+   vbo_array->Ptr = (const void *)((const char *)uploaded_array->Ptr + 
+				   ((const char *)array->Ptr - ptr));
+   vbo_array->Enabled = 1;
+   vbo_array->Normalized = array->Normalized;
+   vbo_array->_MaxElement = array->_MaxElement;	
+   vbo_array->BufferObj = uploaded_array->BufferObj;
+
+   return vbo_array;
+}
+
+
+GLboolean brw_upload_vertices( struct brw_context *brw,
+			       GLuint min_index,
+			       GLuint max_index )
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   struct intel_context *intel = intel_context(ctx);
+   GLuint tmp = brw->vs.prog_data->inputs_read; 
+   struct brw_vertex_element_packet vep;
+   struct brw_array_state vbp;
+   GLuint i;
+   const void *ptr = NULL;
+   GLuint interleave = 0;
+
+   struct brw_vertex_element *enabled[VERT_ATTRIB_MAX];
+   GLuint nr_enabled = 0;
+
+   struct brw_vertex_element *upload[VERT_ATTRIB_MAX];
+   GLuint nr_uploads = 0;
+   
+
+   memset(&vbp, 0, sizeof(vbp));
+   memset(&vep, 0, sizeof(vep));
+
+   /* First build an array of pointers to ve's in vb.inputs_read
+    */
+   if (0)
+      _mesa_printf("%s %d..%d\n", __FUNCTION__, min_index, max_index);
+   
+   while (tmp) {
+      GLuint i = _mesa_ffsll(tmp)-1;
+      struct brw_vertex_element *input = &brw->vb.inputs[i];
+
+      tmp &= ~(1<<i);
+      enabled[nr_enabled++] = input;
+
+      input->index = i;
+      input->element_size = get_size(input->glarray->Type) * input->glarray->Size;
+      input->count = input->glarray->StrideB ? max_index + 1 - min_index : 1;
+
+      if (!input->glarray->BufferObj->Name) {
+	 if (i == 0) {
+	    /* Position array not properly enabled:
+	     */
+	    if (input->glarray->StrideB == 0)
+	       return GL_FALSE;
+
+	    interleave = input->glarray->StrideB;
+	    ptr = input->glarray->Ptr;
+	 }
+	 else if (interleave != input->glarray->StrideB ||
+		  (const char *)input->glarray->Ptr - (const char *)ptr < 0 ||
+		  (const char *)input->glarray->Ptr - (const char *)ptr > interleave) {
+	    interleave = 0;
+	 }
+
+	 upload[nr_uploads++] = input;
+	 
+	 /* We rebase drawing to start at element zero only when
+	  * varyings are not in vbos, which means we can end up
+	  * uploading non-varying arrays (stride != 0) when min_index
+	  * is zero.  This doesn't matter as the amount to upload is
+	  * the same for these arrays whether the draw call is rebased
+	  * or not - we just have to upload the one element.
+	  */
+	 assert(min_index == 0 || input->glarray->StrideB == 0);
+      }
+   }
+
+   /* Upload interleaved arrays if all uploads are interleaved
+    */
+   if (nr_uploads > 1 && 
+       interleave && 
+       interleave <= 256) {
+      struct brw_vertex_element *input0 = upload[0];
+
+      input0->glarray = copy_array_to_vbo_array(brw, 0,
+						input0->glarray, 
+						interleave,
+						input0->count);
+
+      for (i = 1; i < nr_uploads; i++) {
+	 upload[i]->glarray = interleaved_vbo_array(brw,
+						    i,
+						    input0->glarray,
+						    upload[i]->glarray,
+						    ptr);
+      }
+   }
+   else {
+      for (i = 0; i < nr_uploads; i++) {
+	 struct brw_vertex_element *input = upload[i];
+
+	 input->glarray = copy_array_to_vbo_array(brw, i, 
+						  input->glarray,
+						  input->element_size,
+						  input->count);
+
+      }
+   }
+
+   /* XXX: In the rare cases where this happens we fallback all
+    * the way to software rasterization, although a tnl fallback
+    * would be sufficient.  I don't know of *any* real world
+    * cases with > 17 vertex attributes enabled, so it probably
+    * isn't an issue at this point.
+    */
+   if (nr_enabled >= BRW_VEP_MAX)
+	 return GL_FALSE;
+
+   /* This still defines a hardware VB for each input, even if they
+    * are interleaved or from the same VBO.  TBD if this makes a
+    * performance difference.
+    */
+   for (i = 0; i < nr_enabled; i++) {
+      struct brw_vertex_element *input = enabled[i];
+
+      input->vep = &vep.ve[i];
+      input->vep->ve0.src_format = get_surface_type(input->glarray->Type, 
+						    input->glarray->Size,
+						    input->glarray->Normalized);
+      input->vep->ve0.valid = 1;
+      input->vep->ve1.dst_offset = (i) * 4;
+      input->vep->ve1.vfcomponent3 = BRW_VFCOMPONENT_STORE_SRC;
+      input->vep->ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_SRC;
+      input->vep->ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_SRC;
+      input->vep->ve1.vfcomponent0 = BRW_VFCOMPONENT_STORE_SRC;
+
+      switch (input->glarray->Size) {
+      case 0: input->vep->ve1.vfcomponent0 = BRW_VFCOMPONENT_STORE_0;
+      case 1: input->vep->ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_0;
+      case 2: input->vep->ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_0;
+      case 3: input->vep->ve1.vfcomponent3 = BRW_VFCOMPONENT_STORE_1_FLT;
+	 break;
+      }
+
+      input->vep->ve0.vertex_buffer_index = i;
+      input->vep->ve0.src_offset = 0;
+
+      vbp.vb[i].vb0.bits.pitch = input->glarray->StrideB;
+      vbp.vb[i].vb0.bits.pad = 0;
+      vbp.vb[i].vb0.bits.access_type = BRW_VERTEXBUFFER_ACCESS_VERTEXDATA;
+      vbp.vb[i].vb0.bits.vb_index = i;
+      vbp.vb[i].offset = (GLuint)input->glarray->Ptr;
+      vbp.vb[i].buffer = array_buffer(input->glarray);
+      vbp.vb[i].max_index = max_index;
+   }
+
+
+
+   /* Now emit VB and VEP state packets:
+    */
+   vbp.header.bits.length = (1 + nr_enabled * 4) - 2;
+   vbp.header.bits.opcode = CMD_VERTEX_BUFFER;
+
+   BEGIN_BATCH(vbp.header.bits.length+2, 0);
+   OUT_BATCH( vbp.header.dword );
+   
+   for (i = 0; i < nr_enabled; i++) {
+      OUT_BATCH( vbp.vb[i].vb0.dword );
+      OUT_BATCH( bmBufferOffset(&brw->intel, vbp.vb[i].buffer) + vbp.vb[i].offset);
+      OUT_BATCH( vbp.vb[i].max_index );
+      OUT_BATCH( vbp.vb[i].instance_data_step_rate );
+   }
+   ADVANCE_BATCH();
+
+   vep.header.length = (1 + nr_enabled * sizeof(vep.ve[0])/4) - 2;
+   vep.header.opcode = CMD_VERTEX_ELEMENT;
+   brw_cached_batch_struct(brw, &vep, 4 + nr_enabled * sizeof(vep.ve[0]));
+
+   return GL_TRUE;
+}
+
+
+static GLuint element_size( GLenum type )
+{
+   switch(type) {
+   case GL_UNSIGNED_INT: return 4;
+   case GL_UNSIGNED_SHORT: return 2;
+   case GL_UNSIGNED_BYTE: return 1;
+   default: assert(0); return 0;
+   }
+}
+
+
+
+void brw_upload_indices( struct brw_context *brw,
+			 const struct _mesa_index_buffer *index_buffer )
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   struct intel_context *intel = &brw->intel;
+   GLuint ib_size = get_size(index_buffer->type) * index_buffer->count;
+   struct gl_buffer_object *bufferobj = index_buffer->obj;
+   GLuint offset = (GLuint)index_buffer->ptr;
+
+   /* Turn into a proper VBO:
+    */
+   if (!bufferobj->Name) {
+     
+      /* Get new bufferobj, offset:
+       */
+      get_space(brw, ib_size, &bufferobj, &offset);
+
+      /* Straight upload
+       */
+      ctx->Driver.BufferSubData( ctx,
+				 GL_ELEMENT_ARRAY_BUFFER_ARB,
+				 offset, 
+				 ib_size,
+				 index_buffer->ptr,
+				 bufferobj);
+   }
+
+   /* Emit the indexbuffer packet:
+    */
+   {
+      struct brw_indexbuffer ib;
+      struct buffer *buffer = intel_bufferobj_buffer(intel_buffer_object(bufferobj));
+
+      memset(&ib, 0, sizeof(ib));
+   
+      ib.header.bits.opcode = CMD_INDEX_BUFFER;
+      ib.header.bits.length = sizeof(ib)/4 - 2;
+      ib.header.bits.index_format = get_index_type(index_buffer->type);
+      ib.header.bits.cut_index_enable = 0;
+   
+
+      BEGIN_BATCH(4, 0);
+      OUT_BATCH( ib.header.dword );
+      OUT_BATCH( bmBufferOffset(intel, buffer) + offset );
+      OUT_BATCH( bmBufferOffset(intel, buffer) + offset + ib_size );
+      OUT_BATCH( 0 );
+      ADVANCE_BATCH();
+   }
+}
diff --git a/i965/brw_eu.c b/i965/brw_eu.c
new file mode 100644
index 0000000..b3ae4ee
--- /dev/null
+++ b/i965/brw_eu.c
@@ -0,0 +1,131 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+  
+
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_eu.h"
+
+
+
+/* How does predicate control work when execution_size != 8?  Do I
+ * need to test/set for 0xffff when execution_size is 16?
+ */
+void brw_set_predicate_control_flag_value( struct brw_compile *p, GLuint value )
+{
+   p->current->header.predicate_control = BRW_PREDICATE_NONE;
+
+   if (value != 0xff) {
+      if (value != p->flag_value) {
+	 brw_push_insn_state(p);
+	 brw_MOV(p, brw_flag_reg(), brw_imm_uw(value));
+	 p->flag_value = value;
+	 brw_pop_insn_state(p);
+      }
+
+      p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
+   }   
+}
+
+void brw_set_predicate_control( struct brw_compile *p, GLuint pc )
+{
+   p->current->header.predicate_control = pc;
+}
+
+void brw_set_conditionalmod( struct brw_compile *p, GLuint conditional )
+{
+   p->current->header.destreg__conditonalmod = conditional;
+}
+
+void brw_set_access_mode( struct brw_compile *p, GLuint access_mode )
+{
+   p->current->header.access_mode = access_mode;
+}
+
+void brw_set_compression_control( struct brw_compile *p, GLboolean compression_control )
+{
+   p->current->header.compression_control = compression_control;
+}
+
+void brw_set_mask_control( struct brw_compile *p, GLuint value )
+{
+   p->current->header.mask_control = value;
+}
+
+void brw_set_saturate( struct brw_compile *p, GLuint value )
+{
+   p->current->header.saturate = value;
+}
+
+void brw_push_insn_state( struct brw_compile *p )
+{
+   assert(p->current != &p->stack[BRW_EU_MAX_INSN_STACK-1]);
+   memcpy(p->current+1, p->current, sizeof(struct brw_instruction));
+   p->current++;   
+}
+
+void brw_pop_insn_state( struct brw_compile *p )
+{
+   assert(p->current != p->stack);
+   p->current--;
+}
+
+
+/***********************************************************************
+ */
+void brw_init_compile( struct brw_context *brw, struct brw_compile *p )
+{
+   p->brw = brw;
+   p->nr_insn = 0;
+   p->current = p->stack;
+   memset(p->current, 0, sizeof(p->current[0]));
+
+   /* Some defaults?
+    */
+   brw_set_mask_control(p, BRW_MASK_ENABLE); /* what does this do? */
+   brw_set_saturate(p, 0);
+   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+   brw_set_predicate_control_flag_value(p, 0xff); 
+}
+
+
+const GLuint *brw_get_program( struct brw_compile *p,
+			       GLuint *sz )
+{
+   GLuint i;
+
+   for (i = 0; i < 8; i++)
+      brw_NOP(p);
+
+   *sz = p->nr_insn * sizeof(struct brw_instruction);
+   return (const GLuint *)p->store;
+}
+
diff --git a/i965/brw_eu.h b/i965/brw_eu.h
new file mode 100644
index 0000000..9d46aac
--- /dev/null
+++ b/i965/brw_eu.h
@@ -0,0 +1,865 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+   
+
+#ifndef BRW_EU_H
+#define BRW_EU_H
+
+#include "brw_structs.h"
+#include "brw_defines.h"
+#include "shader/prog_instruction.h"
+
+#define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6))
+#define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3)
+
+#define BRW_SWIZZLE_NOOP      BRW_SWIZZLE4(0,1,2,3)
+#define BRW_SWIZZLE_XYZW      BRW_SWIZZLE4(0,1,2,3)
+#define BRW_SWIZZLE_XXXX      BRW_SWIZZLE4(0,0,0,0)
+#define BRW_SWIZZLE_XYXY      BRW_SWIZZLE4(0,1,0,1)
+
+
+#define REG_SIZE (8*4)
+
+
+/* These aren't hardware structs, just something useful for us to pass around:
+ *
+ * Align1 operation has a lot of control over input ranges.  Used in
+ * WM programs to implement shaders decomposed into "channel serial"
+ * or "structure of array" form:
+ */
+struct brw_reg
+{
+   GLuint type:4;
+   GLuint file:2;
+   GLuint nr:8;
+   GLuint subnr:5;		/* :1 in align16 */
+   GLuint negate:1;		/* source only */
+   GLuint abs:1;		/* source only */
+   GLuint vstride:4;		/* source only */
+   GLuint width:3;		/* src only, align1 only */
+   GLuint hstride:2;   		/* src only, align1 only */
+   GLuint address_mode:1;	/* relative addressing, hopefully! */
+   GLuint pad0:1;
+
+   union {      
+      struct {
+	 GLuint swizzle:8;		/* src only, align16 only */
+	 GLuint writemask:4;		/* dest only, align16 only */
+	 GLint  indirect_offset:10;	/* relative addressing offset */
+	 GLuint pad1:10;		/* two dwords total */
+      } bits;
+
+      GLfloat f;
+      GLint   d;
+      GLuint ud;
+   } dw1;      
+};
+
+
+struct brw_indirect {
+   GLuint addr_subnr:4;
+   GLint addr_offset:10;
+   GLuint pad:18;
+};
+
+
+#define BRW_EU_MAX_INSN_STACK 5
+#define BRW_EU_MAX_INSN 1200
+
+struct brw_compile {
+   struct brw_instruction store[BRW_EU_MAX_INSN];
+   GLuint nr_insn;
+
+   /* Allow clients to push/pop instruction state:
+    */
+   struct brw_instruction stack[BRW_EU_MAX_INSN_STACK];
+   struct brw_instruction *current;
+
+   GLuint flag_value;
+   GLboolean single_program_flow;
+   struct brw_context *brw;
+};
+
+
+
+static __inline int type_sz( GLuint type )
+{
+   switch( type ) {
+   case BRW_REGISTER_TYPE_UD:
+   case BRW_REGISTER_TYPE_D:
+   case BRW_REGISTER_TYPE_F:
+      return 4;
+   case BRW_REGISTER_TYPE_HF:
+   case BRW_REGISTER_TYPE_UW:
+   case BRW_REGISTER_TYPE_W:
+      return 2;
+   case BRW_REGISTER_TYPE_UB:
+   case BRW_REGISTER_TYPE_B:
+      return 1;
+   default:
+      return 0;
+   }
+}
+
+static __inline struct brw_reg brw_reg( GLuint file,
+					GLuint nr,
+					GLuint subnr,
+					GLuint type,
+					GLuint vstride,
+					GLuint width,
+					GLuint hstride,
+					GLuint swizzle,
+					GLuint writemask)
+{
+      
+   struct brw_reg reg;
+   reg.type = type;
+   reg.file = file;
+   reg.nr = nr;
+   reg.subnr = subnr * type_sz(type);
+   reg.negate = 0;
+   reg.abs = 0;
+   reg.vstride = vstride;
+   reg.width = width;
+   reg.hstride = hstride;
+   reg.address_mode = BRW_ADDRESS_DIRECT;
+   reg.pad0 = 0;
+
+   /* Could do better: If the reg is r5.3<0;1,0>, we probably want to
+    * set swizzle and writemask to W, as the lower bits of subnr will
+    * be lost when converted to align16.  This is probably too much to
+    * keep track of as you'd want it adjusted by suboffset(), etc.
+    * Perhaps fix up when converting to align16?
+    */
+   reg.dw1.bits.swizzle = swizzle;
+   reg.dw1.bits.writemask = writemask;
+   reg.dw1.bits.indirect_offset = 0;
+   reg.dw1.bits.pad1 = 0;
+   return reg;
+}
+
+static __inline struct brw_reg brw_vec16_reg( GLuint file,
+					      GLuint nr,
+					      GLuint subnr )
+{
+   return brw_reg(file,
+		  nr,
+		  subnr,
+		  BRW_REGISTER_TYPE_F,
+		  BRW_VERTICAL_STRIDE_16,
+		  BRW_WIDTH_16,
+		  BRW_HORIZONTAL_STRIDE_1,
+		  BRW_SWIZZLE_XYZW,
+		  WRITEMASK_XYZW);
+}
+
+static __inline struct brw_reg brw_vec8_reg( GLuint file,
+					     GLuint nr,
+					     GLuint subnr )
+{
+   return brw_reg(file,
+		  nr,
+		  subnr,
+		  BRW_REGISTER_TYPE_F,
+		  BRW_VERTICAL_STRIDE_8,
+		  BRW_WIDTH_8,
+		  BRW_HORIZONTAL_STRIDE_1,
+		  BRW_SWIZZLE_XYZW,
+		  WRITEMASK_XYZW);
+}
+
+
+static __inline struct brw_reg brw_vec4_reg( GLuint file,
+					      GLuint nr,
+					      GLuint subnr )
+{
+   return brw_reg(file,
+		  nr,
+		  subnr,
+		  BRW_REGISTER_TYPE_F,
+		  BRW_VERTICAL_STRIDE_4,
+		  BRW_WIDTH_4,
+		  BRW_HORIZONTAL_STRIDE_1,
+		  BRW_SWIZZLE_XYZW,
+		  WRITEMASK_XYZW);
+}
+
+
+static __inline struct brw_reg brw_vec2_reg( GLuint file,
+					      GLuint nr,
+					      GLuint subnr )
+{
+   return brw_reg(file,
+		  nr,
+		  subnr,
+		  BRW_REGISTER_TYPE_F,
+		  BRW_VERTICAL_STRIDE_2,
+		  BRW_WIDTH_2,
+		  BRW_HORIZONTAL_STRIDE_1,
+		  BRW_SWIZZLE_XYXY,
+		  WRITEMASK_XY);
+}
+
+static __inline struct brw_reg brw_vec1_reg( GLuint file,
+					     GLuint nr,
+					     GLuint subnr )
+{
+   return brw_reg(file,
+		  nr,
+		  subnr,
+		  BRW_REGISTER_TYPE_F,
+		  BRW_VERTICAL_STRIDE_0,
+		  BRW_WIDTH_1,
+		  BRW_HORIZONTAL_STRIDE_0,
+		  BRW_SWIZZLE_XXXX,
+		  WRITEMASK_X);
+}
+
+
+static __inline struct brw_reg retype( struct brw_reg reg,
+				       GLuint type )
+{
+   reg.type = type;
+   return reg;
+}
+
+static __inline struct brw_reg suboffset( struct brw_reg reg,
+					  GLuint delta )
+{   
+   reg.subnr += delta * type_sz(reg.type);
+   return reg;
+}
+
+
+static __inline struct brw_reg offset( struct brw_reg reg,
+				       GLuint delta )
+{
+   reg.nr += delta;
+   return reg;
+}
+
+
+static __inline struct brw_reg byte_offset( struct brw_reg reg,
+					    GLuint bytes )
+{
+   GLuint newoffset = reg.nr * REG_SIZE + reg.subnr + bytes;
+   reg.nr = newoffset / REG_SIZE;
+   reg.subnr = newoffset % REG_SIZE;
+   return reg;
+}
+   
+
+static __inline struct brw_reg brw_uw16_reg( GLuint file,
+					     GLuint nr,
+					     GLuint subnr )
+{
+   return suboffset(retype(brw_vec16_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
+}
+
+static __inline struct brw_reg brw_uw8_reg( GLuint file,
+					    GLuint nr,
+					    GLuint subnr )
+{
+   return suboffset(retype(brw_vec8_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
+}
+
+static __inline struct brw_reg brw_uw1_reg( GLuint file,
+					    GLuint nr,
+					    GLuint subnr )
+{
+   return suboffset(retype(brw_vec1_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
+}
+
+static __inline struct brw_reg brw_imm_reg( GLuint type )
+{
+   return brw_reg( BRW_IMMEDIATE_VALUE,
+		   0,
+		   0,
+		   type,
+		   BRW_VERTICAL_STRIDE_0,
+		   BRW_WIDTH_1,
+		   BRW_HORIZONTAL_STRIDE_0,
+		   0,
+		   0);      
+}
+
+static __inline struct brw_reg brw_imm_f( GLfloat f )
+{
+   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_F);
+   imm.dw1.f = f;
+   return imm;
+}
+
+static __inline struct brw_reg brw_imm_d( GLint d )
+{
+   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_D);
+   imm.dw1.d = d;
+   return imm;
+}
+
+static __inline struct brw_reg brw_imm_ud( GLuint ud )
+{
+   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UD);
+   imm.dw1.ud = ud;
+   return imm;
+}
+
+static __inline struct brw_reg brw_imm_uw( GLushort uw )
+{
+   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UW);
+   imm.dw1.ud = uw;
+   return imm;
+}
+
+static __inline struct brw_reg brw_imm_w( GLshort w )
+{
+   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_W);
+   imm.dw1.d = w;
+   return imm;
+}
+
+/* brw_imm_b and brw_imm_ub aren't supported by hardware - the type
+ * numbers alias with _V and _VF below:
+ */
+
+/* Vector of eight signed half-byte values: 
+ */
+static __inline struct brw_reg brw_imm_v( GLuint v )
+{
+   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_V);
+   imm.vstride = BRW_VERTICAL_STRIDE_0;
+   imm.width = BRW_WIDTH_8;
+   imm.hstride = BRW_HORIZONTAL_STRIDE_1;
+   imm.dw1.ud = v;
+   return imm;
+}
+
+/* Vector of four 8-bit float values:
+ */
+static __inline struct brw_reg brw_imm_vf( GLuint v )
+{
+   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
+   imm.vstride = BRW_VERTICAL_STRIDE_0;
+   imm.width = BRW_WIDTH_4;
+   imm.hstride = BRW_HORIZONTAL_STRIDE_1;
+   imm.dw1.ud = v;
+   return imm;
+}
+
+#define VF_ZERO 0x0
+#define VF_ONE  0x30
+#define VF_NEG  (1<<7)
+
+static __inline struct brw_reg brw_imm_vf4( GLuint v0, 
+					    GLuint v1, 
+					    GLuint v2,
+					    GLuint v3)
+{
+   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
+   imm.vstride = BRW_VERTICAL_STRIDE_0;
+   imm.width = BRW_WIDTH_4;
+   imm.hstride = BRW_HORIZONTAL_STRIDE_1;
+   imm.dw1.ud = ((v0 << 0) |
+		 (v1 << 8) |
+		 (v2 << 16) |
+		 (v3 << 24));
+   return imm;
+}
+
+
+static __inline struct brw_reg brw_address( struct brw_reg reg )
+{
+   return brw_imm_uw(reg.nr * REG_SIZE + reg.subnr);
+}
+
+
+static __inline struct brw_reg brw_vec1_grf( GLuint nr,
+					       GLuint subnr )
+{
+   return brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+static __inline struct brw_reg brw_vec8_grf( GLuint nr,
+					     GLuint subnr )
+{
+   return brw_vec8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+static __inline struct brw_reg brw_vec4_grf( GLuint nr,
+					     GLuint subnr )
+{
+   return brw_vec4_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+
+static __inline struct brw_reg brw_vec2_grf( GLuint nr,
+					     GLuint subnr )
+{
+   return brw_vec2_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+static __inline struct brw_reg brw_uw8_grf( GLuint nr,
+					    GLuint subnr )
+{
+   return brw_uw8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+static __inline struct brw_reg brw_null_reg( void )
+{
+   return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, 
+		       BRW_ARF_NULL, 
+		       0);
+}
+
+static __inline struct brw_reg brw_address_reg( GLuint subnr )
+{
+   return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, 
+		      BRW_ARF_ADDRESS, 
+		      subnr);
+}
+
+/* If/else instructions break in align16 mode if writemask & swizzle
+ * aren't xyzw.  This goes against the convention for other scalar
+ * regs:
+ */
+static __inline struct brw_reg brw_ip_reg( void )
+{
+   return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE, 
+		  BRW_ARF_IP, 
+		  0,
+		  BRW_REGISTER_TYPE_UD,
+		  BRW_VERTICAL_STRIDE_4, /* ? */
+		  BRW_WIDTH_1,
+		  BRW_HORIZONTAL_STRIDE_0,
+		  BRW_SWIZZLE_XYZW, /* NOTE! */
+		  WRITEMASK_XYZW); /* NOTE! */
+}
+
+static __inline struct brw_reg brw_acc_reg( void )
+{
+   return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, 
+		       BRW_ARF_ACCUMULATOR, 
+		       0);
+}
+
+
+static __inline struct brw_reg brw_flag_reg( void )
+{
+   return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+		      BRW_ARF_FLAG,
+		      0);
+}
+
+
+static __inline struct brw_reg brw_mask_reg( GLuint subnr )
+{
+   return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+		      BRW_ARF_MASK,
+		      subnr);
+}
+
+static __inline struct brw_reg brw_message_reg( GLuint nr )
+{
+   return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE,
+		       nr,
+		       0);
+}
+
+
+
+
+/* This is almost always called with a numeric constant argument, so
+ * make things easy to evaluate at compile time:
+ */
+static __inline GLuint cvt( GLuint val )
+{
+   switch (val) {
+   case 0: return 0;
+   case 1: return 1;
+   case 2: return 2;
+   case 4: return 3;
+   case 8: return 4;
+   case 16: return 5;
+   case 32: return 6;
+   }
+   return 0;
+}
+
+static __inline struct brw_reg stride( struct brw_reg reg,
+				       GLuint vstride,
+				       GLuint width,
+				       GLuint hstride )
+{
+   
+   reg.vstride = cvt(vstride);
+   reg.width = cvt(width) - 1;
+   reg.hstride = cvt(hstride);
+   return reg;
+}
+
+static __inline struct brw_reg vec16( struct brw_reg reg )
+{
+   return stride(reg, 16,16,1);
+}
+
+static __inline struct brw_reg vec8( struct brw_reg reg )
+{
+   return stride(reg, 8,8,1);
+}
+
+static __inline struct brw_reg vec4( struct brw_reg reg )
+{
+   return stride(reg, 4,4,1);
+}
+
+static __inline struct brw_reg vec2( struct brw_reg reg )
+{
+   return stride(reg, 2,2,1);
+}
+
+static __inline struct brw_reg vec1( struct brw_reg reg )
+{
+   return stride(reg, 0,1,0);
+}
+
+static __inline struct brw_reg get_element( struct brw_reg reg, GLuint elt )
+{
+   return vec1(suboffset(reg, elt));
+}
+
+static __inline struct brw_reg get_element_ud( struct brw_reg reg, GLuint elt )
+{
+   return vec1(suboffset(retype(reg, BRW_REGISTER_TYPE_UD), elt));
+}
+
+
+static __inline struct brw_reg brw_swizzle( struct brw_reg reg,
+					    GLuint x,
+					    GLuint y, 
+					    GLuint z,
+					    GLuint w)
+{
+   reg.dw1.bits.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(reg.dw1.bits.swizzle, x),
+				       BRW_GET_SWZ(reg.dw1.bits.swizzle, y),
+				       BRW_GET_SWZ(reg.dw1.bits.swizzle, z),
+				       BRW_GET_SWZ(reg.dw1.bits.swizzle, w));
+   return reg;
+}
+
+
+static __inline struct brw_reg brw_swizzle1( struct brw_reg reg,
+					     GLuint x )
+{
+   return brw_swizzle(reg, x, x, x, x);
+}
+
+static __inline struct brw_reg brw_writemask( struct brw_reg reg,
+					      GLuint mask )
+{
+   reg.dw1.bits.writemask &= mask;
+   return reg;
+}
+
+static __inline struct brw_reg brw_set_writemask( struct brw_reg reg,
+						  GLuint mask )
+{
+   reg.dw1.bits.writemask = mask;
+   return reg;
+}
+
+static __inline struct brw_reg negate( struct brw_reg reg )
+{
+   reg.negate ^= 1;
+   return reg;
+}
+
+static __inline struct brw_reg brw_abs( struct brw_reg reg )
+{
+   reg.abs = 1;
+   return reg;
+}
+
+/***********************************************************************
+ */
+static __inline struct brw_reg brw_vec4_indirect( GLuint subnr,
+						  GLint offset )
+{
+   struct brw_reg reg =  brw_vec4_grf(0, 0);
+   reg.subnr = subnr;
+   reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
+   reg.dw1.bits.indirect_offset = offset;
+   return reg;
+}
+
+static __inline struct brw_reg brw_vec1_indirect( GLuint subnr,
+						  GLint offset )
+{
+   struct brw_reg reg =  brw_vec1_grf(0, 0);
+   reg.subnr = subnr;
+   reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
+   reg.dw1.bits.indirect_offset = offset;
+   return reg;
+}
+
+static __inline struct brw_reg deref_4f(struct brw_indirect ptr, GLint offset)
+{
+   return brw_vec4_indirect(ptr.addr_subnr, ptr.addr_offset + offset);
+}
+
+static __inline struct brw_reg deref_1f(struct brw_indirect ptr, GLint offset)
+{
+   return brw_vec1_indirect(ptr.addr_subnr, ptr.addr_offset + offset);
+}
+
+static __inline struct brw_reg deref_4b(struct brw_indirect ptr, GLint offset)
+{
+   return retype(deref_4f(ptr, offset), BRW_REGISTER_TYPE_B);
+}
+
+static __inline struct brw_reg deref_1uw(struct brw_indirect ptr, GLint offset)
+{
+   return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UW);
+}
+
+static __inline struct brw_reg get_addr_reg(struct brw_indirect ptr)
+{
+   return brw_address_reg(ptr.addr_subnr);
+}
+
+static __inline struct brw_indirect brw_indirect_offset( struct brw_indirect ptr, GLint offset )
+{
+   ptr.addr_offset += offset;
+   return ptr;
+}
+
+static __inline struct brw_indirect brw_indirect( GLuint addr_subnr, GLint offset )
+{
+   struct brw_indirect ptr;
+   ptr.addr_subnr = addr_subnr;
+   ptr.addr_offset = offset;
+   ptr.pad = 0;
+   return ptr;
+}
+
+
+
+void brw_pop_insn_state( struct brw_compile *p );
+void brw_push_insn_state( struct brw_compile *p );
+void brw_set_mask_control( struct brw_compile *p, GLuint value );
+void brw_set_saturate( struct brw_compile *p, GLuint value );
+void brw_set_access_mode( struct brw_compile *p, GLuint access_mode );
+void brw_set_compression_control( struct brw_compile *p, GLboolean control );
+void brw_set_predicate_control_flag_value( struct brw_compile *p, GLuint value );
+void brw_set_predicate_control( struct brw_compile *p, GLuint pc );
+void brw_set_conditionalmod( struct brw_compile *p, GLuint conditional );
+
+void brw_init_compile( struct brw_context *, struct brw_compile *p );
+const GLuint *brw_get_program( struct brw_compile *p, GLuint *sz );
+
+
+/* Helpers for regular instructions:
+ */
+#define ALU1(OP)					\
+struct brw_instruction *brw_##OP(struct brw_compile *p,	\
+	      struct brw_reg dest,			\
+	      struct brw_reg src0);
+
+#define ALU2(OP)					\
+struct brw_instruction *brw_##OP(struct brw_compile *p,	\
+	      struct brw_reg dest,			\
+	      struct brw_reg src0,			\
+	      struct brw_reg src1);
+
+ALU1(MOV)
+ALU2(SEL)
+ALU1(NOT)
+ALU2(AND)
+ALU2(OR)
+ALU2(XOR)
+ALU2(SHR)
+ALU2(SHL)
+ALU2(RSR)
+ALU2(RSL)
+ALU2(ASR)
+ALU2(JMPI)
+ALU2(ADD)
+ALU2(MUL)
+ALU1(FRC)
+ALU1(RNDD)
+ALU2(MAC)
+ALU2(MACH)
+ALU1(LZD)
+ALU2(DP4)
+ALU2(DPH)
+ALU2(DP3)
+ALU2(DP2)
+ALU2(LINE)
+
+#undef ALU1
+#undef ALU2
+
+
+
+/* Helpers for SEND instruction:
+ */
+void brw_urb_WRITE(struct brw_compile *p,
+		   struct brw_reg dest,
+		   GLuint msg_reg_nr,
+		   struct brw_reg src0,
+		   GLboolean allocate,
+		   GLboolean used,
+		   GLuint msg_length,
+		   GLuint response_length,
+		   GLboolean eot,
+		   GLboolean writes_complete,
+		   GLuint offset,
+		   GLuint swizzle);
+
+void brw_fb_WRITE(struct brw_compile *p,
+		   struct brw_reg dest,
+		   GLuint msg_reg_nr,
+		   struct brw_reg src0,
+		   GLuint binding_table_index,
+		   GLuint msg_length,
+		   GLuint response_length,
+		   GLboolean eot);
+
+void brw_SAMPLE(struct brw_compile *p,
+		struct brw_reg dest,
+		GLuint msg_reg_nr,
+		struct brw_reg src0,
+		GLuint binding_table_index,
+		GLuint sampler,
+		GLuint writemask,
+		GLuint msg_type,
+		GLuint response_length,
+		GLuint msg_length,
+		GLboolean eot);
+
+void brw_math_16( struct brw_compile *p,
+		  struct brw_reg dest,
+		  GLuint function,
+		  GLuint saturate,
+		  GLuint msg_reg_nr,
+		  struct brw_reg src,
+		  GLuint precision );
+
+void brw_math( struct brw_compile *p,
+	       struct brw_reg dest,
+	       GLuint function,
+	       GLuint saturate,
+	       GLuint msg_reg_nr,
+	       struct brw_reg src,
+	       GLuint data_type,
+	       GLuint precision );
+
+void brw_dp_READ_16( struct brw_compile *p,
+		     struct brw_reg dest,
+		     GLuint msg_reg_nr,
+		     GLuint scratch_offset );
+
+void brw_dp_WRITE_16( struct brw_compile *p,
+		      struct brw_reg src,
+		      GLuint msg_reg_nr,
+		      GLuint scratch_offset );
+
+/* If/else/endif.  Works by manipulating the execution flags on each
+ * channel.
+ */
+struct brw_instruction *brw_IF(struct brw_compile *p, 
+			       GLuint execute_size);
+
+struct brw_instruction *brw_ELSE(struct brw_compile *p, 
+				 struct brw_instruction *if_insn);
+
+void brw_ENDIF(struct brw_compile *p, 
+	       struct brw_instruction *if_or_else_insn);
+
+
+/* DO/WHILE loops:
+ */
+struct brw_instruction *brw_DO(struct brw_compile *p,
+			       GLuint execute_size);
+
+void brw_WHILE(struct brw_compile *p, 
+	       struct brw_instruction *patch_insn);
+
+/* Forward jumps:
+ */
+void brw_land_fwd_jump(struct brw_compile *p, 
+		       struct brw_instruction *jmp_insn);
+
+
+
+void brw_NOP(struct brw_compile *p);
+
+/* Special case: there is never a destination, execution size will be
+ * taken from src0:
+ */
+void brw_CMP(struct brw_compile *p,
+	     struct brw_reg dest,
+	     GLuint conditional,
+	     struct brw_reg src0,
+	     struct brw_reg src1);
+
+void brw_print_reg( struct brw_reg reg );
+
+
+/*********************************************************************** 
+ * brw_eu_util.c:
+ */
+
+void brw_copy_indirect_to_indirect(struct brw_compile *p,
+				   struct brw_indirect dst_ptr,
+				   struct brw_indirect src_ptr,
+				   GLuint count);
+
+void brw_copy_from_indirect(struct brw_compile *p,
+			    struct brw_reg dst,
+			    struct brw_indirect ptr,
+			    GLuint count);
+
+void brw_copy4(struct brw_compile *p,
+	       struct brw_reg dst,
+	       struct brw_reg src,
+	       GLuint count);
+
+void brw_copy8(struct brw_compile *p,
+	       struct brw_reg dst,
+	       struct brw_reg src,
+	       GLuint count);
+
+void brw_math_invert( struct brw_compile *p, 
+		      struct brw_reg dst,
+		      struct brw_reg src);
+
+
+#endif
diff --git a/i965/brw_eu_debug.c b/i965/brw_eu_debug.c
new file mode 100644
index 0000000..2dff1ad
--- /dev/null
+++ b/i965/brw_eu_debug.c
@@ -0,0 +1,90 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+    
+
+#include "mtypes.h"
+#include "brw_eu.h"
+#include "imports.h"
+
+void brw_print_reg( struct brw_reg hwreg )
+{
+   static const char *file[] = {
+      "arf",
+      "grf",
+      "msg",
+      "imm"
+   };
+
+   static const char *type[] = {
+      "ud",
+      "d",
+      "uw",
+      "w",
+      "ub",
+      "vf",
+      "hf",
+      "f"
+   };
+
+   _mesa_printf("%s%s", 
+		hwreg.abs ? "abs/" : "",
+		hwreg.negate ? "-" : "");
+     
+   if (hwreg.file == BRW_GENERAL_REGISTER_FILE &&
+       hwreg.nr % 2 == 0 &&
+       hwreg.subnr == 0 &&
+       hwreg.vstride == BRW_VERTICAL_STRIDE_8 &&
+       hwreg.width == BRW_WIDTH_8 &&
+       hwreg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
+       hwreg.type == BRW_REGISTER_TYPE_F) {
+      _mesa_printf("vec%d", hwreg.nr);
+   }
+   else if (hwreg.file == BRW_GENERAL_REGISTER_FILE &&
+	    hwreg.vstride == BRW_VERTICAL_STRIDE_0 &&
+	    hwreg.width == BRW_WIDTH_1 &&
+	    hwreg.hstride == BRW_HORIZONTAL_STRIDE_0 &&
+	    hwreg.type == BRW_REGISTER_TYPE_F) {      
+      _mesa_printf("scl%d.%d", hwreg.nr, hwreg.subnr / 4);
+   }
+   else {
+      _mesa_printf("%s%d.%d<%d;%d,%d>:%s", 
+		   file[hwreg.file],
+		   hwreg.nr,
+		   hwreg.subnr / type_sz(hwreg.type),
+		   hwreg.vstride ? (1<<(hwreg.vstride-1)) : 0,
+		   1<<hwreg.width,
+		   hwreg.hstride ? (1<<(hwreg.hstride-1)) : 0,		
+		   type[hwreg.type]);
+   }
+}
+
+
+
diff --git a/i965/brw_eu_emit.c b/i965/brw_eu_emit.c
new file mode 100644
index 0000000..1c717e4
--- /dev/null
+++ b/i965/brw_eu_emit.c
@@ -0,0 +1,1059 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+     
+
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_eu.h"
+
+
+
+
+/***********************************************************************
+ * Internal helper for constructing instructions
+ */
+
+static void guess_execution_size( struct brw_instruction *insn,
+				  struct brw_reg reg )
+{
+   if (reg.width == BRW_WIDTH_8 && 
+       insn->header.compression_control == BRW_COMPRESSION_COMPRESSED) 
+      insn->header.execution_size = BRW_EXECUTE_16;
+   else
+      insn->header.execution_size = reg.width;	/* note - definitions are compatible */
+}
+
+
+static void brw_set_dest( struct brw_instruction *insn,
+			  struct brw_reg dest )
+{
+   insn->bits1.da1.dest_reg_file = dest.file;
+   insn->bits1.da1.dest_reg_type = dest.type;
+   insn->bits1.da1.dest_address_mode = dest.address_mode;
+
+   if (dest.address_mode == BRW_ADDRESS_DIRECT) {   
+      insn->bits1.da1.dest_reg_nr = dest.nr;
+
+      if (insn->header.access_mode == BRW_ALIGN_1) {
+	 insn->bits1.da1.dest_subreg_nr = dest.subnr;
+	 insn->bits1.da1.dest_horiz_stride = BRW_HORIZONTAL_STRIDE_1;
+      }
+      else {
+	 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
+	 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
+      }
+   }
+   else {
+      insn->bits1.ia1.dest_subreg_nr = dest.subnr;
+
+      /* These are different sizes in align1 vs align16:
+       */
+      if (insn->header.access_mode == BRW_ALIGN_1) {
+	 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
+	 insn->bits1.ia1.dest_horiz_stride = BRW_HORIZONTAL_STRIDE_1;
+      }
+      else {
+	 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
+      }
+   }
+
+   /* NEW: Set the execution size based on dest.width and
+    * insn->compression_control:
+    */
+   guess_execution_size(insn, dest);
+}
+
+static void brw_set_src0( struct brw_instruction *insn,
+		      struct brw_reg reg )
+{
+   assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
+
+   insn->bits1.da1.src0_reg_file = reg.file;
+   insn->bits1.da1.src0_reg_type = reg.type;
+   insn->bits2.da1.src0_abs = reg.abs;
+   insn->bits2.da1.src0_negate = reg.negate;
+   insn->bits2.da1.src0_address_mode = reg.address_mode;
+
+   if (reg.file == BRW_IMMEDIATE_VALUE) {
+      insn->bits3.ud = reg.dw1.ud;
+   
+      /* Required to set some fields in src1 as well:
+       */
+      insn->bits1.da1.src1_reg_file = 0; /* arf */
+      insn->bits1.da1.src1_reg_type = reg.type;
+   }
+   else 
+   {
+      if (reg.address_mode == BRW_ADDRESS_DIRECT) {
+	 if (insn->header.access_mode == BRW_ALIGN_1) {
+	    insn->bits2.da1.src0_subreg_nr = reg.subnr;
+	    insn->bits2.da1.src0_reg_nr = reg.nr;
+	 }
+	 else {
+	    insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
+	    insn->bits2.da16.src0_reg_nr = reg.nr;
+	 }
+      }
+      else {
+	 insn->bits2.ia1.src0_subreg_nr = reg.subnr;
+
+	 if (insn->header.access_mode == BRW_ALIGN_1) {
+	    insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset; 
+	 }
+	 else {
+	    insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
+	 }
+      }
+
+      if (insn->header.access_mode == BRW_ALIGN_1) {
+	 if (reg.width == BRW_WIDTH_1 && 
+	     insn->header.execution_size == BRW_EXECUTE_1) {
+	    insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
+	    insn->bits2.da1.src0_width = BRW_WIDTH_1;
+	    insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
+	 }
+	 else {
+	    insn->bits2.da1.src0_horiz_stride = reg.hstride;
+	    insn->bits2.da1.src0_width = reg.width;
+	    insn->bits2.da1.src0_vert_stride = reg.vstride;
+	 }
+      }
+      else {
+	 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
+	 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
+	 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
+	 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
+
+	 /* This is an oddity of the fact we're using the same
+	  * descriptions for registers in align_16 as align_1:
+	  */
+	 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
+	    insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
+	 else
+	    insn->bits2.da16.src0_vert_stride = reg.vstride;
+      }
+   }
+}
+
+
+static void brw_set_src1( struct brw_instruction *insn,
+			  struct brw_reg reg )
+{
+   assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
+
+   insn->bits1.da1.src1_reg_file = reg.file;
+   insn->bits1.da1.src1_reg_type = reg.type;
+   insn->bits3.da1.src1_abs = reg.abs;
+   insn->bits3.da1.src1_negate = reg.negate;
+
+   /* Only src1 can be immediate in two-argument instructions.
+    */
+   assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
+
+   if (reg.file == BRW_IMMEDIATE_VALUE) {
+      insn->bits3.ud = reg.dw1.ud;
+   }
+   else {
+      /* This is a hardware restriction, which may or may not be lifted
+       * in the future:
+       */
+      assert (reg.address_mode == BRW_ADDRESS_DIRECT);
+      assert (reg.file == BRW_GENERAL_REGISTER_FILE);
+
+      if (insn->header.access_mode == BRW_ALIGN_1) {
+	 insn->bits3.da1.src1_subreg_nr = reg.subnr;
+	 insn->bits3.da1.src1_reg_nr = reg.nr;
+      }
+      else {
+	 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
+	 insn->bits3.da16.src1_reg_nr = reg.nr;
+      }
+
+      if (insn->header.access_mode == BRW_ALIGN_1) {
+	 if (reg.width == BRW_WIDTH_1 && 
+	     insn->header.execution_size == BRW_EXECUTE_1) {
+	    insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
+	    insn->bits3.da1.src1_width = BRW_WIDTH_1;
+	    insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
+	 }
+	 else {
+	    insn->bits3.da1.src1_horiz_stride = reg.hstride;
+	    insn->bits3.da1.src1_width = reg.width;
+	    insn->bits3.da1.src1_vert_stride = reg.vstride;
+	 }
+      }
+      else {
+	 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
+	 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
+	 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
+	 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
+
+	 /* This is an oddity of the fact we're using the same
+	  * descriptions for registers in align_16 as align_1:
+	  */
+	 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
+	    insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
+	 else
+	    insn->bits3.da16.src1_vert_stride = reg.vstride;
+      }
+   }
+}
+
+
+
+static void brw_set_math_message( struct brw_instruction *insn,
+				  GLuint msg_length,
+				  GLuint response_length,
+				  GLuint function,
+				  GLuint integer_type,
+				  GLboolean low_precision,
+				  GLboolean saturate,
+				  GLuint dataType )
+{
+   brw_set_src1(insn, brw_imm_d(0));
+
+   insn->bits3.math.function = function;
+   insn->bits3.math.int_type = integer_type;
+   insn->bits3.math.precision = low_precision;
+   insn->bits3.math.saturate = saturate;
+   insn->bits3.math.data_type = dataType;
+   insn->bits3.math.response_length = response_length;
+   insn->bits3.math.msg_length = msg_length;
+   insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH;
+   insn->bits3.math.end_of_thread = 0;
+}
+
+static void brw_set_urb_message( struct brw_instruction *insn,
+				 GLboolean allocate,
+				 GLboolean used,
+				 GLuint msg_length,
+				 GLuint response_length,
+				 GLboolean end_of_thread,
+				 GLboolean complete,
+				 GLuint offset,
+				 GLuint swizzle_control )
+{
+   brw_set_src1(insn, brw_imm_d(0));
+
+   insn->bits3.urb.opcode = 0;	/* ? */
+   insn->bits3.urb.offset = offset;
+   insn->bits3.urb.swizzle_control = swizzle_control;
+   insn->bits3.urb.allocate = allocate;
+   insn->bits3.urb.used = used;	/* ? */
+   insn->bits3.urb.complete = complete;
+   insn->bits3.urb.response_length = response_length;
+   insn->bits3.urb.msg_length = msg_length;
+   insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB;
+   insn->bits3.urb.end_of_thread = end_of_thread;
+}
+
+static void brw_set_dp_write_message( struct brw_instruction *insn,
+				      GLuint binding_table_index,
+				      GLuint msg_control,
+				      GLuint msg_type,
+				      GLuint msg_length,
+				      GLuint pixel_scoreboard_clear,
+				      GLuint response_length,
+				      GLuint end_of_thread )
+{
+   brw_set_src1(insn, brw_imm_d(0));
+
+   insn->bits3.dp_write.binding_table_index = binding_table_index;
+   insn->bits3.dp_write.msg_control = msg_control;
+   insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear;
+   insn->bits3.dp_write.msg_type = msg_type;
+   insn->bits3.dp_write.send_commit_msg = 0;
+   insn->bits3.dp_write.response_length = response_length;
+   insn->bits3.dp_write.msg_length = msg_length;
+   insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
+   insn->bits3.urb.end_of_thread = end_of_thread;
+}
+
+static void brw_set_dp_read_message( struct brw_instruction *insn,
+				      GLuint binding_table_index,
+				      GLuint msg_control,
+				      GLuint msg_type,
+				      GLuint target_cache,
+				      GLuint msg_length,
+				      GLuint response_length,
+				      GLuint end_of_thread )
+{
+   brw_set_src1(insn, brw_imm_d(0));
+
+   insn->bits3.dp_read.binding_table_index = binding_table_index;
+   insn->bits3.dp_read.msg_control = msg_control;
+   insn->bits3.dp_read.msg_type = msg_type;
+   insn->bits3.dp_read.target_cache = target_cache;
+   insn->bits3.dp_read.response_length = response_length;
+   insn->bits3.dp_read.msg_length = msg_length;
+   insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ;
+   insn->bits3.dp_read.end_of_thread = end_of_thread;
+}
+
+static void brw_set_sampler_message(struct brw_context *brw,
+				     struct brw_instruction *insn,
+				     GLuint binding_table_index,
+				     GLuint sampler,
+				     GLuint msg_type,
+				     GLuint response_length,
+				     GLuint msg_length,
+				     GLboolean eot)
+{
+   brw_set_src1(insn, brw_imm_d(0));
+
+   if (BRW_IS_IGD(brw)) {
+      insn->bits3.sampler_igd.binding_table_index = binding_table_index;
+      insn->bits3.sampler_igd.sampler = sampler;
+      insn->bits3.sampler_igd.msg_type = msg_type;
+      insn->bits3.sampler_igd.response_length = response_length;
+      insn->bits3.sampler_igd.msg_length = msg_length;
+      insn->bits3.sampler_igd.end_of_thread = eot;
+      insn->bits3.sampler_igd.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
+   } else {
+      insn->bits3.sampler.binding_table_index = binding_table_index;
+      insn->bits3.sampler.sampler = sampler;
+      insn->bits3.sampler.msg_type = msg_type;
+      insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
+      insn->bits3.sampler.response_length = response_length;
+      insn->bits3.sampler.msg_length = msg_length;
+      insn->bits3.sampler.end_of_thread = eot;
+      insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
+   }
+}
+
+
+
+static struct brw_instruction *next_insn( struct brw_compile *p, 
+					  GLuint opcode )
+{
+   struct brw_instruction *insn;
+
+   assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
+
+   insn = &p->store[p->nr_insn++];
+   memcpy(insn, p->current, sizeof(*insn));
+
+   /* Reset this one-shot flag: 
+    */
+
+   if (p->current->header.destreg__conditonalmod) {
+      p->current->header.destreg__conditonalmod = 0;   
+      p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
+   }
+
+   insn->header.opcode = opcode;
+   return insn;
+}
+
+
+static struct brw_instruction *brw_alu1( struct brw_compile *p,
+					 GLuint opcode,
+					 struct brw_reg dest,
+					 struct brw_reg src )
+{
+   struct brw_instruction *insn = next_insn(p, opcode);
+   brw_set_dest(insn, dest);
+   brw_set_src0(insn, src);   
+   return insn;
+}
+
+static struct brw_instruction *brw_alu2(struct brw_compile *p,
+					GLuint opcode,
+					struct brw_reg dest,
+					struct brw_reg src0,
+					struct brw_reg src1 )
+{
+   struct brw_instruction *insn = next_insn(p, opcode);   
+   brw_set_dest(insn, dest);
+   brw_set_src0(insn, src0);
+   brw_set_src1(insn, src1);
+   return insn;
+}
+
+
+/***********************************************************************
+ * Convenience routines.
+ */
+#define ALU1(OP)					\
+struct brw_instruction *brw_##OP(struct brw_compile *p,			\
+	      struct brw_reg dest,			\
+	      struct brw_reg src0)   			\
+{							\
+   return brw_alu1(p, BRW_OPCODE_##OP, dest, src0);    	\
+}
+
+#define ALU2(OP)					\
+struct brw_instruction *brw_##OP(struct brw_compile *p,			\
+	      struct brw_reg dest,			\
+	      struct brw_reg src0,			\
+	      struct brw_reg src1)   			\
+{							\
+   return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1);	\
+}
+
+
+ALU1(MOV)
+ALU2(SEL)
+ALU1(NOT)
+ALU2(AND)
+ALU2(OR)
+ALU2(XOR)
+ALU2(SHR)
+ALU2(SHL)
+ALU2(RSR)
+ALU2(RSL)
+ALU2(ASR)
+ALU2(ADD)
+ALU2(MUL)
+ALU1(FRC)
+ALU1(RNDD)
+ALU2(MAC)
+ALU2(MACH)
+ALU1(LZD)
+ALU2(DP4)
+ALU2(DPH)
+ALU2(DP3)
+ALU2(DP2)
+ALU2(LINE)
+
+
+
+
+void brw_NOP(struct brw_compile *p)
+{
+   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);   
+   brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
+   brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
+   brw_set_src1(insn, brw_imm_ud(0x0));
+}
+
+
+
+
+
+/***********************************************************************
+ * Comparisons, if/else/endif
+ */
+
+struct brw_instruction *brw_JMPI(struct brw_compile *p, 
+	      struct brw_reg dest,
+	      struct brw_reg src0,
+	      struct brw_reg src1)
+{
+   struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
+
+   p->current->header.predicate_control = BRW_PREDICATE_NONE;
+
+   return insn;
+}
+
+/* EU takes the value from the flag register and pushes it onto some
+ * sort of a stack (presumably merging with any flag value already on
+ * the stack).  Within an if block, the flags at the top of the stack
+ * control execution on each channel of the unit, eg. on each of the
+ * 16 pixel values in our wm programs.
+ *
+ * When the matching 'else' instruction is reached (presumably by
+ * countdown of the instruction count patched in by our ELSE/ENDIF
+ * functions), the relevent flags are inverted.
+ *
+ * When the matching 'endif' instruction is reached, the flags are
+ * popped off.  If the stack is now empty, normal execution resumes.
+ *
+ * No attempt is made to deal with stack overflow (14 elements?).
+ */
+struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size)
+{
+   struct brw_instruction *insn;
+
+   if (p->single_program_flow) {
+      assert(execute_size == BRW_EXECUTE_1);
+
+      insn = next_insn(p, BRW_OPCODE_ADD);
+      insn->header.predicate_inverse = 1;
+   } else {
+      insn = next_insn(p, BRW_OPCODE_IF);
+   }
+
+   /* Override the defaults for this instruction:
+    */
+   brw_set_dest(insn, brw_ip_reg());
+   brw_set_src0(insn, brw_ip_reg());
+   brw_set_src1(insn, brw_imm_d(0x0));
+
+   insn->header.execution_size = execute_size;
+   insn->header.compression_control = BRW_COMPRESSION_NONE;
+   insn->header.predicate_control = BRW_PREDICATE_NORMAL;
+   insn->header.mask_control = BRW_MASK_ENABLE;
+
+   p->current->header.predicate_control = BRW_PREDICATE_NONE;
+
+   return insn;
+}
+
+
+struct brw_instruction *brw_ELSE(struct brw_compile *p, 
+				 struct brw_instruction *if_insn)
+{
+   struct brw_instruction *insn;
+
+   if (p->single_program_flow) {
+      insn = next_insn(p, BRW_OPCODE_ADD);
+   } else {
+      insn = next_insn(p, BRW_OPCODE_ELSE);
+   }
+
+   brw_set_dest(insn, brw_ip_reg());
+   brw_set_src0(insn, brw_ip_reg());
+   brw_set_src1(insn, brw_imm_d(0x0));
+
+   insn->header.compression_control = BRW_COMPRESSION_NONE;
+   insn->header.execution_size = if_insn->header.execution_size;
+   insn->header.mask_control = BRW_MASK_ENABLE;
+
+   /* Patch the if instruction to point at this instruction.
+    */
+   if (p->single_program_flow) {
+      assert(if_insn->header.opcode == BRW_OPCODE_ADD);
+
+      if_insn->bits3.ud = (insn - if_insn + 1) * 16;
+   } else {
+      assert(if_insn->header.opcode == BRW_OPCODE_IF);
+
+      if_insn->bits3.if_else.jump_count = insn - if_insn;
+      if_insn->bits3.if_else.pop_count = 1;
+      if_insn->bits3.if_else.pad0 = 0;
+   }
+
+   return insn;
+}
+
+void brw_ENDIF(struct brw_compile *p, 
+	       struct brw_instruction *patch_insn)
+{
+   if (p->single_program_flow) {
+      /* In single program flow mode, there's no need to execute an ENDIF,
+       * since we don't need to do any stack operations, and if we're executing
+       * currently, we want to just continue executing.
+       */
+      struct brw_instruction *next = &p->store[p->nr_insn];
+
+      assert(patch_insn->header.opcode == BRW_OPCODE_ADD);
+
+      patch_insn->bits3.ud = (next - patch_insn) * 16;
+   } else {
+      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF);
+
+      brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
+      brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
+      brw_set_src1(insn, brw_imm_d(0x0));
+
+      insn->header.compression_control = BRW_COMPRESSION_NONE;
+      insn->header.execution_size = patch_insn->header.execution_size;
+      insn->header.mask_control = BRW_MASK_ENABLE;
+
+      assert(patch_insn->bits3.if_else.jump_count == 0);
+
+      /* Patch the if or else instructions to point at this or the next
+       * instruction respectively.
+       */
+      if (patch_insn->header.opcode == BRW_OPCODE_IF) {
+	 /* Automagically turn it into an IFF:
+	  */
+	 patch_insn->header.opcode = BRW_OPCODE_IFF;
+	 patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1;
+	 patch_insn->bits3.if_else.pop_count = 0;
+	 patch_insn->bits3.if_else.pad0 = 0;
+      } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) {
+	 patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1;
+	 patch_insn->bits3.if_else.pop_count = 1;
+	 patch_insn->bits3.if_else.pad0 = 0;
+      } else {
+	 assert(0);
+      }
+
+      /* Also pop item off the stack in the endif instruction:
+       */
+      insn->bits3.if_else.jump_count = 0;
+      insn->bits3.if_else.pop_count = 1;
+      insn->bits3.if_else.pad0 = 0;
+   }
+}
+
+/* DO/WHILE loop:
+ */
+struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
+{
+   if (p->single_program_flow) {
+      return &p->store[p->nr_insn];
+   } else {
+      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
+
+      /* Override the defaults for this instruction:
+       */
+      brw_set_dest(insn, retype(brw_vec1_grf(0,0), BRW_REGISTER_TYPE_UD));
+      brw_set_src0(insn, retype(brw_vec1_grf(0,0), BRW_REGISTER_TYPE_UD));
+      brw_set_src1(insn, retype(brw_vec1_grf(0,0), BRW_REGISTER_TYPE_UD));
+
+      insn->header.compression_control = BRW_COMPRESSION_NONE;
+      insn->header.execution_size = execute_size;
+      /* insn->header.mask_control = BRW_MASK_ENABLE; */
+
+      return insn;
+   }
+}
+
+
+
+void brw_WHILE(struct brw_compile *p, 
+	       struct brw_instruction *do_insn)
+{
+   struct brw_instruction *insn;
+
+   if (p->single_program_flow)
+      insn = next_insn(p, BRW_OPCODE_ADD);
+   else
+      insn = next_insn(p, BRW_OPCODE_WHILE);
+
+   brw_set_dest(insn, brw_ip_reg());
+   brw_set_src0(insn, brw_ip_reg());
+   brw_set_src1(insn, brw_imm_d(0x0));
+
+   insn->header.compression_control = BRW_COMPRESSION_NONE;
+
+   if (p->single_program_flow) {
+      insn->header.execution_size = BRW_EXECUTE_1;
+
+      insn->bits3.d = (do_insn - insn) * 16;
+   } else {
+      insn->header.execution_size = do_insn->header.execution_size;
+
+      assert(do_insn->header.opcode == BRW_OPCODE_DO);
+      insn->bits3.if_else.jump_count = do_insn - insn;
+      insn->bits3.if_else.pop_count = 0;
+      insn->bits3.if_else.pad0 = 0;
+   }
+
+/*    insn->header.mask_control = BRW_MASK_ENABLE; */
+
+   p->current->header.predicate_control = BRW_PREDICATE_NONE;   
+}
+
+
+/* FORWARD JUMPS:
+ */
+void brw_land_fwd_jump(struct brw_compile *p, 
+		       struct brw_instruction *jmp_insn)
+{
+   struct brw_instruction *landing = &p->store[p->nr_insn];
+
+   assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
+   assert(jmp_insn->bits1.da1.src1_reg_file = BRW_IMMEDIATE_VALUE);
+
+   jmp_insn->bits3.ud = (landing - jmp_insn) - 1; 
+}
+
+
+
+/* To integrate with the above, it makes sense that the comparison
+ * instruction should populate the flag register.  It might be simpler
+ * just to use the flag reg for most WM tasks?
+ */
+void brw_CMP(struct brw_compile *p,
+	     struct brw_reg dest,
+	     GLuint conditional,
+	     struct brw_reg src0,
+	     struct brw_reg src1)
+{
+   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
+
+   insn->header.destreg__conditonalmod = conditional;
+   brw_set_dest(insn, dest);
+   brw_set_src0(insn, src0);
+   brw_set_src1(insn, src1);
+
+/*    guess_execution_size(insn, src0); */
+
+
+   /* Make it so that future instructions will use the computed flag
+    * value until brw_set_predicate_control_flag_value() is called
+    * again.  
+    */
+   if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
+       dest.nr == 0) {
+      p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
+      p->flag_value = 0xff;
+   }
+}
+
+
+
+/***********************************************************************
+ * Helpers for the various SEND message types:
+ */
+
+/* Invert 8 values
+ */
+void brw_math( struct brw_compile *p,
+	       struct brw_reg dest,
+	       GLuint function,
+	       GLuint saturate,
+	       GLuint msg_reg_nr,
+	       struct brw_reg src,
+	       GLuint data_type,
+	       GLuint precision )
+{
+   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+   GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; 
+   GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; 
+
+   /* Example code doesn't set predicate_control for send
+    * instructions.
+    */
+   insn->header.predicate_control = 0; 
+   insn->header.destreg__conditonalmod = msg_reg_nr;
+
+   brw_set_dest(insn, dest);
+   brw_set_src0(insn, src);
+   brw_set_math_message(insn, 
+			msg_length, response_length, 
+			function,
+			BRW_MATH_INTEGER_UNSIGNED,
+			precision,
+			saturate,
+			data_type);
+}
+
+/* Use 2 send instructions to invert 16 elements
+ */
+void brw_math_16( struct brw_compile *p,
+		  struct brw_reg dest,
+		  GLuint function,
+		  GLuint saturate,
+		  GLuint msg_reg_nr,
+		  struct brw_reg src,
+		  GLuint precision )
+{
+   struct brw_instruction *insn;
+   GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; 
+   GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; 
+
+   /* First instruction:
+    */
+   brw_push_insn_state(p);
+   brw_set_predicate_control_flag_value(p, 0xff);
+   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+
+   insn = next_insn(p, BRW_OPCODE_SEND);
+   insn->header.destreg__conditonalmod = msg_reg_nr;
+
+   brw_set_dest(insn, dest);
+   brw_set_src0(insn, src);
+   brw_set_math_message(insn, 
+			msg_length, response_length, 
+			function,
+			BRW_MATH_INTEGER_UNSIGNED,
+			precision,
+			saturate,
+			BRW_MATH_DATA_VECTOR);
+
+   /* Second instruction:
+    */
+   insn = next_insn(p, BRW_OPCODE_SEND);
+   insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
+   insn->header.destreg__conditonalmod = msg_reg_nr+1;
+
+   brw_set_dest(insn, offset(dest,1));
+   brw_set_src0(insn, src);
+   brw_set_math_message(insn, 
+			msg_length, response_length, 
+			function,
+			BRW_MATH_INTEGER_UNSIGNED,
+			precision,
+			saturate,
+			BRW_MATH_DATA_VECTOR);
+
+   brw_pop_insn_state(p);
+}
+
+
+
+
+void brw_dp_WRITE_16( struct brw_compile *p,
+		      struct brw_reg src,
+		      GLuint msg_reg_nr,
+		      GLuint scratch_offset )
+{
+   {
+      brw_push_insn_state(p);
+      brw_set_mask_control(p, BRW_MASK_DISABLE);
+      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+
+      brw_MOV(p,
+	      retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
+	      brw_imm_d(scratch_offset));
+			   
+      brw_pop_insn_state(p);
+   }
+
+   {
+      GLuint msg_length = 3;
+      struct brw_reg dest = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
+      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+   
+      insn->header.predicate_control = 0; /* XXX */
+      insn->header.compression_control = BRW_COMPRESSION_NONE; 
+      insn->header.destreg__conditonalmod = msg_reg_nr;
+  
+      brw_set_dest(insn, dest);
+      brw_set_src0(insn, src);
+
+      brw_set_dp_write_message(insn,
+			       255, /* bti */
+			       BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */
+			       BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */
+			       msg_length,
+			       0, /* pixel scoreboard */
+			       0, /* response_length */
+			       0); /* eot */
+   }
+
+}
+
+
+void brw_dp_READ_16( struct brw_compile *p,
+		      struct brw_reg dest,
+		      GLuint msg_reg_nr,
+		      GLuint scratch_offset )
+{
+   {
+      brw_push_insn_state(p);
+      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+      brw_set_mask_control(p, BRW_MASK_DISABLE);
+
+      brw_MOV(p,
+	      retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
+	      brw_imm_d(scratch_offset));
+			   
+      brw_pop_insn_state(p);
+   }
+
+   {
+      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+   
+      insn->header.predicate_control = 0; /* XXX */
+      insn->header.compression_control = BRW_COMPRESSION_NONE; 
+      insn->header.destreg__conditonalmod = msg_reg_nr;
+  
+      brw_set_dest(insn, dest);	/* UW? */
+      brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW));
+
+      brw_set_dp_read_message(insn,
+			      255, /* bti */
+			      3,  /* msg_control */
+			      BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
+			      1, /* target cache */
+			      1, /* msg_length */
+			      2, /* response_length */
+			      0); /* eot */
+   }
+}
+
+
+void brw_fb_WRITE(struct brw_compile *p,
+		   struct brw_reg dest,
+		   GLuint msg_reg_nr,
+		   struct brw_reg src0,
+		   GLuint binding_table_index,
+		   GLuint msg_length,
+		   GLuint response_length,
+		   GLboolean eot)
+{
+   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+   
+   insn->header.predicate_control = 0; /* XXX */
+   insn->header.compression_control = BRW_COMPRESSION_NONE; 
+   insn->header.destreg__conditonalmod = msg_reg_nr;
+  
+   brw_set_dest(insn, dest);
+   brw_set_src0(insn, src0);
+   brw_set_dp_write_message(insn,
+			    binding_table_index,
+			    BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, /* msg_control */
+			    BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */
+			    msg_length,
+			    1,	/* pixel scoreboard */
+			    response_length, 
+			    eot);
+}
+
+
+
+void brw_SAMPLE(struct brw_compile *p,
+		struct brw_reg dest,
+		GLuint msg_reg_nr,
+		struct brw_reg src0,
+		GLuint binding_table_index,
+		GLuint sampler,
+		GLuint writemask,
+		GLuint msg_type,
+		GLuint response_length,
+		GLuint msg_length,
+		GLboolean eot)
+{
+   GLboolean need_stall = 0;
+   
+   if(writemask == 0) {
+/*       _mesa_printf("%s: zero writemask??\n", __FUNCTION__); */
+      return;
+   }
+   
+   /* Hardware doesn't do destination dependency checking on send
+    * instructions properly.  Add a workaround which generates the
+    * dependency by other means.  In practice it seems like this bug
+    * only crops up for texture samples, and only where registers are
+    * written by the send and then written again later without being
+    * read in between.  Luckily for us, we already track that
+    * information and use it to modify the writemask for the
+    * instruction, so that is a guide for whether a workaround is
+    * needed.
+    */
+   if (writemask != WRITEMASK_XYZW) {
+      GLuint dst_offset = 0;
+      GLuint i, newmask = 0, len = 0;
+
+      for (i = 0; i < 4; i++) {
+	 if (writemask & (1<<i))
+	    break;
+	 dst_offset += 2;
+      }
+      for (; i < 4; i++) {
+	 if (!(writemask & (1<<i)))
+	    break;
+	 newmask |= 1<<i;
+	 len++;
+      }
+
+      if (newmask != writemask) {
+	 need_stall = 1;
+/* 	 _mesa_printf("need stall %x %x\n", newmask , writemask); */
+      }
+      else {
+	 struct brw_reg m1 = brw_message_reg(msg_reg_nr);
+	 
+	 newmask = ~newmask & WRITEMASK_XYZW;
+
+	 brw_push_insn_state(p);
+
+	 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+	 brw_set_mask_control(p, BRW_MASK_DISABLE);
+
+	 brw_MOV(p, m1, brw_vec8_grf(0,0));	 
+  	 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12)); 
+
+	 brw_pop_insn_state(p);
+
+  	 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); 
+	 dest = offset(dest, dst_offset);
+	 response_length = len * 2;
+      }
+   }
+
+   {
+      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+   
+      insn->header.predicate_control = 0; /* XXX */
+      insn->header.compression_control = BRW_COMPRESSION_NONE;
+      insn->header.destreg__conditonalmod = msg_reg_nr;
+
+      brw_set_dest(insn, dest);
+      brw_set_src0(insn, src0);
+      brw_set_sampler_message(p->brw, insn,
+			      binding_table_index,
+			      sampler,
+			      msg_type,
+			      response_length, 
+			      msg_length,
+			      eot);
+   }
+
+   if (need_stall)
+   {
+      struct brw_reg reg = vec8(offset(dest, response_length-1));
+
+      /*  mov (8) r9.0<1>:f    r9.0<8;8,1>:f    { Align1 }
+       */
+      brw_push_insn_state(p);
+      brw_set_compression_control(p, GL_FALSE);
+      brw_MOV(p, reg, reg);	      
+      brw_pop_insn_state(p);
+   }
+
+}
+
+/* All these variables are pretty confusing - we might be better off
+ * using bitmasks and macros for this, in the old style.  Or perhaps
+ * just having the caller instantiate the fields in dword3 itself.
+ */
+void brw_urb_WRITE(struct brw_compile *p,
+		   struct brw_reg dest,
+		   GLuint msg_reg_nr,
+		   struct brw_reg src0,
+		   GLboolean allocate,
+		   GLboolean used,
+		   GLuint msg_length,
+		   GLuint response_length,
+		   GLboolean eot,
+		   GLboolean writes_complete,
+		   GLuint offset,
+		   GLuint swizzle)
+{
+   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+
+   assert(msg_length < 16);
+
+   brw_set_dest(insn, dest);
+   brw_set_src0(insn, src0);
+   brw_set_src1(insn, brw_imm_d(0));
+
+   insn->header.destreg__conditonalmod = msg_reg_nr;
+
+   brw_set_urb_message(insn,
+		       allocate,
+		       used,
+		       msg_length,
+		       response_length, 
+		       eot, 
+		       writes_complete, 
+		       offset,
+		       swizzle);
+}
+
diff --git a/i965/brw_eu_util.c b/i965/brw_eu_util.c
new file mode 100644
index 0000000..5405cf1
--- /dev/null
+++ b/i965/brw_eu_util.c
@@ -0,0 +1,126 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+      
+
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_eu.h"
+
+
+void brw_math_invert( struct brw_compile *p, 
+			     struct brw_reg dst,
+			     struct brw_reg src)
+{
+   brw_math( p, 
+	     dst,
+	     BRW_MATH_FUNCTION_INV, 
+	     BRW_MATH_SATURATE_NONE,
+	     0,
+	     src,
+	     BRW_MATH_PRECISION_FULL, 
+	     BRW_MATH_DATA_VECTOR );
+}
+
+
+
+void brw_copy4(struct brw_compile *p,
+	       struct brw_reg dst,
+	       struct brw_reg src,
+	       GLuint count)
+{
+   GLuint i;
+
+   dst = vec4(dst);
+   src = vec4(src);
+
+   for (i = 0; i < count; i++)
+   {
+      GLuint delta = i*32;
+      brw_MOV(p, byte_offset(dst, delta),    byte_offset(src, delta));
+      brw_MOV(p, byte_offset(dst, delta+16), byte_offset(src, delta+16));
+   }
+}
+
+
+void brw_copy8(struct brw_compile *p,
+	       struct brw_reg dst,
+	       struct brw_reg src,
+	       GLuint count)
+{
+   GLuint i;
+
+   dst = vec8(dst);
+   src = vec8(src);
+
+   for (i = 0; i < count; i++)
+   {
+      GLuint delta = i*32;
+      brw_MOV(p, byte_offset(dst, delta),    byte_offset(src, delta));
+   }
+}
+
+
+void brw_copy_indirect_to_indirect(struct brw_compile *p,
+				   struct brw_indirect dst_ptr,
+				   struct brw_indirect src_ptr,
+				   GLuint count)
+{
+   GLuint i;
+
+   for (i = 0; i < count; i++)
+   {
+      GLuint delta = i*32;
+      brw_MOV(p, deref_4f(dst_ptr, delta),    deref_4f(src_ptr, delta));
+      brw_MOV(p, deref_4f(dst_ptr, delta+16), deref_4f(src_ptr, delta+16));
+   }
+}
+
+
+void brw_copy_from_indirect(struct brw_compile *p,
+			    struct brw_reg dst,
+			    struct brw_indirect ptr,
+			    GLuint count)
+{
+   GLuint i;
+
+   dst = vec4(dst);
+
+   for (i = 0; i < count; i++)
+   {
+      GLuint delta = i*32;
+      brw_MOV(p, byte_offset(dst, delta),    deref_4f(ptr, delta));
+      brw_MOV(p, byte_offset(dst, delta+16), deref_4f(ptr, delta+16));
+   }
+}
+
+
+
+
diff --git a/i965/brw_exec_generic.c b/i965/brw_exec_generic.c
new file mode 100644
index 0000000..11d1ef7
--- /dev/null
+++ b/i965/brw_exec_generic.c
@@ -0,0 +1,530 @@
+/**************************************************************************
+
+Copyright 2004 Tungsten Graphics Inc., Cedar Park, Texas.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "glheader.h"
+#include "context.h"
+#include "macros.h"
+#include "vtxfmt.h"
+#include "dlist.h"
+#include "state.h"
+#include "light.h"
+#include "api_arrayelt.h"
+#include "api_noop.h"
+
+#include "brw_exec.h"
+
+
+/* Versions of all the entrypoints for situations where codegen isn't
+ * available.  
+ *
+ * Note: Only one size for each attribute may be active at once.
+ * Eg. if Color3f is installed/active, then Color4f may not be, even
+ * if the vertex actually contains 4 color coordinates.  This is
+ * because the 3f version won't otherwise set color[3] to 1.0 -- this
+ * is the job of the chooser function when switching between Color4f
+ * and Color3f.
+ */
+#define ATTRFV( ATTR, N )				\
+static void attrib_##ATTR##_##N( const GLfloat *v )	\
+{							\
+   GET_CURRENT_CONTEXT( ctx );				\
+   struct brw_exec_context *exec = IMM_CONTEXT(ctx)->exec;			\
+							\
+   if ((ATTR) == 0) {					\
+      GLuint i;						\
+							\
+      if (N>0) exec->vtx.vbptr[0] = v[0];		\
+      if (N>1) exec->vtx.vbptr[1] = v[1];		\
+      if (N>2) exec->vtx.vbptr[2] = v[2];		\
+      if (N>3) exec->vtx.vbptr[3] = v[3];		\
+							\
+      for (i = N; i < exec->vtx.vertex_size; i++)	\
+	 exec->vtx.vbptr[i] = exec->vtx.vertex[i];	\
+							\
+      exec->vtx.vbptr += exec->vtx.vertex_size;		\
+      exec->ctx->Driver.NeedFlush |= FLUSH_STORED_VERTICES; \
+							\
+      if (++exec->vtx.vert_count >= exec->vtx.max_vert)	\
+	 brw_exec_vtx_wrap( exec );		\
+   }							\
+   else {						\
+      GLfloat *dest = exec->vtx.attrptr[ATTR];		\
+      if (N>0) dest[0] = v[0];				\
+      if (N>1) dest[1] = v[1];				\
+      if (N>2) dest[2] = v[2];				\
+      if (N>3) dest[3] = v[3];				\
+   }							\
+}
+
+#define INIT(TAB, ATTR)						\
+   TAB[ATTR][0] = attrib_##ATTR##_1;				\
+   TAB[ATTR][1] = attrib_##ATTR##_2;				\
+   TAB[ATTR][2] = attrib_##ATTR##_3;				\
+   TAB[ATTR][3] = attrib_##ATTR##_4;
+
+
+#define ATTRS( ATTRIB )				\
+   ATTRFV( ATTRIB, 1 )				\
+   ATTRFV( ATTRIB, 2 )				\
+   ATTRFV( ATTRIB, 3 )				\
+   ATTRFV( ATTRIB, 4 )			
+
+ATTRS( 0 )
+ATTRS( 1 )
+ATTRS( 2 )
+ATTRS( 3 )
+ATTRS( 4 )
+ATTRS( 5 )
+ATTRS( 6 )
+ATTRS( 7 )
+ATTRS( 8 )
+ATTRS( 9 )
+ATTRS( 10 )
+ATTRS( 11 )
+ATTRS( 12 )
+ATTRS( 13 )
+ATTRS( 14 )
+ATTRS( 15 )
+
+void brw_exec_generic_attr_table_init( brw_attrfv_func (*tab)[4] )
+{
+   INIT( tab, 0 );
+   INIT( tab, 1 );
+   INIT( tab, 2 );
+   INIT( tab, 3 );
+   INIT( tab, 4 );
+   INIT( tab, 5 );
+   INIT( tab, 6 );
+   INIT( tab, 7 );
+   INIT( tab, 8 );
+   INIT( tab, 9 );
+   INIT( tab, 10 );
+   INIT( tab, 11 );
+   INIT( tab, 12 );
+   INIT( tab, 13 );
+   INIT( tab, 14 );
+   INIT( tab, 15 );
+}
+
+/* These can be made efficient with codegen.  Further, by adding more
+ * logic to do_choose(), the double-dispatch for legacy entrypoints
+ * like glVertex3f() can be removed.
+ */
+#define DISPATCH_ATTRFV( ATTR, COUNT, P )	\
+do {						\
+   GET_CURRENT_CONTEXT( ctx ); 			\
+   struct brw_exec_context *exec = IMM_CONTEXT(ctx)->exec; 		\
+   exec->vtx.tabfv[ATTR][COUNT-1]( P );		\
+} while (0)
+
+#define DISPATCH_ATTR1FV( ATTR, V ) DISPATCH_ATTRFV( ATTR, 1, V )
+#define DISPATCH_ATTR2FV( ATTR, V ) DISPATCH_ATTRFV( ATTR, 2, V )
+#define DISPATCH_ATTR3FV( ATTR, V ) DISPATCH_ATTRFV( ATTR, 3, V )
+#define DISPATCH_ATTR4FV( ATTR, V ) DISPATCH_ATTRFV( ATTR, 4, V )
+
+#define DISPATCH_ATTR1F( ATTR, S ) DISPATCH_ATTRFV( ATTR, 1, &(S) )
+
+#define DISPATCH_ATTR2F( ATTR, S,T ) 		\
+do { 						\
+   GLfloat v[2]; 				\
+   v[0] = S; v[1] = T;				\
+   DISPATCH_ATTR2FV( ATTR, v );			\
+} while (0)
+#define DISPATCH_ATTR3F( ATTR, S,T,R ) 		\
+do { 						\
+   GLfloat v[3]; 				\
+   v[0] = S; v[1] = T; v[2] = R;		\
+   DISPATCH_ATTR3FV( ATTR, v );			\
+} while (0)
+#define DISPATCH_ATTR4F( ATTR, S,T,R,Q )	\
+do { 						\
+   GLfloat v[4]; 				\
+   v[0] = S; v[1] = T; v[2] = R; v[3] = Q;	\
+   DISPATCH_ATTR4FV( ATTR, v );			\
+} while (0)
+
+
+static void GLAPIENTRY brw_Vertex2f( GLfloat x, GLfloat y )
+{
+   DISPATCH_ATTR2F( BRW_ATTRIB_POS, x, y );
+}
+
+static void GLAPIENTRY brw_Vertex2fv( const GLfloat *v )
+{
+   DISPATCH_ATTR2FV( BRW_ATTRIB_POS, v );
+}
+
+static void GLAPIENTRY brw_Vertex3f( GLfloat x, GLfloat y, GLfloat z )
+{
+   DISPATCH_ATTR3F( BRW_ATTRIB_POS, x, y, z );
+}
+
+static void GLAPIENTRY brw_Vertex3fv( const GLfloat *v )
+{
+   DISPATCH_ATTR3FV( BRW_ATTRIB_POS, v );
+}
+
+static void GLAPIENTRY brw_Vertex4f( GLfloat x, GLfloat y, GLfloat z, 
+				      GLfloat w )
+{
+   DISPATCH_ATTR4F( BRW_ATTRIB_POS, x, y, z, w );
+}
+
+static void GLAPIENTRY brw_Vertex4fv( const GLfloat *v )
+{
+   DISPATCH_ATTR4FV( BRW_ATTRIB_POS, v );
+}
+
+static void GLAPIENTRY brw_TexCoord1f( GLfloat x )
+{
+   DISPATCH_ATTR1F( BRW_ATTRIB_TEX0, x );
+}
+
+static void GLAPIENTRY brw_TexCoord1fv( const GLfloat *v )
+{
+   DISPATCH_ATTR1FV( BRW_ATTRIB_TEX0, v );
+}
+
+static void GLAPIENTRY brw_TexCoord2f( GLfloat x, GLfloat y )
+{
+   DISPATCH_ATTR2F( BRW_ATTRIB_TEX0, x, y );
+}
+
+static void GLAPIENTRY brw_TexCoord2fv( const GLfloat *v )
+{
+   DISPATCH_ATTR2FV( BRW_ATTRIB_TEX0, v );
+}
+
+static void GLAPIENTRY brw_TexCoord3f( GLfloat x, GLfloat y, GLfloat z )
+{
+   DISPATCH_ATTR3F( BRW_ATTRIB_TEX0, x, y, z );
+}
+
+static void GLAPIENTRY brw_TexCoord3fv( const GLfloat *v )
+{
+   DISPATCH_ATTR3FV( BRW_ATTRIB_TEX0, v );
+}
+
+static void GLAPIENTRY brw_TexCoord4f( GLfloat x, GLfloat y, GLfloat z,
+					GLfloat w )
+{
+   DISPATCH_ATTR4F( BRW_ATTRIB_TEX0, x, y, z, w );
+}
+
+static void GLAPIENTRY brw_TexCoord4fv( const GLfloat *v )
+{
+   DISPATCH_ATTR4FV( BRW_ATTRIB_TEX0, v );
+}
+
+static void GLAPIENTRY brw_Normal3f( GLfloat x, GLfloat y, GLfloat z )
+{
+   DISPATCH_ATTR3F( BRW_ATTRIB_NORMAL, x, y, z );
+}
+
+static void GLAPIENTRY brw_Normal3fv( const GLfloat *v )
+{
+   DISPATCH_ATTR3FV( BRW_ATTRIB_NORMAL, v );
+}
+
+static void GLAPIENTRY brw_FogCoordfEXT( GLfloat x )
+{
+   DISPATCH_ATTR1F( BRW_ATTRIB_FOG, x );
+}
+
+static void GLAPIENTRY brw_FogCoordfvEXT( const GLfloat *v )
+{
+   DISPATCH_ATTR1FV( BRW_ATTRIB_FOG, v );
+}
+
+static void GLAPIENTRY brw_Color3f( GLfloat x, GLfloat y, GLfloat z )
+{
+   DISPATCH_ATTR3F( BRW_ATTRIB_COLOR0, x, y, z );
+}
+
+static void GLAPIENTRY brw_Color3fv( const GLfloat *v )
+{
+   DISPATCH_ATTR3FV( BRW_ATTRIB_COLOR0, v );
+}
+
+static void GLAPIENTRY brw_Color4f( GLfloat x, GLfloat y, GLfloat z, 
+				     GLfloat w )
+{
+   DISPATCH_ATTR4F( BRW_ATTRIB_COLOR0, x, y, z, w );
+}
+
+static void GLAPIENTRY brw_Color4fv( const GLfloat *v )
+{
+   DISPATCH_ATTR4FV( BRW_ATTRIB_COLOR0, v );
+}
+
+static void GLAPIENTRY brw_SecondaryColor3fEXT( GLfloat x, GLfloat y, 
+						 GLfloat z )
+{
+   DISPATCH_ATTR3F( BRW_ATTRIB_COLOR1, x, y, z );
+}
+
+static void GLAPIENTRY brw_SecondaryColor3fvEXT( const GLfloat *v )
+{
+   DISPATCH_ATTR3FV( BRW_ATTRIB_COLOR1, v );
+}
+
+static void GLAPIENTRY brw_MultiTexCoord1f( GLenum target, GLfloat x  )
+{
+   GLuint attr = (target & 0x7) + BRW_ATTRIB_TEX0;
+   DISPATCH_ATTR1F( attr, x );
+}
+
+static void GLAPIENTRY brw_MultiTexCoord1fv( GLenum target,
+					      const GLfloat *v )
+{
+   GLuint attr = (target & 0x7) + BRW_ATTRIB_TEX0;
+   DISPATCH_ATTR1FV( attr, v );
+}
+
+static void GLAPIENTRY brw_MultiTexCoord2f( GLenum target, GLfloat x, 
+					     GLfloat y )
+{
+   GLuint attr = (target & 0x7) + BRW_ATTRIB_TEX0;
+   DISPATCH_ATTR2F( attr, x, y );
+}
+
+static void GLAPIENTRY brw_MultiTexCoord2fv( GLenum target, 
+					      const GLfloat *v )
+{
+   GLuint attr = (target & 0x7) + BRW_ATTRIB_TEX0;
+   DISPATCH_ATTR2FV( attr, v );
+}
+
+static void GLAPIENTRY brw_MultiTexCoord3f( GLenum target, GLfloat x, 
+					     GLfloat y, GLfloat z)
+{
+   GLuint attr = (target & 0x7) + BRW_ATTRIB_TEX0;
+   DISPATCH_ATTR3F( attr, x, y, z );
+}
+
+static void GLAPIENTRY brw_MultiTexCoord3fv( GLenum target, 
+					      const GLfloat *v )
+{
+   GLuint attr = (target & 0x7) + BRW_ATTRIB_TEX0;
+   DISPATCH_ATTR3FV( attr, v );
+}
+
+static void GLAPIENTRY brw_MultiTexCoord4f( GLenum target, GLfloat x, 
+					     GLfloat y, GLfloat z,
+					     GLfloat w )
+{
+   GLuint attr = (target & 0x7) + BRW_ATTRIB_TEX0;
+   DISPATCH_ATTR4F( attr, x, y, z, w );
+}
+
+static void GLAPIENTRY brw_MultiTexCoord4fv( GLenum target, 
+					      const GLfloat *v )
+{
+   GLuint attr = (target & 0x7) + BRW_ATTRIB_TEX0;
+   DISPATCH_ATTR4FV( attr, v );
+}
+
+
+static void GLAPIENTRY brw_VertexAttrib1fNV( GLuint index, GLfloat x )
+{
+   if (index >= BRW_ATTRIB_FIRST_MATERIAL) index = ERROR_ATTRIB;
+   DISPATCH_ATTR1F( index, x );
+}
+
+static void GLAPIENTRY brw_VertexAttrib1fvNV( GLuint index, 
+					       const GLfloat *v )
+{
+   if (index >= BRW_ATTRIB_FIRST_MATERIAL) index = ERROR_ATTRIB;
+   DISPATCH_ATTR1FV( index, v );
+}
+
+static void GLAPIENTRY brw_VertexAttrib2fNV( GLuint index, GLfloat x, 
+					      GLfloat y )
+{
+   if (index >= BRW_ATTRIB_FIRST_MATERIAL) index = ERROR_ATTRIB;
+   DISPATCH_ATTR2F( index, x, y );
+}
+
+static void GLAPIENTRY brw_VertexAttrib2fvNV( GLuint index,
+					       const GLfloat *v )
+{
+   if (index >= BRW_ATTRIB_FIRST_MATERIAL) index = ERROR_ATTRIB;
+   DISPATCH_ATTR2FV( index, v );
+}
+
+static void GLAPIENTRY brw_VertexAttrib3fNV( GLuint index, GLfloat x,
+					      GLfloat y, GLfloat z )
+{
+   if (index >= BRW_ATTRIB_FIRST_MATERIAL) index = ERROR_ATTRIB;
+   DISPATCH_ATTR3F( index, x, y, z );
+}
+
+static void GLAPIENTRY brw_VertexAttrib3fvNV( GLuint index,
+					       const GLfloat *v )
+{
+   if (index >= BRW_ATTRIB_FIRST_MATERIAL) index = ERROR_ATTRIB;
+   DISPATCH_ATTR3FV( index, v );
+}
+
+static void GLAPIENTRY brw_VertexAttrib4fNV( GLuint index, GLfloat x,
+					      GLfloat y, GLfloat z,
+					      GLfloat w )
+{
+   if (index >= BRW_ATTRIB_FIRST_MATERIAL) index = ERROR_ATTRIB;
+   DISPATCH_ATTR4F( index, x, y, z, w );
+}
+
+static void GLAPIENTRY brw_VertexAttrib4fvNV( GLuint index, 
+					       const GLfloat *v )
+{
+   if (index >= BRW_ATTRIB_FIRST_MATERIAL) index = ERROR_ATTRIB;
+   DISPATCH_ATTR4FV( index, v );
+}
+
+
+/*
+ * XXX adjust index
+ */
+
+static void GLAPIENTRY brw_VertexAttrib1fARB( GLuint index, GLfloat x )
+{
+   if (index >= BRW_ATTRIB_FIRST_MATERIAL) index = ERROR_ATTRIB;
+   DISPATCH_ATTR1F( index, x );
+}
+
+static void GLAPIENTRY brw_VertexAttrib1fvARB( GLuint index, 
+					       const GLfloat *v )
+{
+   if (index >= BRW_ATTRIB_FIRST_MATERIAL) index = ERROR_ATTRIB;
+   DISPATCH_ATTR1FV( index, v );
+}
+
+static void GLAPIENTRY brw_VertexAttrib2fARB( GLuint index, GLfloat x, 
+					      GLfloat y )
+{
+   if (index >= BRW_ATTRIB_FIRST_MATERIAL) index = ERROR_ATTRIB;
+   DISPATCH_ATTR2F( index, x, y );
+}
+
+static void GLAPIENTRY brw_VertexAttrib2fvARB( GLuint index,
+					       const GLfloat *v )
+{
+   if (index >= BRW_ATTRIB_FIRST_MATERIAL) index = ERROR_ATTRIB;
+   DISPATCH_ATTR2FV( index, v );
+}
+
+static void GLAPIENTRY brw_VertexAttrib3fARB( GLuint index, GLfloat x,
+					      GLfloat y, GLfloat z )
+{
+   if (index >= BRW_ATTRIB_FIRST_MATERIAL) index = ERROR_ATTRIB;
+   DISPATCH_ATTR3F( index, x, y, z );
+}
+
+static void GLAPIENTRY brw_VertexAttrib3fvARB( GLuint index,
+					       const GLfloat *v )
+{
+   if (index >= BRW_ATTRIB_FIRST_MATERIAL) index = ERROR_ATTRIB;
+   DISPATCH_ATTR3FV( index, v );
+}
+
+static void GLAPIENTRY brw_VertexAttrib4fARB( GLuint index, GLfloat x,
+					      GLfloat y, GLfloat z,
+					      GLfloat w )
+{
+   if (index >= BRW_ATTRIB_FIRST_MATERIAL) index = ERROR_ATTRIB;
+   DISPATCH_ATTR4F( index, x, y, z, w );
+}
+
+static void GLAPIENTRY brw_VertexAttrib4fvARB( GLuint index, 
+					       const GLfloat *v )
+{
+   if (index >= BRW_ATTRIB_FIRST_MATERIAL) index = ERROR_ATTRIB;
+   DISPATCH_ATTR4FV( index, v );
+}
+
+
+/* Install the generic versions of the 2nd level dispatch
+ * functions.  Some of these have a codegen alternative.
+ */
+void brw_exec_vtx_generic_init( struct brw_exec_context *exec )
+{
+   GLvertexformat *vfmt = &exec->vtxfmt;
+
+   vfmt->Color3f = brw_Color3f;
+   vfmt->Color3fv = brw_Color3fv;
+   vfmt->Color4f = brw_Color4f;
+   vfmt->Color4fv = brw_Color4fv;
+   vfmt->FogCoordfEXT = brw_FogCoordfEXT;
+   vfmt->FogCoordfvEXT = brw_FogCoordfvEXT;
+   vfmt->MultiTexCoord1fARB = brw_MultiTexCoord1f;
+   vfmt->MultiTexCoord1fvARB = brw_MultiTexCoord1fv;
+   vfmt->MultiTexCoord2fARB = brw_MultiTexCoord2f;
+   vfmt->MultiTexCoord2fvARB = brw_MultiTexCoord2fv;
+   vfmt->MultiTexCoord3fARB = brw_MultiTexCoord3f;
+   vfmt->MultiTexCoord3fvARB = brw_MultiTexCoord3fv;
+   vfmt->MultiTexCoord4fARB = brw_MultiTexCoord4f;
+   vfmt->MultiTexCoord4fvARB = brw_MultiTexCoord4fv;
+   vfmt->Normal3f = brw_Normal3f;
+   vfmt->Normal3fv = brw_Normal3fv;
+   vfmt->SecondaryColor3fEXT = brw_SecondaryColor3fEXT;
+   vfmt->SecondaryColor3fvEXT = brw_SecondaryColor3fvEXT;
+   vfmt->TexCoord1f = brw_TexCoord1f;
+   vfmt->TexCoord1fv = brw_TexCoord1fv;
+   vfmt->TexCoord2f = brw_TexCoord2f;
+   vfmt->TexCoord2fv = brw_TexCoord2fv;
+   vfmt->TexCoord3f = brw_TexCoord3f;
+   vfmt->TexCoord3fv = brw_TexCoord3fv;
+   vfmt->TexCoord4f = brw_TexCoord4f;
+   vfmt->TexCoord4fv = brw_TexCoord4fv;
+   vfmt->Vertex2f = brw_Vertex2f;
+   vfmt->Vertex2fv = brw_Vertex2fv;
+   vfmt->Vertex3f = brw_Vertex3f;
+   vfmt->Vertex3fv = brw_Vertex3fv;
+   vfmt->Vertex4f = brw_Vertex4f;
+   vfmt->Vertex4fv = brw_Vertex4fv;
+   vfmt->VertexAttrib1fNV = brw_VertexAttrib1fNV;
+   vfmt->VertexAttrib1fvNV = brw_VertexAttrib1fvNV;
+   vfmt->VertexAttrib2fNV = brw_VertexAttrib2fNV;
+   vfmt->VertexAttrib2fvNV = brw_VertexAttrib2fvNV;
+   vfmt->VertexAttrib3fNV = brw_VertexAttrib3fNV;
+   vfmt->VertexAttrib3fvNV = brw_VertexAttrib3fvNV;
+   vfmt->VertexAttrib4fNV = brw_VertexAttrib4fNV;
+   vfmt->VertexAttrib4fvNV = brw_VertexAttrib4fvNV;
+   vfmt->VertexAttrib1fARB = brw_VertexAttrib1fARB;
+   vfmt->VertexAttrib1fvARB = brw_VertexAttrib1fvARB;
+   vfmt->VertexAttrib2fARB = brw_VertexAttrib2fARB;
+   vfmt->VertexAttrib2fvARB = brw_VertexAttrib2fvARB;
+   vfmt->VertexAttrib3fARB = brw_VertexAttrib3fARB;
+   vfmt->VertexAttrib3fvARB = brw_VertexAttrib3fvARB;
+   vfmt->VertexAttrib4fARB = brw_VertexAttrib4fARB;
+   vfmt->VertexAttrib4fvARB = brw_VertexAttrib4fvARB;
+}
diff --git a/i965/brw_fallback.c b/i965/brw_fallback.c
new file mode 100644
index 0000000..86464b2
--- /dev/null
+++ b/i965/brw_fallback.c
@@ -0,0 +1,122 @@
+/**************************************************************************
+ * 
+ * Copyright 2005 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "swrast_setup/swrast_setup.h"
+#include "swrast/swrast.h"
+#include "tnl/tnl.h"
+#include "context.h"
+#include "brw_context.h"
+#include "brw_fallback.h"
+
+#include "glheader.h"
+#include "enums.h"
+#include "glapi.h"
+#include "imports.h"
+#include "macros.h"
+#include "mtypes.h"
+
+
+
+
+
+
+
+static GLboolean do_check_fallback(struct brw_context *brw)
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   GLuint i;
+   
+   /* BRW_NEW_METAOPS
+    */
+   if (brw->metaops.active)
+      return GL_FALSE;
+
+   if (brw->intel.no_rast)
+      return GL_TRUE;
+   
+   /* _NEW_BUFFERS
+    */
+   if (ctx->DrawBuffer->_ColorDrawBufferMask[0] != BUFFER_BIT_FRONT_LEFT &&
+       ctx->DrawBuffer->_ColorDrawBufferMask[0] != BUFFER_BIT_BACK_LEFT)
+      return GL_TRUE;
+
+   /* _NEW_RENDERMODE
+    *
+    * XXX: need to save/restore RenderMode in metaops state, or
+    * somehow move to a new attribs pointer:
+    */
+   if (ctx->RenderMode != GL_RENDER)
+      return GL_TRUE;
+
+   /* _NEW_TEXTURE:
+    */
+   for (i = 0; i < BRW_MAX_TEX_UNIT; i++) {
+      struct gl_texture_unit *texUnit = &brw->attribs.Texture->Unit[i];
+      if (texUnit->_ReallyEnabled) {
+	 struct intel_texture_object *intelObj = intel_texture_object(texUnit->_Current);
+	 struct gl_texture_image *texImage = intelObj->base.Image[0][intelObj->firstLevel];
+	 if (texImage->Border)
+	    return GL_TRUE;
+      }
+   }
+   
+   /* _NEW_STENCIL 
+    */
+   if (brw->attribs.Stencil->Enabled && 
+       !brw->intel.hw_stencil) {
+      return GL_TRUE;
+   }
+
+
+   return GL_FALSE;
+}
+
+static void check_fallback(struct brw_context *brw)
+{
+   brw->intel.Fallback = do_check_fallback(brw);
+}
+
+const struct brw_tracked_state brw_check_fallback = {
+   .dirty = {
+      .mesa = _NEW_BUFFERS | _NEW_RENDERMODE | _NEW_TEXTURE | _NEW_STENCIL,
+      .brw  = BRW_NEW_METAOPS,
+      .cache = 0
+   },
+   .update = check_fallback
+};
+
+
+
+
+/* Not used:
+ */
+void intelFallback( struct intel_context *intel, GLuint bit, GLboolean mode )
+{
+}
+
+
+
diff --git a/i965/brw_fallback.h b/i965/brw_fallback.h
new file mode 100644
index 0000000..684a46c
--- /dev/null
+++ b/i965/brw_fallback.h
@@ -0,0 +1,47 @@
+ /**************************************************************************
+ * 
+ * Copyright 2005 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef BRW_FALLBACK_H
+#define BRW_FALLBACK_H
+
+#include "mtypes.h"		/* for GLcontext... */
+
+struct brw_context;
+struct vbo_prim;
+
+void brw_fallback( GLcontext *ctx );
+void brw_unfallback( GLcontext *ctx );
+
+void brw_loopback_vertex_list( GLcontext *ctx,
+			       const GLfloat *buffer,
+			       const GLubyte *attrsz,
+			       const struct vbo_prim *prim,
+			       GLuint prim_count,
+			       GLuint wrap_count,
+			       GLuint vertex_size);
+
+#endif
diff --git a/i965/brw_gs.c b/i965/brw_gs.c
new file mode 100644
index 0000000..119d07d
--- /dev/null
+++ b/i965/brw_gs.c
@@ -0,0 +1,203 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+      
+#include "glheader.h"
+#include "macros.h"
+#include "enums.h"
+
+#include "intel_batchbuffer.h"
+
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_state.h"
+#include "brw_gs.h"
+
+
+
+static void compile_gs_prog( struct brw_context *brw,
+			     struct brw_gs_prog_key *key )
+{
+   struct brw_gs_compile c;
+   const GLuint *program;
+   GLuint program_size;
+
+   memset(&c, 0, sizeof(c));
+   
+   c.key = *key;
+
+   /* Need to locate the two positions present in vertex + header.
+    * These are currently hardcoded:
+    */
+   c.nr_attrs = brw_count_bits(c.key.attrs);
+   c.nr_regs = (c.nr_attrs + 1) / 2 + 1;  /* are vertices packed, or reg-aligned? */
+   c.nr_bytes = c.nr_regs * REG_SIZE;
+
+   
+   /* Begin the compilation:
+    */
+   brw_init_compile(brw, &c.func);
+
+   c.func.single_program_flow = 1;
+
+   /* For some reason the thread is spawned with only 4 channels
+    * unmasked.  
+    */
+   brw_set_mask_control(&c.func, BRW_MASK_DISABLE);
+
+
+   /* Note that primitives which don't require a GS program have
+    * already been weeded out by this stage:
+    */
+   switch (key->primitive) {
+   case GL_QUADS:
+      brw_gs_quads( &c ); 
+      break;
+   case GL_QUAD_STRIP:
+      brw_gs_quad_strip( &c );
+      break;
+   case GL_LINE_LOOP:
+      brw_gs_lines( &c );
+      break;
+   case GL_LINES:
+      if (key->hint_gs_always)
+	 brw_gs_lines( &c );
+      else {
+	 return;
+      }
+      break;
+   case GL_TRIANGLES:
+      if (key->hint_gs_always)
+	 brw_gs_tris( &c );
+      else {
+	 return;
+      }
+      break;
+   case GL_POINTS:
+      if (key->hint_gs_always)
+	 brw_gs_points( &c );
+      else {
+	 return;
+      }
+      break;      
+   default:
+      return;
+   }
+
+   /* get the program
+    */
+   program = brw_get_program(&c.func, &program_size);
+
+   /* Upload
+    */
+   brw->gs.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_GS_PROG],
+					      &c.key,
+					      sizeof(c.key),
+					      program,
+					      program_size,
+					      &c.prog_data,
+					      &brw->gs.prog_data );
+}
+
+
+static GLboolean search_cache( struct brw_context *brw, 
+			       struct brw_gs_prog_key *key )
+{
+   return brw_search_cache(&brw->cache[BRW_GS_PROG], 
+			   key, sizeof(*key),
+			   &brw->gs.prog_data,
+			   &brw->gs.prog_gs_offset);
+}
+
+
+static const GLenum gs_prim[GL_POLYGON+1] = {  
+   GL_POINTS,
+   GL_LINES,
+   GL_LINE_LOOP,
+   GL_LINES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_QUADS,
+   GL_QUAD_STRIP,
+   GL_TRIANGLES
+};
+
+static void populate_key( struct brw_context *brw,
+			  struct brw_gs_prog_key *key )
+{
+   memset(key, 0, sizeof(*key));
+
+   /* CACHE_NEW_VS_PROG */
+   key->attrs = brw->vs.prog_data->outputs_written;
+
+   /* BRW_NEW_PRIMITIVE */
+   key->primitive = gs_prim[brw->primitive];
+
+   key->hint_gs_always = 0;	/* debug code? */
+
+   key->need_gs_prog = (key->hint_gs_always ||
+			brw->primitive == GL_QUADS ||
+			brw->primitive == GL_QUAD_STRIP ||
+			brw->primitive == GL_LINE_LOOP);
+}
+
+/* Calculate interpolants for triangle and line rasterization.
+ */
+static void upload_gs_prog( struct brw_context *brw )
+{
+   struct brw_gs_prog_key key;
+
+   /* Populate the key:
+    */
+   populate_key(brw, &key);
+
+   if (brw->gs.prog_active != key.need_gs_prog) {
+      brw->state.dirty.cache |= CACHE_NEW_GS_PROG;
+      brw->gs.prog_active = key.need_gs_prog;
+   }
+
+   if (brw->gs.prog_active) {
+      if (!search_cache(brw, &key))
+	 compile_gs_prog( brw, &key );
+   }
+}
+
+
+const struct brw_tracked_state brw_gs_prog = {
+   .dirty = {
+      .mesa  = 0,
+      .brw   = BRW_NEW_PRIMITIVE,
+      .cache = CACHE_NEW_VS_PROG
+   },
+   .update = upload_gs_prog
+};
diff --git a/i965/brw_gs.h b/i965/brw_gs.h
new file mode 100644
index 0000000..29a4e80
--- /dev/null
+++ b/i965/brw_gs.h
@@ -0,0 +1,75 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+ 
+
+#ifndef BRW_GS_H
+#define BRW_GS_H
+
+
+#include "brw_context.h"
+#include "brw_eu.h"
+
+#define MAX_GS_VERTS (4)	     
+
+struct brw_gs_prog_key {
+   GLuint primitive:4;
+   GLuint attrs:16;		
+   GLuint hint_gs_always:1;
+   GLuint need_gs_prog:1;
+   GLuint pad:10;
+};
+
+struct brw_gs_compile {
+   struct brw_compile func;
+   struct brw_gs_prog_key key;
+   struct brw_gs_prog_data prog_data;
+   
+   struct {
+      struct brw_reg R0;
+      struct brw_reg vertex[MAX_GS_VERTS];
+   } reg;
+
+   /* 3 different ways of expressing vertex size:
+    */
+   GLuint nr_attrs;
+   GLuint nr_regs;
+   GLuint nr_bytes;
+};
+
+#define ATTR_SIZE  (4*4)
+
+void brw_gs_quads( struct brw_gs_compile *c );
+void brw_gs_quad_strip( struct brw_gs_compile *c );
+void brw_gs_tris( struct brw_gs_compile *c );
+void brw_gs_lines( struct brw_gs_compile *c );
+void brw_gs_points( struct brw_gs_compile *c );
+
+#endif
diff --git a/i965/brw_gs_emit.c b/i965/brw_gs_emit.c
new file mode 100644
index 0000000..9abb94d
--- /dev/null
+++ b/i965/brw_gs_emit.c
@@ -0,0 +1,156 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+ 
+
+#include "glheader.h"
+#include "macros.h"
+#include "enums.h"
+
+#include "shader/program.h"
+#include "intel_batchbuffer.h"
+
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_gs.h"
+
+static void brw_gs_alloc_regs( struct brw_gs_compile *c,
+			       GLuint nr_verts )
+{
+   GLuint i = 0,j;
+
+   /* Register usage is static, precompute here:
+    */
+   c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++;
+
+   /* Payload vertices plus space for more generated vertices:
+    */
+   for (j = 0; j < nr_verts; j++) {
+      c->reg.vertex[j] = brw_vec4_grf(i, 0);
+      i += c->nr_regs;
+   }
+
+   c->prog_data.urb_read_length = c->nr_regs; 
+   c->prog_data.total_grf = i;
+}
+
+
+static void brw_gs_emit_vue(struct brw_gs_compile *c, 
+			    struct brw_reg vert,
+			    GLboolean last,
+			    GLuint header)
+{
+   struct brw_compile *p = &c->func;
+   GLboolean allocate = !last;
+
+   /* Overwrite PrimType and PrimStart in the message header, for
+    * each vertex in turn:
+    */
+   brw_MOV(p, get_element_ud(c->reg.R0, 2), brw_imm_ud(header));
+
+   /* Copy the vertex from vertn into m1..mN+1:
+    */
+   brw_copy8(p, brw_message_reg(1), vert, c->nr_regs);
+
+   /* Send each vertex as a seperate write to the urb.  This is
+    * different to the concept in brw_sf_emit.c, where subsequent
+    * writes are used to build up a single urb entry.  Each of these
+    * writes instantiates a seperate urb entry, and a new one must be
+    * allocated each time.
+    */
+   brw_urb_WRITE(p, 
+		 allocate ? c->reg.R0 : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
+		 0,
+		 c->reg.R0,
+		 allocate,
+		 1,		/* used */
+		 c->nr_regs + 1, /* msg length */
+		 allocate ? 1 : 0, /* response length */
+		 allocate ? 0 : 1, /* eot */
+		 1,		/* writes_complete */
+		 0,		/* urb offset */
+		 BRW_URB_SWIZZLE_NONE);
+}
+
+
+
+void brw_gs_quads( struct brw_gs_compile *c )
+{
+   brw_gs_alloc_regs(c, 4);
+   
+   /* Use polygons for correct edgeflag behaviour. Note that vertex 3
+    * is the PV for quads, but vertex 0 for polygons:
+    */
+   brw_gs_emit_vue(c, c->reg.vertex[3], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START));
+   brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2));
+   brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_POLYGON << 2)); 
+   brw_gs_emit_vue(c, c->reg.vertex[2], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END));
+}
+
+void brw_gs_quad_strip( struct brw_gs_compile *c )
+{
+   brw_gs_alloc_regs(c, 4);
+   
+   brw_gs_emit_vue(c, c->reg.vertex[2], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START));
+   brw_gs_emit_vue(c, c->reg.vertex[3], 0, (_3DPRIM_POLYGON << 2));
+   brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2)); 
+   brw_gs_emit_vue(c, c->reg.vertex[1], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END));
+}
+
+void brw_gs_tris( struct brw_gs_compile *c )
+{
+   brw_gs_alloc_regs(c, 3);
+   brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_TRILIST << 2) | R02_PRIM_START));
+   brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_TRILIST << 2));
+   brw_gs_emit_vue(c, c->reg.vertex[2], 1, ((_3DPRIM_TRILIST << 2) | R02_PRIM_END));
+}
+
+void brw_gs_lines( struct brw_gs_compile *c )
+{
+   brw_gs_alloc_regs(c, 2);
+   brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_LINESTRIP << 2) | R02_PRIM_START));
+   brw_gs_emit_vue(c, c->reg.vertex[1], 1, ((_3DPRIM_LINESTRIP << 2) | R02_PRIM_END));
+}
+
+void brw_gs_points( struct brw_gs_compile *c )
+{
+   brw_gs_alloc_regs(c, 1);
+   brw_gs_emit_vue(c, c->reg.vertex[0], 1, ((_3DPRIM_POINTLIST << 2) | R02_PRIM_START | R02_PRIM_END));
+}
+
+
+
+
+
+
+
+
diff --git a/i965/brw_gs_state.c b/i965/brw_gs_state.c
new file mode 100644
index 0000000..5826c01
--- /dev/null
+++ b/i965/brw_gs_state.c
@@ -0,0 +1,89 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+ 
+
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "macros.h"
+
+
+
+static void upload_gs_unit( struct brw_context *brw )
+{
+   struct brw_gs_unit_state gs;
+
+   memset(&gs, 0, sizeof(gs));
+
+   /* CACHE_NEW_GS_PROG */
+   if (brw->gs.prog_active) {
+      gs.thread0.grf_reg_count = ((brw->gs.prog_data->total_grf-1) & ~15) / 16;
+      gs.thread0.kernel_start_pointer = brw->gs.prog_gs_offset >> 6;
+      gs.thread3.urb_entry_read_length = brw->gs.prog_data->urb_read_length;
+   }
+   else {
+      gs.thread0.grf_reg_count = 0;
+      gs.thread0.kernel_start_pointer = 0;
+      gs.thread3.urb_entry_read_length = 1;
+   }
+
+   /* BRW_NEW_URB_FENCE */
+   gs.thread4.nr_urb_entries = brw->urb.nr_gs_entries; 
+   gs.thread4.urb_entry_allocation_size = brw->urb.vsize - 1;
+
+   gs.thread4.max_threads = 0; /* Hardware requirement */
+
+   if (INTEL_DEBUG & DEBUG_STATS)
+      gs.thread4.stats_enable = 1; 
+
+   /* CONSTANT */
+   gs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+   gs.thread1.single_program_flow = 1;
+   gs.thread3.dispatch_grf_start_reg = 1;
+   gs.thread3.const_urb_entry_read_offset = 0;
+   gs.thread3.const_urb_entry_read_length = 0;
+   gs.thread3.urb_entry_read_offset = 0;
+   
+
+   brw->gs.state_gs_offset = brw_cache_data( &brw->cache[BRW_GS_UNIT], &gs );
+}
+
+
+const struct brw_tracked_state brw_gs_unit = {
+   .dirty = {
+      .mesa  = 0,
+      .brw   = (BRW_NEW_CURBE_OFFSETS |
+		BRW_NEW_URB_FENCE),
+      .cache = CACHE_NEW_GS_PROG
+   },
+   .update = upload_gs_unit
+};
diff --git a/i965/brw_hal.c b/i965/brw_hal.c
new file mode 100644
index 0000000..3126102
--- /dev/null
+++ b/i965/brw_hal.c
@@ -0,0 +1,52 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+
+#include "intel_batchbuffer.h"
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_hal.h"
+#include <dlfcn.h>
+
+static void *brw_hal_lib;
+static GLboolean brw_hal_tried;
+
+void *
+brw_hal_find_symbol (char *symbol)
+{
+    if (!brw_hal_tried)
+    {
+	char *brw_hal_name = getenv ("INTEL_HAL");
+    
+	if (!brw_hal_name)
+	    brw_hal_name = "/usr/lib/xorg/modules/drivers/intel_hal.so";
+
+	brw_hal_lib = dlopen (brw_hal_name, RTLD_LAZY|RTLD_LOCAL);
+	brw_hal_tried = 1;
+    }
+    if (!brw_hal_lib)
+	return NULL;
+    return dlsym (brw_hal_lib, symbol);
+}
diff --git a/i965/brw_hal.h b/i965/brw_hal.h
new file mode 100644
index 0000000..cd86e39
--- /dev/null
+++ b/i965/brw_hal.h
@@ -0,0 +1,27 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+
+void *
+brw_hal_find_symbol (char *symbol);
diff --git a/i965/brw_metaops.c b/i965/brw_metaops.c
new file mode 100644
index 0000000..6e030f1
--- /dev/null
+++ b/i965/brw_metaops.c
@@ -0,0 +1,564 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  *   frame buffer texture by Gary Wong <gtw@gnu.org>
+  */
+ 
+
+
+#include "glheader.h"
+#include "context.h"
+#include "macros.h"
+
+#include "shader/arbprogparse.h"
+
+#include "intel_screen.h"
+#include "intel_batchbuffer.h"
+#include "intel_regions.h"
+
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_draw.h"
+#include "brw_fallback.h"
+
+#define INIT(brw, STRUCT, ATTRIB) 		\
+do {						\
+   brw->attribs.ATTRIB = &ctx->ATTRIB;		\
+} while (0)
+
+#define DUP(brw, STRUCT, ATTRIB) 		\
+do {						\
+   brw->metaops.attribs.ATTRIB = MALLOC_STRUCT(STRUCT);	\
+   memcpy(brw->metaops.attribs.ATTRIB, 			\
+	  brw->attribs.ATTRIB,			\
+	  sizeof(struct STRUCT));		\
+} while (0)
+
+
+#define INSTALL(brw, ATTRIB, STATE)		\
+do {						\
+   brw->attribs.ATTRIB = brw->metaops.attribs.ATTRIB;	\
+   brw->state.dirty.mesa |= STATE;		\
+} while (0)
+
+#define RESTORE(brw, ATTRIB, STATE)			\
+do {							\
+   brw->attribs.ATTRIB = &brw->intel.ctx.ATTRIB;	\
+   brw->state.dirty.mesa |= STATE;			\
+} while (0)
+
+static void init_attribs( struct brw_context *brw )
+{
+   DUP(brw, gl_colorbuffer_attrib, Color);
+   DUP(brw, gl_depthbuffer_attrib, Depth);
+   DUP(brw, gl_fog_attrib, Fog);
+   DUP(brw, gl_hint_attrib, Hint);
+   DUP(brw, gl_light_attrib, Light);
+   DUP(brw, gl_line_attrib, Line);
+   DUP(brw, gl_point_attrib, Point);
+   DUP(brw, gl_polygon_attrib, Polygon);
+   DUP(brw, gl_scissor_attrib, Scissor);
+   DUP(brw, gl_stencil_attrib, Stencil);
+   DUP(brw, gl_texture_attrib, Texture);
+   DUP(brw, gl_transform_attrib, Transform);
+   DUP(brw, gl_viewport_attrib, Viewport);
+   DUP(brw, gl_vertex_program_state, VertexProgram);
+   DUP(brw, gl_fragment_program_state, FragmentProgram);
+}
+
+static void install_attribs( struct brw_context *brw )
+{
+   INSTALL(brw, Color, _NEW_COLOR);
+   INSTALL(brw, Depth, _NEW_DEPTH);
+   INSTALL(brw, Fog, _NEW_FOG);
+   INSTALL(brw, Hint, _NEW_HINT);
+   INSTALL(brw, Light, _NEW_LIGHT);
+   INSTALL(brw, Line, _NEW_LINE);
+   INSTALL(brw, Point, _NEW_POINT);
+   INSTALL(brw, Polygon, _NEW_POLYGON);
+   INSTALL(brw, Scissor, _NEW_SCISSOR);
+   INSTALL(brw, Stencil, _NEW_STENCIL);
+   INSTALL(brw, Texture, _NEW_TEXTURE);
+   INSTALL(brw, Transform, _NEW_TRANSFORM);
+   INSTALL(brw, Viewport, _NEW_VIEWPORT);
+   INSTALL(brw, VertexProgram, _NEW_PROGRAM);
+   INSTALL(brw, FragmentProgram, _NEW_PROGRAM);
+}
+
+static void restore_attribs( struct brw_context *brw )
+{
+   RESTORE(brw, Color, _NEW_COLOR);
+   RESTORE(brw, Depth, _NEW_DEPTH);
+   RESTORE(brw, Fog, _NEW_FOG);
+   RESTORE(brw, Hint, _NEW_HINT);
+   RESTORE(brw, Light, _NEW_LIGHT);
+   RESTORE(brw, Line, _NEW_LINE);
+   RESTORE(brw, Point, _NEW_POINT);
+   RESTORE(brw, Polygon, _NEW_POLYGON);
+   RESTORE(brw, Scissor, _NEW_SCISSOR);
+   RESTORE(brw, Stencil, _NEW_STENCIL);
+   RESTORE(brw, Texture, _NEW_TEXTURE);
+   RESTORE(brw, Transform, _NEW_TRANSFORM);
+   RESTORE(brw, Viewport, _NEW_VIEWPORT);
+   RESTORE(brw, VertexProgram, _NEW_PROGRAM);
+   RESTORE(brw, FragmentProgram, _NEW_PROGRAM);
+}
+
+
+static const char *vp_prog =
+      "!!ARBvp1.0\n"
+      "MOV  result.color, vertex.color;\n"
+      "MOV  result.position, vertex.position;\n"
+      "END\n";
+
+static const char *fp_prog =
+      "!!ARBfp1.0\n"
+      "MOV result.color, fragment.color;\n"
+      "END\n";
+
+static const char *fp_tex_prog =
+      "!!ARBfp1.0\n"
+      "TEMP a;\n"
+      "ADD a, fragment.position, program.local[0];\n"
+      "MUL a, a, program.local[1];\n"
+      "TEX result.color, a, texture[0], 2D;\n"
+      "MOV result.depth.z, fragment.position;\n"
+      "END\n";
+
+/* Derived values of importance:
+ *
+ *   FragmentProgram->_Current
+ *   VertexProgram->_Enabled
+ *   brw->vertex_program
+ *   DrawBuffer->_ColorDrawBufferMask[0]
+ * 
+ *
+ * More if drawpixels-through-texture is added.  
+ */
+static void init_metaops_state( struct brw_context *brw )
+{
+   GLcontext *ctx = &brw->intel.ctx;
+
+   brw->metaops.vbo = ctx->Driver.NewBufferObject(ctx, 1, GL_ARRAY_BUFFER_ARB);
+
+   ctx->Driver.BufferData(ctx,
+			  GL_ARRAY_BUFFER_ARB,
+			  4096,
+			  NULL,
+			  GL_DYNAMIC_DRAW_ARB,
+			  brw->metaops.vbo);
+
+   brw->metaops.fp = (struct gl_fragment_program *)
+      ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 1 );
+
+   brw->metaops.fp_tex = (struct gl_fragment_program *)
+      ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 1 );
+
+   brw->metaops.vp = (struct gl_vertex_program *)
+      ctx->Driver.NewProgram(ctx, GL_VERTEX_PROGRAM_ARB, 1 );
+
+   _mesa_parse_arb_fragment_program(ctx, GL_FRAGMENT_PROGRAM_ARB, 
+				    fp_prog, strlen(fp_prog),
+				    brw->metaops.fp);
+
+   _mesa_parse_arb_fragment_program(ctx, GL_FRAGMENT_PROGRAM_ARB, 
+				    fp_tex_prog, strlen(fp_tex_prog),
+				    brw->metaops.fp_tex);
+
+   _mesa_parse_arb_vertex_program(ctx, GL_VERTEX_PROGRAM_ARB, 
+				  vp_prog, strlen(vp_prog),
+				  brw->metaops.vp);
+
+   brw->metaops.attribs.VertexProgram->Current = brw->metaops.vp;
+   brw->metaops.attribs.VertexProgram->_Enabled = GL_TRUE;
+
+   brw->metaops.attribs.FragmentProgram->_Current = brw->metaops.fp;
+}
+
+static void meta_flat_shade( struct intel_context *intel )
+{
+   struct brw_context *brw = brw_context(&intel->ctx);
+
+   brw->metaops.attribs.Light->ShadeModel = GL_FLAT;
+   brw->state.dirty.mesa |= _NEW_LIGHT;
+}
+
+
+static void meta_no_stencil_write( struct intel_context *intel )
+{
+   struct brw_context *brw = brw_context(&intel->ctx);
+
+   brw->metaops.attribs.Stencil->Enabled = GL_FALSE;
+   brw->metaops.attribs.Stencil->WriteMask[0] = GL_FALSE; 
+   brw->state.dirty.mesa |= _NEW_STENCIL;
+}
+
+static void meta_no_depth_write( struct intel_context *intel )
+{
+   struct brw_context *brw = brw_context(&intel->ctx);
+
+   brw->metaops.attribs.Depth->Test = GL_FALSE;
+   brw->metaops.attribs.Depth->Mask = GL_FALSE;
+   brw->state.dirty.mesa |= _NEW_DEPTH;
+}
+
+
+static void meta_depth_replace( struct intel_context *intel )
+{
+   struct brw_context *brw = brw_context(&intel->ctx);
+
+   /* ctx->Driver.Enable( ctx, GL_DEPTH_TEST, GL_TRUE )
+    * ctx->Driver.DepthMask( ctx, GL_TRUE )
+    */
+   brw->metaops.attribs.Depth->Test = GL_TRUE;
+   brw->metaops.attribs.Depth->Mask = GL_TRUE;
+   brw->state.dirty.mesa |= _NEW_DEPTH;
+
+   /* ctx->Driver.DepthFunc( ctx, GL_ALWAYS )
+    */
+   brw->metaops.attribs.Depth->Func = GL_ALWAYS;
+
+   brw->state.dirty.mesa |= _NEW_DEPTH;
+}
+
+
+static void meta_stencil_replace( struct intel_context *intel,
+				 GLuint s_mask,
+				 GLuint s_clear)
+{
+   struct brw_context *brw = brw_context(&intel->ctx);
+
+   brw->metaops.attribs.Stencil->Enabled = GL_TRUE;
+   brw->metaops.attribs.Stencil->WriteMask[0] = s_mask;
+   brw->metaops.attribs.Stencil->ValueMask[0] = 0xff;
+   brw->metaops.attribs.Stencil->Ref[0] = s_clear;
+   brw->metaops.attribs.Stencil->Function[0] = GL_ALWAYS;
+   brw->metaops.attribs.Stencil->FailFunc[0] = GL_REPLACE;
+   brw->metaops.attribs.Stencil->ZPassFunc[0] = GL_REPLACE;
+   brw->metaops.attribs.Stencil->ZFailFunc[0] = GL_REPLACE;
+   brw->state.dirty.mesa |= _NEW_STENCIL;
+}
+
+
+static void meta_color_mask( struct intel_context *intel, GLboolean state )
+{
+   struct brw_context *brw = brw_context(&intel->ctx);
+
+   if (state)
+      COPY_4V(brw->metaops.attribs.Color->ColorMask, 
+	      brw->intel.ctx.Color.ColorMask); 
+   else
+      ASSIGN_4V(brw->metaops.attribs.Color->ColorMask, 0, 0, 0, 0);
+
+   brw->state.dirty.mesa |= _NEW_COLOR;
+}
+
+static void meta_no_texture( struct intel_context *intel )
+{
+   struct brw_context *brw = brw_context(&intel->ctx);
+   
+   brw->metaops.attribs.FragmentProgram->_Current = brw->metaops.fp;
+   
+   brw->metaops.attribs.Texture->CurrentUnit = 0;
+   brw->metaops.attribs.Texture->_EnabledUnits = 0;
+   brw->metaops.attribs.Texture->_EnabledCoordUnits = 0;
+   brw->metaops.attribs.Texture->Unit[ 0 ].Enabled = 0;
+   brw->metaops.attribs.Texture->Unit[ 0 ]._ReallyEnabled = 0;
+
+   brw->state.dirty.mesa |= _NEW_TEXTURE | _NEW_PROGRAM;
+}
+
+static void meta_texture_blend_replace(struct intel_context *intel)
+{
+   struct brw_context *brw = brw_context(&intel->ctx);
+
+   brw->metaops.attribs.Texture->CurrentUnit = 0;
+   brw->metaops.attribs.Texture->_EnabledUnits = 1;
+   brw->metaops.attribs.Texture->_EnabledCoordUnits = 1;
+   brw->metaops.attribs.Texture->Unit[ 0 ].Enabled = TEXTURE_2D_BIT;
+   brw->metaops.attribs.Texture->Unit[ 0 ]._ReallyEnabled = TEXTURE_2D_BIT;
+   brw->metaops.attribs.Texture->Unit[ 0 ].Current2D =
+      intel->frame_buffer_texobj;
+   brw->metaops.attribs.Texture->Unit[ 0 ]._Current =
+      intel->frame_buffer_texobj;
+
+   brw->state.dirty.mesa |= _NEW_TEXTURE | _NEW_PROGRAM;
+}
+
+static void meta_import_pixel_state(struct intel_context *intel)
+{
+   struct brw_context *brw = brw_context(&intel->ctx);
+   
+   RESTORE(brw, Color, _NEW_COLOR);
+   RESTORE(brw, Depth, _NEW_DEPTH);
+   RESTORE(brw, Fog, _NEW_FOG);
+   RESTORE(brw, Scissor, _NEW_SCISSOR);
+   RESTORE(brw, Stencil, _NEW_STENCIL);
+   RESTORE(brw, Texture, _NEW_TEXTURE);
+   RESTORE(brw, FragmentProgram, _NEW_PROGRAM);
+}
+
+static void meta_frame_buffer_texture( struct intel_context *intel,
+				       GLint xoff, GLint yoff )
+{
+   struct brw_context *brw = brw_context(&intel->ctx);
+   struct intel_region *region = intel_drawbuf_region( intel );
+   
+   INSTALL(brw, FragmentProgram, _NEW_PROGRAM);
+
+   brw->metaops.attribs.FragmentProgram->_Current = brw->metaops.fp_tex;
+   /* This is unfortunate, but seems to be necessary, since later on we
+      will end up calling _mesa_load_state_parameters to lookup the
+      local params (below), and that will want to look in ctx.FragmentProgram
+      instead of brw->attribs.FragmentProgram. */
+   intel->ctx.FragmentProgram.Current = brw->metaops.fp_tex;
+
+   brw->metaops.fp_tex->Base.LocalParams[ 0 ][ 0 ] = xoff;
+   brw->metaops.fp_tex->Base.LocalParams[ 0 ][ 1 ] = yoff;
+   brw->metaops.fp_tex->Base.LocalParams[ 0 ][ 2 ] = 0.0;
+   brw->metaops.fp_tex->Base.LocalParams[ 0 ][ 3 ] = 0.0;
+   brw->metaops.fp_tex->Base.LocalParams[ 1 ][ 0 ] =
+      1.0 / region->pitch;
+   brw->metaops.fp_tex->Base.LocalParams[ 1 ][ 1 ] =
+      -1.0 / region->height;
+   brw->metaops.fp_tex->Base.LocalParams[ 1 ][ 2 ] = 0.0;
+   brw->metaops.fp_tex->Base.LocalParams[ 1 ][ 3 ] = 1.0;
+   
+   brw->state.dirty.mesa |= _NEW_PROGRAM;
+}
+
+
+static void meta_draw_region( struct intel_context *intel,
+			     struct intel_region *draw_region,
+			     struct intel_region *depth_region )
+{
+   struct brw_context *brw = brw_context(&intel->ctx);
+
+   if (!brw->metaops.saved_draw_region) {
+      brw->metaops.saved_draw_region = brw->state.draw_region;
+      brw->metaops.saved_depth_region = brw->state.depth_region;
+   }
+
+   brw->state.draw_region = draw_region;
+   brw->state.depth_region = depth_region;
+
+   brw->state.dirty.mesa |= _NEW_BUFFERS;
+}
+
+
+static void meta_draw_quad(struct intel_context *intel, 
+			   GLfloat x0, GLfloat x1,
+			   GLfloat y0, GLfloat y1, 
+			   GLfloat z,
+			   GLubyte red, GLubyte green,
+			   GLubyte blue, GLubyte alpha,
+			   GLfloat s0, GLfloat s1,
+			   GLfloat t0, GLfloat t1)
+{
+   GLcontext *ctx = &intel->ctx;
+   struct brw_context *brw = brw_context(&intel->ctx);
+   struct gl_client_array pos_array;
+   struct gl_client_array color_array;
+   struct gl_client_array *attribs[VERT_ATTRIB_MAX];
+   struct _mesa_prim prim[1];
+   GLfloat pos[4][3];
+   GLubyte color[4];
+
+   ctx->Driver.BufferData(ctx,
+			  GL_ARRAY_BUFFER_ARB,
+			  sizeof(pos) + sizeof(color),
+			  NULL,
+			  GL_DYNAMIC_DRAW_ARB,
+			  brw->metaops.vbo);
+
+   pos[0][0] = x0;
+   pos[0][1] = y0;
+   pos[0][2] = z;
+
+   pos[1][0] = x1;
+   pos[1][1] = y0;
+   pos[1][2] = z;
+
+   pos[2][0] = x1;
+   pos[2][1] = y1;
+   pos[2][2] = z;
+
+   pos[3][0] = x0;
+   pos[3][1] = y1;
+   pos[3][2] = z;
+
+
+   ctx->Driver.BufferSubData(ctx,
+			     GL_ARRAY_BUFFER_ARB,
+			     0,
+			     sizeof(pos),
+			     pos,
+			     brw->metaops.vbo);
+
+   color[0] = red;
+   color[1] = green;
+   color[2] = blue;
+   color[3] = alpha;
+
+   ctx->Driver.BufferSubData(ctx,
+			     GL_ARRAY_BUFFER_ARB,
+			     sizeof(pos),
+			     sizeof(color),
+			     color,
+			     brw->metaops.vbo);
+
+   /* Ignoring texture coords. 
+    */
+
+   memset(attribs, 0, VERT_ATTRIB_MAX * sizeof(*attribs));
+
+   attribs[VERT_ATTRIB_POS] = &pos_array;
+   attribs[VERT_ATTRIB_POS]->Ptr = 0;
+   attribs[VERT_ATTRIB_POS]->Type = GL_FLOAT;
+   attribs[VERT_ATTRIB_POS]->Enabled = 1;
+   attribs[VERT_ATTRIB_POS]->Size = 3;
+   attribs[VERT_ATTRIB_POS]->StrideB = 3 * sizeof(GLfloat);
+   attribs[VERT_ATTRIB_POS]->Stride = 3 * sizeof(GLfloat);
+   attribs[VERT_ATTRIB_POS]->_MaxElement = 4;
+   attribs[VERT_ATTRIB_POS]->Normalized = 0;
+   attribs[VERT_ATTRIB_POS]->BufferObj = brw->metaops.vbo;
+
+   attribs[VERT_ATTRIB_COLOR0] = &color_array;
+   attribs[VERT_ATTRIB_COLOR0]->Ptr = (const GLubyte *)sizeof(pos);
+   attribs[VERT_ATTRIB_COLOR0]->Type = GL_UNSIGNED_BYTE;
+   attribs[VERT_ATTRIB_COLOR0]->Enabled = 1;
+   attribs[VERT_ATTRIB_COLOR0]->Size = 4;
+   attribs[VERT_ATTRIB_COLOR0]->StrideB = 0;
+   attribs[VERT_ATTRIB_COLOR0]->Stride = 0;
+   attribs[VERT_ATTRIB_COLOR0]->_MaxElement = 1;
+   attribs[VERT_ATTRIB_COLOR0]->Normalized = 1;
+   attribs[VERT_ATTRIB_COLOR0]->BufferObj = brw->metaops.vbo;
+   
+   /* Just ignoring texture coordinates for now. 
+    */
+
+   memset(prim, 0, sizeof(*prim));
+
+   prim[0].mode = GL_TRIANGLE_FAN;
+   prim[0].begin = 1;
+   prim[0].end = 1;
+   prim[0].weak = 0;
+   prim[0].pad = 0;
+   prim[0].start = 0;
+   prim[0].count = 4;
+
+   brw_draw_prims(&brw->intel.ctx, 
+		  (const struct gl_client_array **)attribs,
+		  prim, 1,
+		  NULL,
+		  0,
+		  3 );
+}
+
+
+static void install_meta_state( struct intel_context *intel )
+{
+   GLcontext *ctx = &intel->ctx;
+   struct brw_context *brw = brw_context(ctx);
+
+   if (!brw->metaops.vbo) {
+      init_metaops_state(brw);
+   }
+
+   install_attribs(brw);
+   
+   meta_no_texture(&brw->intel);
+   meta_flat_shade(&brw->intel);
+   brw->metaops.restore_draw_mask = ctx->DrawBuffer->_ColorDrawBufferMask[0];
+   brw->metaops.restore_fp = ctx->FragmentProgram.Current;
+
+   /* This works without adjusting refcounts.  Fix later? 
+    */
+   brw->metaops.saved_draw_region = brw->state.draw_region;
+   brw->metaops.saved_depth_region = brw->state.depth_region;
+   brw->metaops.active = 1;
+   
+   brw->state.dirty.brw |= BRW_NEW_METAOPS;
+}
+
+static void leave_meta_state( struct intel_context *intel )
+{
+   GLcontext *ctx = &intel->ctx;
+   struct brw_context *brw = brw_context(ctx);
+
+   restore_attribs(brw);
+
+   ctx->DrawBuffer->_ColorDrawBufferMask[0] = brw->metaops.restore_draw_mask;
+   ctx->FragmentProgram.Current = brw->metaops.restore_fp;
+
+   brw->state.draw_region = brw->metaops.saved_draw_region;
+   brw->state.depth_region = brw->metaops.saved_depth_region;
+   brw->metaops.saved_draw_region = NULL;
+   brw->metaops.saved_depth_region = NULL;
+   brw->metaops.active = 0;
+
+   brw->state.dirty.mesa |= _NEW_BUFFERS;
+   brw->state.dirty.brw |= BRW_NEW_METAOPS;
+}
+
+
+
+void brw_init_metaops( struct brw_context *brw )
+{
+   init_attribs(brw);
+
+
+   brw->intel.vtbl.install_meta_state = install_meta_state;
+   brw->intel.vtbl.leave_meta_state = leave_meta_state;
+   brw->intel.vtbl.meta_no_depth_write = meta_no_depth_write;
+   brw->intel.vtbl.meta_no_stencil_write = meta_no_stencil_write;
+   brw->intel.vtbl.meta_stencil_replace = meta_stencil_replace;
+   brw->intel.vtbl.meta_depth_replace = meta_depth_replace;
+   brw->intel.vtbl.meta_color_mask = meta_color_mask;
+   brw->intel.vtbl.meta_no_texture = meta_no_texture;
+   brw->intel.vtbl.meta_import_pixel_state = meta_import_pixel_state;
+   brw->intel.vtbl.meta_frame_buffer_texture = meta_frame_buffer_texture;
+   brw->intel.vtbl.meta_draw_region = meta_draw_region;
+   brw->intel.vtbl.meta_draw_quad = meta_draw_quad;
+   brw->intel.vtbl.meta_texture_blend_replace = meta_texture_blend_replace;
+/*    brw->intel.vtbl.meta_tex_rect_source = meta_tex_rect_source; */
+/*    brw->intel.vtbl.meta_draw_format = set_draw_format; */
+}
+
+void brw_destroy_metaops( struct brw_context *brw )
+{
+   GLcontext *ctx = &brw->intel.ctx;
+
+   if (brw->metaops.vbo)
+      ctx->Driver.DeleteBuffer( ctx, brw->metaops.vbo );
+
+/*    ctx->Driver.DeleteProgram( ctx, brw->metaops.fp ); */
+/*    ctx->Driver.DeleteProgram( ctx, brw->metaops.fp_tex ); */
+/*    ctx->Driver.DeleteProgram( ctx, brw->metaops.vp ); */
+}
diff --git a/i965/brw_misc_state.c b/i965/brw_misc_state.c
new file mode 100644
index 0000000..fe476c9
--- /dev/null
+++ b/i965/brw_misc_state.c
@@ -0,0 +1,560 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+ 
+
+
+#include "intel_batchbuffer.h"
+#include "intel_regions.h"
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+
+
+
+
+
+/***********************************************************************
+ * Blend color
+ */
+
+static void upload_blend_constant_color(struct brw_context *brw)
+{
+   struct brw_blend_constant_color bcc;
+
+   memset(&bcc, 0, sizeof(bcc));      
+   bcc.header.opcode = CMD_BLEND_CONSTANT_COLOR;
+   bcc.header.length = sizeof(bcc)/4-2;
+   bcc.blend_constant_color[0] = brw->attribs.Color->BlendColor[0];
+   bcc.blend_constant_color[1] = brw->attribs.Color->BlendColor[1];
+   bcc.blend_constant_color[2] = brw->attribs.Color->BlendColor[2];
+   bcc.blend_constant_color[3] = brw->attribs.Color->BlendColor[3];
+
+   BRW_CACHED_BATCH_STRUCT(brw, &bcc);
+}
+
+
+const struct brw_tracked_state brw_blend_constant_color = {
+   .dirty = {
+      .mesa = _NEW_COLOR,
+      .brw = 0,
+      .cache = 0
+   },
+   .update = upload_blend_constant_color
+};
+
+/***********************************************************************
+ * Drawing rectangle -- Need for AUB file only.
+ */
+static void upload_drawing_rect(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+   __DRIdrawablePrivate *dPriv = intel->driDrawable;
+   struct brw_drawrect bdr;
+   int x1, y1;
+   int x2, y2;
+
+   /* If there is a single cliprect, set it here.  Otherwise iterate
+    * over them in brw_draw_prim().
+    */
+   if (brw->intel.numClipRects > 1) 
+      return; 
+ 
+   x1 = brw->intel.pClipRects[0].x1;
+   y1 = brw->intel.pClipRects[0].y1;
+   x2 = brw->intel.pClipRects[0].x2;
+   y2 = brw->intel.pClipRects[0].y2;
+	 
+   if (x1 < 0) x1 = 0;
+   if (y1 < 0) y1 = 0;
+   if (x2 > intel->intelScreen->width) x2 = intel->intelScreen->width;
+   if (y2 > intel->intelScreen->height) y2 = intel->intelScreen->height;
+
+   memset(&bdr, 0, sizeof(bdr));
+   bdr.header.opcode = CMD_DRAW_RECT;
+   bdr.header.length = sizeof(bdr)/4 - 2;
+   bdr.xmin = x1;
+   bdr.ymin = y1;
+   bdr.xmax = x2;
+   bdr.ymax = y2;
+   bdr.xorg = dPriv->x;
+   bdr.yorg = dPriv->y;
+
+   /* Can't use BRW_CACHED_BATCH_STRUCT because this is also emitted
+    * uncached in brw_draw.c:
+    */
+   BRW_BATCH_STRUCT(brw, &bdr);
+}
+
+const struct brw_tracked_state brw_drawing_rect = {
+   .dirty = {
+      .mesa = _NEW_WINDOW_POS,
+      .brw = 0,
+      .cache = 0
+   },
+   .update = upload_drawing_rect
+};
+
+/***********************************************************************
+ * Binding table pointers
+ */
+
+static void upload_binding_table_pointers(struct brw_context *brw)
+{
+   struct brw_binding_table_pointers btp;
+   memset(&btp, 0, sizeof(btp));
+
+   /* The binding table has been emitted to the SS pool already, so we
+    * know what its offset is.  When the batch buffer is fired, the
+    * binding table and surface structs will get fixed up to point to
+    * where the textures actually landed, but that won't change the
+    * value of the offsets here:
+    */
+   btp.header.opcode = CMD_BINDING_TABLE_PTRS;
+   btp.header.length = sizeof(btp)/4 - 2;
+   btp.vs = 0;
+   btp.gs = 0;
+   btp.clp = 0;
+   btp.sf = 0;
+   btp.wm = brw->wm.bind_ss_offset;
+
+   BRW_CACHED_BATCH_STRUCT(brw, &btp);
+}
+
+const struct brw_tracked_state brw_binding_table_pointers = {
+   .dirty = {
+      .mesa = 0,
+      .brw = 0,
+      .cache = CACHE_NEW_SURF_BIND 
+   },
+   .update = upload_binding_table_pointers
+};
+
+
+/***********************************************************************
+ * Pipelined state pointers.  This is the key state packet from which
+ * the hardware chases pointers to all the uploaded state in VRAM.
+ */
+   
+static void upload_pipelined_state_pointers(struct brw_context *brw )
+{
+   struct brw_pipelined_state_pointers psp;
+   memset(&psp, 0, sizeof(psp));
+
+   psp.header.opcode = CMD_PIPELINED_STATE_POINTERS;
+   psp.header.length = sizeof(psp)/4 - 2;
+
+   psp.vs.offset = brw->vs.state_gs_offset >> 5;
+   psp.sf.offset = brw->sf.state_gs_offset >> 5;
+   psp.wm.offset = brw->wm.state_gs_offset >> 5;
+   psp.cc.offset = brw->cc.state_gs_offset >> 5;
+
+   /* GS gets turned on and off regularly.  Need to re-emit URB fence
+    * after this occurs.  
+    */
+   if (brw->gs.prog_active) {
+      psp.gs.offset = brw->gs.state_gs_offset >> 5;
+      psp.gs.enable = 1;
+   }
+
+   if (!brw->metaops.active) {
+      psp.clp.offset = brw->clip.state_gs_offset >> 5;
+      psp.clp.enable = 1;
+   }
+
+
+   if (BRW_CACHED_BATCH_STRUCT(brw, &psp))
+      brw->state.dirty.brw |= BRW_NEW_PSP;
+}
+
+const struct brw_tracked_state brw_pipelined_state_pointers = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_METAOPS,
+      .cache = (CACHE_NEW_VS_UNIT | 
+		CACHE_NEW_GS_UNIT | 
+		CACHE_NEW_GS_PROG | 
+		CACHE_NEW_CLIP_UNIT | 
+		CACHE_NEW_SF_UNIT | 
+		CACHE_NEW_WM_UNIT | 
+		CACHE_NEW_CC_UNIT)
+   },
+   .update = upload_pipelined_state_pointers
+};
+
+static void upload_psp_urb_cbs(struct brw_context *brw )
+{
+   upload_pipelined_state_pointers(brw);
+   brw_upload_urb_fence(brw);
+   brw_upload_constant_buffer_state(brw);
+}
+
+
+const struct brw_tracked_state brw_psp_urb_cbs = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_URB_FENCE | BRW_NEW_METAOPS,
+      .cache = (CACHE_NEW_VS_UNIT | 
+		CACHE_NEW_GS_UNIT | 
+		CACHE_NEW_GS_PROG | 
+		CACHE_NEW_CLIP_UNIT | 
+		CACHE_NEW_SF_UNIT | 
+		CACHE_NEW_WM_UNIT | 
+		CACHE_NEW_CC_UNIT)
+   },
+   .update = upload_psp_urb_cbs
+};
+
+
+
+
+/***********************************************************************
+ * Depthbuffer - currently constant, but rotation would change that.
+ */
+
+static void upload_depthbuffer(struct brw_context *brw)
+{
+   /* 0x79050003  Depth Buffer */
+   struct intel_context *intel = &brw->intel;
+   struct intel_region *region = brw->state.depth_region;
+   struct brw_depthbuffer bd;
+   memset(&bd, 0, sizeof(bd));
+
+   bd.header.bits.opcode = CMD_DEPTH_BUFFER;
+   bd.header.bits.length = BRW_IS_IGD(brw) ? (sizeof(bd)/4-2) : (sizeof(bd)/4-3);
+   bd.dword1.bits.pitch = (region->pitch * region->cpp) - 1;
+   
+   switch (region->cpp) {
+   case 2:
+      bd.dword1.bits.format = BRW_DEPTHFORMAT_D16_UNORM;
+      break;
+   case 4:
+      if (intel->depth_buffer_is_float)
+	 bd.dword1.bits.format = BRW_DEPTHFORMAT_D32_FLOAT;
+      else
+	 bd.dword1.bits.format = BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
+      break;
+   default:
+      assert(0);
+      return;
+   }
+
+   bd.dword1.bits.depth_offset_disable = 0; /* coordinate offset */
+
+   /* The depthbuffer can only use YMAJOR tiling...  This is a bit of
+    * a shame as it clashes with the 2d blitter which only supports
+    * XMAJOR tiling...  
+    */
+   bd.dword1.bits.tile_walk = BRW_TILEWALK_YMAJOR;
+   bd.dword1.bits.tiled_surface = intel->depth_region->tiled;
+   bd.dword1.bits.surface_type = BRW_SURFACE_2D;
+
+   /* BRW_NEW_LOCK */
+   bd.dword2_base_addr = bmBufferOffset(intel, region->buffer);    
+
+   bd.dword3.bits.mipmap_layout = BRW_SURFACE_MIPMAPLAYOUT_BELOW;
+   bd.dword3.bits.lod = 0;
+   bd.dword3.bits.width = region->pitch - 1; /* XXX: width ? */
+   bd.dword3.bits.height = region->height - 1;
+
+   bd.dword4.bits.min_array_element = 0;
+   bd.dword4.bits.depth = 0;
+      
+   BRW_CACHED_BATCH_STRUCT(brw, &bd);
+}
+
+const struct brw_tracked_state brw_depthbuffer = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_CONTEXT | BRW_NEW_LOCK,
+      .cache = 0
+   },
+   .update = upload_depthbuffer
+};
+
+
+
+/***********************************************************************
+ * Polygon stipple packet
+ */
+
+static void upload_polygon_stipple(struct brw_context *brw)
+{
+   struct brw_polygon_stipple bps;
+   GLuint i;
+
+   memset(&bps, 0, sizeof(bps));
+   bps.header.opcode = CMD_POLY_STIPPLE_PATTERN;
+   bps.header.length = sizeof(bps)/4-2;
+
+   for (i = 0; i < 32; i++)
+      bps.stipple[i] = brw->attribs.PolygonStipple[31 - i]; /* invert */
+
+   BRW_CACHED_BATCH_STRUCT(brw, &bps);
+}
+
+const struct brw_tracked_state brw_polygon_stipple = {
+   .dirty = {
+      .mesa = _NEW_POLYGONSTIPPLE,
+      .brw = 0,
+      .cache = 0
+   },
+   .update = upload_polygon_stipple
+};
+
+
+/***********************************************************************
+ * Polygon stipple offset packet
+ */
+
+static void upload_polygon_stipple_offset(struct brw_context *brw)
+{
+   __DRIdrawablePrivate *dPriv = brw->intel.driDrawable;
+   struct brw_polygon_stipple_offset bpso;
+
+   memset(&bpso, 0, sizeof(bpso));
+   bpso.header.opcode = CMD_POLY_STIPPLE_OFFSET;
+   bpso.header.length = sizeof(bpso)/4-2;
+
+   bpso.bits0.x_offset = (32 - (dPriv->x & 31)) & 31;
+   bpso.bits0.y_offset = (32 - ((dPriv->y + dPriv->h) & 31)) & 31;
+
+   BRW_CACHED_BATCH_STRUCT(brw, &bpso);
+}
+
+const struct brw_tracked_state brw_polygon_stipple_offset = {
+   .dirty = {
+      .mesa = _NEW_WINDOW_POS,
+      .brw = 0,
+      .cache = 0
+   },
+   .update = upload_polygon_stipple_offset
+};
+
+/**********************************************************************
+ * AA Line parameters
+ */
+static void upload_aa_line_parameters(struct brw_context *brw)
+{
+   struct brw_aa_line_parameters balp;
+   
+   if (!BRW_IS_IGD(brw))
+      return;
+
+   /* use legacy aa line coverage computation */
+   memset(&balp, 0, sizeof(balp));
+   balp.header.opcode = CMD_AA_LINE_PARAMETERS;
+   balp.header.length = sizeof(balp) / 4 - 2;
+   
+   BRW_CACHED_BATCH_STRUCT(brw, &balp);
+}
+
+const struct brw_tracked_state brw_aa_line_parameters = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_CONTEXT,
+      .cache = 0
+   },
+   .update = upload_aa_line_parameters
+};
+
+/***********************************************************************
+ * Line stipple packet
+ */
+
+static void upload_line_stipple(struct brw_context *brw)
+{
+   struct brw_line_stipple bls;
+   GLfloat tmp;
+   GLint tmpi;
+
+   memset(&bls, 0, sizeof(bls));
+   bls.header.opcode = CMD_LINE_STIPPLE_PATTERN;
+   bls.header.length = sizeof(bls)/4 - 2;
+
+   bls.bits0.pattern = brw->attribs.Line->StipplePattern;
+   bls.bits1.repeat_count = brw->attribs.Line->StippleFactor;
+
+   tmp = 1.0 / (GLfloat) brw->attribs.Line->StippleFactor;
+   tmpi = tmp * (1<<13);
+
+
+   bls.bits1.inverse_repeat_count = tmpi;
+
+   BRW_CACHED_BATCH_STRUCT(brw, &bls);
+}
+
+const struct brw_tracked_state brw_line_stipple = {
+   .dirty = {
+      .mesa = _NEW_LINE,
+      .brw = 0,
+      .cache = 0
+   },
+   .update = upload_line_stipple
+};
+
+
+
+/***********************************************************************
+ * Misc constant state packets
+ */
+
+static void upload_pipe_control(struct brw_context *brw)
+{
+   struct brw_pipe_control pc;
+
+   return;
+
+   memset(&pc, 0, sizeof(pc));
+
+   pc.header.opcode = CMD_PIPE_CONTROL;
+   pc.header.length = sizeof(pc)/4 - 2;
+   pc.header.post_sync_operation = PIPE_CONTROL_NOWRITE;
+
+   pc.header.instruction_state_cache_flush_enable = 1;
+
+   pc.bits1.dest_addr_type = PIPE_CONTROL_GTTWRITE_GLOBAL;
+
+   BRW_BATCH_STRUCT(brw, &pc);
+}
+
+const struct brw_tracked_state brw_pipe_control = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_CONTEXT,
+      .cache = 0
+   },
+   .update = upload_pipe_control
+};
+
+
+/***********************************************************************
+ * Misc invarient state packets
+ */
+
+static void upload_invarient_state( struct brw_context *brw )
+{
+   {
+      /* 0x61040000  Pipeline Select */
+      /*     PipelineSelect            : 0 */
+      struct brw_pipeline_select ps;
+
+      memset(&ps, 0, sizeof(ps));
+      ps.header.opcode = CMD_PIPELINE_SELECT(brw);
+      ps.header.pipeline_select = 0;
+      BRW_BATCH_STRUCT(brw, &ps);
+   }
+
+   {
+      struct brw_global_depth_offset_clamp gdo;
+      memset(&gdo, 0, sizeof(gdo));
+
+      /* Disable depth offset clamping. 
+       */
+      gdo.header.opcode = CMD_GLOBAL_DEPTH_OFFSET_CLAMP;
+      gdo.header.length = sizeof(gdo)/4 - 2;
+      gdo.depth_offset_clamp = 0.0;
+
+      BRW_BATCH_STRUCT(brw, &gdo);
+   }
+
+
+   /* 0x61020000  State Instruction Pointer */
+   {
+      struct brw_system_instruction_pointer sip;
+      memset(&sip, 0, sizeof(sip));
+
+      sip.header.opcode = CMD_STATE_INSN_POINTER;
+      sip.header.length = 0;
+      sip.bits0.pad = 0;
+      sip.bits0.system_instruction_pointer = 0;
+      BRW_BATCH_STRUCT(brw, &sip);
+   }
+
+
+   {
+      struct brw_vf_statistics vfs;
+      memset(&vfs, 0, sizeof(vfs));
+
+      vfs.opcode = CMD_VF_STATISTICS(brw);
+      if (INTEL_DEBUG & DEBUG_STATS)
+	 vfs.statistics_enable = 1; 
+
+      BRW_BATCH_STRUCT(brw, &vfs);
+   }
+}
+
+const struct brw_tracked_state brw_invarient_state = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_CONTEXT,
+      .cache = 0
+   },
+   .update = upload_invarient_state
+};
+
+
+/* State pool addresses:
+ */
+static void upload_state_base_address( struct brw_context *brw )
+{
+   struct intel_context *intel = &brw->intel;
+   struct brw_state_base_address sba;
+      
+   memset(&sba, 0, sizeof(sba));
+
+   sba.header.opcode = CMD_STATE_BASE_ADDRESS;
+   sba.header.length = 0x4;
+
+   /* BRW_NEW_LOCK */
+   sba.bits0.general_state_address = bmBufferOffset(intel, brw->pool[BRW_GS_POOL].buffer) >> 5;
+   sba.bits0.modify_enable = 1;
+
+   /* BRW_NEW_LOCK */
+   sba.bits1.surface_state_address = bmBufferOffset(intel, brw->pool[BRW_SS_POOL].buffer) >> 5;
+   sba.bits1.modify_enable = 1;
+
+   sba.bits2.modify_enable = 1;
+   sba.bits3.modify_enable = 1;
+   sba.bits4.modify_enable = 1;
+
+   BRW_CACHED_BATCH_STRUCT(brw, &sba);
+}
+
+
+const struct brw_tracked_state brw_state_base_address = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_CONTEXT | BRW_NEW_LOCK,
+      .cache = 0
+   },
+   .update = upload_state_base_address
+};
diff --git a/i965/brw_program.c b/i965/brw_program.c
new file mode 100644
index 0000000..752fe49
--- /dev/null
+++ b/i965/brw_program.c
@@ -0,0 +1,147 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+  
+#include "shader/prog_parameter.h"
+#include "brw_context.h"
+#include "brw_aub.h"
+#include "brw_util.h"
+#include "program.h"
+#include "imports.h"
+#include "enums.h"
+#include "tnl/tnl.h"
+
+
+static void brwBindProgram( GLcontext *ctx,
+			    GLenum target, 
+			    struct gl_program *prog )
+{
+   struct brw_context *brw = brw_context(ctx);
+
+   switch (target) {
+   case GL_VERTEX_PROGRAM_ARB: 
+      brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM;
+      break;
+   case GL_FRAGMENT_PROGRAM_ARB:
+      brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM;
+      break;
+   }
+}
+
+static struct gl_program *brwNewProgram( GLcontext *ctx,
+				      GLenum target, 
+				      GLuint id )
+{
+   struct brw_context *brw = brw_context(ctx);
+
+   switch (target) {
+   case GL_VERTEX_PROGRAM_ARB: {
+      struct brw_vertex_program *prog = CALLOC_STRUCT(brw_vertex_program);
+      if (prog) {
+	 prog->id = brw->program_id++;
+
+	 return _mesa_init_vertex_program( ctx, &prog->program,
+					     target, id );
+      }
+      else
+	 return NULL;
+   }
+
+   case GL_FRAGMENT_PROGRAM_ARB: {
+      struct brw_fragment_program *prog = CALLOC_STRUCT(brw_fragment_program);
+      if (prog) {
+	 prog->id = brw->program_id++;
+
+	 return _mesa_init_fragment_program( ctx, &prog->program,
+					     target, id );
+      }
+      else
+	 return NULL;
+   }
+
+   default:
+      return _mesa_new_program(ctx, target, id);
+   }
+}
+
+static void brwDeleteProgram( GLcontext *ctx,
+			      struct gl_program *prog )
+{
+   
+   _mesa_delete_program( ctx, prog );
+}
+
+
+static GLboolean brwIsProgramNative( GLcontext *ctx,
+				     GLenum target, 
+				     struct gl_program *prog )
+{
+   return GL_TRUE;
+}
+
+static void brwProgramStringNotify( GLcontext *ctx,
+				    GLenum target,
+				    struct gl_program *prog )
+{
+   if (target == GL_FRAGMENT_PROGRAM_ARB) {
+      struct brw_context *brw = brw_context(ctx);
+      struct brw_fragment_program *p = (struct brw_fragment_program *)prog;
+      struct brw_fragment_program *fp = (struct brw_fragment_program *)brw->fragment_program;
+      if (p == fp)
+	 brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM;
+      p->id = brw->program_id++;      
+      p->param_state = p->program.Base.Parameters->StateFlags;
+   }
+   else if (target == GL_VERTEX_PROGRAM_ARB) {
+      struct brw_context *brw = brw_context(ctx);
+      struct brw_vertex_program *p = (struct brw_vertex_program *)prog;
+      struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program;
+      if (p == vp)
+	 brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM;
+      p->id = brw->program_id++;      
+      p->param_state = p->program.Base.Parameters->StateFlags;
+
+      /* Also tell tnl about it:
+       */
+      _tnl_program_string(ctx, target, prog);
+   }
+}
+
+void brwInitFragProgFuncs( struct dd_function_table *functions )
+{
+   assert(functions->ProgramStringNotify == _tnl_program_string); 
+
+   functions->BindProgram = brwBindProgram;
+   functions->NewProgram = brwNewProgram;
+   functions->DeleteProgram = brwDeleteProgram;
+   functions->IsProgramNative = brwIsProgramNative;
+   functions->ProgramStringNotify = brwProgramStringNotify;
+}
+
diff --git a/i965/brw_sf.c b/i965/brw_sf.c
new file mode 100644
index 0000000..6dcfa62
--- /dev/null
+++ b/i965/brw_sf.c
@@ -0,0 +1,188 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+  
+
+#include "glheader.h"
+#include "macros.h"
+#include "enums.h"
+
+#include "intel_batchbuffer.h"
+
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_sf.h"
+#include "brw_state.h"
+
+#define DO_SETUP_BITS ((1<<FRAG_ATTRIB_MAX)-1)
+
+static void compile_sf_prog( struct brw_context *brw,
+			     struct brw_sf_prog_key *key )
+{
+   struct brw_sf_compile c;
+   const GLuint *program;
+   GLuint program_size;
+   GLuint i, idx;
+
+   memset(&c, 0, sizeof(c));
+
+   /* Begin the compilation:
+    */
+   brw_init_compile(brw, &c.func);
+
+   c.key = *key;
+   c.nr_attrs = brw_count_bits(c.key.attrs);
+   c.nr_attr_regs = (c.nr_attrs+1)/2;
+   c.nr_setup_attrs = brw_count_bits(c.key.attrs & DO_SETUP_BITS);
+   c.nr_setup_regs = (c.nr_setup_attrs+1)/2;
+
+   c.prog_data.urb_read_length = c.nr_attr_regs;
+   c.prog_data.urb_entry_size = c.nr_setup_regs * 2;
+
+   /* Construct map from attribute number to position in the vertex.
+    */
+   for (i = idx = 0; i < VERT_RESULT_MAX; i++) 
+      if (c.key.attrs & (1<<i)) {
+	 c.attr_to_idx[i] = idx;
+	 c.idx_to_attr[idx] = i;
+	 idx++;
+      }
+   
+   /* Which primitive?  Or all three? 
+    */
+   switch (key->primitive) {
+   case SF_TRIANGLES:
+      c.nr_verts = 3;
+      brw_emit_tri_setup( &c, GL_TRUE );
+      break;
+   case SF_LINES:
+      c.nr_verts = 2;
+      brw_emit_line_setup( &c, GL_TRUE );
+      break;
+   case SF_POINTS:
+      c.nr_verts = 1;
+      brw_emit_point_setup( &c, GL_TRUE );
+      break;
+   case SF_UNFILLED_TRIS:
+      c.nr_verts = 3;
+      brw_emit_anyprim_setup( &c );
+      break;
+   default:
+      assert(0);
+      return;
+   }
+	 
+
+   /* get the program
+    */
+   program = brw_get_program(&c.func, &program_size);
+
+   /* Upload
+    */
+   brw->sf.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_SF_PROG],
+					      &c.key,
+					      sizeof(c.key),
+					      program,
+					      program_size,
+					      &c.prog_data,
+					      &brw->sf.prog_data );
+}
+
+
+static GLboolean search_cache( struct brw_context *brw, 
+			       struct brw_sf_prog_key *key )
+{
+   return brw_search_cache(&brw->cache[BRW_SF_PROG], 
+			   key, sizeof(*key),
+			   &brw->sf.prog_data,
+			   &brw->sf.prog_gs_offset);
+}
+
+
+/* Calculate interpolants for triangle and line rasterization.
+ */
+static void upload_sf_prog( struct brw_context *brw )
+{
+   struct brw_sf_prog_key key;
+
+   memset(&key, 0, sizeof(key));
+
+   /* Populate the key, noting state dependencies:
+    */
+   /* CACHE_NEW_VS_PROG */
+   key.attrs = brw->vs.prog_data->outputs_written; 
+
+   /* BRW_NEW_REDUCED_PRIMITIVE */
+   switch (brw->intel.reduced_primitive) {
+   case GL_TRIANGLES: 
+      /* NOTE: We just use the edgeflag attribute as an indicator that
+       * unfilled triangles are active.  We don't actually do the
+       * edgeflag testing here, it is already done in the clip
+       * program.
+       */
+      if (key.attrs & (1<<VERT_RESULT_EDGE))
+	 key.primitive = SF_UNFILLED_TRIS;
+      else
+	 key.primitive = SF_TRIANGLES;
+      break;
+   case GL_LINES: 
+      key.primitive = SF_LINES; 
+      break;
+   case GL_POINTS: 
+      key.primitive = SF_POINTS; 
+      break;
+   }
+
+
+   /* _NEW_LIGHT */
+   key.do_flat_shading = (brw->attribs.Light->ShadeModel == GL_FLAT);
+   key.do_twoside_color = (brw->attribs.Light->Enabled && brw->attribs.Light->Model.TwoSide);
+
+   /* _NEW_POLYGON */
+   if (key.do_twoside_color)
+      key.frontface_ccw = (brw->attribs.Polygon->FrontFace == GL_CCW);
+
+
+   if (!search_cache(brw, &key))
+      compile_sf_prog( brw, &key );
+}
+
+
+const struct brw_tracked_state brw_sf_prog = {
+   .dirty = {
+      .mesa  = (_NEW_LIGHT|_NEW_POLYGON),
+      .brw   = (BRW_NEW_REDUCED_PRIMITIVE),
+      .cache = CACHE_NEW_VS_PROG
+   },
+   .update = upload_sf_prog
+};
+
diff --git a/i965/brw_sf.h b/i965/brw_sf.h
new file mode 100644
index 0000000..b321cda
--- /dev/null
+++ b/i965/brw_sf.h
@@ -0,0 +1,105 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+   
+
+#ifndef BRW_SF_H
+#define BRW_SF_H
+
+
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "program.h"
+
+
+#define SF_POINTS    0
+#define SF_LINES     1
+#define SF_TRIANGLES 2
+#define SF_UNFILLED_TRIS   3
+
+struct brw_sf_prog_key {
+   GLuint primitive:2;
+   GLuint do_twoside_color:1;
+   GLuint do_flat_shading:1;
+   GLuint attrs:16;
+   GLuint frontface_ccw:1;
+   GLuint pad:11;
+};
+
+
+struct brw_sf_compile {
+   struct brw_compile func;
+   struct brw_sf_prog_key key;
+   struct brw_sf_prog_data prog_data;
+   
+   struct brw_reg pv;
+   struct brw_reg det;
+   struct brw_reg dx0;
+   struct brw_reg dx2;
+   struct brw_reg dy0;
+   struct brw_reg dy2;
+
+   /* z and 1/w passed in seperately:
+    */
+   struct brw_reg z[3];
+   struct brw_reg inv_w[3];
+   
+   /* The vertices:
+    */
+   struct brw_reg vert[3];
+
+    /* Temporaries, allocated after last vertex reg.
+    */
+   struct brw_reg inv_det;
+   struct brw_reg a1_sub_a0;
+   struct brw_reg a2_sub_a0;
+   struct brw_reg tmp;
+
+   struct brw_reg m1Cx;
+   struct brw_reg m2Cy;
+   struct brw_reg m3C0;
+
+   GLuint nr_verts;
+   GLuint nr_attrs;
+   GLuint nr_attr_regs;
+   GLuint nr_setup_attrs;
+   GLuint nr_setup_regs;
+
+   GLubyte attr_to_idx[VERT_RESULT_MAX];   
+   GLubyte idx_to_attr[VERT_RESULT_MAX];   
+};
+
+ 
+void brw_emit_tri_setup( struct brw_sf_compile *c, GLboolean allocate );
+void brw_emit_line_setup( struct brw_sf_compile *c, GLboolean allocate );
+void brw_emit_point_setup( struct brw_sf_compile *c, GLboolean allocate );
+void brw_emit_anyprim_setup( struct brw_sf_compile *c );
+
+#endif
diff --git a/i965/brw_sf_emit.c b/i965/brw_sf_emit.c
new file mode 100644
index 0000000..94be815
--- /dev/null
+++ b/i965/brw_sf_emit.c
@@ -0,0 +1,631 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+   
+
+#include "glheader.h"
+#include "macros.h"
+#include "enums.h"
+
+#include "intel_batchbuffer.h"
+
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_sf.h"
+
+
+static struct brw_reg get_vert_attr(struct brw_sf_compile *c,
+				    struct brw_reg vert,
+				    GLuint attr)
+{
+   GLuint off = c->attr_to_idx[attr] / 2;
+   GLuint sub = c->attr_to_idx[attr] % 2;
+
+   return brw_vec4_grf(vert.nr + off, sub * 4);
+}
+
+static GLboolean have_attr(struct brw_sf_compile *c,
+			   GLuint attr)
+{
+   return (c->key.attrs & (1<<attr)) ? 1 : 0;
+}
+
+
+			 
+/*********************************************************************** 
+ * Twoside lighting
+ */
+static void copy_bfc( struct brw_sf_compile *c,
+		      struct brw_reg vert )
+{
+   struct brw_compile *p = &c->func;
+   GLuint i;
+
+   for (i = 0; i < 2; i++) {
+      if (have_attr(c, VERT_RESULT_COL0+i) &&
+	  have_attr(c, VERT_RESULT_BFC0+i))
+	 brw_MOV(p, 
+		 get_vert_attr(c, vert, VERT_RESULT_COL0+i), 
+		 get_vert_attr(c, vert, VERT_RESULT_BFC0+i));
+   }
+}
+
+
+static void do_twoside_color( struct brw_sf_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_instruction *if_insn;
+   GLuint backface_conditional = c->key.frontface_ccw ? BRW_CONDITIONAL_G : BRW_CONDITIONAL_L;
+
+   /* Already done in clip program:
+    */
+   if (c->key.primitive == SF_UNFILLED_TRIS)
+      return;
+
+   /* XXX: What happens if BFC isn't present?  This could only happen
+    * for user-supplied vertex programs, as t_vp_build.c always does
+    * the right thing.
+    */
+   if (!(have_attr(c, VERT_RESULT_COL0) && have_attr(c, VERT_RESULT_BFC0)) &&
+       !(have_attr(c, VERT_RESULT_COL1) && have_attr(c, VERT_RESULT_BFC1)))
+      return;
+   
+   /* Need to use BRW_EXECUTE_4 and also do an 4-wide compare in order
+    * to get all channels active inside the IF.  In the clipping code
+    * we run with NoMask, so it's not an option and we can use
+    * BRW_EXECUTE_1 for all comparisions.
+    */
+   brw_push_insn_state(p);
+   brw_CMP(p, vec4(brw_null_reg()), backface_conditional, c->det, brw_imm_f(0));
+   if_insn = brw_IF(p, BRW_EXECUTE_4); 
+   {
+      switch (c->nr_verts) {
+      case 3: copy_bfc(c, c->vert[2]);
+      case 2: copy_bfc(c, c->vert[1]);
+      case 1: copy_bfc(c, c->vert[0]);
+      }
+   }
+   brw_ENDIF(p, if_insn);
+   brw_pop_insn_state(p);
+}
+
+
+
+/***********************************************************************
+ * Flat shading
+ */
+
+#define VERT_RESULT_COLOR_BITS ((1<<VERT_RESULT_COL0) | \
+                                 (1<<VERT_RESULT_COL1))
+
+static void copy_colors( struct brw_sf_compile *c,
+		     struct brw_reg dst,
+		     struct brw_reg src)
+{
+   struct brw_compile *p = &c->func;
+   GLuint i;
+
+   for (i = VERT_RESULT_COL0; i <= VERT_RESULT_COL1; i++) {
+      if (have_attr(c,i))
+	 brw_MOV(p, 
+		 get_vert_attr(c, dst, i), 
+		 get_vert_attr(c, src, i));
+   }
+}
+
+
+
+/* Need to use a computed jump to copy flatshaded attributes as the
+ * vertices are ordered according to y-coordinate before reaching this
+ * point, so the PV could be anywhere.
+ */
+static void do_flatshade_triangle( struct brw_sf_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_reg ip = brw_ip_reg();
+   GLuint nr = brw_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS);
+   if (!nr)
+      return;
+
+   /* Already done in clip program:
+    */
+   if (c->key.primitive == SF_UNFILLED_TRIS)
+      return;
+
+   brw_push_insn_state(p);
+   
+   brw_MUL(p, c->pv, c->pv, brw_imm_ud(nr*2+1));
+   brw_JMPI(p, ip, ip, c->pv);
+
+   copy_colors(c, c->vert[1], c->vert[0]);
+   copy_colors(c, c->vert[2], c->vert[0]);
+   brw_JMPI(p, ip, ip, brw_imm_ud(nr*4+1));
+
+   copy_colors(c, c->vert[0], c->vert[1]);
+   copy_colors(c, c->vert[2], c->vert[1]);
+   brw_JMPI(p, ip, ip, brw_imm_ud(nr*2));
+
+   copy_colors(c, c->vert[0], c->vert[2]);
+   copy_colors(c, c->vert[1], c->vert[2]);
+
+   brw_pop_insn_state(p);
+}
+	
+
+static void do_flatshade_line( struct brw_sf_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_reg ip = brw_ip_reg();
+   GLuint nr = brw_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS);
+   
+   if (!nr)
+      return;
+
+   /* Already done in clip program: 
+    */
+   if (c->key.primitive == SF_UNFILLED_TRIS)
+      return;
+
+   brw_push_insn_state(p);
+   
+   brw_MUL(p, c->pv, c->pv, brw_imm_ud(nr+1));
+   brw_JMPI(p, ip, ip, c->pv);
+   copy_colors(c, c->vert[1], c->vert[0]);
+
+   brw_JMPI(p, ip, ip, brw_imm_ud(nr));
+   copy_colors(c, c->vert[0], c->vert[1]);
+
+   brw_pop_insn_state(p);
+}
+
+	
+
+/***********************************************************************
+ * Triangle setup.
+ */
+
+
+static void alloc_regs( struct brw_sf_compile *c )
+{
+   GLuint reg, i;
+
+   /* Values computed by fixed function unit:
+    */
+   c->pv  = retype(brw_vec1_grf(1, 1), BRW_REGISTER_TYPE_UD);
+   c->det = brw_vec1_grf(1, 2);
+   c->dx0 = brw_vec1_grf(1, 3);
+   c->dx2 = brw_vec1_grf(1, 4);
+   c->dy0 = brw_vec1_grf(1, 5);
+   c->dy2 = brw_vec1_grf(1, 6);
+
+   /* z and 1/w passed in seperately:
+    */
+   c->z[0]     = brw_vec1_grf(2, 0);
+   c->inv_w[0] = brw_vec1_grf(2, 1);
+   c->z[1]     = brw_vec1_grf(2, 2);
+   c->inv_w[1] = brw_vec1_grf(2, 3);
+   c->z[2]     = brw_vec1_grf(2, 4);
+   c->inv_w[2] = brw_vec1_grf(2, 5);
+   
+   /* The vertices:
+    */
+   reg = 3;
+   for (i = 0; i < c->nr_verts; i++) {
+      c->vert[i] = brw_vec8_grf(reg, 0);
+      reg += c->nr_attr_regs;
+   }
+
+   /* Temporaries, allocated after last vertex reg.
+    */
+   c->inv_det = brw_vec1_grf(reg, 0);  reg++;
+   c->a1_sub_a0 = brw_vec8_grf(reg, 0);  reg++;
+   c->a2_sub_a0 = brw_vec8_grf(reg, 0);  reg++;
+   c->tmp = brw_vec8_grf(reg, 0);  reg++;
+
+   /* Note grf allocation:
+    */
+   c->prog_data.total_grf = reg;
+   
+
+   /* Outputs of this program - interpolation coefficients for
+    * rasterization:
+    */
+   c->m1Cx = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 1, 0);
+   c->m2Cy = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 2, 0);
+   c->m3C0 = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 3, 0);
+}
+
+
+static void copy_z_inv_w( struct brw_sf_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   GLuint i;
+
+   brw_push_insn_state(p);
+	
+   /* Copy both scalars with a single MOV:
+    */
+   for (i = 0; i < c->nr_verts; i++)
+      brw_MOV(p, vec2(suboffset(c->vert[i], 2)), vec2(c->z[i]));
+	 
+   brw_pop_insn_state(p);
+}
+
+
+static void invert_det( struct brw_sf_compile *c)
+{
+   /* Looks like we invert all 8 elements just to get 1/det in
+    * position 2 !?!
+    */
+   brw_math(&c->func, 
+	    c->inv_det, 
+	    BRW_MATH_FUNCTION_INV,
+	    BRW_MATH_SATURATE_NONE,
+	    0, 
+	    c->det,
+	    BRW_MATH_DATA_SCALAR,
+	    BRW_MATH_PRECISION_FULL);
+
+}
+
+#define NON_PERPECTIVE_ATTRS  (FRAG_BIT_WPOS | \
+                               FRAG_BIT_COL0 | \
+			       FRAG_BIT_COL1)
+
+static GLboolean calculate_masks( struct brw_sf_compile *c,
+				  GLuint reg,
+				  GLushort *pc,
+				  GLushort *pc_persp,
+				  GLushort *pc_linear)
+{
+   GLboolean is_last_attr = (reg == c->nr_setup_regs - 1);
+   GLuint persp_mask = c->key.attrs & ~NON_PERPECTIVE_ATTRS;
+   GLuint linear_mask;
+
+   if (c->key.do_flat_shading)
+      linear_mask = c->key.attrs & ~(FRAG_BIT_COL0|FRAG_BIT_COL1);
+   else
+      linear_mask = c->key.attrs;
+
+   *pc_persp = 0;
+   *pc_linear = 0;
+   *pc = 0xf;
+      
+   if (persp_mask & (1 << c->idx_to_attr[reg*2])) 
+      *pc_persp = 0xf;
+
+   if (linear_mask & (1 << c->idx_to_attr[reg*2])) 
+      *pc_linear = 0xf;
+
+   /* Maybe only processs one attribute on the final round:
+    */
+   if (reg*2+1 < c->nr_setup_attrs) {
+      *pc |= 0xf0;
+
+      if (persp_mask & (1 << c->idx_to_attr[reg*2+1])) 
+	 *pc_persp |= 0xf0;
+
+      if (linear_mask & (1 << c->idx_to_attr[reg*2+1])) 
+	 *pc_linear |= 0xf0;
+   }
+
+   return is_last_attr;
+}
+
+
+
+void brw_emit_tri_setup( struct brw_sf_compile *c, GLboolean allocate)
+{
+   struct brw_compile *p = &c->func;
+   GLuint i;
+
+   c->nr_verts = 3;
+
+   if (allocate)
+      alloc_regs(c);
+
+   invert_det(c);
+   copy_z_inv_w(c);
+
+   if (c->key.do_twoside_color) 
+      do_twoside_color(c);
+
+   if (c->key.do_flat_shading)
+      do_flatshade_triangle(c);
+      
+   
+   for (i = 0; i < c->nr_setup_regs; i++)
+   {
+      /* Pair of incoming attributes:
+       */
+      struct brw_reg a0 = offset(c->vert[0], i);
+      struct brw_reg a1 = offset(c->vert[1], i);
+      struct brw_reg a2 = offset(c->vert[2], i);
+      GLushort pc, pc_persp, pc_linear;
+      GLboolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
+
+      if (pc_persp)
+      {
+	 brw_set_predicate_control_flag_value(p, pc_persp);
+	 brw_MUL(p, a0, a0, c->inv_w[0]);
+	 brw_MUL(p, a1, a1, c->inv_w[1]);
+	 brw_MUL(p, a2, a2, c->inv_w[2]);
+      }
+      
+      
+      /* Calculate coefficients for interpolated values:
+       */      
+      if (pc_linear)
+      {
+	 brw_set_predicate_control_flag_value(p, pc_linear);
+
+	 brw_ADD(p, c->a1_sub_a0, a1, negate(a0));
+	 brw_ADD(p, c->a2_sub_a0, a2, negate(a0));
+
+	 /* calculate dA/dx
+	  */
+	 brw_MUL(p, brw_null_reg(), c->a1_sub_a0, c->dy2);
+	 brw_MAC(p, c->tmp, c->a2_sub_a0, negate(c->dy0));
+	 brw_MUL(p, c->m1Cx, c->tmp, c->inv_det);
+		
+	 /* calculate dA/dy
+	  */
+	 brw_MUL(p, brw_null_reg(), c->a2_sub_a0, c->dx0);
+	 brw_MAC(p, c->tmp, c->a1_sub_a0, negate(c->dx2));
+	 brw_MUL(p, c->m2Cy, c->tmp, c->inv_det);
+      }
+
+      {
+	 brw_set_predicate_control_flag_value(p, pc); 
+	 /* start point for interpolation
+	  */
+	 brw_MOV(p, c->m3C0, a0);
+      
+	 /* Copy m0..m3 to URB.  m0 is implicitly copied from r0 in
+	  * the send instruction:
+	  */	 
+	 brw_urb_WRITE(p, 
+		       brw_null_reg(),
+		       0,
+		       brw_vec8_grf(0, 0), /* r0, will be copied to m0 */
+		       0, 	/* allocate */
+		       1,	/* used */
+		       4, 	/* msg len */
+		       0,	/* response len */
+		       last,	/* eot */
+		       last, 	/* writes complete */
+		       i*4,	/* offset */
+		       BRW_URB_SWIZZLE_TRANSPOSE); /* XXX: Swizzle control "SF to windower" */
+      }
+   }
+}
+
+
+
+void brw_emit_line_setup( struct brw_sf_compile *c, GLboolean allocate)
+{
+   struct brw_compile *p = &c->func;
+   GLuint i;
+
+
+   c->nr_verts = 2;
+
+   if (allocate)
+      alloc_regs(c);
+
+   invert_det(c);
+   copy_z_inv_w(c);
+
+   if (c->key.do_flat_shading)
+      do_flatshade_line(c);
+
+   for (i = 0; i < c->nr_setup_regs; i++)
+   {
+      /* Pair of incoming attributes:
+       */
+      struct brw_reg a0 = offset(c->vert[0], i);
+      struct brw_reg a1 = offset(c->vert[1], i);
+      GLushort pc, pc_persp, pc_linear;
+      GLboolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
+
+      if (pc_persp)
+      {
+	 brw_set_predicate_control_flag_value(p, pc_persp);
+	 brw_MUL(p, a0, a0, c->inv_w[0]);
+	 brw_MUL(p, a1, a1, c->inv_w[1]);
+      }
+
+      /* Calculate coefficients for position, color:
+       */
+      if (pc_linear) {
+	 brw_set_predicate_control_flag_value(p, pc_linear); 
+
+	 brw_ADD(p, c->a1_sub_a0, a1, negate(a0));
+
+ 	 brw_MUL(p, c->tmp, c->a1_sub_a0, c->dx0); 
+	 brw_MUL(p, c->m1Cx, c->tmp, c->inv_det);
+		
+	 brw_MUL(p, c->tmp, c->a1_sub_a0, c->dy0);
+	 brw_MUL(p, c->m2Cy, c->tmp, c->inv_det);
+      }
+
+      {
+	 brw_set_predicate_control_flag_value(p, pc); 
+
+	 /* start point for interpolation
+	  */
+	 brw_MOV(p, c->m3C0, a0);
+
+	 /* Copy m0..m3 to URB. 
+	  */
+	 brw_urb_WRITE(p, 
+		       brw_null_reg(),
+		       0,
+		       brw_vec8_grf(0, 0),
+		       0, 	/* allocate */
+		       1, 	/* used */
+		       4, 	/* msg len */
+		       0,	/* response len */
+		       last, 	/* eot */
+		       last, 	/* writes complete */
+		       i*4,	/* urb destination offset */
+		       BRW_URB_SWIZZLE_TRANSPOSE); 
+      }
+   } 
+}
+
+/* Points setup - several simplifications as all attributes are
+ * constant across the face of the point (point sprites excluded!)
+ */
+void brw_emit_point_setup( struct brw_sf_compile *c, GLboolean allocate)
+{
+   struct brw_compile *p = &c->func;
+   GLuint i;
+
+   c->nr_verts = 1;
+   
+   if (allocate)
+      alloc_regs(c);
+
+   copy_z_inv_w(c);
+
+   brw_MOV(p, c->m1Cx, brw_imm_ud(0)); /* zero - move out of loop */
+   brw_MOV(p, c->m2Cy, brw_imm_ud(0)); /* zero - move out of loop */
+
+   for (i = 0; i < c->nr_setup_regs; i++)
+   {
+      struct brw_reg a0 = offset(c->vert[0], i);
+      GLushort pc, pc_persp, pc_linear;
+      GLboolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
+            
+      if (pc_persp)
+      {				
+	 /* This seems odd as the values are all constant, but the
+	  * fragment shader will be expecting it:
+	  */
+	 brw_set_predicate_control_flag_value(p, pc_persp);
+	 brw_MUL(p, a0, a0, c->inv_w[0]);
+      }
+
+
+      /* The delta values are always zero, just send the starting
+       * coordinate.  Again, this is to fit in with the interpolation
+       * code in the fragment shader.
+       */
+      {
+	 brw_set_predicate_control_flag_value(p, pc); 
+
+	 brw_MOV(p, c->m3C0, a0); /* constant value */
+
+	 /* Copy m0..m3 to URB. 
+	  */
+	 brw_urb_WRITE(p, 
+		       brw_null_reg(),
+		       0,
+		       brw_vec8_grf(0, 0),
+		       0, 	/* allocate */
+		       1,	/* used */
+		       4, 	/* msg len */
+		       0,	/* response len */
+		       last, 	/* eot */
+		       last, 	/* writes complete */
+		       i*4,	/* urb destination offset */
+		       BRW_URB_SWIZZLE_TRANSPOSE);
+      }
+   }
+}
+
+void brw_emit_anyprim_setup( struct brw_sf_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_reg ip = brw_ip_reg();
+   struct brw_reg payload_prim = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0);
+   struct brw_reg primmask;
+   struct brw_instruction *jmp;
+   struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
+   
+   GLuint saveflag;
+
+   c->nr_verts = 3;
+   alloc_regs(c);
+
+   primmask = retype(get_element(c->tmp, 0), BRW_REGISTER_TYPE_UD);
+
+   brw_MOV(p, primmask, brw_imm_ud(1));
+   brw_SHL(p, primmask, primmask, payload_prim);
+
+   brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
+   brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_TRILIST) |
+					       (1<<_3DPRIM_TRISTRIP) |
+					       (1<<_3DPRIM_TRIFAN) |
+					       (1<<_3DPRIM_TRISTRIP_REVERSE) |
+					       (1<<_3DPRIM_POLYGON) |
+					       (1<<_3DPRIM_RECTLIST) |
+					       (1<<_3DPRIM_TRIFAN_NOSTIPPLE)));
+   jmp = brw_JMPI(p, ip, ip, brw_imm_w(0));
+   {
+      saveflag = p->flag_value;
+      brw_push_insn_state(p); 
+      brw_emit_tri_setup( c, GL_FALSE );
+      brw_pop_insn_state(p);
+      p->flag_value = saveflag;
+      /* note - thread killed in subroutine, so must
+       * restore the flag which is changed when building
+       * the subroutine. fix #13240
+       */
+   }
+   brw_land_fwd_jump(p, jmp);
+
+   brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
+   brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_LINELIST) |
+					       (1<<_3DPRIM_LINESTRIP) |
+					       (1<<_3DPRIM_LINELOOP) |
+					       (1<<_3DPRIM_LINESTRIP_CONT) |
+					       (1<<_3DPRIM_LINESTRIP_BF) |
+					       (1<<_3DPRIM_LINESTRIP_CONT_BF)));
+   jmp = brw_JMPI(p, ip, ip, brw_imm_w(0));
+   {
+      saveflag = p->flag_value;
+      brw_push_insn_state(p); 
+      brw_emit_line_setup( c, GL_FALSE );
+      brw_pop_insn_state(p);
+      p->flag_value = saveflag;
+      /* note - thread killed in subroutine */
+   }
+   brw_land_fwd_jump(p, jmp); 
+
+   brw_emit_point_setup( c, GL_FALSE );
+}
+
+
+
+
diff --git a/i965/brw_sf_state.c b/i965/brw_sf_state.c
new file mode 100644
index 0000000..2fd75a0
--- /dev/null
+++ b/i965/brw_sf_state.c
@@ -0,0 +1,220 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+   
+
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "macros.h"
+
+static void upload_sf_vp(struct brw_context *brw)
+{
+   struct brw_sf_viewport sfv;
+
+   memset(&sfv, 0, sizeof(sfv));
+   
+   if (brw->intel.driDrawable) 
+   {
+      /* _NEW_VIEWPORT, BRW_NEW_METAOPS */
+
+      if (!brw->metaops.active) {
+	 const GLfloat *v = brw->intel.ctx.Viewport._WindowMap.m;
+	 
+	 sfv.viewport.m00 =   v[MAT_SX];
+	 sfv.viewport.m11 = - v[MAT_SY];
+	 sfv.viewport.m22 =   v[MAT_SZ] * brw->intel.depth_scale;
+	 sfv.viewport.m30 =   v[MAT_TX];
+	 sfv.viewport.m31 = - v[MAT_TY] + brw->intel.driDrawable->h;
+	 sfv.viewport.m32 =   v[MAT_TZ] * brw->intel.depth_scale;
+      }
+      else {
+	 sfv.viewport.m00 =   1;
+	 sfv.viewport.m11 = - 1;
+	 sfv.viewport.m22 =   1;
+	 sfv.viewport.m30 =   0;
+	 sfv.viewport.m31 =   brw->intel.driDrawable->h;
+	 sfv.viewport.m32 =   0;
+      }
+   }
+
+   /* XXX: what state for this? */
+   if (brw->intel.driDrawable)
+   {
+      intelScreenPrivate *screen = brw->intel.intelScreen;
+      /* _NEW_SCISSOR */
+      GLint x = brw->attribs.Scissor->X;
+      GLint y = brw->attribs.Scissor->Y;
+      GLuint w = brw->attribs.Scissor->Width;
+      GLuint h = brw->attribs.Scissor->Height;
+
+      GLint x1 = x;
+      GLint y1 = brw->intel.driDrawable->h - (y + h);
+      GLint x2 = x + w - 1;
+      GLint y2 = y1 + h - 1;
+
+      if (x1 < 0) x1 = 0;
+      if (y1 < 0) y1 = 0;
+      if (x2 < 0) x2 = 0;
+      if (y2 < 0) y2 = 0;
+
+      if (x2 >= screen->width) x2 = screen->width-1;
+      if (y2 >= screen->height) y2 = screen->height-1;
+      if (x1 >= screen->width) x1 = screen->width-1;
+      if (y1 >= screen->height) y1 = screen->height-1;
+      
+      sfv.scissor.xmin = x1;
+      sfv.scissor.xmax = x2;
+      sfv.scissor.ymin = y1;
+      sfv.scissor.ymax = y2;
+   }
+
+   brw->sf.vp_gs_offset = brw_cache_data( &brw->cache[BRW_SF_VP], &sfv );
+}
+
+const struct brw_tracked_state brw_sf_vp = {
+   .dirty = {
+      .mesa  = (_NEW_VIEWPORT | 
+		_NEW_SCISSOR),
+      .brw   = BRW_NEW_METAOPS,
+      .cache = 0
+   },
+   .update = upload_sf_vp
+};
+
+
+
+static void upload_sf_unit( struct brw_context *brw )
+{
+   struct brw_sf_unit_state sf;
+   memset(&sf, 0, sizeof(sf));
+
+   /* CACHE_NEW_SF_PROG */
+   sf.thread0.grf_reg_count = ((brw->sf.prog_data->total_grf-1) & ~15) / 16;
+   sf.thread0.kernel_start_pointer = brw->sf.prog_gs_offset >> 6;
+   sf.thread3.urb_entry_read_length = brw->sf.prog_data->urb_read_length;
+
+   sf.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+   sf.thread3.dispatch_grf_start_reg = 3;
+   sf.thread3.urb_entry_read_offset = 1;
+
+   /* BRW_NEW_URB_FENCE */
+   sf.thread4.nr_urb_entries = brw->urb.nr_sf_entries;
+   sf.thread4.urb_entry_allocation_size = brw->urb.sfsize - 1;
+   sf.thread4.max_threads = MIN2(12, brw->urb.nr_sf_entries / 2) - 1;
+
+   if (INTEL_DEBUG & DEBUG_SINGLE_THREAD)
+      sf.thread4.max_threads = 0; 
+
+   if (INTEL_DEBUG & DEBUG_STATS)
+      sf.thread4.stats_enable = 1; 
+
+   /* CACHE_NEW_SF_VP */
+   sf.sf5.sf_viewport_state_offset = brw->sf.vp_gs_offset >> 5;
+   
+   sf.sf5.viewport_transform = 1;
+   
+   /* _NEW_SCISSOR */
+   if (brw->attribs.Scissor->Enabled) 
+      sf.sf6.scissor = 1;  
+
+   /* _NEW_POLYGON */
+   if (brw->attribs.Polygon->FrontFace == GL_CCW)
+      sf.sf5.front_winding = BRW_FRONTWINDING_CCW;
+   else
+      sf.sf5.front_winding = BRW_FRONTWINDING_CW;
+
+   if (brw->attribs.Polygon->CullFlag) {
+      switch (brw->attribs.Polygon->CullFaceMode) {
+      case GL_FRONT:
+	 sf.sf6.cull_mode = BRW_CULLMODE_FRONT;
+	 break;
+      case GL_BACK:
+	 sf.sf6.cull_mode = BRW_CULLMODE_BACK;
+	 break;
+      case GL_FRONT_AND_BACK:
+	 sf.sf6.cull_mode = BRW_CULLMODE_BOTH;
+	 break;
+      default:
+	 assert(0);
+	 break;
+      }
+   }
+   else
+      sf.sf6.cull_mode = BRW_CULLMODE_NONE;
+      
+
+   /* _NEW_LINE */
+   sf.sf6.line_width = brw->attribs.Line->_Width * (1<<1);
+
+   sf.sf6.line_endcap_aa_region_width = 1;
+   if (brw->attribs.Line->SmoothFlag)
+      sf.sf6.aa_enable = 1;
+   else if (sf.sf6.line_width <= 0x2) 
+       sf.sf6.line_width = 0; 
+
+   /* _NEW_POINT */
+   sf.sf6.point_rast_rule = 1;	/* opengl conventions */
+   sf.sf7.point_size = brw->attribs.Point->_Size * (1<<3);
+   sf.sf7.use_point_size_state = !brw->attribs.Point->_Attenuated;
+   sf.sf7.aa_line_distance_mode = 0;
+
+   /* might be BRW_NEW_PRIMITIVE if we have to adjust pv for polygons:
+    */
+   sf.sf7.trifan_pv = 2;
+   sf.sf7.linestrip_pv = 1;
+   sf.sf7.tristrip_pv = 2;
+   sf.sf7.line_last_pixel_enable = 0;
+
+   /* Set bias for OpenGL rasterization rules:
+    */
+   sf.sf6.dest_org_vbias = 0x8;
+   sf.sf6.dest_org_hbias = 0x8;
+
+   brw->sf.state_gs_offset = brw_cache_data( &brw->cache[BRW_SF_UNIT], &sf );
+}
+
+
+const struct brw_tracked_state brw_sf_unit = {
+   .dirty = {
+      .mesa  = (_NEW_POLYGON | 
+		_NEW_LINE | 
+		_NEW_POINT | 
+		_NEW_SCISSOR),
+      .brw   = (BRW_NEW_URB_FENCE |
+		BRW_NEW_METAOPS),
+      .cache = (CACHE_NEW_SF_VP |
+		CACHE_NEW_SF_PROG)
+   },
+   .update = upload_sf_unit
+};
+
+
diff --git a/i965/brw_state.h b/i965/brw_state.h
new file mode 100644
index 0000000..41ac095
--- /dev/null
+++ b/i965/brw_state.h
@@ -0,0 +1,147 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+    
+
+#ifndef BRW_STATE_H
+#define BRW_STATE_H
+
+#include "brw_context.h"
+
+
+const struct brw_tracked_state brw_blend_constant_color;
+const struct brw_tracked_state brw_cc_unit;
+const struct brw_tracked_state brw_cc_vp;
+const struct brw_tracked_state brw_check_fallback;
+const struct brw_tracked_state brw_clip_prog;
+const struct brw_tracked_state brw_clip_unit;
+const struct brw_tracked_state brw_constant_buffer_state;
+const struct brw_tracked_state brw_constant_buffer;
+const struct brw_tracked_state brw_curbe_offsets;
+const struct brw_tracked_state brw_invarient_state;
+const struct brw_tracked_state brw_gs_prog;
+const struct brw_tracked_state brw_gs_unit;
+const struct brw_tracked_state brw_drawing_rect;
+const struct brw_tracked_state brw_line_stipple;
+const struct brw_tracked_state brw_aa_line_parameters;
+const struct brw_tracked_state brw_pipelined_state_pointers;
+const struct brw_tracked_state brw_binding_table_pointers;
+const struct brw_tracked_state brw_depthbuffer;
+const struct brw_tracked_state brw_polygon_stipple_offset;
+const struct brw_tracked_state brw_polygon_stipple;
+const struct brw_tracked_state brw_program_parameters;
+const struct brw_tracked_state brw_recalculate_urb_fence;
+const struct brw_tracked_state brw_sf_prog;
+const struct brw_tracked_state brw_sf_unit;
+const struct brw_tracked_state brw_sf_vp;
+const struct brw_tracked_state brw_state_base_address;
+const struct brw_tracked_state brw_urb_fence;
+const struct brw_tracked_state brw_vertex_state;
+const struct brw_tracked_state brw_vs_prog;
+const struct brw_tracked_state brw_vs_unit;
+const struct brw_tracked_state brw_wm_input_sizes;
+const struct brw_tracked_state brw_wm_prog;
+const struct brw_tracked_state brw_wm_samplers;
+const struct brw_tracked_state brw_wm_surfaces;
+const struct brw_tracked_state brw_wm_unit;
+
+const struct brw_tracked_state brw_psp_urb_cbs;
+
+const struct brw_tracked_state brw_active_vertprog;
+const struct brw_tracked_state brw_tnl_vertprog;
+const struct brw_tracked_state brw_pipe_control;
+
+const struct brw_tracked_state brw_clear_surface_cache;
+const struct brw_tracked_state brw_clear_batch_cache;
+
+/***********************************************************************
+ * brw_state_cache.c
+ */
+GLuint brw_cache_data(struct brw_cache *cache,
+		      const void *data );
+
+GLuint brw_cache_data_sz(struct brw_cache *cache,
+			 const void *data,
+			 GLuint data_sz);
+
+GLuint brw_upload_cache( struct brw_cache *cache,
+			 const void *key,
+			 GLuint key_sz,
+			 const void *data,
+			 GLuint data_sz,
+			 const void *aux,
+			 void *aux_return );
+
+GLboolean brw_search_cache( struct brw_cache *cache,
+			    const void *key,
+			    GLuint key_size,
+			    void *aux_return,
+			    GLuint *offset_return);
+
+void brw_init_caches( struct brw_context *brw );
+void brw_destroy_caches( struct brw_context *brw );
+
+/***********************************************************************
+ * brw_state_batch.c
+ */
+#define BRW_BATCH_STRUCT(brw, s) intel_batchbuffer_data( brw->intel.batch, (s), sizeof(*(s)), 0)
+#define BRW_CACHED_BATCH_STRUCT(brw, s) brw_cached_batch_struct( brw, (s), sizeof(*(s)) )
+
+GLboolean brw_cached_batch_struct( struct brw_context *brw,
+				   const void *data,
+				   GLuint sz );
+
+void brw_destroy_batch_cache( struct brw_context *brw );
+
+
+/***********************************************************************
+ * brw_state_pool.c
+ */
+void brw_init_pools( struct brw_context *brw );
+void brw_destroy_pools( struct brw_context *brw );
+
+GLboolean brw_pool_alloc( struct brw_mem_pool *pool,
+			  GLuint size,
+			  GLuint alignment,
+			  GLuint *offset_return);
+
+void brw_pool_fence( struct brw_context *brw,
+		     struct brw_mem_pool *pool,
+		     GLuint fence );
+
+
+void brw_pool_check_wrap( struct brw_context *brw,
+			  struct brw_mem_pool *pool );
+
+void brw_clear_all_caches( struct brw_context *brw );
+void brw_invalidate_pools( struct brw_context *brw );
+void brw_clear_batch_cache_flush( struct brw_context *brw );
+
+#endif
diff --git a/i965/brw_state_batch.c b/i965/brw_state_batch.c
new file mode 100644
index 0000000..c93d66a
--- /dev/null
+++ b/i965/brw_state_batch.c
@@ -0,0 +1,123 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+     
+
+
+#include "brw_state.h"
+#include "brw_aub.h"
+#include "intel_batchbuffer.h"
+#include "imports.h"
+
+
+
+/* A facility similar to the data caching code above, which aims to
+ * prevent identical commands being issued repeatedly.
+ */
+GLboolean brw_cached_batch_struct( struct brw_context *brw,
+				   const void *data,
+				   GLuint sz )
+{
+   struct brw_cached_batch_item *item = brw->cached_batch_items;
+   struct header *newheader = (struct header *)data;
+
+   if (brw->emit_state_always) {
+      intel_batchbuffer_data(brw->intel.batch, data, sz, 0);
+      return GL_TRUE;
+   }
+
+   while (item) {
+      if (item->header->opcode == newheader->opcode) {
+	 if (item->sz == sz && memcmp(item->header, newheader, sz) == 0)
+	    return GL_FALSE;
+	 if (item->sz != sz) {
+	    _mesa_free(item->header);
+	    item->header = _mesa_malloc(sz);
+	    item->sz = sz;
+	 }
+	 goto emit;
+      }
+      item = item->next;
+   }
+
+   assert(!item);
+   item = CALLOC_STRUCT(brw_cached_batch_item);
+   item->header = _mesa_malloc(sz);
+   item->sz = sz;
+   item->next = brw->cached_batch_items;
+   brw->cached_batch_items = item;
+
+ emit:
+   memcpy(item->header, newheader, sz);
+   intel_batchbuffer_data(brw->intel.batch, data, sz, 0);
+   return GL_TRUE;
+}
+
+static void clear_batch_cache( struct brw_context *brw )
+{
+   struct brw_cached_batch_item *item = brw->cached_batch_items;
+
+   while (item) {
+      struct brw_cached_batch_item *next = item->next;
+      free((void *)item->header);
+      free(item);
+      item = next;
+   }
+
+   brw->cached_batch_items = NULL;
+
+
+   brw_clear_all_caches(brw);
+
+   bmReleaseBuffers(&brw->intel);
+   
+   brw_invalidate_pools(brw);
+}
+
+void brw_clear_batch_cache_flush( struct brw_context *brw )
+{
+   bmFinishFenceLock(&(brw->intel), bmSetFenceLock(&(brw->intel)));
+   clear_batch_cache(brw);
+
+   brw->wrap = 0;
+   
+/*    brw_do_flush(brw, BRW_FLUSH_STATE_CACHE|BRW_FLUSH_READ_CACHE); */
+   
+   brw->state.dirty.mesa |= ~0;
+   brw->state.dirty.brw |= ~0;
+   brw->state.dirty.cache |= ~0;
+}
+
+
+
+void brw_destroy_batch_cache( struct brw_context *brw )
+{
+   clear_batch_cache(brw);
+}
diff --git a/i965/brw_state_cache.c b/i965/brw_state_cache.c
new file mode 100644
index 0000000..71c6938
--- /dev/null
+++ b/i965/brw_state_cache.c
@@ -0,0 +1,469 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+      
+
+#include "brw_state.h"
+#include "brw_aub.h"
+#include "intel_batchbuffer.h"
+#include "imports.h"
+
+/* XXX: Fixme - have to include these to get the sizes of the prog_key
+ * structs:
+ */
+#include "brw_wm.h"
+#include "brw_vs.h"
+#include "brw_clip.h"
+#include "brw_sf.h"
+#include "brw_gs.h"
+
+
+/***********************************************************************
+ * Check cache for uploaded version of struct, else upload new one.
+ * Fail when memory is exhausted.
+ *
+ * XXX: FIXME: Currently search is so slow it would be quicker to
+ * regenerate the data every time...
+ */
+
+static GLuint hash_key( const void *key, GLuint key_size )
+{
+   GLuint *ikey = (GLuint *)key;
+   GLuint hash = 0, i;
+
+   assert(key_size % 4 == 0);
+
+   /* I'm sure this can be improved on:
+    */
+   for (i = 0; i < key_size/4; i++)
+      hash ^= ikey[i];
+
+   return hash;
+}
+
+static struct brw_cache_item *search_cache( struct brw_cache *cache,
+					     GLuint hash,
+					     const void *key,
+					     GLuint key_size)
+{
+   struct brw_cache_item *c;
+
+   for (c = cache->items[hash % cache->size]; c; c = c->next) {
+      if (c->hash == hash && 
+	  c->key_size == key_size &&
+	  memcmp(c->key, key, key_size) == 0)
+	 return c;
+   }
+
+   return NULL;
+}
+
+
+static void rehash( struct brw_cache *cache )
+{
+   struct brw_cache_item **items;
+   struct brw_cache_item *c, *next;
+   GLuint size, i;
+
+   size = cache->size * 3;
+   items = (struct brw_cache_item**) _mesa_malloc(size * sizeof(*items));
+   _mesa_memset(items, 0, size * sizeof(*items));
+
+   for (i = 0; i < cache->size; i++)
+      for (c = cache->items[i]; c; c = next) {
+	 next = c->next;
+	 c->next = items[c->hash % size];
+	 items[c->hash % size] = c;
+      }
+
+   FREE(cache->items);
+   cache->items = items;
+   cache->size = size;
+}
+
+
+GLboolean brw_search_cache( struct brw_cache *cache,
+			    const void *key,
+			    GLuint key_size,
+			    void *aux_return,
+			    GLuint *offset_return)
+{
+   struct brw_cache_item *item;
+   GLuint addr = 0;
+   GLuint hash = hash_key(key, key_size);
+
+   item = search_cache(cache, hash, key, key_size);
+
+   if (item) {
+      if (aux_return) 
+	 *(void **)aux_return = (void *)((char *)item->key + item->key_size);
+      
+      *offset_return = addr = item->offset;
+   }    
+    
+   if (item == NULL || addr != cache->last_addr) {
+      cache->brw->state.dirty.cache |= 1<<cache->id;
+      cache->last_addr = addr;
+   }
+   
+   return item != NULL;
+}
+
+GLuint brw_upload_cache( struct brw_cache *cache,
+			 const void *key,
+			 GLuint key_size,
+			 const void *data,
+			 GLuint data_size,
+			 const void *aux,
+			 void *aux_return )
+{   
+   GLuint offset;
+   struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item);
+   GLuint hash = hash_key(key, key_size);
+   void *tmp = _mesa_malloc(key_size + cache->aux_size);
+   
+   if (!brw_pool_alloc(cache->pool, data_size, 6, &offset)) {
+      /* Should not be possible: 
+       */
+      _mesa_printf("brw_pool_alloc failed\n");
+      exit(1);
+   }
+
+   memcpy(tmp, key, key_size);
+
+   if (cache->aux_size)
+      memcpy(tmp+key_size, aux, cache->aux_size);
+	 
+   item->key = tmp;
+   item->hash = hash;
+   item->key_size = key_size;
+   item->offset = offset;
+   item->data_size = data_size;
+
+   if (++cache->n_items > cache->size * 1.5)
+      rehash(cache);
+   
+   hash %= cache->size;
+   item->next = cache->items[hash];
+   cache->items[hash] = item;
+      
+   if (aux_return) {
+      assert(cache->aux_size);
+      *(void **)aux_return = (void *)((char *)item->key + item->key_size);
+   }
+
+   if (INTEL_DEBUG & DEBUG_STATE)
+      _mesa_printf("upload %s: %d bytes to pool buffer %d offset %x\n",
+		   cache->name,
+		   data_size, 
+		   cache->pool->buffer,
+		   offset);
+
+   /* Copy data to the buffer:
+    */
+   bmBufferSubDataAUB(&cache->brw->intel,
+		      cache->pool->buffer,
+		      offset, 
+		      data_size, 
+		      data,
+		      cache->aub_type,
+		      cache->aub_sub_type);
+   
+
+   cache->brw->state.dirty.cache |= 1<<cache->id;
+   cache->last_addr = offset;
+
+   return offset;
+}
+
+/* This doesn't really work with aux data.  Use search/upload instead
+ */
+GLuint brw_cache_data_sz(struct brw_cache *cache,
+			 const void *data,
+			 GLuint data_size)
+{
+   GLuint addr;
+
+   if (!brw_search_cache(cache, data, data_size, NULL, &addr)) {
+      addr = brw_upload_cache(cache, 
+			      data, data_size, 
+			      data, data_size, 
+			      NULL, NULL);
+   }
+
+   return addr;
+}
+
+GLuint brw_cache_data(struct brw_cache *cache,
+		      const void *data)
+{
+   return brw_cache_data_sz(cache, data, cache->key_size);
+}
+
+
+
+
+
+static void brw_init_cache( struct brw_context *brw, 
+			    const char *name,
+			    GLuint id,
+			    GLuint key_size,
+			    GLuint aux_size,
+			    GLuint aub_type,
+			    GLuint aub_sub_type )
+{
+   struct brw_cache *cache = &brw->cache[id];
+   cache->brw = brw;
+   cache->id = id;
+   cache->name = name;
+   cache->items = NULL;
+
+   cache->size = 7;
+   cache->n_items = 0;
+   cache->items = (struct brw_cache_item **)
+      _mesa_calloc(cache->size * 
+		   sizeof(struct brw_cache_item));
+
+
+   cache->key_size = key_size;
+   cache->aux_size = aux_size;
+   cache->aub_type = aub_type;
+   cache->aub_sub_type = aub_sub_type;
+   switch (aub_type) {
+   case DW_GENERAL_STATE: cache->pool = &brw->pool[BRW_GS_POOL]; break;
+   case DW_SURFACE_STATE: cache->pool = &brw->pool[BRW_SS_POOL]; break;
+   default: assert(0); break;
+   }
+}
+
+void brw_init_caches( struct brw_context *brw )
+{
+
+   brw_init_cache(brw,
+		  "CC_VP",
+		  BRW_CC_VP,
+		  sizeof(struct brw_cc_viewport),
+		  0,
+		  DW_GENERAL_STATE,
+		  DWGS_COLOR_CALC_VIEWPORT_STATE);
+
+   brw_init_cache(brw,
+		  "CC_UNIT",
+		  BRW_CC_UNIT,
+		  sizeof(struct brw_cc_unit_state),
+		  0,
+		  DW_GENERAL_STATE,
+		  DWGS_COLOR_CALC_STATE);
+
+   brw_init_cache(brw,
+		  "WM_PROG",
+		  BRW_WM_PROG,
+		  sizeof(struct brw_wm_prog_key),
+		  sizeof(struct brw_wm_prog_data),
+		  DW_GENERAL_STATE,
+		  DWGS_KERNEL_INSTRUCTIONS);
+
+   brw_init_cache(brw,
+		  "SAMPLER_DEFAULT_COLOR",
+		  BRW_SAMPLER_DEFAULT_COLOR,
+		  sizeof(struct brw_sampler_default_color),
+		  0,
+		  DW_GENERAL_STATE,
+		  DWGS_SAMPLER_DEFAULT_COLOR);
+
+   brw_init_cache(brw,
+		  "SAMPLER",
+		  BRW_SAMPLER,
+		  0,		/* variable key/data size */
+		  0,
+		  DW_GENERAL_STATE,
+		  DWGS_SAMPLER_STATE);
+
+   brw_init_cache(brw,
+		  "WM_UNIT",
+		  BRW_WM_UNIT,
+		  sizeof(struct brw_wm_unit_state),
+		  0,
+		  DW_GENERAL_STATE,
+		  DWGS_WINDOWER_IZ_STATE);
+
+   brw_init_cache(brw,
+		  "SF_PROG",
+		  BRW_SF_PROG,
+		  sizeof(struct brw_sf_prog_key),
+		  sizeof(struct brw_sf_prog_data),
+		  DW_GENERAL_STATE,
+		  DWGS_KERNEL_INSTRUCTIONS);
+
+   brw_init_cache(brw,
+		  "SF_VP",
+		  BRW_SF_VP,
+		  sizeof(struct brw_sf_viewport),
+		  0,
+		  DW_GENERAL_STATE,
+		  DWGS_STRIPS_FANS_VIEWPORT_STATE);
+
+   brw_init_cache(brw,
+		  "SF_UNIT",
+		  BRW_SF_UNIT,
+		  sizeof(struct brw_sf_unit_state),
+		  0,
+		  DW_GENERAL_STATE,
+		  DWGS_STRIPS_FANS_STATE);
+
+   brw_init_cache(brw,
+		  "VS_UNIT",
+		  BRW_VS_UNIT,
+		  sizeof(struct brw_vs_unit_state),
+		  0,
+		  DW_GENERAL_STATE,
+		  DWGS_VERTEX_SHADER_STATE);
+
+   brw_init_cache(brw,
+		  "VS_PROG",
+		  BRW_VS_PROG,
+		  sizeof(struct brw_vs_prog_key),
+		  sizeof(struct brw_vs_prog_data),
+		  DW_GENERAL_STATE,
+		  DWGS_KERNEL_INSTRUCTIONS);
+
+   brw_init_cache(brw,
+		  "CLIP_UNIT",
+		  BRW_CLIP_UNIT,
+		  sizeof(struct brw_clip_unit_state),
+		  0,
+		  DW_GENERAL_STATE,
+		  DWGS_CLIPPER_STATE);
+
+   brw_init_cache(brw,
+		  "CLIP_PROG",
+		  BRW_CLIP_PROG,
+		  sizeof(struct brw_clip_prog_key),
+		  sizeof(struct brw_clip_prog_data),
+		  DW_GENERAL_STATE,
+		  DWGS_KERNEL_INSTRUCTIONS);
+
+   brw_init_cache(brw,
+		  "GS_UNIT",
+		  BRW_GS_UNIT,
+		  sizeof(struct brw_gs_unit_state),
+		  0,
+		  DW_GENERAL_STATE,
+		  DWGS_GEOMETRY_SHADER_STATE);
+
+   brw_init_cache(brw,
+		  "GS_PROG",
+		  BRW_GS_PROG,
+		  sizeof(struct brw_gs_prog_key),
+		  sizeof(struct brw_gs_prog_data),
+		  DW_GENERAL_STATE,
+		  DWGS_KERNEL_INSTRUCTIONS);
+
+   brw_init_cache(brw,
+		  "SS_SURFACE",
+		  BRW_SS_SURFACE,
+		  sizeof(struct brw_surface_state),
+		  0,
+		  DW_SURFACE_STATE,
+		  DWSS_SURFACE_STATE);
+
+   brw_init_cache(brw,
+		  "SS_SURF_BIND",
+		  BRW_SS_SURF_BIND,
+		  sizeof(struct brw_surface_binding_table),
+		  0,
+		  DW_SURFACE_STATE,
+		  DWSS_BINDING_TABLE_STATE);
+}
+
+
+/* When we lose hardware context, need to invalidate the surface cache
+ * as these structs must be explicitly re-uploaded.  They are subject
+ * to fixup by the memory manager as they contain absolute agp
+ * offsets, so we need to ensure there is a fresh version of the
+ * struct available to receive the fixup.
+ *
+ * XXX: Need to ensure that there aren't two versions of a surface or
+ * bufferobj with different backing data active in the same buffer at
+ * once?  Otherwise the cache could confuse them.  Maybe better not to
+ * cache at all?
+ * 
+ * --> Isn't this the same as saying need to ensure batch is flushed
+ *         before new data is uploaded to an existing buffer?  We
+ *         already try to make sure of that.
+ */
+static void clear_cache( struct brw_cache *cache )
+{
+   struct brw_cache_item *c, *next;
+   GLuint i;
+
+   for (i = 0; i < cache->size; i++) {
+      for (c = cache->items[i]; c; c = next) {
+	 next = c->next;
+	 free((void *)c->key);
+	 free(c);
+      }
+      cache->items[i] = NULL;
+   }
+
+   cache->n_items = 0;
+}
+
+void brw_clear_all_caches( struct brw_context *brw )
+{
+   GLint i;
+
+   if (INTEL_DEBUG & DEBUG_STATE)
+      _mesa_printf("%s\n", __FUNCTION__);
+
+   for (i = 0; i < BRW_MAX_CACHE; i++)
+      clear_cache(&brw->cache[i]);      
+
+   if (brw->curbe.last_buf) {
+      _mesa_free(brw->curbe.last_buf);
+      brw->curbe.last_buf = NULL;
+   }
+
+   brw->state.dirty.mesa |= ~0;
+   brw->state.dirty.brw |= ~0;
+   brw->state.dirty.cache |= ~0;
+}
+
+
+
+
+
+void brw_destroy_caches( struct brw_context *brw )
+{
+   GLuint i;
+
+   for (i = 0; i < BRW_MAX_CACHE; i++)
+      clear_cache(&brw->cache[i]);      
+}
diff --git a/i965/brw_state_pool.c b/i965/brw_state_pool.c
new file mode 100644
index 0000000..b9926f2
--- /dev/null
+++ b/i965/brw_state_pool.c
@@ -0,0 +1,154 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+       
+
+#include "brw_state.h"
+#include "imports.h"
+
+#include "intel_ioctl.h"
+#include "bufmgr.h"
+
+GLboolean brw_pool_alloc( struct brw_mem_pool *pool,
+			  GLuint size,
+			  GLuint align,
+			  GLuint *offset_return)
+{
+   GLuint align_mask = (1<<align)-1;
+   GLuint fixup = ((pool->offset + align_mask) & ~align_mask) - pool->offset;
+
+   size = (size + 3) & ~3;
+
+   if (pool->offset + fixup + size >= pool->size) {
+      _mesa_printf("%s failed\n", __FUNCTION__);
+      assert(0);
+      exit(0);
+   }
+
+   pool->offset += fixup;
+   *offset_return = pool->offset;
+   pool->offset += size; 
+  
+   return GL_TRUE;
+}
+
+static
+void brw_invalidate_pool( struct intel_context *intel,
+			  struct brw_mem_pool *pool )
+{
+   if (INTEL_DEBUG & DEBUG_STATE)
+      _mesa_printf("\n\n\n %s \n\n\n", __FUNCTION__);
+   
+   bmBufferData(intel,
+		pool->buffer,
+		pool->size,
+		NULL,
+		0); 
+
+   pool->offset = 0;
+
+   brw_clear_all_caches(pool->brw);
+}
+
+static void brw_invalidate_pool_cb( struct intel_context *intel, void *ptr )
+{
+   struct brw_mem_pool *pool = (struct brw_mem_pool *) ptr;
+
+   pool->offset = 0;
+   brw_clear_all_caches(pool->brw);
+}
+
+
+
+static void brw_init_pool( struct brw_context *brw,
+			   GLuint pool_id,
+			   GLuint size )
+{
+   struct brw_mem_pool *pool = &brw->pool[pool_id];
+
+   pool->size = size;   
+   pool->brw = brw;
+   
+   bmGenBuffers(&brw->intel, "pool", 1, &pool->buffer, 12);
+
+   /* Also want to say not to wait on fences when data is presented
+    */
+   bmBufferSetInvalidateCB(&brw->intel, pool->buffer, 
+			   brw_invalidate_pool_cb, 
+			   pool,
+			   GL_TRUE);   
+
+   bmBufferData(&brw->intel,
+		pool->buffer,
+		pool->size,
+		NULL,
+		0); 
+
+}
+
+static void brw_destroy_pool( struct brw_context *brw,
+			      GLuint pool_id )
+{
+   struct brw_mem_pool *pool = &brw->pool[pool_id];
+   
+   bmDeleteBuffers(&brw->intel, 1, &pool->buffer);
+}
+
+
+void brw_pool_check_wrap( struct brw_context *brw,
+			  struct brw_mem_pool *pool )
+{
+   if (pool->offset > (pool->size * 3) / 4) {
+      if (brw->intel.aub_file)
+	 brw->intel.aub_wrap = 1;
+      else
+	 brw->state.dirty.brw |= BRW_NEW_CONTEXT;
+   }
+
+}
+
+void brw_init_pools( struct brw_context *brw )
+{
+   brw_init_pool(brw, BRW_GS_POOL, 0x80000);
+   brw_init_pool(brw, BRW_SS_POOL, 0x80000);
+}
+
+void brw_destroy_pools( struct brw_context *brw )
+{
+   brw_destroy_pool(brw, BRW_GS_POOL);
+   brw_destroy_pool(brw, BRW_SS_POOL);
+}
+
+
+void brw_invalidate_pools( struct brw_context *brw )
+{
+   brw_invalidate_pool(&brw->intel, &brw->pool[BRW_GS_POOL]);
+   brw_invalidate_pool(&brw->intel, &brw->pool[BRW_SS_POOL]);
+}
diff --git a/i965/brw_state_upload.c b/i965/brw_state_upload.c
new file mode 100644
index 0000000..9bd2881
--- /dev/null
+++ b/i965/brw_state_upload.c
@@ -0,0 +1,264 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+       
+
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "bufmgr.h"
+#include "intel_batchbuffer.h"
+
+/* This is used to initialize brw->state.atoms[].  We could use this
+ * list directly except for a single atom, brw_constant_buffer, which
+ * has a .dirty value which changes according to the parameters of the
+ * current fragment and vertex programs, and so cannot be a static
+ * value.
+ */
+const struct brw_tracked_state *atoms[] =
+{
+   &brw_check_fallback,
+
+   &brw_tnl_vertprog,
+   &brw_active_vertprog,
+   &brw_wm_input_sizes,
+   &brw_vs_prog,
+   &brw_gs_prog, 
+   &brw_clip_prog, 
+   &brw_sf_prog,
+   &brw_wm_prog,
+
+   /* Once all the programs are done, we know how large urb entry
+    * sizes need to be and can decide if we need to change the urb
+    * layout.
+    */
+   &brw_curbe_offsets,
+   &brw_recalculate_urb_fence,
+
+
+   &brw_cc_vp,
+   &brw_cc_unit,
+
+   &brw_wm_surfaces,		/* must do before samplers */
+   &brw_wm_samplers,
+
+   &brw_wm_unit,
+   &brw_sf_vp,
+   &brw_sf_unit,
+   &brw_vs_unit,		/* always required, enabled or not */
+   &brw_clip_unit,
+   &brw_gs_unit,  
+
+   /* Command packets:
+    */
+   &brw_invarient_state,
+   &brw_state_base_address,
+   &brw_pipe_control,
+
+   &brw_binding_table_pointers,
+   &brw_blend_constant_color,
+
+   &brw_drawing_rect,
+   &brw_depthbuffer,
+
+   &brw_polygon_stipple,
+   &brw_polygon_stipple_offset,
+
+   &brw_line_stipple,
+   &brw_aa_line_parameters,
+   /* Ordering of the commands below is documented as fixed.  
+    */
+#if 0
+   &brw_pipelined_state_pointers,
+   &brw_urb_fence,
+   &brw_constant_buffer_state,
+#else
+   &brw_psp_urb_cbs,
+#endif
+
+
+   NULL,			/* brw_constant_buffer */
+};
+
+
+void brw_init_state( struct brw_context *brw )
+{
+   GLuint i;
+
+   brw_init_pools(brw);
+   brw_init_caches(brw);
+
+   brw->state.atoms = _mesa_malloc(sizeof(atoms));
+   brw->state.nr_atoms = sizeof(atoms)/sizeof(*atoms);
+   _mesa_memcpy(brw->state.atoms, atoms, sizeof(atoms));
+
+   /* Patch in a pointer to the dynamic state atom:
+    */
+   for (i = 0; i < brw->state.nr_atoms; i++)
+      if (brw->state.atoms[i] == NULL)
+	 brw->state.atoms[i] = &brw->curbe.tracked_state;
+
+   _mesa_memcpy(&brw->curbe.tracked_state, 
+		&brw_constant_buffer,
+		sizeof(brw_constant_buffer));
+}
+
+
+void brw_destroy_state( struct brw_context *brw )
+{
+   if (brw->state.atoms) {
+      _mesa_free(brw->state.atoms);
+      brw->state.atoms = NULL;
+   }
+
+   brw_destroy_caches(brw);
+   brw_destroy_batch_cache(brw);
+   brw_destroy_pools(brw);   
+}
+
+/***********************************************************************
+ */
+
+static GLboolean check_state( const struct brw_state_flags *a,
+			      const struct brw_state_flags *b )
+{
+   return ((a->mesa & b->mesa) ||
+	   (a->brw & b->brw) ||
+	   (a->cache & b->cache));
+}
+
+static void accumulate_state( struct brw_state_flags *a,
+			      const struct brw_state_flags *b )
+{
+   a->mesa |= b->mesa;
+   a->brw |= b->brw;
+   a->cache |= b->cache;
+}
+
+
+static void xor_states( struct brw_state_flags *result,
+			     const struct brw_state_flags *a,
+			      const struct brw_state_flags *b )
+{
+   result->mesa = a->mesa ^ b->mesa;
+   result->brw = a->brw ^ b->brw;
+   result->cache = a->cache ^ b->cache;
+}
+
+
+/***********************************************************************
+ * Emit all state:
+ */
+void brw_validate_state( struct brw_context *brw )
+{
+   struct brw_state_flags *state = &brw->state.dirty;
+   GLuint i;
+
+   state->mesa |= brw->intel.NewGLState;
+   brw->intel.NewGLState = 0;
+
+   if (brw->wrap)
+      state->brw |= BRW_NEW_CONTEXT;
+
+   if (brw->emit_state_always) {
+      state->mesa |= ~0;
+      state->brw |= ~0;
+   }
+
+   /* texenv program needs to notify us somehow when this happens: 
+    * Some confusion about which state flag should represent this change.
+    */
+   if (brw->fragment_program != brw->attribs.FragmentProgram->_Current) {
+      brw->fragment_program = brw->attribs.FragmentProgram->_Current;
+      brw->state.dirty.mesa |= _NEW_PROGRAM;
+      brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM;
+   }
+
+
+   if (state->mesa == 0 &&
+       state->cache == 0 &&
+       state->brw == 0)
+      return;
+
+   if (brw->state.dirty.brw & BRW_NEW_CONTEXT)
+      brw_clear_batch_cache_flush(brw);
+
+
+   /* Make an early reference to the state pools, as we don't cope
+    * well with them being evicted from here down.
+    */
+   (void)bmBufferOffset(&brw->intel, brw->pool[BRW_GS_POOL].buffer);
+   (void)bmBufferOffset(&brw->intel, brw->pool[BRW_SS_POOL].buffer);
+   (void)bmBufferOffset(&brw->intel, brw->intel.batch->buffer);
+
+   if (INTEL_DEBUG) {
+      /* Debug version which enforces various sanity checks on the
+       * state flags which are generated and checked to help ensure
+       * state atoms are ordered correctly in the list.
+       */
+      struct brw_state_flags examined, prev;      
+      _mesa_memset(&examined, 0, sizeof(examined));
+      prev = *state;
+
+      for (i = 0; i < brw->state.nr_atoms; i++) {	 
+	 const struct brw_tracked_state *atom = brw->state.atoms[i];
+	 struct brw_state_flags generated;
+
+	 assert(atom->dirty.mesa ||
+		atom->dirty.brw ||
+		atom->dirty.cache);
+	 assert(atom->update);
+
+	 if (check_state(state, &atom->dirty)) {
+	    brw->state.atoms[i]->update( brw );
+	    
+/* 	    emit_foo(brw); */
+	 }
+
+	 accumulate_state(&examined, &atom->dirty);
+
+	 /* generated = (prev ^ state)
+	  * if (examined & generated)
+	  *     fail;
+	  */
+	 xor_states(&generated, &prev, state);
+	 assert(!check_state(&examined, &generated));
+	 prev = *state;
+      }
+   }
+   else {
+      for (i = 0; i < Elements(atoms); i++) {	 
+	 if (check_state(state, &brw->state.atoms[i]->dirty))
+	    brw->state.atoms[i]->update( brw );
+      }
+   }
+
+   memset(state, 0, sizeof(*state));
+}
diff --git a/i965/brw_structs.h b/i965/brw_structs.h
new file mode 100644
index 0000000..a799122
--- /dev/null
+++ b/i965/brw_structs.h
@@ -0,0 +1,1409 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+        
+
+#ifndef BRW_STRUCTS_H
+#define BRW_STRUCTS_H
+
+/* Command packets:
+ */
+struct header 
+{
+   GLuint length:16; 
+   GLuint opcode:16; 
+};
+
+
+union header_union
+{
+   struct header bits;
+   GLuint dword;
+};
+
+struct brw_3d_control
+{   
+   struct 
+   {
+      GLuint length:8;
+      GLuint notify_enable:1;
+      GLuint pad:3;
+      GLuint wc_flush_enable:1; 
+      GLuint depth_stall_enable:1; 
+      GLuint operation:2; 
+      GLuint opcode:16; 
+   } header;
+   
+   struct
+   {
+      GLuint pad:2;
+      GLuint dest_addr_type:1; 
+      GLuint dest_addr:29; 
+   } dest;
+   
+   GLuint dword2;   
+   GLuint dword3;   
+};
+
+
+struct brw_3d_primitive
+{
+   struct
+   {
+      GLuint length:8; 
+      GLuint pad:2;
+      GLuint topology:5; 
+      GLuint indexed:1; 
+      GLuint opcode:16; 
+   } header;
+
+   GLuint verts_per_instance;  
+   GLuint start_vert_location;  
+   GLuint instance_count;  
+   GLuint start_instance_location;  
+   GLuint base_vert_location;  
+};
+
+/* These seem to be passed around as function args, so it works out
+ * better to keep them as #defines:
+ */
+#define BRW_FLUSH_READ_CACHE           0x1
+#define BRW_FLUSH_STATE_CACHE          0x2
+#define BRW_INHIBIT_FLUSH_RENDER_CACHE 0x4
+#define BRW_FLUSH_SNAPSHOT_COUNTERS    0x8
+
+struct brw_mi_flush
+{
+   GLuint flags:4;
+   GLuint pad:12;
+   GLuint opcode:16;
+};
+
+struct brw_vf_statistics
+{
+   GLuint statistics_enable:1;
+   GLuint pad:15;
+   GLuint opcode:16;
+};
+
+
+
+struct brw_binding_table_pointers
+{
+   struct header header;
+   GLuint vs; 
+   GLuint gs; 
+   GLuint clp; 
+   GLuint sf; 
+   GLuint wm; 
+};
+
+
+struct brw_blend_constant_color
+{
+   struct header header;
+   GLfloat blend_constant_color[4];  
+};
+
+
+struct brw_depthbuffer
+{
+   union header_union header;
+   
+   union {
+      struct {
+	 GLuint pitch:18; 
+	 GLuint format:3; 
+	 GLuint pad:2;
+	 GLuint software_tiled_rendering_mode:2;
+	 GLuint depth_offset_disable:1; 
+	 GLuint tile_walk:1; 
+	 GLuint tiled_surface:1; 
+	 GLuint pad2:1;
+	 GLuint surface_type:3; 
+      } bits;
+      GLuint dword;
+   } dword1;
+   
+   GLuint dword2_base_addr; 
+ 
+   union {
+      struct {
+	 GLuint pad:1;
+	 GLuint mipmap_layout:1; 
+	 GLuint lod:4; 
+	 GLuint width:13; 
+	 GLuint height:13; 
+      } bits;
+      GLuint dword;
+   } dword3;
+
+   union {
+      struct {
+	 GLuint pad:10;
+	 GLuint min_array_element:11; 
+	 GLuint depth:11; 
+      } bits;
+      GLuint dword;
+   } dword4;
+
+   union {
+      struct {
+         GLuint xoffset:16;
+         GLuint yoffset:16;
+      } bits;
+      GLuint dword;
+   } dword5;   /* NEW in Integrated Graphics Device */
+};
+
+struct brw_drawrect
+{
+   struct header header;
+   GLuint xmin:16; 
+   GLuint ymin:16; 
+   GLuint xmax:16; 
+   GLuint ymax:16; 
+   GLuint xorg:16;  
+   GLuint yorg:16;  
+};
+
+
+
+
+struct brw_global_depth_offset_clamp
+{
+   struct header header;
+   GLfloat depth_offset_clamp;  
+};
+
+struct brw_indexbuffer
+{   
+   union {
+      struct
+      {
+	 GLuint length:8; 
+	 GLuint index_format:2; 
+	 GLuint cut_index_enable:1; 
+	 GLuint pad:5; 
+	 GLuint opcode:16; 
+      } bits;
+      GLuint dword;
+
+   } header;
+
+   GLuint buffer_start; 
+   GLuint buffer_end; 
+};
+
+/* NEW in Integrated Graphics Device */
+struct brw_aa_line_parameters
+{
+   struct header header;
+
+   struct {
+      GLuint aa_coverage_scope:8;
+      GLuint pad0:8;
+      GLuint aa_coverage_bias:8;
+      GLuint pad1:8;
+   } bits0;
+
+   struct {
+      GLuint aa_coverage_endcap_slope:8;
+      GLuint pad0:8;
+      GLuint aa_coverage_endcap_bias:8;
+      GLuint pad1:8;
+   } bits1;
+};
+
+struct brw_line_stipple
+{   
+   struct header header;
+  
+   struct
+   {
+      GLuint pattern:16; 
+      GLuint pad:16;
+   } bits0;
+   
+   struct
+   {
+      GLuint repeat_count:9; 
+      GLuint pad:7;
+      GLuint inverse_repeat_count:16; 
+   } bits1;
+};
+
+
+struct brw_pipelined_state_pointers
+{
+   struct header header;
+   
+   struct {
+      GLuint pad:5;
+      GLuint offset:27; 
+   } vs;
+   
+   struct
+   {
+      GLuint enable:1;
+      GLuint pad:4;
+      GLuint offset:27; 
+   } gs;
+   
+   struct
+   {
+      GLuint enable:1;
+      GLuint pad:4;
+      GLuint offset:27; 
+   } clp;
+   
+   struct
+   {
+      GLuint pad:5;
+      GLuint offset:27; 
+   } sf;
+
+   struct
+   {
+      GLuint pad:5;
+      GLuint offset:27; 
+   } wm;
+   
+   struct
+   {
+      GLuint pad:5;
+      GLuint offset:27; /* KW: check me! */
+   } cc;
+};
+
+
+struct brw_polygon_stipple_offset
+{
+   struct header header;
+
+   struct {
+      GLuint y_offset:5; 
+      GLuint pad:3;
+      GLuint x_offset:5; 
+      GLuint pad0:19;
+   } bits0;
+};
+
+
+
+struct brw_polygon_stipple
+{
+   struct header header;
+   GLuint stipple[32];
+};
+
+
+
+struct brw_pipeline_select
+{
+   struct
+   {
+      GLuint pipeline_select:1;   
+      GLuint pad:15;
+      GLuint opcode:16;   
+   } header;
+};
+
+
+struct brw_pipe_control
+{
+   struct
+   {
+      GLuint length:8;
+      GLuint notify_enable:1;
+      GLuint texture_cache_flush_enable:1;
+      GLuint indirect_state_pointers_disable:1;
+      GLuint instruction_state_cache_flush_enable:1;
+      GLuint write_cache_flush_enable:1;
+      GLuint depth_stall_enable:1;
+      GLuint post_sync_operation:2;
+
+      GLuint opcode:16;
+   } header;
+
+   struct
+   {
+      GLuint pad:2;
+      GLuint dest_addr_type:1;
+      GLuint dest_addr:29;
+   } bits1;
+
+   GLuint data0;
+   GLuint data1;
+};
+
+
+struct brw_urb_fence
+{
+   struct
+   {
+      GLuint length:8;   
+      GLuint vs_realloc:1;   
+      GLuint gs_realloc:1;   
+      GLuint clp_realloc:1;   
+      GLuint sf_realloc:1;   
+      GLuint vfe_realloc:1;   
+      GLuint cs_realloc:1;   
+      GLuint pad:2;
+      GLuint opcode:16;   
+   } header;
+
+   struct
+   {
+      GLuint vs_fence:10;  
+      GLuint gs_fence:10;  
+      GLuint clp_fence:10;  
+      GLuint pad:2;
+   } bits0;
+
+   struct
+   {
+      GLuint sf_fence:10;  
+      GLuint vf_fence:10;  
+      GLuint cs_fence:10;  
+      GLuint pad:2;
+   } bits1;
+};
+
+struct brw_constant_buffer_state /* previously brw_command_streamer */
+{
+   struct header header;
+
+   struct
+   {
+      GLuint nr_urb_entries:3;   
+      GLuint pad:1;
+      GLuint urb_entry_size:5;   
+      GLuint pad0:23;
+   } bits0;
+};
+
+struct brw_constant_buffer
+{
+   struct
+   {
+      GLuint length:8;   
+      GLuint valid:1;   
+      GLuint pad:7;
+      GLuint opcode:16;   
+   } header;
+
+   struct
+   {
+      GLuint buffer_length:6;   
+      GLuint buffer_address:26;  
+   } bits0;
+};
+
+struct brw_state_base_address
+{
+   struct header header;
+
+   struct
+   {
+      GLuint modify_enable:1;
+      GLuint pad:4;
+      GLuint general_state_address:27;  
+   } bits0;
+
+   struct
+   {
+      GLuint modify_enable:1;
+      GLuint pad:4;
+      GLuint surface_state_address:27;  
+   } bits1;
+
+   struct
+   {
+      GLuint modify_enable:1;
+      GLuint pad:4;
+      GLuint indirect_object_state_address:27;  
+   } bits2;
+
+   struct
+   {
+      GLuint modify_enable:1;
+      GLuint pad:11;
+      GLuint general_state_upper_bound:20;  
+   } bits3;
+
+   struct
+   {
+      GLuint modify_enable:1;
+      GLuint pad:11;
+      GLuint indirect_object_state_upper_bound:20;  
+   } bits4;
+};
+
+struct brw_state_prefetch
+{
+   struct header header;
+
+   struct
+   {
+      GLuint prefetch_count:3;   
+      GLuint pad:3;
+      GLuint prefetch_pointer:26;  
+   } bits0;
+};
+
+struct brw_system_instruction_pointer
+{
+   struct header header;
+
+   struct
+   {
+      GLuint pad:4;
+      GLuint system_instruction_pointer:28;  
+   } bits0;
+};
+
+
+
+
+/* State structs for the various fixed function units:
+ */
+
+
+struct thread0
+{
+   GLuint pad0:1;
+   GLuint grf_reg_count:3; 
+   GLuint pad1:2;
+   GLuint kernel_start_pointer:26; 
+};
+
+struct thread1
+{
+   GLuint ext_halt_exception_enable:1; 
+   GLuint sw_exception_enable:1; 
+   GLuint mask_stack_exception_enable:1; 
+   GLuint timeout_exception_enable:1; 
+   GLuint illegal_op_exception_enable:1; 
+   GLuint pad0:3;
+   GLuint depth_coef_urb_read_offset:6;	/* WM only */
+   GLuint pad1:2;
+   GLuint floating_point_mode:1; 
+   GLuint thread_priority:1; 
+   GLuint binding_table_entry_count:8; 
+   GLuint pad3:5;
+   GLuint single_program_flow:1; 
+};
+
+struct thread2
+{
+   GLuint per_thread_scratch_space:4; 
+   GLuint pad0:6;
+   GLuint scratch_space_base_pointer:22; 
+};
+
+   
+struct thread3
+{
+   GLuint dispatch_grf_start_reg:4; 
+   GLuint urb_entry_read_offset:6; 
+   GLuint pad0:1;
+   GLuint urb_entry_read_length:6; 
+   GLuint pad1:1;
+   GLuint const_urb_entry_read_offset:6; 
+   GLuint pad2:1;
+   GLuint const_urb_entry_read_length:6; 
+   GLuint pad3:1;
+};
+
+
+
+struct brw_clip_unit_state
+{
+   struct thread0 thread0;
+   struct
+   {
+      GLuint pad0:7;
+      GLuint sw_exception_enable:1;
+      GLuint pad1:3;
+      GLuint mask_stack_exception_enable:1;
+      GLuint pad2:1;
+      GLuint illegal_op_exception_enable:1;
+      GLuint pad3:2;
+      GLuint floating_point_mode:1;
+      GLuint thread_priority:1;
+      GLuint binding_table_entry_count:8;
+      GLuint pad4:5;
+      GLuint single_program_flow:1;
+   } thread1;
+
+   struct thread2 thread2;
+   struct thread3 thread3;
+
+   struct
+   {
+      GLuint pad0:9;
+      GLuint gs_output_stats:1; /* not always */
+      GLuint stats_enable:1; 
+      GLuint nr_urb_entries:7; 
+      GLuint pad1:1;
+      GLuint urb_entry_allocation_size:5; 
+      GLuint pad2:1;
+      GLuint max_threads:5; 	/* may be less */
+      GLuint pad3:2;
+   } thread4;   
+      
+   struct
+   {
+      GLuint pad0:13;
+      GLuint clip_mode:3; 
+      GLuint userclip_enable_flags:8; 
+      GLuint userclip_must_clip:1; 
+      GLuint negative_w_clip_test:1;
+      GLuint guard_band_enable:1; 
+      GLuint viewport_z_clip_enable:1; 
+      GLuint viewport_xy_clip_enable:1; 
+      GLuint vertex_position_space:1; 
+      GLuint api_mode:1; 
+      GLuint pad2:1;
+   } clip5;
+   
+   struct
+   {
+      GLuint pad0:5;
+      GLuint clipper_viewport_state_ptr:27; 
+   } clip6;
+
+   
+   GLfloat viewport_xmin;  
+   GLfloat viewport_xmax;  
+   GLfloat viewport_ymin;  
+   GLfloat viewport_ymax;  
+};
+
+
+
+struct brw_cc_unit_state
+{
+   struct
+   {
+      GLuint pad0:3;
+      GLuint bf_stencil_pass_depth_pass_op:3; 
+      GLuint bf_stencil_pass_depth_fail_op:3; 
+      GLuint bf_stencil_fail_op:3; 
+      GLuint bf_stencil_func:3; 
+      GLuint bf_stencil_enable:1; 
+      GLuint pad1:2;
+      GLuint stencil_write_enable:1; 
+      GLuint stencil_pass_depth_pass_op:3; 
+      GLuint stencil_pass_depth_fail_op:3; 
+      GLuint stencil_fail_op:3; 
+      GLuint stencil_func:3; 
+      GLuint stencil_enable:1; 
+   } cc0;
+
+   
+   struct
+   {
+      GLuint bf_stencil_ref:8; 
+      GLuint stencil_write_mask:8; 
+      GLuint stencil_test_mask:8; 
+      GLuint stencil_ref:8; 
+   } cc1;
+
+   
+   struct
+   {
+      GLuint logicop_enable:1; 
+      GLuint pad0:10;
+      GLuint depth_write_enable:1; 
+      GLuint depth_test_function:3; 
+      GLuint depth_test:1; 
+      GLuint bf_stencil_write_mask:8; 
+      GLuint bf_stencil_test_mask:8; 
+   } cc2;
+
+   
+   struct
+   {
+      GLuint pad0:8;
+      GLuint alpha_test_func:3; 
+      GLuint alpha_test:1; 
+      GLuint blend_enable:1; 
+      GLuint ia_blend_enable:1; 
+      GLuint pad1:1;
+      GLuint alpha_test_format:1;
+      GLuint pad2:16;
+   } cc3;
+   
+   struct
+   {
+      GLuint pad0:5; 
+      GLuint cc_viewport_state_offset:27; 
+   } cc4;
+   
+   struct
+   {
+      GLuint pad0:2;
+      GLuint ia_dest_blend_factor:5; 
+      GLuint ia_src_blend_factor:5; 
+      GLuint ia_blend_function:3; 
+      GLuint statistics_enable:1; 
+      GLuint logicop_func:4; 
+      GLuint pad1:11;
+      GLuint dither_enable:1; 
+   } cc5;
+
+   struct
+   {
+      GLuint clamp_post_alpha_blend:1; 
+      GLuint clamp_pre_alpha_blend:1; 
+      GLuint clamp_range:2; 
+      GLuint pad0:11;
+      GLuint y_dither_offset:2; 
+      GLuint x_dither_offset:2; 
+      GLuint dest_blend_factor:5; 
+      GLuint src_blend_factor:5; 
+      GLuint blend_function:3; 
+   } cc6;
+
+   struct {
+      union {
+	 GLfloat f;  
+	 GLubyte ub[4];
+      } alpha_ref;
+   } cc7;
+};
+
+
+
+struct brw_sf_unit_state
+{
+   struct thread0 thread0;
+   struct thread1 thread1;
+   struct thread2 thread2;
+   struct thread3 thread3;
+
+   struct
+   {
+      GLuint pad0:10;
+      GLuint stats_enable:1; 
+      GLuint nr_urb_entries:7; 
+      GLuint pad1:1;
+      GLuint urb_entry_allocation_size:5; 
+      GLuint pad2:1;
+      GLuint max_threads:6; 
+      GLuint pad3:1;
+   } thread4;   
+
+   struct
+   {
+      GLuint front_winding:1; 
+      GLuint viewport_transform:1; 
+      GLuint pad0:3;
+      GLuint sf_viewport_state_offset:27; 
+   } sf5;
+   
+   struct
+   {
+      GLuint pad0:9;
+      GLuint dest_org_vbias:4; 
+      GLuint dest_org_hbias:4; 
+      GLuint scissor:1; 
+      GLuint disable_2x2_trifilter:1; 
+      GLuint disable_zero_pix_trifilter:1; 
+      GLuint point_rast_rule:2; 
+      GLuint line_endcap_aa_region_width:2; 
+      GLuint line_width:4; 
+      GLuint fast_scissor_disable:1; 
+      GLuint cull_mode:2; 
+      GLuint aa_enable:1; 
+   } sf6;
+
+   struct
+   {
+      GLuint point_size:11; 
+      GLuint use_point_size_state:1; 
+      GLuint subpixel_precision:1; 
+      GLuint sprite_point:1; 
+      GLuint pad0:10;
+      GLuint aa_line_distance_mode:1;
+      GLuint trifan_pv:2; 
+      GLuint linestrip_pv:2; 
+      GLuint tristrip_pv:2; 
+      GLuint line_last_pixel_enable:1; 
+   } sf7;
+
+};
+
+
+struct brw_gs_unit_state
+{
+   struct thread0 thread0;
+   struct thread1 thread1;
+   struct thread2 thread2;
+   struct thread3 thread3;
+
+   struct
+   {
+      GLuint pad0:10;
+      GLuint stats_enable:1; 
+      GLuint nr_urb_entries:7; 
+      GLuint pad1:1;
+      GLuint urb_entry_allocation_size:5; 
+      GLuint pad2:1;
+      GLuint max_threads:5; 
+      GLuint pad3:2;
+   } thread4;   
+      
+   struct
+   {
+      GLuint sampler_count:3; 
+      GLuint pad0:2;
+      GLuint sampler_state_pointer:27; 
+   } gs5;
+
+   
+   struct
+   {
+      GLuint max_vp_index:4; 
+      GLuint pad0:12;
+      GLuint svbi_post_inc_value:10;
+      GLuint pad1:1;
+      GLuint svbi_post_inc_enable:1;
+      GLuint svbi_payload:1;
+      GLuint discard_adjaceny:1;
+      GLuint reorder_enable:1; 
+      GLuint pad2:1;
+   } gs6;
+};
+
+
+struct brw_vs_unit_state
+{
+   struct thread0 thread0;
+   struct thread1 thread1;
+   struct thread2 thread2;
+   struct thread3 thread3;
+   
+   struct
+   {
+      GLuint pad0:10;
+      GLuint stats_enable:1; 
+      GLuint nr_urb_entries:7; 
+      GLuint pad1:1;
+      GLuint urb_entry_allocation_size:5; 
+      GLuint pad2:1;
+      GLuint max_threads:6; 
+      GLuint pad3:1;
+   } thread4;   
+
+   struct
+   {
+      GLuint sampler_count:3; 
+      GLuint pad0:2;
+      GLuint sampler_state_pointer:27; 
+   } vs5;
+
+   struct
+   {
+      GLuint vs_enable:1; 
+      GLuint vert_cache_disable:1; 
+      GLuint pad0:30;
+   } vs6;
+};
+
+
+struct brw_wm_unit_state
+{
+   struct thread0 thread0;
+   struct thread1 thread1;
+   struct thread2 thread2;
+   struct thread3 thread3;
+   
+   struct {
+      GLuint stats_enable:1; 
+      GLuint depth_buffer_clear:1;
+      GLuint sampler_count:3; 
+      GLuint sampler_state_pointer:27; 
+   } wm4;
+   
+   struct
+   {
+      GLuint enable_8_pix:1; 
+      GLuint enable_16_pix:1; 
+      GLuint enable_32_pix:1; 
+      GLuint enable_con_32_pix:1;
+      GLuint enable_con_64_pix:1;
+      GLuint pad0:5;
+      GLuint legacy_global_depth_bias:1; 
+      GLuint line_stipple:1; 
+      GLuint depth_offset:1; 
+      GLuint polygon_stipple:1; 
+      GLuint line_aa_region_width:2; 
+      GLuint line_endcap_aa_region_width:2; 
+      GLuint early_depth_test:1; 
+      GLuint thread_dispatch_enable:1; 
+      GLuint program_uses_depth:1; 
+      GLuint program_computes_depth:1; 
+      GLuint program_uses_killpixel:1; 
+      GLuint legacy_line_rast: 1; 
+      GLuint transposed_urb_read_enable:1; 
+      GLuint max_threads:7; 
+   } wm5;
+   
+   GLfloat global_depth_offset_constant;  
+   GLfloat global_depth_offset_scale;   
+};
+
+struct brw_sampler_default_color {
+   GLfloat color[4];
+};
+
+struct brw_sampler_state
+{
+   
+   struct
+   {
+      GLuint shadow_function:3; 
+      GLuint lod_bias:11; 
+      GLuint min_filter:3; 
+      GLuint mag_filter:3; 
+      GLuint mip_filter:2; 
+      GLuint base_level:5; 
+      GLuint pad:1;
+      GLuint lod_preclamp:1; 
+      GLuint default_color_mode:1; 
+      GLuint pad0:1;
+      GLuint disable:1; 
+   } ss0;
+
+   struct
+   {
+      GLuint r_wrap_mode:3; 
+      GLuint t_wrap_mode:3; 
+      GLuint s_wrap_mode:3; 
+      GLuint pad:3;
+      GLuint max_lod:10; 
+      GLuint min_lod:10; 
+   } ss1;
+
+   
+   struct
+   {
+      GLuint pad:5;
+      GLuint default_color_pointer:27; 
+   } ss2;
+   
+   struct
+   {
+      GLuint pad:19;
+      GLuint max_aniso:3; 
+      GLuint chroma_key_mode:1; 
+      GLuint chroma_key_index:2; 
+      GLuint chroma_key_enable:1; 
+      GLuint monochrome_filter_width:3; 
+      GLuint monochrome_filter_height:3; 
+   } ss3;
+};
+
+
+struct brw_clipper_viewport
+{
+   GLfloat xmin;  
+   GLfloat xmax;  
+   GLfloat ymin;  
+   GLfloat ymax;  
+};
+
+struct brw_cc_viewport
+{
+   GLfloat min_depth;  
+   GLfloat max_depth;  
+};
+
+struct brw_sf_viewport
+{
+   struct {
+      GLfloat m00;  
+      GLfloat m11;  
+      GLfloat m22;  
+      GLfloat m30;  
+      GLfloat m31;  
+      GLfloat m32;  
+   } viewport;
+
+   struct {
+      GLshort xmin;
+      GLshort ymin;
+      GLshort xmax;
+      GLshort ymax;
+   } scissor;
+};
+
+/* Documented in the subsystem/shared-functions/sampler chapter...
+ */
+struct brw_surface_state
+{
+   struct {
+      GLuint cube_pos_z:1; 
+      GLuint cube_neg_z:1; 
+      GLuint cube_pos_y:1; 
+      GLuint cube_neg_y:1; 
+      GLuint cube_pos_x:1; 
+      GLuint cube_neg_x:1; 
+      GLuint pad:4;
+      GLuint mipmap_layout_mode:1; 
+      GLuint vert_line_stride_ofs:1; 
+      GLuint vert_line_stride:1; 
+      GLuint color_blend:1; 
+      GLuint writedisable_blue:1; 
+      GLuint writedisable_green:1; 
+      GLuint writedisable_red:1; 
+      GLuint writedisable_alpha:1; 
+      GLuint surface_format:9; 
+      GLuint data_return_format:1; 
+      GLuint pad0:1;
+      GLuint surface_type:3; 
+   } ss0;
+   
+   struct {
+      GLuint base_addr;  
+   } ss1;
+   
+   struct {
+      GLuint pad:2;
+      GLuint mip_count:4; 
+      GLuint width:13; 
+      GLuint height:13; 
+   } ss2;
+
+   struct {
+      GLuint tile_walk:1; 
+      GLuint tiled_surface:1; 
+      GLuint pad:1; 
+      GLuint pitch:18; 
+      GLuint depth:11; 
+   } ss3;
+   
+   struct {
+      GLuint multisample_position_palette_index:3;
+      GLuint pad1:1;
+      GLuint num_multisamples:3;
+      GLuint pad0:1;
+      GLuint render_target_view_extent:9;
+      GLuint min_array_elt:11;
+      GLuint min_lod:4; 
+   } ss4;
+
+   struct {
+      GLuint pad1:16;
+      GLuint llc_mapping:1;
+      GLuint mlc_mapping:1;
+      GLuint gfdt:1;
+      GLuint gfdt_src:1;
+      GLuint y_offset:4;
+      GLuint pad0:1;
+      GLuint x_offset:7;
+   } ss5;   /* NEW in Integrated Graphics Device */
+
+};
+
+
+
+struct brw_vertex_buffer_state
+{
+   struct {
+      GLuint pitch:11; 
+      GLuint pad:15;
+      GLuint access_type:1; 
+      GLuint vb_index:5; 
+   } vb0;
+   
+   GLuint start_addr; 
+   GLuint max_index;   
+#if 1
+   GLuint instance_data_step_rate; /* not included for sequential/random vertices? */
+#endif
+};
+
+#define BRW_VBP_MAX 17
+
+struct brw_vb_array_state {
+   struct header header;
+   struct brw_vertex_buffer_state vb[BRW_VBP_MAX];
+};
+
+
+struct brw_vertex_element_state
+{
+   struct
+   {
+      GLuint src_offset:11; 
+      GLuint pad:5;
+      GLuint src_format:9; 
+      GLuint pad0:1;
+      GLuint valid:1; 
+      GLuint vertex_buffer_index:5; 
+   } ve0;
+   
+   struct
+   {
+      GLuint dst_offset:8; 
+      GLuint pad:8;
+      GLuint vfcomponent3:4; 
+      GLuint vfcomponent2:4; 
+      GLuint vfcomponent1:4; 
+      GLuint vfcomponent0:4; 
+   } ve1;
+};
+
+#define BRW_VEP_MAX 18
+
+struct brw_vertex_element_packet {
+   struct header header;
+   struct brw_vertex_element_state ve[BRW_VEP_MAX]; /* note: less than _TNL_ATTRIB_MAX */
+};
+
+
+struct brw_urb_immediate {
+   GLuint opcode:4;
+   GLuint offset:6;
+   GLuint swizzle_control:2; 
+   GLuint pad:1;
+   GLuint allocate:1;
+   GLuint used:1;
+   GLuint complete:1;
+   GLuint response_length:4;
+   GLuint msg_length:4;
+   GLuint msg_target:4;
+   GLuint pad1:3;
+   GLuint end_of_thread:1;
+};
+
+/* Instruction format for the execution units:
+ */
+ 
+struct brw_instruction
+{
+   struct 
+   {
+      GLuint opcode:7;
+      GLuint pad:1;
+      GLuint access_mode:1;
+      GLuint mask_control:1;
+      GLuint dependency_control:2;
+      GLuint compression_control:2;
+      GLuint thread_control:2;
+      GLuint predicate_control:4;
+      GLuint predicate_inverse:1;
+      GLuint execution_size:3;
+      GLuint destreg__conditonalmod:4; /* destreg - send, conditionalmod - others */
+      GLuint pad0:2;
+      GLuint debug_control:1;
+      GLuint saturate:1;
+   } header;
+
+   union {
+      struct
+      {
+	 GLuint dest_reg_file:2;
+	 GLuint dest_reg_type:3;
+	 GLuint src0_reg_file:2;
+	 GLuint src0_reg_type:3;
+	 GLuint src1_reg_file:2;
+	 GLuint src1_reg_type:3;
+	 GLuint pad:1;
+	 GLuint dest_subreg_nr:5;
+	 GLuint dest_reg_nr:8;
+	 GLuint dest_horiz_stride:2;
+	 GLuint dest_address_mode:1;
+      } da1;
+
+      struct
+      {
+	 GLuint dest_reg_file:2;
+	 GLuint dest_reg_type:3;
+	 GLuint src0_reg_file:2;
+	 GLuint src0_reg_type:3;
+	 GLuint pad:6;
+	 GLint dest_indirect_offset:10;	/* offset against the deref'd address reg */
+	 GLuint dest_subreg_nr:3; /* subnr for the address reg a0.x */
+	 GLuint dest_horiz_stride:2;
+	 GLuint dest_address_mode:1;
+      } ia1;
+
+      struct
+      {
+	 GLuint dest_reg_file:2;
+	 GLuint dest_reg_type:3;
+	 GLuint src0_reg_file:2;
+	 GLuint src0_reg_type:3;
+	 GLuint src1_reg_file:2;
+	 GLuint src1_reg_type:3;
+	 GLuint pad0:1;
+	 GLuint dest_writemask:4;
+	 GLuint dest_subreg_nr:1;
+	 GLuint dest_reg_nr:8;
+	 GLuint pad1:2;
+	 GLuint dest_address_mode:1;
+      } da16;
+
+      struct
+      {
+	 GLuint dest_reg_file:2;
+	 GLuint dest_reg_type:3;
+	 GLuint src0_reg_file:2;
+	 GLuint src0_reg_type:3;
+	 GLuint pad0:6;
+	 GLuint dest_writemask:4;
+	 GLint dest_indirect_offset:6;
+	 GLuint dest_subreg_nr:3;
+	 GLuint pad1:2;
+	 GLuint dest_address_mode:1;
+      } ia16;
+   } bits1;
+
+
+   union {
+      struct
+      {
+	 GLuint src0_subreg_nr:5;
+	 GLuint src0_reg_nr:8;
+	 GLuint src0_abs:1;
+	 GLuint src0_negate:1;
+	 GLuint src0_address_mode:1;
+	 GLuint src0_horiz_stride:2;
+	 GLuint src0_width:3;
+	 GLuint src0_vert_stride:4;
+	 GLuint flag_reg_nr:1;
+	 GLuint pad:6;
+      } da1;
+
+      struct
+      {
+	 GLint src0_indirect_offset:10;
+	 GLuint src0_subreg_nr:3;
+	 GLuint src0_abs:1;
+	 GLuint src0_negate:1;
+	 GLuint src0_address_mode:1;
+	 GLuint src0_horiz_stride:2;
+	 GLuint src0_width:3;
+	 GLuint src0_vert_stride:4;
+	 GLuint flag_reg_nr:1;
+	 GLuint pad:6;	
+      } ia1;
+
+      struct
+      {
+	 GLuint src0_swz_x:2;
+	 GLuint src0_swz_y:2;
+	 GLuint src0_subreg_nr:1;
+	 GLuint src0_reg_nr:8;
+	 GLuint src0_abs:1;
+	 GLuint src0_negate:1;
+	 GLuint src0_address_mode:1;
+	 GLuint src0_swz_z:2;
+	 GLuint src0_swz_w:2;
+	 GLuint pad0:1;
+	 GLuint src0_vert_stride:4;
+	 GLuint flag_reg_nr:1;
+	 GLuint pad1:6;
+      } da16;
+
+      struct
+      {
+	 GLuint src0_swz_x:2;
+	 GLuint src0_swz_y:2;
+	 GLint src0_indirect_offset:6;
+	 GLuint src0_subreg_nr:3;
+	 GLuint src0_abs:1;
+	 GLuint src0_negate:1;
+	 GLuint src0_address_mode:1;
+	 GLuint src0_swz_z:2;
+	 GLuint src0_swz_w:2;
+	 GLuint pad0:1;
+	 GLuint src0_vert_stride:4;
+	 GLuint flag_reg_nr:1;
+	 GLuint pad1:6;
+      } ia16;
+
+   } bits2;
+
+   union
+   {
+      struct
+      {
+	 GLuint src1_subreg_nr:5;
+	 GLuint src1_reg_nr:8;
+	 GLuint src1_abs:1;
+	 GLuint src1_negate:1;
+	 GLuint pad:1;
+	 GLuint src1_horiz_stride:2;
+	 GLuint src1_width:3;
+	 GLuint src1_vert_stride:4;
+	 GLuint pad0:7;
+      } da1;
+
+      struct
+      {
+	 GLuint src1_swz_x:2;
+	 GLuint src1_swz_y:2;
+	 GLuint src1_subreg_nr:1;
+	 GLuint src1_reg_nr:8;
+	 GLuint src1_abs:1;
+	 GLuint src1_negate:1;
+	 GLuint pad0:1;
+	 GLuint src1_swz_z:2;
+	 GLuint src1_swz_w:2;
+	 GLuint pad1:1;
+	 GLuint src1_vert_stride:4;
+	 GLuint pad2:7;
+      } da16;
+
+      struct
+      {
+	 GLint  src1_indirect_offset:10;
+	 GLuint src1_subreg_nr:3;
+	 GLuint src1_abs:1;
+	 GLuint src1_negate:1;
+	 GLuint pad0:1;
+	 GLuint src1_horiz_stride:2;
+	 GLuint src1_width:3;
+	 GLuint src1_vert_stride:4;
+	 GLuint flag_reg_nr:1;
+	 GLuint pad1:6;	
+      } ia1;
+
+      struct
+      {
+	 GLuint src1_swz_x:2;
+	 GLuint src1_swz_y:2;
+	 GLint  src1_indirect_offset:6;
+	 GLuint src1_subreg_nr:3;
+	 GLuint src1_abs:1;
+	 GLuint src1_negate:1;
+	 GLuint pad0:1;
+	 GLuint src1_swz_z:2;
+	 GLuint src1_swz_w:2;
+	 GLuint pad1:1;
+	 GLuint src1_vert_stride:4;
+	 GLuint flag_reg_nr:1;
+	 GLuint pad2:6;
+      } ia16;
+
+
+      struct
+      {
+	 GLint  jump_count:16;	/* note: signed */
+	 GLuint  pop_count:4;
+	 GLuint  pad0:12;
+      } if_else;
+
+      struct {
+	 GLuint function:4;
+	 GLuint int_type:1;
+	 GLuint precision:1;
+	 GLuint saturate:1;
+	 GLuint data_type:1;
+	 GLuint pad0:8;
+	 GLuint response_length:4;
+	 GLuint msg_length:4;
+	 GLuint msg_target:4;
+	 GLuint pad1:3;
+	 GLuint end_of_thread:1;
+      } math;
+
+      struct {
+	 GLuint binding_table_index:8;
+	 GLuint sampler:4;
+	 GLuint return_format:2; 
+	 GLuint msg_type:2;   
+	 GLuint response_length:4;
+	 GLuint msg_length:4;
+	 GLuint msg_target:4;
+	 GLuint pad1:3;
+	 GLuint end_of_thread:1;
+      } sampler;
+
+      struct {
+         GLuint binding_table_index:8;
+         GLuint sampler:4;
+         GLuint msg_type:4;
+         GLuint response_length:4;
+         GLuint msg_length:4;
+         GLuint msg_target:4;
+         GLuint pad1:3;
+         GLuint end_of_thread:1;
+      } sampler_igd; 
+
+      struct brw_urb_immediate urb;
+
+      struct {
+	 GLuint binding_table_index:8;
+	 GLuint msg_control:4;  
+	 GLuint msg_type:2;  
+	 GLuint target_cache:2;    
+	 GLuint response_length:4;
+	 GLuint msg_length:4;
+	 GLuint msg_target:4;
+	 GLuint pad1:3;
+	 GLuint end_of_thread:1;
+      } dp_read;
+
+      struct {
+	 GLuint binding_table_index:8;
+	 GLuint msg_control:3;
+	 GLuint pixel_scoreboard_clear:1;
+	 GLuint msg_type:3;    
+	 GLuint send_commit_msg:1;
+	 GLuint response_length:4;
+	 GLuint msg_length:4;
+	 GLuint msg_target:4;
+	 GLuint pad1:3;
+	 GLuint end_of_thread:1;
+      } dp_write;
+
+      struct {
+	 GLuint pad:16;
+	 GLuint response_length:4;
+	 GLuint msg_length:4;
+	 GLuint msg_target:4;
+	 GLuint pad1:3;
+	 GLuint end_of_thread:1;
+      } generic;
+
+      GLint d;
+      GLuint ud;
+   } bits3;
+};
+
+
+#endif
diff --git a/i965/brw_tex.c b/i965/brw_tex.c
new file mode 100644
index 0000000..9d4b986
--- /dev/null
+++ b/i965/brw_tex.c
@@ -0,0 +1,214 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+        
+
+#include "glheader.h"
+#include "mtypes.h"
+#include "imports.h"
+#include "simple_list.h"
+#include "enums.h"
+#include "image.h"
+#include "teximage.h"
+#include "texstore.h"
+#include "texformat.h"
+#include "texmem.h"
+
+#include "intel_context.h"
+#include "intel_ioctl.h"
+#include "intel_regions.h"
+#include "brw_context.h"
+#include "brw_defines.h"
+
+
+
+
+static const struct gl_texture_format *
+brwChooseTextureFormat( GLcontext *ctx, GLint internalFormat,
+			 GLenum srcFormat, GLenum srcType )
+{
+   switch ( internalFormat ) {
+   case 4:
+   case GL_RGBA:
+   case GL_COMPRESSED_RGBA:
+      if (srcFormat == GL_BGRA && srcType == GL_UNSIGNED_SHORT_4_4_4_4_REV)
+	 return &_mesa_texformat_argb4444;
+      else if (srcFormat == GL_BGRA && srcType == GL_UNSIGNED_SHORT_1_5_5_5_REV)
+	 return &_mesa_texformat_argb1555;
+      else if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8_REV) ||
+	       (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE) ||
+	       (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_INT_8_8_8_8)) 
+	 return &_mesa_texformat_rgba8888_rev;
+      else
+	 return &_mesa_texformat_argb8888;
+
+   case GL_RGBA8:
+   case GL_RGB10_A2:
+   case GL_RGBA12:
+   case GL_RGBA16:
+      return &_mesa_texformat_argb8888; 
+
+   case GL_RGB8:
+   case GL_RGB10:
+   case GL_RGB12:
+   case GL_RGB16:
+      /* Broadwater doesn't support RGB888 textures, so these must be
+       * stored as ARGB.
+       */
+      return &_mesa_texformat_argb8888;
+
+   case 3:
+   case GL_COMPRESSED_RGB:
+   case GL_RGB:
+      if (srcFormat == GL_RGB &&
+	  srcType == GL_UNSIGNED_SHORT_5_6_5)
+	 return &_mesa_texformat_rgb565;
+      else
+	 return &_mesa_texformat_argb8888;
+
+
+   case GL_RGB5:
+   case GL_RGB5_A1:
+      return &_mesa_texformat_argb1555;
+
+   case GL_R3_G3_B2:
+   case GL_RGBA2:
+   case GL_RGBA4:
+   case GL_RGB4:
+      return &_mesa_texformat_argb4444;
+
+   case GL_ALPHA:
+   case GL_ALPHA4:
+   case GL_ALPHA8:
+   case GL_ALPHA12:
+   case GL_ALPHA16:
+   case GL_COMPRESSED_ALPHA:
+      return &_mesa_texformat_a8;
+
+   case 1:
+   case GL_LUMINANCE:
+   case GL_LUMINANCE4:
+   case GL_LUMINANCE8:
+   case GL_LUMINANCE12:
+   case GL_LUMINANCE16:
+   case GL_COMPRESSED_LUMINANCE:
+      return &_mesa_texformat_l8;
+
+   case 2:
+   case GL_LUMINANCE_ALPHA:
+   case GL_LUMINANCE4_ALPHA4:
+   case GL_LUMINANCE6_ALPHA2:
+   case GL_LUMINANCE8_ALPHA8:
+   case GL_LUMINANCE12_ALPHA4:
+   case GL_LUMINANCE12_ALPHA12:
+   case GL_LUMINANCE16_ALPHA16:
+   case GL_COMPRESSED_LUMINANCE_ALPHA:
+      return &_mesa_texformat_al88;
+
+   case GL_INTENSITY:
+   case GL_INTENSITY4:
+   case GL_INTENSITY8:
+   case GL_INTENSITY12:
+   case GL_INTENSITY16:
+   case GL_COMPRESSED_INTENSITY:
+      return &_mesa_texformat_i8;
+
+   case GL_YCBCR_MESA:
+      if (srcType == GL_UNSIGNED_SHORT_8_8_MESA ||
+	  srcType == GL_UNSIGNED_BYTE)
+         return &_mesa_texformat_ycbcr;
+      else
+         return &_mesa_texformat_ycbcr_rev;
+
+   case GL_COMPRESSED_RGB_FXT1_3DFX:
+       return &_mesa_texformat_rgb_fxt1;
+   case GL_COMPRESSED_RGBA_FXT1_3DFX:
+       return &_mesa_texformat_rgba_fxt1;
+
+   case GL_RGB_S3TC:
+   case GL_RGB4_S3TC:
+   case GL_RGBA_S3TC:
+   case GL_RGBA4_S3TC:
+   case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
+   case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
+   case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
+   case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
+     return &_mesa_texformat_rgb_dxt1; /* there is no rgba support? */
+
+   case GL_DEPTH_COMPONENT:
+   case GL_DEPTH_COMPONENT16:
+   case GL_DEPTH_COMPONENT24:
+   case GL_DEPTH_COMPONENT32:
+      return &_mesa_texformat_z16;
+
+   default:
+      fprintf(stderr, "unexpected texture format %s in %s\n", 
+	      _mesa_lookup_enum_by_nr(internalFormat),
+	      __FUNCTION__);
+      return NULL;
+   }
+
+   return NULL; /* never get here */
+}
+
+
+void brwInitTextureFuncs( struct dd_function_table *functions )
+{
+   functions->ChooseTextureFormat = brwChooseTextureFormat;
+}
+
+void brw_FrameBufferTexInit( struct brw_context *brw )
+{
+   struct intel_context *intel = &brw->intel;
+   GLcontext *ctx = &intel->ctx;
+   struct intel_region *region = intel->front_region;
+   struct gl_texture_object *obj;
+   struct gl_texture_image *img;
+   
+   intel->frame_buffer_texobj = obj =
+      ctx->Driver.NewTextureObject( ctx, (GLuint) -1, GL_TEXTURE_2D );
+
+   obj->MinFilter = GL_NEAREST;
+   obj->MagFilter = GL_NEAREST;
+
+   img = ctx->Driver.NewTextureImage( ctx );
+
+   _mesa_init_teximage_fields( ctx, GL_TEXTURE_2D, img,
+			       region->pitch, region->height, 1, 0,
+			       region->cpp == 4 ? GL_RGBA : GL_RGB );
+   
+   _mesa_set_tex_image( obj, GL_TEXTURE_2D, 0, img );
+}
+
+void brw_FrameBufferTexDestroy( struct brw_context *brw )
+{
+   brw->intel.ctx.Driver.DeleteTexture( &brw->intel.ctx,
+					brw->intel.frame_buffer_texobj );
+}
diff --git a/i965/brw_tex_layout.c b/i965/brw_tex_layout.c
new file mode 100644
index 0000000..d4888a4
--- /dev/null
+++ b/i965/brw_tex_layout.c
@@ -0,0 +1,116 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+        
+
+/* Code to layout images in a mipmap tree for i965.
+ */
+
+#include "intel_mipmap_tree.h"
+#include "intel_tex_layout.h"
+#include "macros.h"
+
+
+GLboolean brw_miptree_layout( struct intel_context *intel, struct intel_mipmap_tree *mt )
+{
+   /* XXX: these vary depending on image format: 
+    */
+/*    GLint align_w = 4; */
+
+   switch (mt->target) {
+   case GL_TEXTURE_CUBE_MAP: 
+   case GL_TEXTURE_3D: {
+      GLuint width  = mt->width0;
+      GLuint height = mt->height0;
+      GLuint depth = mt->depth0;
+      GLuint pack_x_pitch, pack_x_nr;
+      GLuint pack_y_pitch;
+      GLuint level;
+
+      mt->pitch = ((mt->width0 * mt->cpp + 3) & ~3) / mt->cpp;
+      mt->total_height = 0;
+
+      pack_y_pitch = MAX2(mt->height0, 2);
+      pack_x_pitch = mt->pitch;
+      pack_x_nr = 1;
+
+      for ( level = mt->first_level ; level <= mt->last_level ; level++ ) {
+	 GLuint nr_images = mt->target == GL_TEXTURE_3D ? depth : 6;
+	 GLint x = 0;
+	 GLint y = 0;
+	 GLint q, j;
+	    
+	 intel_miptree_set_level_info(mt, level, nr_images,
+				      0, mt->total_height,
+				      width, height, depth);
+
+	 for (q = 0; q < nr_images;) {
+	    for (j = 0; j < pack_x_nr && q < nr_images; j++, q++) {
+	       intel_miptree_set_image_offset(mt, level, q, x, y);
+	       x += pack_x_pitch;
+	    }
+
+	    x = 0;
+	    y += pack_y_pitch;	    
+	 }
+
+
+	 mt->total_height += y;
+
+	 if (pack_x_pitch > 4) {
+	    pack_x_pitch >>= 1;
+	    pack_x_nr <<= 1;
+	    assert(pack_x_pitch * pack_x_nr <= mt->pitch);
+	 }
+
+	 if (pack_y_pitch > 2) {
+	    pack_y_pitch >>= 1;
+	 }
+
+	 width  = minify(width);
+	 height = minify(height);
+	 depth  = minify(depth);
+      }
+      break;
+   }
+
+   default:
+      i945_miptree_layout_2d(intel, mt);
+      break;
+   }
+   DBG("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__, 
+		mt->pitch, 
+		mt->total_height,
+		mt->cpp,
+		mt->pitch * mt->total_height * mt->cpp );
+		
+   return GL_TRUE;
+}
+
diff --git a/i965/brw_urb.c b/i965/brw_urb.c
new file mode 100644
index 0000000..4ca6e99
--- /dev/null
+++ b/i965/brw_urb.c
@@ -0,0 +1,215 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+        
+
+
+#include "intel_batchbuffer.h"
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_hal.h"
+
+#define VS 0
+#define GS 1
+#define CLP 2
+#define SF 3
+#define CS 4
+
+/* XXX: Are the min_entry_size numbers useful?
+ * XXX: Verify min_nr_entries, esp for VS.
+ * XXX: Verify SF min_entry_size.
+ */
+static const struct {
+   GLuint min_nr_entries;
+   GLuint preferred_nr_entries;
+   GLuint min_entry_size;
+   GLuint max_entry_size;
+} limits[CS+1] = {
+   { 8, 32, 1, 5 },			/* vs */
+   { 4, 8,  1, 5 },			/* gs */
+   { 6, 8,  1, 5 },			/* clp */
+   { 1, 8,  1, 12 },		        /* sf */
+   { 1, 4,  1, 32 }			/* cs */
+};
+
+
+static GLboolean check_urb_layout( struct brw_context *brw )
+{
+   brw->urb.vs_start = 0;
+   brw->urb.gs_start = brw->urb.nr_vs_entries * brw->urb.vsize;
+   brw->urb.clip_start = brw->urb.gs_start + brw->urb.nr_gs_entries * brw->urb.vsize;
+   brw->urb.sf_start = brw->urb.clip_start + brw->urb.nr_clip_entries * brw->urb.vsize;
+   brw->urb.cs_start = brw->urb.sf_start + brw->urb.nr_sf_entries * brw->urb.sfsize;
+
+   return brw->urb.cs_start + brw->urb.nr_cs_entries * brw->urb.csize <= URB_SIZES(brw);
+}
+
+/* Most minimal update, forces re-emit of URB fence packet after GS
+ * unit turned on/off.
+ */
+static void recalculate_urb_fence( struct brw_context *brw )
+{
+   GLuint csize = brw->curbe.total_size;
+   GLuint vsize = brw->vs.prog_data->urb_entry_size;
+   GLuint sfsize = brw->sf.prog_data->urb_entry_size;
+
+   static GLboolean (*hal_recalculate_urb_fence) (struct brw_context *brw);
+   static GLboolean hal_tried;
+
+   if (!hal_tried)
+   {
+      hal_recalculate_urb_fence = brw_hal_find_symbol ("intel_hal_recalculate_urb_fence");
+      hal_tried = 1;
+   }
+   if (hal_recalculate_urb_fence)
+   {
+      if ((*hal_recalculate_urb_fence) (brw))
+	 return;
+   }
+   
+   if (csize < limits[CS].min_entry_size)
+      csize = limits[CS].min_entry_size;
+
+   if (vsize < limits[VS].min_entry_size)
+      vsize = limits[VS].min_entry_size;
+
+   if (sfsize < limits[SF].min_entry_size)
+      sfsize = limits[SF].min_entry_size;
+
+   if (brw->urb.vsize < vsize ||
+       brw->urb.sfsize < sfsize ||
+       brw->urb.csize < csize ||
+       (brw->urb.constrained && (brw->urb.vsize > brw->urb.vsize ||
+				 brw->urb.sfsize > brw->urb.sfsize ||
+				 brw->urb.csize > brw->urb.csize))) {
+      
+
+      brw->urb.csize = csize;
+      brw->urb.sfsize = sfsize;
+      brw->urb.vsize = vsize;
+
+      brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries;	
+      brw->urb.nr_gs_entries = limits[GS].preferred_nr_entries;	
+      brw->urb.nr_clip_entries = limits[CLP].preferred_nr_entries;
+      brw->urb.nr_sf_entries = limits[SF].preferred_nr_entries;	
+      brw->urb.nr_cs_entries = limits[CS].preferred_nr_entries;	
+      
+      if (!check_urb_layout(brw)) {
+	 brw->urb.nr_vs_entries = limits[VS].min_nr_entries;	
+	 brw->urb.nr_gs_entries = limits[GS].min_nr_entries;	
+	 brw->urb.nr_clip_entries = limits[CLP].min_nr_entries;
+	 brw->urb.nr_sf_entries = limits[SF].min_nr_entries;	
+	 brw->urb.nr_cs_entries = limits[CS].min_nr_entries;	
+
+	 brw->urb.constrained = 1;
+	 
+	 if (!check_urb_layout(brw)) {
+	    /* This is impossible, given the maximal sizes of urb
+	     * entries and the values for minimum nr of entries
+	     * provided above.
+	     */
+	    _mesa_printf("couldn't calculate URB layout!\n");
+	    exit(1);
+	 }
+	 
+	 if (INTEL_DEBUG & (DEBUG_URB|DEBUG_FALLBACKS))
+	    _mesa_printf("URB CONSTRAINED\n");
+      }
+      else 
+	 brw->urb.constrained = 0;
+
+      if (INTEL_DEBUG & DEBUG_URB)
+	 _mesa_printf("URB fence: %d ..VS.. %d ..GS.. %d ..CLP.. %d ..SF.. %d ..CS.. %d\n",
+		      brw->urb.vs_start,
+		      brw->urb.gs_start,
+		      brw->urb.clip_start,
+		      brw->urb.sf_start,
+		      brw->urb.cs_start, 
+		      URB_SIZES(brw));
+      
+      brw->state.dirty.brw |= BRW_NEW_URB_FENCE;
+   }
+}
+
+
+const struct brw_tracked_state brw_recalculate_urb_fence = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_CURBE_OFFSETS,
+      .cache = (CACHE_NEW_VS_PROG |
+		CACHE_NEW_SF_PROG)
+   },
+   .update = recalculate_urb_fence
+};
+
+
+
+
+
+void brw_upload_urb_fence(struct brw_context *brw)
+{
+   struct brw_urb_fence uf;
+   memset(&uf, 0, sizeof(uf));
+
+   uf.header.opcode = CMD_URB_FENCE;
+   uf.header.length = sizeof(uf)/4-2;
+   uf.header.vs_realloc = 1;
+   uf.header.gs_realloc = 1;
+   uf.header.clp_realloc = 1;
+   uf.header.sf_realloc = 1;
+   uf.header.vfe_realloc = 1;
+   uf.header.cs_realloc = 1;
+
+   /* The ordering below is correct, not the layout in the
+    * instruction.
+    *
+    * There are 256/384 urb reg pairs in total.
+    */
+   uf.bits0.vs_fence  = brw->urb.gs_start;
+   uf.bits0.gs_fence  = brw->urb.clip_start; 
+   uf.bits0.clp_fence = brw->urb.sf_start; 
+   uf.bits1.sf_fence  = brw->urb.cs_start; 
+   uf.bits1.cs_fence  = URB_SIZES(brw);
+
+   BRW_BATCH_STRUCT(brw, &uf);
+}
+
+
+#if 0
+const struct brw_tracked_state brw_urb_fence = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_URB_FENCE | BRW_NEW_PSP,
+      .cache = 0
+   },
+   .update = brw_upload_urb_fence
+};
+#endif
diff --git a/i965/brw_util.c b/i965/brw_util.c
new file mode 100644
index 0000000..b6deee2
--- /dev/null
+++ b/i965/brw_util.c
@@ -0,0 +1,184 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+         
+
+#include "mtypes.h"
+#include "shader/prog_parameter.h"
+#include "brw_util.h"
+#include "brw_defines.h"
+
+GLuint brw_count_bits( GLuint val )
+{
+   GLuint i;
+   for (i = 0; val ; val >>= 1)
+      if (val & 1)
+	 i++;
+   return i;
+}
+
+
+static GLuint brw_parameter_state_flags(const gl_state_index state[])
+{
+   switch (state[0]) {
+   case STATE_MATERIAL:
+   case STATE_LIGHT:
+   case STATE_LIGHTMODEL_AMBIENT:
+   case STATE_LIGHTMODEL_SCENECOLOR:
+   case STATE_LIGHTPROD:
+      return _NEW_LIGHT;
+
+   case STATE_TEXGEN:
+   case STATE_TEXENV_COLOR:
+      return _NEW_TEXTURE;
+
+   case STATE_FOG_COLOR:
+   case STATE_FOG_PARAMS:
+      return _NEW_FOG;
+
+   case STATE_CLIPPLANE:
+      return _NEW_TRANSFORM;
+
+   case STATE_POINT_SIZE:
+   case STATE_POINT_ATTENUATION:
+      return _NEW_POINT;
+
+   case STATE_MODELVIEW_MATRIX:
+      return _NEW_MODELVIEW;
+   case STATE_PROJECTION_MATRIX:
+      return _NEW_PROJECTION;
+   case STATE_MVP_MATRIX:
+      return _NEW_MODELVIEW | _NEW_PROJECTION;
+   case STATE_TEXTURE_MATRIX:
+      return _NEW_TEXTURE_MATRIX;
+   case STATE_PROGRAM_MATRIX:
+      return _NEW_TRACK_MATRIX;
+
+   case STATE_DEPTH_RANGE:
+      return _NEW_VIEWPORT;
+
+   case STATE_FRAGMENT_PROGRAM:
+   case STATE_VERTEX_PROGRAM:
+      return _NEW_PROGRAM;
+
+   case STATE_INTERNAL:
+      switch (state[1]) {
+      case STATE_NORMAL_SCALE:
+	 return _NEW_MODELVIEW;
+      case STATE_TEXRECT_SCALE:
+	 return _NEW_TEXTURE;
+      default:
+	 assert(0);
+	 return 0;
+      }
+
+   default:
+      assert(0);
+      return 0;
+   }
+}
+
+
+GLuint
+brw_parameter_list_state_flags(struct gl_program_parameter_list *paramList)
+{
+   GLuint i;
+   GLuint result = 0;
+
+   if (!paramList)
+      return 0;
+
+   for (i = 0; i < paramList->NumParameters; i++) {
+      if (paramList->Parameters[i].Type == PROGRAM_STATE_VAR) {
+         result |= brw_parameter_state_flags(paramList->Parameters[i].StateIndexes);
+      }
+   }
+
+   return result;
+}
+
+
+GLuint brw_translate_blend_equation( GLenum mode )
+{
+   switch (mode) {
+   case GL_FUNC_ADD: 
+      return BRW_BLENDFUNCTION_ADD; 
+   case GL_MIN: 
+      return BRW_BLENDFUNCTION_MIN; 
+   case GL_MAX: 
+      return BRW_BLENDFUNCTION_MAX; 
+   case GL_FUNC_SUBTRACT: 
+      return BRW_BLENDFUNCTION_SUBTRACT; 
+   case GL_FUNC_REVERSE_SUBTRACT: 
+      return BRW_BLENDFUNCTION_REVERSE_SUBTRACT; 
+   default: 
+      assert(0);
+      return BRW_BLENDFUNCTION_ADD;
+   }
+}
+
+GLuint brw_translate_blend_factor( GLenum factor )
+{
+   switch(factor) {
+   case GL_ZERO: 
+      return BRW_BLENDFACTOR_ZERO; 
+   case GL_SRC_ALPHA: 
+      return BRW_BLENDFACTOR_SRC_ALPHA; 
+   case GL_ONE: 
+      return BRW_BLENDFACTOR_ONE; 
+   case GL_SRC_COLOR: 
+      return BRW_BLENDFACTOR_SRC_COLOR; 
+   case GL_ONE_MINUS_SRC_COLOR: 
+      return BRW_BLENDFACTOR_INV_SRC_COLOR; 
+   case GL_DST_COLOR: 
+      return BRW_BLENDFACTOR_DST_COLOR; 
+   case GL_ONE_MINUS_DST_COLOR: 
+      return BRW_BLENDFACTOR_INV_DST_COLOR; 
+   case GL_ONE_MINUS_SRC_ALPHA:
+      return BRW_BLENDFACTOR_INV_SRC_ALPHA; 
+   case GL_DST_ALPHA: 
+      return BRW_BLENDFACTOR_DST_ALPHA; 
+   case GL_ONE_MINUS_DST_ALPHA:
+      return BRW_BLENDFACTOR_INV_DST_ALPHA; 
+   case GL_SRC_ALPHA_SATURATE: 
+      return BRW_BLENDFACTOR_SRC_ALPHA_SATURATE;
+   case GL_CONSTANT_COLOR:
+      return BRW_BLENDFACTOR_CONST_COLOR; 
+   case GL_ONE_MINUS_CONSTANT_COLOR:
+      return BRW_BLENDFACTOR_INV_CONST_COLOR;
+   case GL_CONSTANT_ALPHA:
+      return BRW_BLENDFACTOR_CONST_ALPHA; 
+   case GL_ONE_MINUS_CONSTANT_ALPHA:
+      return BRW_BLENDFACTOR_INV_CONST_ALPHA;
+   default:
+      assert(0);
+      return BRW_BLENDFACTOR_ZERO;
+   }   
+}
diff --git a/i965/brw_util.h b/i965/brw_util.h
new file mode 100644
index 0000000..bd6cc0a
--- /dev/null
+++ b/i965/brw_util.h
@@ -0,0 +1,45 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+          
+
+#ifndef BRW_UTIL_H
+#define BRW_UTIL_H
+
+#include "mtypes.h"
+
+extern GLuint brw_count_bits( GLuint val );
+extern GLuint brw_parameter_list_state_flags(struct gl_program_parameter_list *paramList);
+extern GLuint brw_translate_blend_factor( GLenum factor );
+extern GLuint brw_translate_blend_equation( GLenum mode );
+
+
+
+#endif
diff --git a/i965/brw_vs.c b/i965/brw_vs.c
new file mode 100644
index 0000000..50826d9
--- /dev/null
+++ b/i965/brw_vs.c
@@ -0,0 +1,132 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+           
+
+#include "brw_context.h"
+#include "brw_vs.h"
+#include "brw_util.h"
+#include "brw_state.h"
+#include "shader/prog_print.h"
+
+
+
+static void do_vs_prog( struct brw_context *brw, 
+			struct brw_vertex_program *vp,
+			struct brw_vs_prog_key *key )
+{
+   GLuint program_size;
+   const GLuint *program;
+   struct brw_vs_compile c;
+
+   memset(&c, 0, sizeof(c));
+   memcpy(&c.key, key, sizeof(*key));
+
+   brw_init_compile(brw, &c.func);
+   c.vp = vp;
+
+   c.prog_data.outputs_written = vp->program.Base.OutputsWritten;
+   c.prog_data.inputs_read = vp->program.Base.InputsRead;
+
+   if (c.key.copy_edgeflag) {
+      c.prog_data.outputs_written |= 1<<VERT_RESULT_EDGE;
+      c.prog_data.inputs_read |= 1<<VERT_ATTRIB_EDGEFLAG;
+   }
+
+   if (0)
+      _mesa_print_program(&c.vp->program.Base);
+
+
+
+   /* Emit GEN4 code.
+    */
+   brw_vs_emit(&c);
+
+   /* get the program
+    */
+   program = brw_get_program(&c.func, &program_size);
+
+   /*
+    */
+   brw->vs.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_VS_PROG],
+					      &c.key,
+					      sizeof(c.key),
+					      program,
+					      program_size,
+					      &c.prog_data,
+					      &brw->vs.prog_data);
+}
+
+
+static void brw_upload_vs_prog( struct brw_context *brw )
+{
+   struct brw_vs_prog_key key;
+   struct brw_vertex_program *vp = 
+      (struct brw_vertex_program *)brw->vertex_program;
+
+   assert (vp && !vp->program.IsNVProgram);
+   
+   memset(&key, 0, sizeof(key));
+
+   /* Just upload the program verbatim for now.  Always send it all
+    * the inputs it asks for, whether they are varying or not.
+    */
+   key.program_string_id = vp->id;
+   key.nr_userclip = brw_count_bits(brw->attribs.Transform->ClipPlanesEnabled);
+   key.copy_edgeflag = (brw->attribs.Polygon->FrontMode != GL_FILL ||
+			brw->attribs.Polygon->BackMode != GL_FILL);
+
+   /* BRW_NEW_METAOPS
+    */
+   if (brw->metaops.active)
+      key.know_w_is_one = 1;
+
+   /* Make an early check for the key.
+    */
+   if (brw_search_cache(&brw->cache[BRW_VS_PROG], 
+			&key, sizeof(key),
+			&brw->vs.prog_data,
+			&brw->vs.prog_gs_offset))
+       return;
+
+   do_vs_prog(brw, vp, &key);
+}
+
+
+/* See brw_vs.c:
+ */
+const struct brw_tracked_state brw_vs_prog = {
+   .dirty = {
+      .mesa  = _NEW_TRANSFORM | _NEW_POLYGON,
+      .brw   = BRW_NEW_VERTEX_PROGRAM | BRW_NEW_METAOPS,
+      .cache = 0
+   },
+   .update = brw_upload_vs_prog
+};
diff --git a/i965/brw_vs.h b/i965/brw_vs.h
new file mode 100644
index 0000000..fdb5785
--- /dev/null
+++ b/i965/brw_vs.h
@@ -0,0 +1,81 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+            
+
+#ifndef BRW_VS_H
+#define BRW_VS_H
+
+
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "program.h"
+
+
+struct brw_vs_prog_key {
+   GLuint program_string_id;
+   GLuint nr_userclip:4;
+   GLuint copy_edgeflag:1;
+   GLuint know_w_is_one:1;
+   GLuint pad:26;
+};
+
+
+struct brw_vs_compile {
+   struct brw_compile func;
+   struct brw_vs_prog_key key;
+   struct brw_vs_prog_data prog_data;
+
+   struct brw_vertex_program *vp;
+
+   GLuint nr_inputs;
+
+   GLuint first_output;
+   GLuint nr_outputs;
+
+   GLuint first_tmp;
+   GLuint last_tmp;
+
+   struct brw_reg r0;
+   struct brw_reg r1;
+   struct brw_reg regs[PROGRAM_ADDRESS+1][128];
+   struct brw_reg tmp;
+
+   struct brw_reg userplane[6];
+
+};
+
+void brw_vs_emit( struct brw_vs_compile *c );
+
+
+void brw_ProgramCacheDestroy( GLcontext *ctx );
+void brw_ProgramCacheInit( GLcontext *ctx );
+
+#endif
diff --git a/i965/brw_vs_constval.c b/i965/brw_vs_constval.c
new file mode 100644
index 0000000..caef042
--- /dev/null
+++ b/i965/brw_vs_constval.c
@@ -0,0 +1,223 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+            
+
+#include "macros.h"
+#include "brw_context.h"
+#include "brw_vs.h"
+
+/* Component is active if it may diverge from [0,0,0,1].  Undef values
+ * are promoted to [0,0,0,1] for the purposes of this analysis.
+ */
+struct tracker {
+   GLboolean twoside;
+   GLubyte active[PROGRAM_OUTPUT+1][128];
+   GLuint size_masks[4];
+};
+
+
+static void set_active_component( struct tracker *t,
+				  GLuint file,
+				  GLuint index,
+				  GLubyte active )
+{
+   switch (file) {
+   case PROGRAM_TEMPORARY:
+   case PROGRAM_INPUT:
+   case PROGRAM_OUTPUT:
+      t->active[file][index] |= active;
+
+   default:
+      break;
+   }
+}
+
+static void set_active( struct tracker *t,
+			struct prog_dst_register dst,
+			GLuint active )
+{
+   set_active_component( t, dst.File, dst.Index, active & dst.WriteMask );
+}
+
+
+static GLubyte get_active_component( struct tracker *t,
+				     GLuint file,
+				     GLuint index,
+				     GLuint component,
+				     GLubyte swz )
+{
+   switch (swz) {
+   case SWIZZLE_ZERO:
+      return component < 3 ? 0 : (1<<component);
+   case SWIZZLE_ONE:
+      return component == 3 ? 0 : (1<<component);
+   default:
+      switch (file) {
+      case PROGRAM_TEMPORARY:
+      case PROGRAM_INPUT:
+      case PROGRAM_OUTPUT:
+	 return t->active[file][index] & (1<<component);
+      default:
+	 return 1 << component;
+      }
+   }
+}
+
+
+static GLubyte get_active( struct tracker *t,
+			   struct prog_src_register src )
+{
+   GLuint i;
+   GLubyte active = src.NegateBase; /* NOTE! */
+
+   if (src.RelAddr)
+      return 0xf;
+
+   for (i = 0; i < 4; i++) 
+      active |= get_active_component(t, src.File, src.Index, i,
+				     GET_SWZ(src.Swizzle, i));
+
+   return active;
+}
+
+static GLubyte get_output_size( struct tracker *t,
+				GLuint idx )
+{
+   GLubyte active = t->active[PROGRAM_OUTPUT][idx];
+   if (active & (1<<3)) return 4;
+   if (active & (1<<2)) return 3;
+   if (active & (1<<1)) return 2;
+   if (active & (1<<0)) return 1;
+   return 0;
+}
+
+/* Note the potential copying that occurs in the setup program:
+ */
+static void calc_sizes( struct tracker *t )
+{
+   GLuint i;
+
+   if (t->twoside) {
+      t->active[PROGRAM_OUTPUT][VERT_RESULT_COL0] |= 
+	 t->active[PROGRAM_OUTPUT][VERT_RESULT_BFC0];
+
+      t->active[PROGRAM_OUTPUT][VERT_RESULT_COL1] |= 
+	 t->active[PROGRAM_OUTPUT][VERT_RESULT_BFC1];
+   }
+
+   for (i = 0; i < FRAG_ATTRIB_MAX; i++) {
+      switch (get_output_size(t, i)) {
+      case 4: t->size_masks[4-1] |= 1<<i;
+      case 3: t->size_masks[3-1] |= 1<<i;
+      case 2: t->size_masks[2-1] |= 1<<i;
+      case 1: t->size_masks[1-1] |= 1<<i;
+	 break;
+      }
+   }
+}
+
+static GLubyte szflag[4+1] = {
+   0,
+   0x1,
+   0x3,
+   0x7,
+   0xf
+};
+
+/* Pull a size out of the packed array:
+ */
+static GLuint get_input_size(struct brw_context *brw,
+			     GLuint attr)
+{
+   GLuint sizes_dword = brw->vb.info.sizes[attr/16];
+   GLuint sizes_bits = (sizes_dword>>((attr%16)*2)) & 0x3;
+   return sizes_bits + 1;
+/*    return brw->vb.inputs[attr].glarray->Size; */
+}
+
+/* Calculate sizes of vertex program outputs.  Size is the largest
+ * component index which might vary from [0,0,0,1]
+ */
+static void calc_wm_input_sizes( struct brw_context *brw )
+{
+   /* BRW_NEW_VERTEX_PROGRAM */
+   struct brw_vertex_program *vp = 
+      (struct brw_vertex_program *)brw->vertex_program;
+   /* BRW_NEW_INPUT_DIMENSIONS */
+   struct tracker t;
+   GLuint insn;
+   GLuint i;
+
+   memset(&t, 0, sizeof(t));
+
+   /* _NEW_LIGHT */
+   if (brw->attribs.Light->Model.TwoSide)
+      t.twoside = 1;
+
+   for (i = 0; i < VERT_ATTRIB_MAX; i++) 
+      if (vp->program.Base.InputsRead & (1<<i))
+	 set_active_component(&t, PROGRAM_INPUT, i, 
+			      szflag[get_input_size(brw, i)]);
+      
+   for (insn = 0; insn < vp->program.Base.NumInstructions; insn++) {
+      struct prog_instruction *inst = &vp->program.Base.Instructions[insn];
+      
+      switch (inst->Opcode) {
+      case OPCODE_ARL:
+	 break;
+
+      case OPCODE_MOV:
+	 set_active(&t, inst->DstReg, get_active(&t, inst->SrcReg[0]));
+	 break;
+
+      default:
+	 set_active(&t, inst->DstReg, 0xf);
+	 break;
+      }
+   }
+
+   calc_sizes(&t);
+
+   if (memcmp(brw->wm.input_size_masks, t.size_masks, sizeof(t.size_masks)) != 0) {
+      memcpy(brw->wm.input_size_masks, t.size_masks, sizeof(t.size_masks));
+      brw->state.dirty.brw |= BRW_NEW_WM_INPUT_DIMENSIONS;
+   }
+}
+
+const struct brw_tracked_state brw_wm_input_sizes = {
+   .dirty = {
+      .mesa  = _NEW_LIGHT,
+      .brw   = BRW_NEW_VERTEX_PROGRAM | BRW_NEW_INPUT_DIMENSIONS,
+      .cache = 0
+   },
+   .update = calc_wm_input_sizes
+};
+
diff --git a/i965/brw_vs_emit.c b/i965/brw_vs_emit.c
new file mode 100644
index 0000000..c38e998
--- /dev/null
+++ b/i965/brw_vs_emit.c
@@ -0,0 +1,1041 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+            
+
+#include "program.h"
+#include "macros.h"
+#include "shader/prog_parameter.h"
+#include "shader/prog_print.h"
+#include "brw_context.h"
+#include "brw_vs.h"
+
+
+
+/* Do things as simply as possible.  Allocate and populate all regs
+ * ahead of time.
+ */
+static void brw_vs_alloc_regs( struct brw_vs_compile *c )
+{
+   GLuint i, reg = 0, mrf;
+   GLuint nr_params;
+
+   /* r0 -- reserved as usual
+    */
+   c->r0 = brw_vec8_grf(reg, 0); reg++;
+
+   /* User clip planes from curbe: 
+    */
+   if (c->key.nr_userclip) {
+      for (i = 0; i < c->key.nr_userclip; i++) {
+	 c->userplane[i] = stride( brw_vec4_grf(reg+3+i/2, (i%2) * 4), 0, 4, 1);
+      }     
+
+      /* Deal with curbe alignment:
+       */
+      reg += ((6+c->key.nr_userclip+3)/4)*2;
+   }
+
+   /* Vertex program parameters from curbe:
+    */
+   nr_params = c->vp->program.Base.Parameters->NumParameters;
+   for (i = 0; i < nr_params; i++) {
+      c->regs[PROGRAM_STATE_VAR][i] = stride( brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1);
+   }     
+   reg += (nr_params+1)/2;
+
+   c->prog_data.curb_read_length = reg - 1;
+
+
+
+   /* Allocate input regs:  
+    */
+   c->nr_inputs = 0;
+   for (i = 0; i < VERT_ATTRIB_MAX; i++) {
+      if (c->prog_data.inputs_read & (1<<i)) {
+	 c->nr_inputs++;
+	 c->regs[PROGRAM_INPUT][i] = brw_vec8_grf(reg, 0);
+	 reg++;
+      }
+   }     
+
+
+   /* Allocate outputs: TODO: could organize the non-position outputs
+    * to go straight into message regs.
+    */
+   c->nr_outputs = 0;
+   c->first_output = reg;
+   mrf = 4;
+   for (i = 0; i < VERT_RESULT_MAX; i++) {
+      if (c->prog_data.outputs_written & (1<<i)) {
+	 c->nr_outputs++;
+	 if (i == VERT_RESULT_HPOS) {
+	    c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0);
+	    reg++;
+	 }
+	 else if (i == VERT_RESULT_PSIZ) {
+	    c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0);
+	    reg++;
+	    mrf++;		/* just a placeholder?  XXX fix later stages & remove this */
+	 }
+	 else {
+	    c->regs[PROGRAM_OUTPUT][i] = brw_message_reg(mrf);
+	    mrf++;
+	 }
+      }
+   }     
+
+   /* Allocate program temporaries:
+    */
+   for (i = 0; i < c->vp->program.Base.NumTemporaries; i++) {
+      c->regs[PROGRAM_TEMPORARY][i] = brw_vec8_grf(reg, 0);
+      reg++;
+   }
+
+   /* Address reg(s).  Don't try to use the internal address reg until
+    * deref time.
+    */
+   for (i = 0; i < c->vp->program.Base.NumAddressRegs; i++) {
+      c->regs[PROGRAM_ADDRESS][i] =  brw_reg(BRW_GENERAL_REGISTER_FILE,
+					     reg,
+					     0,
+					     BRW_REGISTER_TYPE_D,
+					     BRW_VERTICAL_STRIDE_8,
+					     BRW_WIDTH_8,
+					     BRW_HORIZONTAL_STRIDE_1,
+					     BRW_SWIZZLE_XXXX,
+					     WRITEMASK_X);
+      reg++;
+   }
+ 
+   
+   /* Some opcodes need an internal temporary:
+    */
+   c->first_tmp = reg;
+   c->last_tmp = reg;		/* for allocation purposes */
+
+   /* Each input reg holds data from two vertices.  The
+    * urb_read_length is the number of registers read from *each*
+    * vertex urb, so is half the amount:
+    */
+   c->prog_data.urb_read_length = (c->nr_inputs+1)/2;
+
+   c->prog_data.urb_entry_size = (c->nr_outputs+2+3)/4;
+   c->prog_data.total_grf = reg;
+}
+
+
+static struct brw_reg get_tmp( struct brw_vs_compile *c )
+{
+   struct brw_reg tmp = brw_vec8_grf(c->last_tmp, 0);
+
+   if (++c->last_tmp > c->prog_data.total_grf)
+      c->prog_data.total_grf = c->last_tmp;
+
+   return tmp;
+}
+
+static void release_tmp( struct brw_vs_compile *c, struct brw_reg tmp )
+{
+   if (tmp.nr == c->last_tmp-1)
+      c->last_tmp--;
+}
+			       
+static void release_tmps( struct brw_vs_compile *c )
+{
+   c->last_tmp = c->first_tmp;
+}
+
+
+static void unalias1( struct brw_vs_compile *c,
+		      struct brw_reg dst,
+		      struct brw_reg arg0,
+		      void (*func)( struct brw_vs_compile *,
+				    struct brw_reg,
+				    struct brw_reg ))
+{
+   if (dst.file == arg0.file && dst.nr == arg0.nr) {
+      struct brw_compile *p = &c->func;
+      struct brw_reg tmp = brw_writemask(get_tmp(c), dst.dw1.bits.writemask);
+      func(c, tmp, arg0);
+      brw_MOV(p, dst, tmp);
+   }
+   else {
+      func(c, dst, arg0);
+   }
+}
+
+static void unalias2( struct brw_vs_compile *c,
+		      struct brw_reg dst,
+		      struct brw_reg arg0,
+		      struct brw_reg arg1,
+		      void (*func)( struct brw_vs_compile *,
+				    struct brw_reg,
+				    struct brw_reg,
+				    struct brw_reg ))
+{
+   if ((dst.file == arg0.file && dst.nr == arg0.nr) &&
+       (dst.file == arg1.file && dst.nr == arg1.nr)) {
+      struct brw_compile *p = &c->func;
+      struct brw_reg tmp = brw_writemask(get_tmp(c), dst.dw1.bits.writemask);
+      func(c, tmp, arg0, arg1);
+      brw_MOV(p, dst, tmp);
+   }
+   else {
+      func(c, dst, arg0, arg1);
+   }
+}
+
+
+
+
+static void emit_slt( struct brw_compile *p, 
+		      struct brw_reg dst,
+		      struct brw_reg arg0,
+		      struct brw_reg arg1 )
+{
+   /* Could be done with an if/else/endif, but this method uses half
+    * the instructions.  Note that we are careful to reference the
+    * arguments before writing the dest.  That means we emit the
+    * instructions in an odd order and have to play with the flag
+    * values.
+    */
+   brw_push_insn_state(p);
+   brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0, arg1);
+
+   /* Write all values to 1:
+    */
+   brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+   brw_MOV(p, dst, brw_imm_f(1.0));
+
+   /* Where the test succeeded, overwite with zero:
+    */
+   brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
+   brw_MOV(p, dst, brw_imm_f(0.0));
+   brw_pop_insn_state(p);
+}
+
+
+static void emit_sge( struct brw_compile *p, 
+		      struct brw_reg dst,
+		      struct brw_reg arg0,
+		      struct brw_reg arg1 )
+{
+   brw_push_insn_state(p);
+   brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0, arg1);
+
+   /* Write all values to zero:
+    */
+   brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+   brw_MOV(p, dst, brw_imm_f(0));
+
+   /* Where the test succeeded, overwite with 1:
+    */
+   brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
+   brw_MOV(p, dst, brw_imm_f(1.0));
+   brw_pop_insn_state(p);
+}
+
+
+static void emit_max( struct brw_compile *p, 
+		      struct brw_reg dst,
+		      struct brw_reg arg0,
+		      struct brw_reg arg1 )
+{
+   brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0, arg1);
+   brw_SEL(p, dst, arg1, arg0);
+   brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+}
+
+static void emit_min( struct brw_compile *p, 
+		      struct brw_reg dst,
+		      struct brw_reg arg0,
+		      struct brw_reg arg1 )
+{
+   brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0, arg1);
+   brw_SEL(p, dst, arg0, arg1);
+   brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+}
+
+
+static void emit_math1( struct brw_vs_compile *c,
+			GLuint function,
+			struct brw_reg dst,
+			struct brw_reg arg0,
+			GLuint precision)
+{
+   /* There are various odd behaviours with SEND on the simulator.  In
+    * addition there are documented issues with the fact that the GEN4
+    * processor doesn't do dependency control properly on SEND
+    * results.  So, on balance, this kludge to get around failures
+    * with writemasked math results looks like it might be necessary
+    * whether that turns out to be a simulator bug or not:
+    */
+   struct brw_compile *p = &c->func;
+   struct brw_reg tmp = dst;
+   GLboolean need_tmp = (dst.dw1.bits.writemask != 0xf ||
+			 dst.file != BRW_GENERAL_REGISTER_FILE);
+
+   if (need_tmp) 
+      tmp = get_tmp(c);
+
+   brw_math(p, 
+	    tmp,
+	    function,
+	    BRW_MATH_SATURATE_NONE,
+	    2,
+	    arg0,
+	    BRW_MATH_DATA_SCALAR,
+	    precision);
+
+   if (need_tmp) {
+      brw_MOV(p, dst, tmp);
+      release_tmp(c, tmp);
+   }
+}
+
+static void emit_math2( struct brw_vs_compile *c, 
+			GLuint function,
+			struct brw_reg dst,
+			struct brw_reg arg0,
+			struct brw_reg arg1,
+			GLuint precision)
+{
+   struct brw_compile *p = &c->func;
+   struct brw_reg tmp = dst;
+   GLboolean need_tmp = (dst.dw1.bits.writemask != 0xf ||
+			 dst.file != BRW_GENERAL_REGISTER_FILE);
+
+   if (need_tmp) 
+      tmp = get_tmp(c);
+
+   brw_MOV(p, brw_message_reg(3), arg1);
+   
+   brw_math(p, 
+	    tmp,
+	    function,
+	    BRW_MATH_SATURATE_NONE,
+	    2,
+ 	    arg0,
+	    BRW_MATH_DATA_SCALAR,
+	    precision);
+
+   if (need_tmp) {
+      brw_MOV(p, dst, tmp);
+      release_tmp(c, tmp);
+   }
+}
+		     
+
+
+static void emit_exp_noalias( struct brw_vs_compile *c,
+			      struct brw_reg dst,
+			      struct brw_reg arg0 )
+{
+   struct brw_compile *p = &c->func;
+   
+
+   if (dst.dw1.bits.writemask & WRITEMASK_X) {
+      struct brw_reg tmp = get_tmp(c);
+      struct brw_reg tmp_d = retype(tmp, BRW_REGISTER_TYPE_D);
+
+      /* tmp_d = floor(arg0.x) */
+      brw_RNDD(p, tmp_d, brw_swizzle1(arg0, 0));
+
+      /* result[0] = 2.0 ^ tmp */
+
+      /* Adjust exponent for floating point: 
+       * exp += 127 
+       */
+      brw_ADD(p, brw_writemask(tmp_d, WRITEMASK_X), tmp_d, brw_imm_d(127));
+
+      /* Install exponent and sign.  
+       * Excess drops off the edge: 
+       */
+      brw_SHL(p, brw_writemask(retype(dst, BRW_REGISTER_TYPE_D), WRITEMASK_X), 
+	      tmp_d, brw_imm_d(23));
+
+      release_tmp(c, tmp);
+   }
+
+   if (dst.dw1.bits.writemask & WRITEMASK_Y) {
+      /* result[1] = arg0.x - floor(arg0.x) */
+      brw_FRC(p, brw_writemask(dst, WRITEMASK_Y), brw_swizzle1(arg0, 0));
+   }
+   
+   if (dst.dw1.bits.writemask & WRITEMASK_Z) {
+      /* As with the LOG instruction, we might be better off just
+       * doing a taylor expansion here, seeing as we have to do all
+       * the prep work.
+       *
+       * If mathbox partial precision is too low, consider also:
+       * result[3] = result[0] * EXP(result[1])
+       */
+      emit_math1(c, 
+		 BRW_MATH_FUNCTION_EXP, 
+		 brw_writemask(dst, WRITEMASK_Z),
+		 brw_swizzle1(arg0, 0), 
+		 BRW_MATH_PRECISION_PARTIAL);
+   }  
+
+   if (dst.dw1.bits.writemask & WRITEMASK_W) {
+      /* result[3] = 1.0; */
+      brw_MOV(p, brw_writemask(dst, WRITEMASK_W), brw_imm_f(1));
+   }
+}
+
+
+static void emit_log_noalias( struct brw_vs_compile *c,
+			      struct brw_reg dst,
+			      struct brw_reg arg0 )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_reg tmp = dst;
+   struct brw_reg tmp_ud = retype(tmp, BRW_REGISTER_TYPE_UD);
+   struct brw_reg arg0_ud = retype(arg0, BRW_REGISTER_TYPE_UD);
+   GLboolean need_tmp = (dst.dw1.bits.writemask != 0xf ||
+			 dst.file != BRW_GENERAL_REGISTER_FILE);
+
+   if (need_tmp) {
+      tmp = get_tmp(c);
+      tmp_ud = retype(tmp, BRW_REGISTER_TYPE_UD);
+   }
+   
+   /* Perform mant = frexpf(fabsf(x), &exp), adjust exp and mnt
+    * according to spec:
+    *
+    * These almost look likey they could be joined up, but not really
+    * practical:
+    *
+    * result[0].f = (x.i & ((1<<31)-1) >> 23) - 127
+    * result[1].i = (x.i & ((1<<23)-1)        + (127<<23)
+    */
+   if (dst.dw1.bits.writemask & WRITEMASK_XZ) {
+      brw_AND(p, 
+	      brw_writemask(tmp_ud, WRITEMASK_X),
+	      brw_swizzle1(arg0_ud, 0),
+	      brw_imm_ud((1U<<31)-1));
+
+      brw_SHR(p, 
+	      brw_writemask(tmp_ud, WRITEMASK_X), 
+	      tmp_ud,
+	      brw_imm_ud(23));
+
+      brw_ADD(p, 
+	      brw_writemask(tmp, WRITEMASK_X), 
+	      retype(tmp_ud, BRW_REGISTER_TYPE_D),	/* does it matter? */
+	      brw_imm_d(-127));
+   }
+
+   if (dst.dw1.bits.writemask & WRITEMASK_YZ) {
+      brw_AND(p, 
+	      brw_writemask(tmp_ud, WRITEMASK_Y),
+	      brw_swizzle1(arg0_ud, 0),
+	      brw_imm_ud((1<<23)-1));
+
+      brw_OR(p, 
+	     brw_writemask(tmp_ud, WRITEMASK_Y), 
+	     tmp_ud,
+	     brw_imm_ud(127<<23));
+   }
+   
+   if (dst.dw1.bits.writemask & WRITEMASK_Z) {
+      /* result[2] = result[0] + LOG2(result[1]); */
+
+      /* Why bother?  The above is just a hint how to do this with a
+       * taylor series.  Maybe we *should* use a taylor series as by
+       * the time all the above has been done it's almost certainly
+       * quicker than calling the mathbox, even with low precision.
+       * 
+       * Options are:
+       *    - result[0] + mathbox.LOG2(result[1])
+       *    - mathbox.LOG2(arg0.x)
+       *    - result[0] + inline_taylor_approx(result[1])
+       */
+      emit_math1(c, 
+		 BRW_MATH_FUNCTION_LOG, 
+		 brw_writemask(tmp, WRITEMASK_Z), 
+		 brw_swizzle1(tmp, 1), 
+		 BRW_MATH_PRECISION_FULL);
+      
+      brw_ADD(p, 
+	      brw_writemask(tmp, WRITEMASK_Z), 
+	      brw_swizzle1(tmp, 2), 
+	      brw_swizzle1(tmp, 0));
+   }  
+
+   if (dst.dw1.bits.writemask & WRITEMASK_W) {
+      /* result[3] = 1.0; */
+      brw_MOV(p, brw_writemask(tmp, WRITEMASK_W), brw_imm_f(1));
+   }
+
+   if (need_tmp) {
+      brw_MOV(p, dst, tmp);
+      release_tmp(c, tmp);
+   }
+}
+
+
+      
+
+/* Need to unalias - consider swizzles:   r0 = DST r0.xxxx r1
+ */
+static void emit_dst_noalias( struct brw_vs_compile *c, 
+			      struct brw_reg dst,
+			      struct brw_reg arg0,
+			      struct brw_reg arg1)
+{
+   struct brw_compile *p = &c->func;
+
+   /* There must be a better way to do this: 
+    */
+   if (dst.dw1.bits.writemask & WRITEMASK_X)
+      brw_MOV(p, brw_writemask(dst, WRITEMASK_X), brw_imm_f(1.0));
+   if (dst.dw1.bits.writemask & WRITEMASK_Y)
+      brw_MUL(p, brw_writemask(dst, WRITEMASK_Y), arg0, arg1);
+   if (dst.dw1.bits.writemask & WRITEMASK_Z)
+      brw_MOV(p, brw_writemask(dst, WRITEMASK_Z), arg0);
+   if (dst.dw1.bits.writemask & WRITEMASK_W)
+      brw_MOV(p, brw_writemask(dst, WRITEMASK_W), arg1);
+}
+
+static void emit_xpd( struct brw_compile *p,
+		      struct brw_reg dst,
+		      struct brw_reg t,
+		      struct brw_reg u)
+{
+   brw_MUL(p, brw_null_reg(), brw_swizzle(t, 1,2,0,3),  brw_swizzle(u,2,0,1,3));
+   brw_MAC(p, dst,     negate(brw_swizzle(t, 2,0,1,3)), brw_swizzle(u,1,2,0,3));
+}
+
+
+
+static void emit_lit_noalias( struct brw_vs_compile *c, 
+			      struct brw_reg dst,
+			      struct brw_reg arg0 )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_instruction *if_insn;
+   struct brw_reg tmp = dst;
+   GLboolean need_tmp = (dst.file != BRW_GENERAL_REGISTER_FILE);
+
+   if (need_tmp) 
+      tmp = get_tmp(c);
+   
+   brw_MOV(p, brw_writemask(dst, WRITEMASK_YZ), brw_imm_f(0)); 
+   brw_MOV(p, brw_writemask(dst, WRITEMASK_XW), brw_imm_f(1)); 
+
+   /* Need to use BRW_EXECUTE_8 and also do an 8-wide compare in order
+    * to get all channels active inside the IF.  In the clipping code
+    * we run with NoMask, so it's not an option and we can use
+    * BRW_EXECUTE_1 for all comparisions.
+    */
+   brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, brw_swizzle1(arg0,0), brw_imm_f(0));
+   if_insn = brw_IF(p, BRW_EXECUTE_8);
+   {
+      brw_MOV(p, brw_writemask(dst, WRITEMASK_Y), brw_swizzle1(arg0,0));
+
+      brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, brw_swizzle1(arg0,1), brw_imm_f(0));
+      brw_MOV(p, brw_writemask(tmp, WRITEMASK_Z),  brw_swizzle1(arg0,1));
+      brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+      emit_math2(c, 
+		 BRW_MATH_FUNCTION_POW, 
+		 brw_writemask(dst, WRITEMASK_Z),
+		 brw_swizzle1(tmp, 2),
+		 brw_swizzle1(arg0, 3),
+		 BRW_MATH_PRECISION_PARTIAL);      
+   }
+
+   brw_ENDIF(p, if_insn);
+}
+
+
+
+
+
+/* TODO: relative addressing!
+ */
+static struct brw_reg get_reg( struct brw_vs_compile *c,
+			       GLuint file,
+			       GLuint index )
+{
+
+   switch (file) {
+   case PROGRAM_TEMPORARY:
+   case PROGRAM_INPUT:
+   case PROGRAM_OUTPUT:
+   case PROGRAM_STATE_VAR:
+      assert(c->regs[file][index].nr != 0);
+      return c->regs[file][index];
+   case PROGRAM_ADDRESS:
+      assert(index == 0);
+      return c->regs[file][index];
+
+   case PROGRAM_UNDEFINED:			/* undef values */
+      return brw_null_reg();
+
+   case PROGRAM_LOCAL_PARAM: 
+   case PROGRAM_ENV_PARAM: 
+   case PROGRAM_WRITE_ONLY:
+   default:
+      assert(0);
+      return brw_null_reg();
+   }
+}
+
+
+
+static struct brw_reg deref( struct brw_vs_compile *c,
+			     struct brw_reg arg,
+			     GLint offset)
+{
+   struct brw_compile *p = &c->func;
+   struct brw_reg tmp = vec4(get_tmp(c));
+   struct brw_reg vp_address = retype(vec1(get_reg(c, PROGRAM_ADDRESS, 0)), BRW_REGISTER_TYPE_UW);
+   GLuint byte_offset = arg.nr * 32 + arg.subnr + offset * 16;
+   struct brw_reg indirect = brw_vec4_indirect(0,0);
+
+   {
+      brw_push_insn_state(p);
+      brw_set_access_mode(p, BRW_ALIGN_1);
+
+      /* This is pretty clunky - load the address register twice and
+       * fetch each 4-dword value in turn.  There must be a way to do
+       * this in a single pass, but I couldn't get it to work.
+       */
+      brw_ADD(p, brw_address_reg(0), vp_address, brw_imm_d(byte_offset));
+      brw_MOV(p, tmp, indirect);
+
+      brw_ADD(p, brw_address_reg(0), suboffset(vp_address, 8), brw_imm_d(byte_offset));
+      brw_MOV(p, suboffset(tmp, 4), indirect);
+
+      brw_pop_insn_state(p);
+   }
+   
+   return vec8(tmp);
+}
+
+
+static void emit_arl( struct brw_vs_compile *c,
+		      struct brw_reg dst,
+		      struct brw_reg arg0 )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_reg tmp = dst;
+   GLboolean need_tmp = (dst.file != BRW_GENERAL_REGISTER_FILE);
+   
+   if (need_tmp) 
+      tmp = get_tmp(c);
+
+   brw_RNDD(p, tmp, arg0);
+   brw_MUL(p, dst, tmp, brw_imm_d(16));
+
+   if (need_tmp)
+      release_tmp(c, tmp);
+}
+
+
+/* Will return mangled results for SWZ op.  The emit_swz() function
+ * ignores this result and recalculates taking extended swizzles into
+ * account.
+ */
+static struct brw_reg get_arg( struct brw_vs_compile *c,
+			       struct prog_src_register src )
+{
+   struct brw_reg reg;
+
+   if (src.File == PROGRAM_UNDEFINED)
+      return brw_null_reg();
+
+   if (src.RelAddr) 
+      reg = deref(c, c->regs[PROGRAM_STATE_VAR][0], src.Index);
+   else
+      reg = get_reg(c, src.File, src.Index);
+
+   /* Convert 3-bit swizzle to 2-bit.  
+    */
+   reg.dw1.bits.swizzle = BRW_SWIZZLE4(GET_SWZ(src.Swizzle, 0),
+				       GET_SWZ(src.Swizzle, 1),
+				       GET_SWZ(src.Swizzle, 2),
+				       GET_SWZ(src.Swizzle, 3));
+
+   /* Note this is ok for non-swizzle instructions: 
+    */
+   reg.negate = src.NegateBase ? 1 : 0;   
+
+   return reg;
+}
+
+
+static struct brw_reg get_dst( struct brw_vs_compile *c,
+			       struct prog_dst_register dst )
+{
+   struct brw_reg reg = get_reg(c, dst.File, dst.Index);
+
+   reg.dw1.bits.writemask = dst.WriteMask;
+
+   return reg;
+}
+
+
+
+
+static void emit_swz( struct brw_vs_compile *c, 
+		      struct brw_reg dst,
+		      struct prog_src_register src )
+{
+   struct brw_compile *p = &c->func;
+   GLuint zeros_mask = 0;
+   GLuint ones_mask = 0;
+   GLuint src_mask = 0;
+   GLubyte src_swz[4];
+   GLboolean need_tmp = (src.NegateBase &&
+			 dst.file != BRW_GENERAL_REGISTER_FILE);
+   struct brw_reg tmp = dst;
+   GLuint i;
+
+   if (need_tmp)
+      tmp = get_tmp(c);
+
+   for (i = 0; i < 4; i++) {
+      if (dst.dw1.bits.writemask & (1<<i)) {
+	 GLubyte s = GET_SWZ(src.Swizzle, i);
+	 switch (s) {
+	 case SWIZZLE_X:
+	 case SWIZZLE_Y:
+	 case SWIZZLE_Z:
+	 case SWIZZLE_W:
+	    src_mask |= 1<<i;
+	    src_swz[i] = s;
+	    break;
+	 case SWIZZLE_ZERO:
+	    zeros_mask |= 1<<i;
+	    break;
+	 case SWIZZLE_ONE:
+	    ones_mask |= 1<<i;
+	    break;
+	 }
+      }
+   }
+   
+   /* Do src first, in case dst aliases src:
+    */
+   if (src_mask) {
+      struct brw_reg arg0;
+
+      if (src.RelAddr) 
+	 arg0 = deref(c, c->regs[PROGRAM_STATE_VAR][0], src.Index);
+      else
+	 arg0 = get_reg(c, src.File, src.Index);
+
+      arg0 = brw_swizzle(arg0, 
+			 src_swz[0], src_swz[1], 
+			 src_swz[2], src_swz[3]);
+
+      brw_MOV(p, brw_writemask(tmp, src_mask), arg0);
+   } 
+   
+   if (zeros_mask) 
+      brw_MOV(p, brw_writemask(tmp, zeros_mask), brw_imm_f(0));
+
+   if (ones_mask) 
+      brw_MOV(p, brw_writemask(tmp, ones_mask), brw_imm_f(1));
+
+   if (src.NegateBase)
+      brw_MOV(p, brw_writemask(tmp, src.NegateBase), negate(tmp));
+   
+   if (need_tmp) {
+      brw_MOV(p, dst, tmp);
+      release_tmp(c, tmp);
+   }
+}
+
+
+
+/* Post-vertex-program processing.  Send the results to the URB.
+ */
+static void emit_vertex_write( struct brw_vs_compile *c)
+{
+   struct brw_compile *p = &c->func;
+   struct brw_reg m0 = brw_message_reg(0);
+   struct brw_reg pos = c->regs[PROGRAM_OUTPUT][VERT_RESULT_HPOS];
+   struct brw_reg ndc;
+
+   if (c->key.copy_edgeflag) {
+      brw_MOV(p, 
+	      get_reg(c, PROGRAM_OUTPUT, VERT_RESULT_EDGE),
+	      get_reg(c, PROGRAM_INPUT, VERT_ATTRIB_EDGEFLAG));
+   }
+
+
+   /* Build ndc coords?   TODO: Shortcircuit when w is known to be one.
+    */
+   if (!c->key.know_w_is_one) {
+      ndc = get_tmp(c);
+      emit_math1(c, BRW_MATH_FUNCTION_INV, ndc, brw_swizzle1(pos, 3), BRW_MATH_PRECISION_FULL);
+      brw_MUL(p, brw_writemask(ndc, WRITEMASK_XYZ), pos, ndc);
+   }
+   else {
+      ndc = pos;
+   }
+
+   /* This includes the workaround for -ve rhw, so is no longer an
+    * optional step:
+    */
+   if ((c->prog_data.outputs_written & (1<<VERT_RESULT_PSIZ)) ||
+       c->key.nr_userclip ||
+       !c->key.know_w_is_one)
+   {
+      struct brw_reg header1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
+      GLuint i;
+
+      brw_MOV(p, header1, brw_imm_ud(0));
+
+      brw_set_access_mode(p, BRW_ALIGN_16);	
+
+      if (c->prog_data.outputs_written & (1<<VERT_RESULT_PSIZ)) {
+	 struct brw_reg psiz = c->regs[PROGRAM_OUTPUT][VERT_RESULT_PSIZ];
+	 brw_MUL(p, brw_writemask(header1, WRITEMASK_W), brw_swizzle1(psiz, 0), brw_imm_f(1<<11));
+	 brw_AND(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(0x7ff<<8));
+      }
+
+
+      for (i = 0; i < c->key.nr_userclip; i++) {
+	 brw_set_conditionalmod(p, BRW_CONDITIONAL_L);
+	 brw_DP4(p, brw_null_reg(), pos, c->userplane[i]);
+	 brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<i));
+	 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+      }
+
+
+      /* i965 clipping workaround: 
+       * 1) Test for -ve rhw
+       * 2) If set, 
+       *      set ndc = (0,0,0,0)
+       *      set ucp[6] = 1
+       *
+       * Later, clipping will detect ucp[6] and ensure the primitive is
+       * clipped against all fixed planes.
+       */
+      if (!BRW_IS_IGD(p->brw) && !c->key.know_w_is_one) {
+	 brw_CMP(p,
+		 vec8(brw_null_reg()),
+		 BRW_CONDITIONAL_L,
+		 brw_swizzle1(ndc, 3),
+		 brw_imm_f(0));
+   
+	 brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<6));
+	 brw_MOV(p, ndc, brw_imm_f(0));
+	 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+      }
+
+      brw_set_access_mode(p, BRW_ALIGN_1);	/* why? */
+      brw_MOV(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD), header1);
+      brw_set_access_mode(p, BRW_ALIGN_16);
+
+      release_tmp(c, header1);
+   }
+   else {
+      brw_MOV(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD), brw_imm_ud(0));
+   }
+
+
+   /* Emit the (interleaved) headers for the two vertices - an 8-reg
+    * of zeros followed by two sets of NDC coordinates:
+    */
+   brw_set_access_mode(p, BRW_ALIGN_1);
+   brw_MOV(p, offset(m0, 2), ndc);
+   brw_MOV(p, offset(m0, 3), pos);
+   
+
+   brw_urb_WRITE(p, 
+		 brw_null_reg(), /* dest */
+		 0,		/* starting mrf reg nr */
+		 c->r0,		/* src */
+		 0,		/* allocate */
+		 1,		/* used */
+		 c->nr_outputs + 3, /* msg len */
+		 0,		/* response len */
+		 1, 		/* eot */
+		 1, 		/* writes complete */
+		 0, 		/* urb destination offset */
+		 BRW_URB_SWIZZLE_INTERLEAVE);
+
+}
+
+
+
+
+/* Emit the fragment program instructions here.
+ */
+void brw_vs_emit( struct brw_vs_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   GLuint nr_insns = c->vp->program.Base.NumInstructions;
+   GLuint insn;
+
+
+   if (INTEL_DEBUG & DEBUG_VS) {
+      _mesa_printf("\n\n\nvs-emit:\n");
+      _mesa_print_program(&c->vp->program.Base); 
+      _mesa_printf("\n");
+   }
+
+   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+   brw_set_access_mode(p, BRW_ALIGN_16);
+   
+   /* Static register allocation
+    */
+   brw_vs_alloc_regs(c);
+
+   for (insn = 0; insn < nr_insns; insn++) {
+
+      struct prog_instruction *inst = &c->vp->program.Base.Instructions[insn];
+      struct brw_reg args[3], dst;
+      GLuint i;
+      
+      /* Get argument regs.  SWZ is special and does this itself.
+       */
+      if (inst->Opcode != OPCODE_SWZ)
+	 for (i = 0; i < 3; i++) 
+	    args[i] = get_arg(c, inst->SrcReg[i]);
+
+      /* Get dest regs.  Note that it is possible for a reg to be both
+       * dst and arg, given the static allocation of registers.  So
+       * care needs to be taken emitting multi-operation instructions.
+       */
+      dst = get_dst(c, inst->DstReg);
+
+      
+      switch (inst->Opcode) {
+      case OPCODE_ABS:
+	 brw_MOV(p, dst, brw_abs(args[0]));
+	 break;
+      case OPCODE_ADD:
+	 brw_ADD(p, dst, args[0], args[1]);
+	 break;
+      case OPCODE_DP3:
+	 brw_DP3(p, dst, args[0], args[1]);
+	 break;
+      case OPCODE_DP4:
+	 brw_DP4(p, dst, args[0], args[1]);
+	 break;
+      case OPCODE_DPH:
+	 brw_DPH(p, dst, args[0], args[1]);
+	 break;
+      case OPCODE_DST:
+	 unalias2(c, dst, args[0], args[1], emit_dst_noalias); 
+	 break;
+      case OPCODE_EXP:
+	 unalias1(c, dst, args[0], emit_exp_noalias);
+	 break;
+      case OPCODE_EX2:
+	 emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, args[0], BRW_MATH_PRECISION_FULL);
+	 break;
+      case OPCODE_ARL:
+	 emit_arl(c, dst, args[0]);
+	 break;
+      case OPCODE_FLR:
+	 brw_RNDD(p, dst, args[0]);
+	 break;
+      case OPCODE_FRC:
+	 brw_FRC(p, dst, args[0]);
+	 break;
+      case OPCODE_LOG:
+	 unalias1(c, dst, args[0], emit_log_noalias);
+	 break;
+      case OPCODE_LG2:
+	 emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, args[0], BRW_MATH_PRECISION_FULL);
+	 break;
+      case OPCODE_LIT:
+	 unalias1(c, dst, args[0], emit_lit_noalias);
+	 break;
+      case OPCODE_MAD:
+	 brw_MOV(p, brw_acc_reg(), args[2]);
+	 brw_MAC(p, dst, args[0], args[1]);
+	 break;
+      case OPCODE_MAX:
+	 emit_max(p, dst, args[0], args[1]);
+	 break;
+      case OPCODE_MIN:
+	 emit_min(p, dst, args[0], args[1]);
+	 break;
+      case OPCODE_MOV:
+	 brw_MOV(p, dst, args[0]);
+	 break;
+      case OPCODE_MUL:
+	 brw_MUL(p, dst, args[0], args[1]);
+	 break;
+      case OPCODE_POW:
+	 emit_math2(c, BRW_MATH_FUNCTION_POW, dst, args[0], args[1], BRW_MATH_PRECISION_FULL); 
+	 break;
+      case OPCODE_RCP:
+	 emit_math1(c, BRW_MATH_FUNCTION_INV, dst, args[0], BRW_MATH_PRECISION_FULL);
+	 break;
+      case OPCODE_RSQ:
+	 emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, args[0], BRW_MATH_PRECISION_FULL);
+	 break;
+      case OPCODE_SGE:
+	 emit_sge(p, dst, args[0], args[1]);
+	 break;
+      case OPCODE_SLT:
+	 emit_slt(p, dst, args[0], args[1]);
+	 break;
+      case OPCODE_SUB:
+	 brw_ADD(p, dst, args[0], negate(args[1]));
+	 break;
+      case OPCODE_SWZ:
+	 /* The args[0] value can't be used here as it won't have
+	  * correctly encoded the full swizzle:
+	  */
+	 emit_swz(c, dst, inst->SrcReg[0] );
+	 break;
+      case OPCODE_XPD:
+	 emit_xpd(p, dst, args[0], args[1]);
+	 break;
+      case OPCODE_END:	
+      case OPCODE_PRINT:
+	 break;
+      default:
+	 break;
+      }
+
+      release_tmps(c);
+   }
+
+   emit_vertex_write(c);
+
+}
+
+
+
+
+
diff --git a/i965/brw_vs_state.c b/i965/brw_vs_state.c
new file mode 100644
index 0000000..c225bf8
--- /dev/null
+++ b/i965/brw_vs_state.c
@@ -0,0 +1,102 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+            
+
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "macros.h"
+
+static void upload_vs_unit( struct brw_context *brw )
+{
+   struct brw_vs_unit_state vs;
+
+   memset(&vs, 0, sizeof(vs));
+
+   /* CACHE_NEW_VS_PROG */
+   vs.thread0.kernel_start_pointer = brw->vs.prog_gs_offset >> 6;
+   vs.thread0.grf_reg_count = ((brw->vs.prog_data->total_grf-1) & ~15) / 16;
+   vs.thread3.urb_entry_read_length = brw->vs.prog_data->urb_read_length;
+   vs.thread3.const_urb_entry_read_length = brw->vs.prog_data->curb_read_length;
+   vs.thread3.dispatch_grf_start_reg = 1;
+
+
+   /* BRW_NEW_URB_FENCE  */
+   vs.thread4.nr_urb_entries = brw->urb.nr_vs_entries; 
+   vs.thread4.urb_entry_allocation_size = brw->urb.vsize - 1;
+   vs.thread4.max_threads = MIN2(
+      MAX2(0, (brw->urb.nr_vs_entries - 6) / 2 - 1), 
+      15);
+
+
+
+   if (INTEL_DEBUG & DEBUG_SINGLE_THREAD)
+      vs.thread4.max_threads = 0; 
+
+   /* BRW_NEW_CURBE_OFFSETS, _NEW_TRANSFORM */
+   if (brw->attribs.Transform->ClipPlanesEnabled) {
+      /* Note that we read in the userclip planes as well, hence
+       * clip_start:
+       */
+      vs.thread3.const_urb_entry_read_offset = brw->curbe.clip_start * 2;
+   }
+   else {
+      vs.thread3.const_urb_entry_read_offset = brw->curbe.vs_start * 2;
+   }
+
+   vs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+   vs.thread3.urb_entry_read_offset = 0;
+
+   /* No samplers for ARB_vp programs:
+    */
+   vs.vs5.sampler_count = 0;
+
+   if (INTEL_DEBUG & DEBUG_STATS)
+      vs.thread4.stats_enable = 1; 
+
+   /* Vertex program always enabled: 
+    */
+   vs.vs6.vs_enable = 1;
+
+   brw->vs.state_gs_offset = brw_cache_data( &brw->cache[BRW_VS_UNIT], &vs );
+}
+
+
+const struct brw_tracked_state brw_vs_unit = {
+   .dirty = {
+      .mesa  = _NEW_TRANSFORM,
+      .brw   = (BRW_NEW_CURBE_OFFSETS |
+		BRW_NEW_URB_FENCE),
+      .cache = CACHE_NEW_VS_PROG
+   },
+   .update = upload_vs_unit
+};
diff --git a/i965/brw_vs_tnl.c b/i965/brw_vs_tnl.c
new file mode 100644
index 0000000..14483b3
--- /dev/null
+++ b/i965/brw_vs_tnl.c
@@ -0,0 +1,1691 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.3
+ *
+ * Copyright (C) 2005  Tungsten Graphics   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * TUNGSTEN GRAPHICS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file t_vp_build.c
+ * Create a vertex program to execute the current fixed function T&L pipeline.
+ * \author Keith Whitwell
+ */
+
+
+#include "glheader.h"
+#include "macros.h"
+#include "enums.h"
+#include "shader/prog_parameter.h"
+#include "shader/prog_print.h"
+#include "brw_vs.h"
+#include "brw_state.h"
+
+
+struct state_key {
+   unsigned light_global_enabled:1;
+   unsigned light_local_viewer:1;
+   unsigned light_twoside:1;
+   unsigned light_color_material:1;
+   unsigned light_color_material_mask:12;
+   unsigned light_material_mask:12;
+   unsigned normalize:1;
+   unsigned rescale_normals:1;
+   unsigned fog_source_is_depth:1;
+   unsigned tnl_do_vertex_fog:1;
+   unsigned separate_specular:1;
+   unsigned fog_option:2;
+   unsigned point_attenuated:1;
+   unsigned texture_enabled_global:1;
+   unsigned fragprog_inputs_read:12;
+
+   struct {
+      unsigned light_enabled:1;
+      unsigned light_eyepos3_is_zero:1;
+      unsigned light_spotcutoff_is_180:1;
+      unsigned light_attenuated:1;      
+      unsigned texunit_really_enabled:1;
+      unsigned texmat_enabled:1;
+      unsigned texgen_enabled:4;
+      unsigned texgen_mode0:4;
+      unsigned texgen_mode1:4;
+      unsigned texgen_mode2:4;
+      unsigned texgen_mode3:4;
+   } unit[8];
+};
+
+
+
+#define FOG_NONE   0
+#define FOG_LINEAR 1
+#define FOG_EXP    2
+#define FOG_EXP2   3
+
+static GLuint translate_fog_mode( GLenum mode )
+{
+   switch (mode) {
+   case GL_LINEAR: return FOG_LINEAR;
+   case GL_EXP: return FOG_EXP;
+   case GL_EXP2: return FOG_EXP2;
+   default: return FOG_NONE;
+   }
+}
+
+#define TXG_NONE           0
+#define TXG_OBJ_LINEAR     1
+#define TXG_EYE_LINEAR     2
+#define TXG_SPHERE_MAP     3
+#define TXG_REFLECTION_MAP 4
+#define TXG_NORMAL_MAP     5
+
+static GLuint translate_texgen( GLboolean enabled, GLenum mode )
+{
+   if (!enabled)
+      return TXG_NONE;
+
+   switch (mode) {
+   case GL_OBJECT_LINEAR: return TXG_OBJ_LINEAR;
+   case GL_EYE_LINEAR: return TXG_EYE_LINEAR;
+   case GL_SPHERE_MAP: return TXG_SPHERE_MAP;
+   case GL_REFLECTION_MAP_NV: return TXG_REFLECTION_MAP;
+   case GL_NORMAL_MAP_NV: return TXG_NORMAL_MAP;
+   default: return TXG_NONE;
+   }
+}
+
+static void make_state_key( GLcontext *ctx, struct state_key *key )
+{
+   struct brw_context *brw = brw_context(ctx);
+   const struct gl_fragment_program *fp = brw->fragment_program;
+   GLuint i;
+
+   /* This now relies on texenvprogram.c being active:
+    */
+   assert(fp);
+
+   memset(key, 0, sizeof(*key));
+
+   /* BRW_NEW_FRAGMENT_PROGRAM */
+   key->fragprog_inputs_read = fp->Base.InputsRead;
+
+   /* _NEW_LIGHT */
+   key->separate_specular = (brw->attribs.Light->Model.ColorControl ==
+			     GL_SEPARATE_SPECULAR_COLOR);
+
+   /* _NEW_LIGHT */
+   if (brw->attribs.Light->Enabled) {
+      key->light_global_enabled = 1;
+
+      if (brw->attribs.Light->Model.LocalViewer)
+	 key->light_local_viewer = 1;
+
+      if (brw->attribs.Light->Model.TwoSide)
+	 key->light_twoside = 1;
+
+      if (brw->attribs.Light->ColorMaterialEnabled) {
+	 key->light_color_material = 1;
+	 key->light_color_material_mask = brw->attribs.Light->ColorMaterialBitmask;
+      }
+
+      /* BRW_NEW_INPUT_VARYING */
+
+      /* For these programs, material values are stuffed into the
+       * generic slots:
+       */
+      for (i = 0 ; i < MAT_ATTRIB_MAX ; i++) 
+	 if (brw->vb.info.varying & (1<<(VERT_ATTRIB_GENERIC0 + i))) 
+	    key->light_material_mask |= 1<<i;
+
+      for (i = 0; i < MAX_LIGHTS; i++) {
+	 struct gl_light *light = &brw->attribs.Light->Light[i];
+
+	 if (light->Enabled) {
+	    key->unit[i].light_enabled = 1;
+
+	    if (light->EyePosition[3] == 0.0)
+	       key->unit[i].light_eyepos3_is_zero = 1;
+	    
+	    if (light->SpotCutoff == 180.0)
+	       key->unit[i].light_spotcutoff_is_180 = 1;
+
+	    if (light->ConstantAttenuation != 1.0 ||
+		light->LinearAttenuation != 0.0 ||
+		light->QuadraticAttenuation != 0.0)
+	       key->unit[i].light_attenuated = 1;
+	 }
+      }
+   }
+
+   /* _NEW_TRANSFORM */
+   if (brw->attribs.Transform->Normalize)
+      key->normalize = 1;
+
+   if (brw->attribs.Transform->RescaleNormals)
+      key->rescale_normals = 1;
+
+   /* BRW_NEW_FRAGMENT_PROGRAM */
+   key->fog_option = translate_fog_mode(fp->FogOption);
+   if (key->fog_option)
+      key->fragprog_inputs_read |= FRAG_BIT_FOGC;
+   
+   /* _NEW_FOG */
+   if (brw->attribs.Fog->FogCoordinateSource == GL_FRAGMENT_DEPTH_EXT)
+      key->fog_source_is_depth = 1;
+   
+   /* _NEW_HINT, ??? */
+   if (1)
+      key->tnl_do_vertex_fog = 1;
+
+   /* _NEW_POINT */
+   if (brw->attribs.Point->_Attenuated)
+      key->point_attenuated = 1;
+
+   /* _NEW_TEXTURE */
+   if (brw->attribs.Texture->_TexGenEnabled ||
+       brw->attribs.Texture->_TexMatEnabled ||
+       brw->attribs.Texture->_EnabledUnits)
+      key->texture_enabled_global = 1;
+      
+   for (i = 0; i < MAX_TEXTURE_UNITS; i++) {
+      struct gl_texture_unit *texUnit = &brw->attribs.Texture->Unit[i];
+
+      if (texUnit->_ReallyEnabled)
+ 	 key->unit[i].texunit_really_enabled = 1;
+
+      if (brw->attribs.Texture->_TexMatEnabled & ENABLE_TEXMAT(i))      
+	 key->unit[i].texmat_enabled = 1;
+      
+      if (texUnit->TexGenEnabled) {
+	 key->unit[i].texgen_enabled = 1;
+      
+	 key->unit[i].texgen_mode0 = 
+	    translate_texgen( texUnit->TexGenEnabled & (1<<0),
+			      texUnit->GenModeS );
+	 key->unit[i].texgen_mode1 = 
+	    translate_texgen( texUnit->TexGenEnabled & (1<<1),
+			      texUnit->GenModeT );
+	 key->unit[i].texgen_mode2 = 
+	    translate_texgen( texUnit->TexGenEnabled & (1<<2),
+			      texUnit->GenModeR );
+	 key->unit[i].texgen_mode3 = 
+	    translate_texgen( texUnit->TexGenEnabled & (1<<3),
+			      texUnit->GenModeQ );
+      }
+   }
+}
+
+
+   
+/* Very useful debugging tool - produces annotated listing of
+ * generated program with line/function references for each
+ * instruction back into this file:
+ */
+#define DISASSEM 0
+
+/* Should be tunable by the driver - do we want to do matrix
+ * multiplications with DP4's or with MUL/MAD's?  SSE works better
+ * with the latter, drivers may differ.
+ */
+#define PREFER_DP4 1
+
+
+/* Use uregs to represent registers internally, translate to Mesa's
+ * expected formats on emit.  
+ *
+ * NOTE: These are passed by value extensively in this file rather
+ * than as usual by pointer reference.  If this disturbs you, try
+ * remembering they are just 32bits in size.
+ *
+ * GCC is smart enough to deal with these dword-sized structures in
+ * much the same way as if I had defined them as dwords and was using
+ * macros to access and set the fields.  This is much nicer and easier
+ * to evolve.
+ */
+struct ureg {
+   GLuint file:4;
+   GLint idx:8;      /* relative addressing may be negative */
+   GLuint negate:1;
+   GLuint swz:12;
+   GLuint pad:7;
+};
+
+
+struct tnl_program {
+   const struct state_key *state;
+   struct gl_vertex_program *program;
+   
+   GLuint nr_instructions;
+   GLuint temp_in_use;
+   GLuint temp_reserved;
+   
+   struct ureg eye_position;
+   struct ureg eye_position_normalized;
+   struct ureg eye_normal;
+   struct ureg identity;
+
+   GLuint materials;
+   GLuint color_materials;
+};
+
+
+const static struct ureg undef = { 
+   PROGRAM_UNDEFINED,
+   ~0,
+   0,
+   0,
+   0
+};
+
+/* Local shorthand:
+ */
+#define X    SWIZZLE_X
+#define Y    SWIZZLE_Y
+#define Z    SWIZZLE_Z
+#define W    SWIZZLE_W
+
+
+/* Construct a ureg:
+ */
+static struct ureg make_ureg(GLuint file, GLint idx)
+{
+   struct ureg reg;
+   reg.file = file;
+   reg.idx = idx;
+   reg.negate = 0;
+   reg.swz = SWIZZLE_NOOP;
+   reg.pad = 0;
+   return reg;
+}
+
+
+
+static struct ureg ureg_negate( struct ureg reg )
+{
+   reg.negate ^= 1;
+   return reg;
+} 
+
+
+static struct ureg swizzle( struct ureg reg, int x, int y, int z, int w )
+{
+   reg.swz = MAKE_SWIZZLE4(GET_SWZ(reg.swz, x),
+			   GET_SWZ(reg.swz, y),
+			   GET_SWZ(reg.swz, z),
+			   GET_SWZ(reg.swz, w));
+
+   return reg;
+}
+
+static struct ureg swizzle1( struct ureg reg, int x )
+{
+   return swizzle(reg, x, x, x, x);
+}
+
+static struct ureg get_temp( struct tnl_program *p )
+{
+   int bit = ffs( ~p->temp_in_use );
+   if (!bit) {
+      fprintf(stderr, "%s: out of temporaries\n", __FILE__);
+      assert(0);
+   }
+
+   if (bit > p->program->Base.NumTemporaries)
+      p->program->Base.NumTemporaries = bit;
+
+   p->temp_in_use |= 1<<(bit-1);
+   return make_ureg(PROGRAM_TEMPORARY, bit-1);
+}
+
+static struct ureg reserve_temp( struct tnl_program *p )
+{
+   struct ureg temp = get_temp( p );
+   p->temp_reserved |= 1<<temp.idx;
+   return temp;
+}
+
+static void release_temp( struct tnl_program *p, struct ureg reg )
+{
+   if (reg.file == PROGRAM_TEMPORARY) {
+      p->temp_in_use &= ~(1<<reg.idx);
+      p->temp_in_use |= p->temp_reserved; /* can't release reserved temps */
+   }
+}
+
+static void release_temps( struct tnl_program *p )
+{
+   p->temp_in_use = p->temp_reserved;
+}
+
+
+
+static struct ureg register_input( struct tnl_program *p, GLuint input )
+{
+   assert(input < 32);
+
+   p->program->Base.InputsRead |= (1<<input);
+   return make_ureg(PROGRAM_INPUT, input);
+}
+
+static struct ureg register_output( struct tnl_program *p, GLuint output )
+{
+   p->program->Base.OutputsWritten |= (1<<output);
+   return make_ureg(PROGRAM_OUTPUT, output);
+}
+
+static struct ureg register_const4f( struct tnl_program *p, 
+			      GLfloat s0,
+			      GLfloat s1,
+			      GLfloat s2,
+			      GLfloat s3)
+{
+   GLfloat values[4];
+   GLint idx;
+   GLuint swizzle;
+   values[0] = s0;
+   values[1] = s1;
+   values[2] = s2;
+   values[3] = s3;
+   idx = _mesa_add_unnamed_constant( p->program->Base.Parameters, values, 4,
+                                     &swizzle);
+   /* XXX what about swizzle? */
+   return make_ureg(PROGRAM_STATE_VAR, idx);
+}
+
+#define register_const1f(p, s0)         register_const4f(p, s0, 0, 0, 1)
+#define register_scalar_const(p, s0)    register_const4f(p, s0, s0, s0, s0)
+#define register_const2f(p, s0, s1)     register_const4f(p, s0, s1, 0, 1)
+#define register_const3f(p, s0, s1, s2) register_const4f(p, s0, s1, s2, 1)
+
+static GLboolean is_undef( struct ureg reg )
+{
+   return reg.file == PROGRAM_UNDEFINED;
+}
+
+static struct ureg get_identity_param( struct tnl_program *p )
+{
+   if (is_undef(p->identity)) 
+      p->identity = register_const4f(p, 0,0,0,1);
+
+   return p->identity;
+}
+
+static struct ureg register_param5( struct tnl_program *p, 
+                                    GLint s0,
+                                    GLint s1,
+                                    GLint s2,
+                                    GLint s3,
+                                    GLint s4)
+{
+   gl_state_index tokens[STATE_LENGTH];
+   GLint idx;
+   tokens[0] = s0;
+   tokens[1] = s1;
+   tokens[2] = s2;
+   tokens[3] = s3;
+   tokens[4] = s4;
+   idx = _mesa_add_state_reference( p->program->Base.Parameters, tokens );
+   return make_ureg(PROGRAM_STATE_VAR, idx);
+}
+
+
+#define register_param1(p,s0)          register_param5(p,s0,0,0,0,0)
+#define register_param2(p,s0,s1)       register_param5(p,s0,s1,0,0,0)
+#define register_param3(p,s0,s1,s2)    register_param5(p,s0,s1,s2,0,0)
+#define register_param4(p,s0,s1,s2,s3) register_param5(p,s0,s1,s2,s3,0)
+
+
+static void register_matrix_param5( struct tnl_program *p,
+				    GLint s0, /* matrix name */
+				    GLint s1, /* texture matrix number */
+				    GLint s2, /* first row */
+				    GLint s3, /* last row */
+				    GLint s4, /* modifier */
+				    struct ureg *matrix )
+{
+   GLint i;
+
+   /* This is a bit sad as the support is there to pull the whole
+    * matrix out in one go:
+    */
+   for (i = 0; i <= s3 - s2; i++) 
+      matrix[i] = register_param5( p, s0, s1, i, i, s4 );
+}
+
+
+static void emit_arg( struct prog_src_register *src,
+		      struct ureg reg )
+{
+   src->File = reg.file;
+   src->Index = reg.idx;
+   src->Swizzle = reg.swz;
+   src->RelAddr = 0;
+   src->NegateBase = reg.negate;
+   src->Abs = 0;
+   src->NegateAbs = 0;
+}
+
+static void emit_dst( struct prog_dst_register *dst,
+		      struct ureg reg, GLuint mask )
+{
+   dst->File = reg.file;
+   dst->Index = reg.idx;
+   /* allow zero as a shorthand for xyzw */
+   dst->WriteMask = mask ? mask : WRITEMASK_XYZW; 
+   dst->CondMask = 0;
+   dst->CondSwizzle = 0;
+   dst->CondSrc = 0;
+   dst->pad = 0;
+}
+
+static void debug_insn( struct prog_instruction *inst, const char *fn,
+			GLuint line )
+{
+   if (DISASSEM) {
+      static const char *last_fn;
+   
+      if (fn != last_fn) {
+	 last_fn = fn;
+	 _mesa_printf("%s:\n", fn);
+      }
+	 
+      _mesa_printf("%d:\t", line);
+      _mesa_print_instruction(inst);
+   }
+}
+
+
+static void emit_op3fn(struct tnl_program *p,
+		       GLuint op,
+		       struct ureg dest,
+		       GLuint mask,
+		       struct ureg src0,
+		       struct ureg src1,
+		       struct ureg src2,
+		       const char *fn,
+		       GLuint line)
+{
+   GLuint nr = p->program->Base.NumInstructions++;
+      
+   if (nr >= p->nr_instructions) {
+      p->program->Base.Instructions = 
+	 _mesa_realloc(p->program->Base.Instructions,
+		       sizeof(struct prog_instruction) * p->nr_instructions,
+		       sizeof(struct prog_instruction) * (p->nr_instructions *= 2));
+   }
+
+   {      
+      struct prog_instruction *inst = &p->program->Base.Instructions[nr];
+      memset(inst, 0, sizeof(*inst));
+      inst->Opcode = op; 
+      inst->StringPos = 0;
+      inst->Data = 0;
+   
+      emit_arg( &inst->SrcReg[0], src0 );
+      emit_arg( &inst->SrcReg[1], src1 );
+      emit_arg( &inst->SrcReg[2], src2 );   
+
+      emit_dst( &inst->DstReg, dest, mask );
+
+      debug_insn(inst, fn, line);
+   }
+}
+
+   
+
+#define emit_op3(p, op, dst, mask, src0, src1, src2) \
+   emit_op3fn(p, op, dst, mask, src0, src1, src2, __FUNCTION__, __LINE__)
+
+#define emit_op2(p, op, dst, mask, src0, src1) \
+    emit_op3fn(p, op, dst, mask, src0, src1, undef, __FUNCTION__, __LINE__)
+
+#define emit_op1(p, op, dst, mask, src0) \
+    emit_op3fn(p, op, dst, mask, src0, undef, undef, __FUNCTION__, __LINE__)
+
+
+static struct ureg make_temp( struct tnl_program *p, struct ureg reg )
+{
+   if (reg.file == PROGRAM_TEMPORARY && 
+       !(p->temp_reserved & (1<<reg.idx)))
+      return reg;
+   else {
+      struct ureg temp = get_temp(p);
+      emit_op1(p, OPCODE_MOV, temp, 0, reg);
+      return temp;
+   }
+}
+
+
+/* Currently no tracking performed of input/output/register size or
+ * active elements.  Could be used to reduce these operations, as
+ * could the matrix type.
+ */
+static void emit_matrix_transform_vec4( struct tnl_program *p,
+					struct ureg dest,
+					const struct ureg *mat,
+					struct ureg src)
+{
+   emit_op2(p, OPCODE_DP4, dest, WRITEMASK_X, src, mat[0]);
+   emit_op2(p, OPCODE_DP4, dest, WRITEMASK_Y, src, mat[1]);
+   emit_op2(p, OPCODE_DP4, dest, WRITEMASK_Z, src, mat[2]);
+   emit_op2(p, OPCODE_DP4, dest, WRITEMASK_W, src, mat[3]);
+}
+
+/* This version is much easier to implement if writemasks are not
+ * supported natively on the target or (like SSE), the target doesn't
+ * have a clean/obvious dotproduct implementation.
+ */
+static void emit_transpose_matrix_transform_vec4( struct tnl_program *p,
+						  struct ureg dest,
+						  const struct ureg *mat,
+						  struct ureg src)
+{
+   struct ureg tmp;
+
+   if (dest.file != PROGRAM_TEMPORARY)
+      tmp = get_temp(p);
+   else
+      tmp = dest;
+
+   emit_op2(p, OPCODE_MUL, tmp, 0, swizzle1(src,X), mat[0]);
+   emit_op3(p, OPCODE_MAD, tmp, 0, swizzle1(src,Y), mat[1], tmp);
+   emit_op3(p, OPCODE_MAD, tmp, 0, swizzle1(src,Z), mat[2], tmp);
+   emit_op3(p, OPCODE_MAD, dest, 0, swizzle1(src,W), mat[3], tmp);
+
+   if (dest.file != PROGRAM_TEMPORARY)
+      release_temp(p, tmp);
+}
+
+static void emit_matrix_transform_vec3( struct tnl_program *p,
+					struct ureg dest,
+					const struct ureg *mat,
+					struct ureg src)
+{
+   emit_op2(p, OPCODE_DP3, dest, WRITEMASK_X, src, mat[0]);
+   emit_op2(p, OPCODE_DP3, dest, WRITEMASK_Y, src, mat[1]);
+   emit_op2(p, OPCODE_DP3, dest, WRITEMASK_Z, src, mat[2]);
+}
+
+
+static void emit_normalize_vec3( struct tnl_program *p,
+				 struct ureg dest,
+				 struct ureg src )
+{
+   emit_op2(p, OPCODE_DP3, dest, WRITEMASK_W, src, src);
+   emit_op1(p, OPCODE_RSQ, dest, WRITEMASK_W, swizzle1(dest,W));
+   emit_op2(p, OPCODE_MUL, dest, WRITEMASK_XYZ, src, swizzle1(dest,W));
+}
+
+static void emit_passthrough( struct tnl_program *p, 
+			      GLuint input,
+			      GLuint output )
+{
+   struct ureg out = register_output(p, output);
+   emit_op1(p, OPCODE_MOV, out, 0, register_input(p, input)); 
+}
+
+static struct ureg get_eye_position( struct tnl_program *p )
+{
+   if (is_undef(p->eye_position)) {
+      struct ureg pos = register_input( p, VERT_ATTRIB_POS ); 
+      struct ureg modelview[4];
+
+      p->eye_position = reserve_temp(p);
+
+      if (PREFER_DP4) {
+	 register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3, 
+				 0, modelview );
+
+	 emit_matrix_transform_vec4(p, p->eye_position, modelview, pos);
+      }
+      else {
+	 register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3, 
+				 STATE_MATRIX_TRANSPOSE, modelview );
+
+	 emit_transpose_matrix_transform_vec4(p, p->eye_position, modelview, pos);
+      }
+   }
+   
+   return p->eye_position;
+}
+
+
+#if 0
+static struct ureg get_eye_z( struct tnl_program *p )
+{
+   if (!is_undef(p->eye_position)) {
+      return swizzle1(p->eye_position, Z);
+   }
+   else if (!is_undef(p->eye_z)) {
+      struct ureg pos = register_input( p, BRW_ATTRIB_POS ); 
+      struct ureg modelview2;
+
+      p->eye_z = reserve_temp(p);
+
+      register_matrix_param6( p, STATE_MATRIX, STATE_MODELVIEW, 0, 2, 1, 
+			      STATE_MATRIX, &modelview2 );
+
+      emit_matrix_transform_vec4(p, p->eye_position, modelview, pos);
+      emit_op2(p, OPCODE_DP4, p->eye_z, WRITEMASK_Z, pos, modelview2);
+   }
+   
+   return swizzle1(p->eye_z, Z)
+}
+#endif
+
+
+
+static struct ureg get_eye_position_normalized( struct tnl_program *p )
+{
+   if (is_undef(p->eye_position_normalized)) {
+      struct ureg eye = get_eye_position(p);
+      p->eye_position_normalized = reserve_temp(p);
+      emit_normalize_vec3(p, p->eye_position_normalized, eye);
+   }
+   
+   return p->eye_position_normalized;
+}
+
+
+static struct ureg get_eye_normal( struct tnl_program *p )
+{
+   if (is_undef(p->eye_normal)) {
+      struct ureg normal = register_input(p, VERT_ATTRIB_NORMAL );
+      struct ureg mvinv[3];
+
+      register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 2,
+			      STATE_MATRIX_INVTRANS, mvinv );
+
+      p->eye_normal = reserve_temp(p);
+
+      /* Transform to eye space:
+       */
+      emit_matrix_transform_vec3( p, p->eye_normal, mvinv, normal );
+
+      /* Normalize/Rescale:
+       */
+      if (p->state->normalize) {
+	 emit_normalize_vec3( p, p->eye_normal, p->eye_normal );
+      }
+      else if (p->state->rescale_normals) {
+	 struct ureg rescale = register_param2(p, STATE_INTERNAL,
+					       STATE_NORMAL_SCALE);
+
+	 emit_op2( p, OPCODE_MUL, p->eye_normal, 0, p->eye_normal, 
+		   swizzle1(rescale, X));
+      }
+   }
+
+   return p->eye_normal;
+}
+
+
+
+static void build_hpos( struct tnl_program *p )
+{
+   struct ureg pos = register_input( p, VERT_ATTRIB_POS ); 
+   struct ureg hpos = register_output( p, VERT_RESULT_HPOS );
+   struct ureg mvp[4];
+
+   if (PREFER_DP4) {
+      register_matrix_param5( p, STATE_MVP_MATRIX, 0, 0, 3, 
+			      0, mvp );
+      emit_matrix_transform_vec4( p, hpos, mvp, pos );
+   }
+   else {
+      register_matrix_param5( p, STATE_MVP_MATRIX, 0, 0, 3, 
+			      STATE_MATRIX_TRANSPOSE, mvp );
+      emit_transpose_matrix_transform_vec4( p, hpos, mvp, pos );
+   }
+}
+
+
+static GLuint material_attrib( GLuint side, GLuint property )
+{
+   return (property - STATE_AMBIENT) * 2 + side;
+}
+
+/* Get a bitmask of which material values vary on a per-vertex basis.
+ */
+static void set_material_flags( struct tnl_program *p )
+{
+   p->color_materials = 0;
+   p->materials = 0;
+
+   if (p->state->light_color_material) {
+      p->materials = 
+	 p->color_materials = p->state->light_color_material_mask;
+   }
+
+   p->materials |= p->state->light_material_mask;
+}
+
+
+static struct ureg get_material( struct tnl_program *p, GLuint side, 
+				 GLuint property )
+{
+   GLuint attrib = material_attrib(side, property);
+
+   if (p->color_materials & (1<<attrib))
+      return register_input(p, VERT_ATTRIB_COLOR0);
+   else if (p->materials & (1<<attrib)) 
+      return register_input( p, attrib + _TNL_ATTRIB_MAT_FRONT_AMBIENT );
+   else
+      return register_param3( p, STATE_MATERIAL, side, property );
+}
+
+#define SCENE_COLOR_BITS(side) ((MAT_BIT_FRONT_EMISSION | \
+				 MAT_BIT_FRONT_AMBIENT | \
+				 MAT_BIT_FRONT_DIFFUSE) << (side))
+
+/* Either return a precalculated constant value or emit code to
+ * calculate these values dynamically in the case where material calls
+ * are present between begin/end pairs.
+ *
+ * Probably want to shift this to the program compilation phase - if
+ * we always emitted the calculation here, a smart compiler could
+ * detect that it was constant (given a certain set of inputs), and
+ * lift it out of the main loop.  That way the programs created here
+ * would be independent of the vertex_buffer details.
+ */
+static struct ureg get_scenecolor( struct tnl_program *p, GLuint side )
+{
+   if (p->materials & SCENE_COLOR_BITS(side)) {
+      struct ureg lm_ambient = register_param1(p, STATE_LIGHTMODEL_AMBIENT);
+      struct ureg material_emission = get_material(p, side, STATE_EMISSION);
+      struct ureg material_ambient = get_material(p, side, STATE_AMBIENT);
+      struct ureg material_diffuse = get_material(p, side, STATE_DIFFUSE);
+      struct ureg tmp = make_temp(p, material_diffuse);
+      emit_op3(p, OPCODE_MAD, tmp,  WRITEMASK_XYZ, lm_ambient, 
+	       material_ambient, material_emission);
+      return tmp;
+   }
+   else
+      return register_param2( p, STATE_LIGHTMODEL_SCENECOLOR, side );
+}
+
+
+static struct ureg get_lightprod( struct tnl_program *p, GLuint light, 
+				  GLuint side, GLuint property )
+{
+   GLuint attrib = material_attrib(side, property);
+   if (p->materials & (1<<attrib)) {
+      struct ureg light_value = 
+	 register_param3(p, STATE_LIGHT, light, property);
+      struct ureg material_value = get_material(p, side, property);
+      struct ureg tmp = get_temp(p);
+      emit_op2(p, OPCODE_MUL, tmp,  0, light_value, material_value);
+      return tmp;
+   }
+   else
+      return register_param4(p, STATE_LIGHTPROD, light, side, property);
+}
+
+static struct ureg calculate_light_attenuation( struct tnl_program *p,
+						GLuint i, 
+						struct ureg VPpli,
+						struct ureg dist )
+{
+   struct ureg attenuation = register_param3(p, STATE_LIGHT, i,
+					     STATE_ATTENUATION);
+   struct ureg att = get_temp(p);
+
+   /* Calculate spot attenuation:
+    */
+   if (!p->state->unit[i].light_spotcutoff_is_180) {
+      struct ureg spot_dir_norm = register_param3(p, STATE_INTERNAL,
+						  STATE_SPOT_DIR_NORMALIZED, i);
+      struct ureg spot = get_temp(p);
+      struct ureg slt = get_temp(p);
+
+      emit_op2(p, OPCODE_DP3, spot, 0, ureg_negate(VPpli), spot_dir_norm);
+      emit_op2(p, OPCODE_SLT, slt, 0, swizzle1(spot_dir_norm,W), spot);
+      emit_op2(p, OPCODE_POW, spot, 0, spot, swizzle1(attenuation, W));
+      emit_op2(p, OPCODE_MUL, att, 0, slt, spot);
+
+      release_temp(p, spot);
+      release_temp(p, slt);
+   }
+
+   /* Calculate distance attenuation:
+    */
+   if (p->state->unit[i].light_attenuated) {
+
+      /* 1/d,d,d,1/d */
+      emit_op1(p, OPCODE_RCP, dist, WRITEMASK_YZ, dist); 
+      /* 1,d,d*d,1/d */
+      emit_op2(p, OPCODE_MUL, dist, WRITEMASK_XZ, dist, swizzle1(dist,Y)); 
+      /* 1/dist-atten */
+      emit_op2(p, OPCODE_DP3, dist, 0, attenuation, dist); 
+
+      if (!p->state->unit[i].light_spotcutoff_is_180) {
+	 /* dist-atten */
+	 emit_op1(p, OPCODE_RCP, dist, 0, dist); 
+	 /* spot-atten * dist-atten */
+	 emit_op2(p, OPCODE_MUL, att, 0, dist, att);	
+      } else {
+	 /* dist-atten */
+	 emit_op1(p, OPCODE_RCP, att, 0, dist); 
+      }
+   }
+
+   return att;
+}
+						
+
+
+
+
+/* Need to add some addtional parameters to allow lighting in object
+ * space - STATE_SPOT_DIRECTION and STATE_HALF_VECTOR implicitly assume eye
+ * space lighting.
+ */
+static void build_lighting( struct tnl_program *p )
+{
+   const GLboolean twoside = p->state->light_twoside;
+   const GLboolean separate = p->state->separate_specular;
+   GLuint nr_lights = 0, count = 0;
+   struct ureg normal = get_eye_normal(p);
+   struct ureg lit = get_temp(p);
+   struct ureg dots = get_temp(p);
+   struct ureg _col0 = undef, _col1 = undef;
+   struct ureg _bfc0 = undef, _bfc1 = undef;
+   GLuint i;
+
+   for (i = 0; i < MAX_LIGHTS; i++) 
+      if (p->state->unit[i].light_enabled)
+	 nr_lights++;
+   
+   set_material_flags(p);
+
+   {
+      struct ureg shininess = get_material(p, 0, STATE_SHININESS);
+      emit_op1(p, OPCODE_MOV, dots,  WRITEMASK_W, swizzle1(shininess,X));
+      release_temp(p, shininess);
+
+      _col0 = make_temp(p, get_scenecolor(p, 0));
+      if (separate)
+	 _col1 = make_temp(p, get_identity_param(p));
+      else
+	 _col1 = _col0;
+
+   }
+
+   if (twoside) {
+      struct ureg shininess = get_material(p, 1, STATE_SHININESS);
+      emit_op1(p, OPCODE_MOV, dots, WRITEMASK_Z, 
+	       ureg_negate(swizzle1(shininess,X)));
+      release_temp(p, shininess);
+
+      _bfc0 = make_temp(p, get_scenecolor(p, 1));
+      if (separate)
+	 _bfc1 = make_temp(p, get_identity_param(p));
+      else
+	 _bfc1 = _bfc0;
+   }
+
+
+   /* If no lights, still need to emit the scenecolor.
+    */
+   /* KW: changed to do this always - v1.17 "Fix lighting alpha result"? 
+    */
+   if (p->state->fragprog_inputs_read & FRAG_BIT_COL0)
+   {
+      struct ureg res0 = register_output( p, VERT_RESULT_COL0 );
+      emit_op1(p, OPCODE_MOV, res0, 0, _col0);
+
+      if (twoside) {
+	 struct ureg res0 = register_output( p, VERT_RESULT_BFC0 );
+	 emit_op1(p, OPCODE_MOV, res0, 0, _bfc0);
+      }
+   }
+
+   if (separate && (p->state->fragprog_inputs_read & FRAG_BIT_COL1)) {
+
+      struct ureg res1 = register_output( p, VERT_RESULT_COL1 );
+      emit_op1(p, OPCODE_MOV, res1, 0, _col1);
+      
+      if (twoside) {
+	 struct ureg res1 = register_output( p, VERT_RESULT_BFC1 );
+	 emit_op1(p, OPCODE_MOV, res1, 0, _bfc1);
+      }
+   }
+      
+   if (nr_lights == 0) {
+      release_temps(p);
+      return;
+   }
+
+
+   for (i = 0; i < MAX_LIGHTS; i++) {
+      if (p->state->unit[i].light_enabled) {
+	 struct ureg half = undef;
+	 struct ureg att = undef, VPpli = undef;
+	  
+	 count++;
+
+	 if (p->state->unit[i].light_eyepos3_is_zero) {
+	    /* Can used precomputed constants in this case.
+	     * Attenuation never applies to infinite lights.
+	     */
+	    VPpli = register_param3(p, STATE_LIGHT, i, 
+				    STATE_POSITION_NORMALIZED); 
+            if (p->state->light_local_viewer) {
+                struct ureg eye_hat = get_eye_position_normalized(p);
+                half = get_temp(p);
+                emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat);
+                emit_normalize_vec3(p, half, half);
+            } else {
+                half = register_param3(p, STATE_LIGHT, i, STATE_HALF_VECTOR);
+            }
+	 } 
+	 else {
+	    struct ureg Ppli = register_param3(p, STATE_LIGHT, i, 
+					       STATE_POSITION); 
+	    struct ureg V = get_eye_position(p);
+	    struct ureg dist = get_temp(p);
+       struct ureg tmpPpli = get_temp(p);
+
+	    VPpli = get_temp(p); 
+	    half = get_temp(p);
+
+       /* In homogeneous object coordinates
+        */
+       emit_op1(p, OPCODE_RCP, dist, 0, swizzle1(Ppli, W));
+       emit_op2(p, OPCODE_MUL, tmpPpli, 0, Ppli, dist);
+
+	    /* Calulate VPpli vector
+	     */
+	    emit_op2(p, OPCODE_SUB, VPpli, 0, tmpPpli, V); 
+
+	    /* Normalize VPpli.  The dist value also used in
+	     * attenuation below.
+	     */
+	    emit_op2(p, OPCODE_DP3, dist, 0, VPpli, VPpli);
+	    emit_op1(p, OPCODE_RSQ, dist, 0, dist);
+	    emit_op2(p, OPCODE_MUL, VPpli, 0, VPpli, dist);
+
+
+	    /* Calculate  attenuation:
+	     */ 
+	    if (!p->state->unit[i].light_spotcutoff_is_180 ||
+		p->state->unit[i].light_attenuated) {
+	       att = calculate_light_attenuation(p, i, VPpli, dist);
+	    }
+	 
+      
+	    /* Calculate viewer direction, or use infinite viewer:
+	     */
+	    if (p->state->light_local_viewer) {
+	       struct ureg eye_hat = get_eye_position_normalized(p);
+	       emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat);
+	    }
+	    else {
+	       struct ureg z_dir = swizzle(get_identity_param(p),X,Y,W,Z); 
+	       emit_op2(p, OPCODE_ADD, half, 0, VPpli, z_dir);
+	    }
+
+	    emit_normalize_vec3(p, half, half);
+
+	    release_temp(p, dist);
+       release_temp(p, tmpPpli);
+	 }
+
+	 /* Calculate dot products:
+	  */
+	 emit_op2(p, OPCODE_DP3, dots, WRITEMASK_X, normal, VPpli);
+	 emit_op2(p, OPCODE_DP3, dots, WRITEMASK_Y, normal, half);
+
+	
+	 /* Front face lighting:
+	  */
+	 {
+	    struct ureg ambient = get_lightprod(p, i, 0, STATE_AMBIENT);
+	    struct ureg diffuse = get_lightprod(p, i, 0, STATE_DIFFUSE);
+	    struct ureg specular = get_lightprod(p, i, 0, STATE_SPECULAR);
+	    struct ureg res0, res1;
+	    GLuint mask0, mask1;
+
+	    emit_op1(p, OPCODE_LIT, lit, 0, dots);
+   
+	    if (!is_undef(att)) 
+	       emit_op2(p, OPCODE_MUL, lit, 0, lit, att);
+
+
+	    mask0 = 0;
+	    mask1 = 0;
+	    res0 = _col0;
+	    res1 = _col1;
+	    
+	    if (count == nr_lights) {
+	       if (separate) {
+		  mask0 = WRITEMASK_XYZ;
+		  mask1 = WRITEMASK_XYZ;
+
+		  if (p->state->fragprog_inputs_read & FRAG_BIT_COL0)
+		     res0 = register_output( p, VERT_RESULT_COL0 );
+
+		  if (p->state->fragprog_inputs_read & FRAG_BIT_COL1)
+		     res1 = register_output( p, VERT_RESULT_COL1 );
+	       }
+	       else {
+		  mask1 = WRITEMASK_XYZ;
+
+		  if (p->state->fragprog_inputs_read & FRAG_BIT_COL0)
+		     res1 = register_output( p, VERT_RESULT_COL0 );
+	       }
+	    } 
+
+	    emit_op3(p, OPCODE_MAD, _col0, 0, swizzle1(lit,X), ambient, _col0);
+	    emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _col0);
+	    emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _col1);
+      
+	    release_temp(p, ambient);
+	    release_temp(p, diffuse);
+	    release_temp(p, specular);
+	 }
+
+	 /* Back face lighting:
+	  */
+	 if (twoside) {
+	    struct ureg ambient = get_lightprod(p, i, 1, STATE_AMBIENT);
+	    struct ureg diffuse = get_lightprod(p, i, 1, STATE_DIFFUSE);
+	    struct ureg specular = get_lightprod(p, i, 1, STATE_SPECULAR);
+	    struct ureg res0, res1;
+	    GLuint mask0, mask1;
+	       
+	    emit_op1(p, OPCODE_LIT, lit, 0, ureg_negate(swizzle(dots,X,Y,W,Z)));
+
+	    if (!is_undef(att)) 
+	       emit_op2(p, OPCODE_MUL, lit, 0, lit, att);
+
+	    mask0 = 0;
+	    mask1 = 0;
+	    res0 = _bfc0;
+	    res1 = _bfc1;
+
+	    if (count == nr_lights) {
+	       if (separate) {
+		  mask0 = WRITEMASK_XYZ;
+		  mask1 = WRITEMASK_XYZ;
+		  if (p->state->fragprog_inputs_read & FRAG_BIT_COL0)
+		     res0 = register_output( p, VERT_RESULT_BFC0 );
+
+		  if (p->state->fragprog_inputs_read & FRAG_BIT_COL1)
+		     res1 = register_output( p, VERT_RESULT_BFC1 );
+	       }
+	       else {
+		  mask1 = WRITEMASK_XYZ;
+
+		  if (p->state->fragprog_inputs_read & FRAG_BIT_COL0)
+		     res1 = register_output( p, VERT_RESULT_BFC0 );
+	       }
+	    }
+
+	    emit_op3(p, OPCODE_MAD, _bfc0, 0, swizzle1(lit,X), ambient, _bfc0);
+	    emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _bfc0);
+	    emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _bfc1);
+
+	    release_temp(p, ambient);
+	    release_temp(p, diffuse);
+	    release_temp(p, specular);
+	 }
+
+	 release_temp(p, half);
+	 release_temp(p, VPpli);
+	 release_temp(p, att);
+      }
+   }
+
+   release_temps( p );
+}
+
+
+static void build_fog( struct tnl_program *p )
+{
+   struct ureg fog = register_output(p, VERT_RESULT_FOGC);
+   struct ureg input;
+   
+   if (p->state->fog_source_is_depth) {
+      input = swizzle1(get_eye_position(p), Z);
+   }
+   else {
+      input = swizzle1(register_input(p, VERT_ATTRIB_FOG), X);
+   }
+
+   if (p->state->fog_option &&
+       p->state->tnl_do_vertex_fog) {
+      struct ureg params = register_param2(p, STATE_INTERNAL,
+					   STATE_FOG_PARAMS_OPTIMIZED);
+      struct ureg tmp = get_temp(p);
+      struct ureg id = get_identity_param(p);
+
+      emit_op1(p, OPCODE_MOV, fog, 0, id);
+
+      switch (p->state->fog_option) {
+      case FOG_LINEAR: {
+	 emit_op1(p, OPCODE_ABS, tmp, 0, input);
+	 emit_op3(p, OPCODE_MAD, tmp, 0, tmp, swizzle1(params,X), swizzle1(params,Y));
+	 emit_op2(p, OPCODE_MAX, tmp, 0, tmp, swizzle1(id,X)); /* saturate */
+	 emit_op2(p, OPCODE_MIN, fog, WRITEMASK_X, tmp, swizzle1(id,W));
+	 break;
+      }
+      case FOG_EXP:
+	 emit_op1(p, OPCODE_ABS, tmp, 0, input); 
+	 emit_op2(p, OPCODE_MUL, tmp, 0, tmp, swizzle1(params,Z));
+	 emit_op1(p, OPCODE_EX2, fog, WRITEMASK_X, ureg_negate(tmp));
+	 break;
+      case FOG_EXP2:
+	 emit_op2(p, OPCODE_MUL, tmp, 0, input, swizzle1(params,W));
+	 emit_op2(p, OPCODE_MUL, tmp, 0, tmp, tmp); 
+	 emit_op1(p, OPCODE_EX2, fog, WRITEMASK_X, ureg_negate(tmp));
+	 break;
+      }
+      
+      release_temp(p, tmp);
+   }
+   else {
+      /* results = incoming fog coords (compute fog per-fragment later) 
+       *
+       * KW:  Is it really necessary to do anything in this case?
+       */
+      emit_op1(p, OPCODE_MOV, fog, 0, input);
+   }
+}
+ 
+static void build_reflect_texgen( struct tnl_program *p,
+				  struct ureg dest,
+				  GLuint writemask )
+{
+   struct ureg normal = get_eye_normal(p);
+   struct ureg eye_hat = get_eye_position_normalized(p);
+   struct ureg tmp = get_temp(p);
+
+   /* n.u */
+   emit_op2(p, OPCODE_DP3, tmp, 0, normal, eye_hat); 
+   /* 2n.u */
+   emit_op2(p, OPCODE_ADD, tmp, 0, tmp, tmp); 
+   /* (-2n.u)n + u */
+   emit_op3(p, OPCODE_MAD, dest, writemask, ureg_negate(tmp), normal, eye_hat);
+
+   release_temp(p, tmp);
+}
+
+static void build_sphere_texgen( struct tnl_program *p,
+				 struct ureg dest,
+				 GLuint writemask )
+{
+   struct ureg normal = get_eye_normal(p);
+   struct ureg eye_hat = get_eye_position_normalized(p);
+   struct ureg tmp = get_temp(p);
+   struct ureg half = register_scalar_const(p, .5);
+   struct ureg r = get_temp(p);
+   struct ureg inv_m = get_temp(p);
+   struct ureg id = get_identity_param(p);
+
+   /* Could share the above calculations, but it would be
+    * a fairly odd state for someone to set (both sphere and
+    * reflection active for different texture coordinate
+    * components.  Of course - if two texture units enable
+    * reflect and/or sphere, things start to tilt in favour
+    * of seperating this out:
+    */
+
+   /* n.u */
+   emit_op2(p, OPCODE_DP3, tmp, 0, normal, eye_hat); 
+   /* 2n.u */
+   emit_op2(p, OPCODE_ADD, tmp, 0, tmp, tmp); 
+   /* (-2n.u)n + u */
+   emit_op3(p, OPCODE_MAD, r, 0, ureg_negate(tmp), normal, eye_hat); 
+   /* r + 0,0,1 */
+   emit_op2(p, OPCODE_ADD, tmp, 0, r, swizzle(id,X,Y,W,Z)); 
+   /* rx^2 + ry^2 + (rz+1)^2 */
+   emit_op2(p, OPCODE_DP3, tmp, 0, tmp, tmp); 
+   /* 2/m */
+   emit_op1(p, OPCODE_RSQ, tmp, 0, tmp); 
+   /* 1/m */
+   emit_op2(p, OPCODE_MUL, inv_m, 0, tmp, half); 
+   /* r/m + 1/2 */
+   emit_op3(p, OPCODE_MAD, dest, writemask, r, inv_m, half); 
+	       
+   release_temp(p, tmp);
+   release_temp(p, r);
+   release_temp(p, inv_m);
+}
+
+
+static void build_texture_transform( struct tnl_program *p )
+{
+   GLuint i, j;
+
+   for (i = 0; i < MAX_TEXTURE_UNITS; i++) {
+
+      if (!(p->state->fragprog_inputs_read & (FRAG_BIT_TEX0<<i)))
+	 continue;
+							     
+      if (p->state->unit[i].texgen_enabled || 
+	  p->state->unit[i].texmat_enabled) {
+	 
+	 GLuint texmat_enabled = p->state->unit[i].texmat_enabled;
+	 struct ureg out = register_output(p, VERT_RESULT_TEX0 + i);
+	 struct ureg out_texgen = undef;
+
+	 if (p->state->unit[i].texgen_enabled) {
+	    GLuint copy_mask = 0;
+	    GLuint sphere_mask = 0;
+	    GLuint reflect_mask = 0;
+	    GLuint normal_mask = 0;
+	    GLuint modes[4];
+	 
+	    if (texmat_enabled) 
+	       out_texgen = get_temp(p);
+	    else
+	       out_texgen = out;
+
+	    modes[0] = p->state->unit[i].texgen_mode0;
+	    modes[1] = p->state->unit[i].texgen_mode1;
+	    modes[2] = p->state->unit[i].texgen_mode2;
+	    modes[3] = p->state->unit[i].texgen_mode3;
+
+	    for (j = 0; j < 4; j++) {
+	       switch (modes[j]) {
+	       case TXG_OBJ_LINEAR: {
+		  struct ureg obj = register_input(p, VERT_ATTRIB_POS);
+		  struct ureg plane = 
+		     register_param3(p, STATE_TEXGEN, i,
+				     STATE_TEXGEN_OBJECT_S + j);
+
+		  emit_op2(p, OPCODE_DP4, out_texgen, WRITEMASK_X << j, 
+			   obj, plane );
+		  break;
+	       }
+	       case TXG_EYE_LINEAR: {
+		  struct ureg eye = get_eye_position(p);
+		  struct ureg plane = 
+		     register_param3(p, STATE_TEXGEN, i, 
+				     STATE_TEXGEN_EYE_S + j);
+
+		  emit_op2(p, OPCODE_DP4, out_texgen, WRITEMASK_X << j, 
+			   eye, plane );
+		  break;
+	       }
+	       case TXG_SPHERE_MAP: 
+		  sphere_mask |= WRITEMASK_X << j;
+		  break;
+	       case TXG_REFLECTION_MAP:
+		  reflect_mask |= WRITEMASK_X << j;
+		  break;
+	       case TXG_NORMAL_MAP: 
+		  normal_mask |= WRITEMASK_X << j;
+		  break;
+	       case TXG_NONE:
+		  copy_mask |= WRITEMASK_X << j;
+	       }
+
+	    }
+
+	 
+	    if (sphere_mask) {
+	       build_sphere_texgen(p, out_texgen, sphere_mask);
+	    }
+
+	    if (reflect_mask) {
+	       build_reflect_texgen(p, out_texgen, reflect_mask);
+	    }
+
+	    if (normal_mask) {
+	       struct ureg normal = get_eye_normal(p);
+	       emit_op1(p, OPCODE_MOV, out_texgen, normal_mask, normal );
+	    }
+
+	    if (copy_mask) {
+	       struct ureg in = register_input(p, VERT_ATTRIB_TEX0+i);
+	       emit_op1(p, OPCODE_MOV, out_texgen, copy_mask, in );
+	    }
+	 }
+
+	 if (texmat_enabled) {
+	    struct ureg texmat[4];
+	    struct ureg in = (!is_undef(out_texgen) ? 
+			      out_texgen : 
+			      register_input(p, VERT_ATTRIB_TEX0+i));
+	    if (PREFER_DP4) {
+	       register_matrix_param5( p, STATE_TEXTURE_MATRIX, i, 0, 3,
+				       0, texmat );
+	       emit_matrix_transform_vec4( p, out, texmat, in );
+	    }
+	    else {
+	       register_matrix_param5( p, STATE_TEXTURE_MATRIX, i, 0, 3,
+				       STATE_MATRIX_TRANSPOSE, texmat );
+	       emit_transpose_matrix_transform_vec4( p, out, texmat, in );
+	    }
+	 }
+
+	 release_temps(p);
+      } 
+      else {
+	 emit_passthrough(p, VERT_ATTRIB_TEX0+i, VERT_RESULT_TEX0+i);
+      }
+   }
+}
+
+
+/* Seems like it could be tighter:
+ */
+static void build_pointsize( struct tnl_program *p )
+{
+   struct ureg eye = get_eye_position(p);
+   struct ureg state_size = register_param1(p, STATE_POINT_SIZE);
+   struct ureg state_attenuation = register_param1(p, STATE_POINT_ATTENUATION);
+   struct ureg out = register_output(p, VERT_RESULT_PSIZ);
+   struct ureg ut = get_temp(p);
+
+   /* 1, Z, Z * Z, 1 */      
+   emit_op1(p, OPCODE_MOV, ut, WRITEMASK_XW, swizzle1(get_identity_param(p), W));
+   emit_op1(p, OPCODE_ABS, ut, WRITEMASK_YZ, swizzle1(eye, Z));
+   emit_op2(p, OPCODE_MUL, ut, WRITEMASK_Z, ut, ut);
+
+
+   /* p1 +  p2 * dist + p3 * dist * dist, 0 */
+   emit_op2(p, OPCODE_DP3, ut, WRITEMASK_X, ut, state_attenuation);
+
+   /* 1 / sqrt(factor) */
+   emit_op1(p, OPCODE_RSQ, ut, WRITEMASK_X, ut ); 
+
+   /* ut = pointSize / factor */
+   emit_op2(p, OPCODE_MUL, ut, WRITEMASK_X, ut, state_size); 
+
+   /* Clamp to min/max - state_size.[yz]
+    */
+   emit_op2(p, OPCODE_MAX, ut, WRITEMASK_X, ut, swizzle1(state_size, Y)); 
+   emit_op2(p, OPCODE_MIN, out, 0, swizzle1(ut, X), swizzle1(state_size, Z)); 
+   
+   release_temp(p, ut);
+}
+
+static void build_tnl_program( struct tnl_program *p )
+{  
+   /* Emit the program, starting with modelviewproject:
+    */
+   build_hpos(p);
+
+   /* Lighting calculations:
+    */
+   if (p->state->fragprog_inputs_read & (FRAG_BIT_COL0|FRAG_BIT_COL1)) {
+      if (p->state->light_global_enabled)
+	 build_lighting(p);
+      else {
+	 if (p->state->fragprog_inputs_read & FRAG_BIT_COL0)
+	    emit_passthrough(p, VERT_ATTRIB_COLOR0, VERT_RESULT_COL0);
+
+	 if (p->state->fragprog_inputs_read & FRAG_BIT_COL1)
+	    emit_passthrough(p, VERT_ATTRIB_COLOR1, VERT_RESULT_COL1);
+      }
+   }
+
+   if ((p->state->fragprog_inputs_read & FRAG_BIT_FOGC) ||
+       p->state->fog_option != FOG_NONE)
+      build_fog(p);
+
+   if (p->state->fragprog_inputs_read & FRAG_BITS_TEX_ANY)
+      build_texture_transform(p);
+
+   if (p->state->point_attenuated)
+      build_pointsize(p);
+
+   /* Finish up:
+    */
+   emit_op1(p, OPCODE_END, undef, 0, undef);
+
+   /* Disassemble:
+    */
+   if (DISASSEM) {
+      _mesa_printf ("\n");
+   }
+}
+
+
+static void build_new_tnl_program( const struct state_key *key,
+				   struct gl_vertex_program *program,
+				   GLuint max_temps)
+{
+   struct tnl_program p;
+
+   _mesa_memset(&p, 0, sizeof(p));
+   p.state = key;
+   p.program = program;
+   p.eye_position = undef;
+   p.eye_position_normalized = undef;
+   p.eye_normal = undef;
+   p.identity = undef;
+   p.temp_in_use = 0;
+   p.nr_instructions = 16;
+   
+   if (max_temps >= sizeof(int) * 8)
+      p.temp_reserved = 0;
+   else
+      p.temp_reserved = ~((1<<max_temps)-1);
+
+   p.program->Base.Instructions = 
+      _mesa_malloc(sizeof(struct prog_instruction) * p.nr_instructions);
+   p.program->Base.String = 0;
+   p.program->Base.NumInstructions =
+   p.program->Base.NumTemporaries =
+   p.program->Base.NumParameters =
+   p.program->Base.NumAttributes = p.program->Base.NumAddressRegs = 0;
+   p.program->Base.Parameters = _mesa_new_parameter_list();
+   p.program->Base.InputsRead = 0;
+   p.program->Base.OutputsWritten = 0;
+
+   build_tnl_program( &p );
+}
+
+static void *search_cache( struct brw_tnl_cache *cache,
+			   GLuint hash,
+			   const void *key,
+			   GLuint keysize)
+{
+   struct brw_tnl_cache_item *c;
+
+   for (c = cache->items[hash % cache->size]; c; c = c->next) {
+      if (c->hash == hash && memcmp(c->key, key, keysize) == 0)
+	 return c->data;
+   }
+
+   return NULL;
+}
+
+static void rehash( struct brw_tnl_cache *cache )
+{
+   struct brw_tnl_cache_item **items;
+   struct brw_tnl_cache_item *c, *next;
+   GLuint size, i;
+
+   size = cache->size * 3;
+   items = (struct brw_tnl_cache_item**) _mesa_malloc(size * sizeof(*items));
+   _mesa_memset(items, 0, size * sizeof(*items));
+
+   for (i = 0; i < cache->size; i++)
+      for (c = cache->items[i]; c; c = next) {
+	 next = c->next;
+	 c->next = items[c->hash % size];
+	 items[c->hash % size] = c;
+      }
+
+   FREE(cache->items);
+   cache->items = items;
+   cache->size = size;
+}
+
+static void cache_item( struct brw_tnl_cache *cache,
+			GLuint hash,
+			const struct state_key *key,
+			void *data )
+{
+   struct brw_tnl_cache_item *c = MALLOC(sizeof(*c));
+   c->hash = hash;
+
+   c->key = malloc(sizeof(*key));
+   memcpy(c->key, key, sizeof(*key));
+
+   c->data = data;
+
+   if (++cache->n_items > cache->size * 1.5)
+      rehash(cache);
+
+   c->next = cache->items[hash % cache->size];
+   cache->items[hash % cache->size] = c;
+}
+
+
+static GLuint hash_key( struct state_key *key )
+{
+   GLuint *ikey = (GLuint *)key;
+   GLuint hash = 0, i;
+
+   /* I'm sure this can be improved on, but speed is important:
+    */
+   for (i = 0; i < sizeof(*key)/sizeof(GLuint); i++)
+      hash += ikey[i];
+
+   return hash;
+}
+
+static void update_tnl_program( struct brw_context *brw )
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   struct state_key key;
+   GLuint hash;
+   struct gl_vertex_program *old = brw->tnl_program;
+
+   /* _NEW_PROGRAM */
+   if (brw->attribs.VertexProgram->_Enabled) 
+      return;
+      
+   /* Grab all the relevent state and put it in a single structure:
+    */
+   make_state_key(ctx, &key);
+   hash = hash_key(&key);
+
+   /* Look for an already-prepared program for this state:
+    */
+   brw->tnl_program = (struct gl_vertex_program *)
+      search_cache( &brw->tnl_program_cache, hash, &key, sizeof(key) );
+   
+   /* OK, we'll have to build a new one:
+    */
+   if (!brw->tnl_program) {
+      brw->tnl_program = (struct gl_vertex_program *)
+	 ctx->Driver.NewProgram(ctx, GL_VERTEX_PROGRAM_ARB, 0); 
+
+      build_new_tnl_program( &key, brw->tnl_program, 
+/* 			     ctx->Const.MaxVertexProgramTemps  */
+			     32
+	 );
+
+      if (ctx->Driver.ProgramStringNotify)
+	 ctx->Driver.ProgramStringNotify( ctx, GL_VERTEX_PROGRAM_ARB, 
+					  &brw->tnl_program->Base );
+
+      cache_item( &brw->tnl_program_cache, 
+		  hash, &key, brw->tnl_program );
+   }
+
+   if (old != brw->tnl_program)
+      brw->state.dirty.brw |= BRW_NEW_TNL_PROGRAM;
+}
+
+/* Note: See brw_draw.c - the vertex program must not rely on
+ * brw->primitive or brw->reduced_prim.
+ */
+const struct brw_tracked_state brw_tnl_vertprog = {
+   .dirty = {
+      .mesa = (_NEW_PROGRAM | 
+	       _NEW_LIGHT | 
+	       _NEW_TRANSFORM | 
+	       _NEW_FOG | 
+	       _NEW_HINT | 
+	       _NEW_POINT | 
+	       _NEW_TEXTURE),
+      .brw = (BRW_NEW_FRAGMENT_PROGRAM | 
+	      BRW_NEW_INPUT_VARYING),
+      .cache = 0
+   },
+   .update = update_tnl_program
+};
+
+
+
+
+static void update_active_vertprog( struct brw_context *brw )
+{
+   const struct gl_vertex_program *prev = brw->vertex_program;
+
+   /* NEW_PROGRAM */
+   if (brw->attribs.VertexProgram->_Enabled) {
+      brw->vertex_program = brw->attribs.VertexProgram->Current;
+   }
+   else {
+      /* BRW_NEW_TNL_PROGRAM */
+      brw->vertex_program = brw->tnl_program;
+   }
+
+   if (brw->vertex_program != prev) 
+      brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM;
+}
+
+
+
+const struct brw_tracked_state brw_active_vertprog = {
+   .dirty = {
+      .mesa = _NEW_PROGRAM,
+      .brw = BRW_NEW_TNL_PROGRAM,
+      .cache = 0
+   },
+   .update = update_active_vertprog
+};
+
+
+void brw_ProgramCacheInit( GLcontext *ctx )
+{
+   struct brw_context *brw = brw_context(ctx);
+
+   brw->tnl_program_cache.size = 17;
+   brw->tnl_program_cache.n_items = 0;
+   brw->tnl_program_cache.items = (struct brw_tnl_cache_item **)
+      _mesa_calloc(brw->tnl_program_cache.size * 
+		   sizeof(struct brw_tnl_cache_item));
+}
+
+void brw_ProgramCacheDestroy( GLcontext *ctx )
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct brw_tnl_cache_item *c, *next;
+   GLuint i;
+
+   for (i = 0; i < brw->tnl_program_cache.size; i++)
+      for (c = brw->tnl_program_cache.items[i]; c; c = next) {
+	 next = c->next;
+	 FREE(c->key);
+	 FREE(c->data);
+	 FREE(c);
+      }
+
+   FREE(brw->tnl_program_cache.items);
+}
diff --git a/i965/brw_vtbl.c b/i965/brw_vtbl.c
new file mode 100644
index 0000000..786f30e
--- /dev/null
+++ b/i965/brw_vtbl.c
@@ -0,0 +1,185 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+            
+
+
+
+#include "glheader.h"
+#include "mtypes.h"
+#include "imports.h"
+#include "macros.h"
+#include "colormac.h"
+
+#include "intel_batchbuffer.h" 
+#include "intel_regions.h" 
+
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_state.h"
+
+#include "brw_draw.h"
+#include "brw_state.h"
+#include "brw_aub.h"
+#include "brw_fallback.h"
+#include "brw_vs.h"
+
+
+
+/* called from intelDestroyContext()
+ */
+static void brw_destroy_context( struct intel_context *intel )
+{
+   GLcontext *ctx = &intel->ctx;
+   struct brw_context *brw = brw_context(&intel->ctx);
+
+   brw_aub_destroy(brw);
+
+   brw_destroy_metaops(brw);
+   brw_destroy_state(brw);
+   brw_draw_destroy( brw );
+
+   brw_ProgramCacheDestroy( ctx );
+   brw_FrameBufferTexDestroy( brw );
+}
+
+/* called from intelDrawBuffer()
+ */
+static void brw_set_draw_region( struct intel_context *intel, 
+				  struct intel_region *draw_region,
+				  struct intel_region *depth_region)
+{
+   struct brw_context *brw = brw_context(&intel->ctx);
+
+   intel_region_release(intel, &brw->state.draw_region);
+   intel_region_release(intel, &brw->state.depth_region);
+   intel_region_reference(&brw->state.draw_region, draw_region);
+   intel_region_reference(&brw->state.depth_region, depth_region);
+}
+
+
+/* called from intelFlushBatchLocked
+ */
+static void brw_lost_hardware( struct intel_context *intel )
+{
+   struct brw_context *brw = brw_context(&intel->ctx);
+
+   /* Note that we effectively lose the context after this.
+    * 
+    * Setting this flag provokes a state buffer wrap and also flushes
+    * the hardware caches.
+    */
+   brw->state.dirty.brw |= BRW_NEW_CONTEXT;
+
+   /* Which means there shouldn't be any commands already queued:
+    */
+   assert(intel->batch->ptr == intel->batch->map + intel->batch->offset);
+
+   brw->state.dirty.mesa |= ~0;
+   brw->state.dirty.brw |= ~0;
+   brw->state.dirty.cache |= ~0;
+}
+
+static void brw_note_fence( struct intel_context *intel, 
+			    GLuint fence )
+{
+   brw_context(&intel->ctx)->state.dirty.brw |= BRW_NEW_FENCE;
+}
+ 
+static void brw_note_unlock( struct intel_context *intel )
+{
+  struct brw_context *brw = brw_context(&intel->ctx);
+
+   brw_pool_check_wrap(brw, &brw->pool[BRW_GS_POOL]);
+   brw_pool_check_wrap(brw, &brw->pool[BRW_SS_POOL]);
+
+   brw_context(&intel->ctx)->state.dirty.brw |= BRW_NEW_LOCK;
+}
+
+
+void brw_do_flush( struct brw_context *brw, 
+		   GLuint flags )
+{
+   struct brw_mi_flush flush;
+   memset(&flush, 0, sizeof(flush));      
+   flush.opcode = CMD_MI_FLUSH;
+   flush.flags = flags;
+   BRW_BATCH_STRUCT(brw, &flush);
+}
+
+
+static void brw_emit_flush( struct intel_context *intel,
+			GLuint unused )
+{
+   brw_do_flush(brw_context(&intel->ctx),
+		BRW_FLUSH_STATE_CACHE|BRW_FLUSH_READ_CACHE);
+}
+
+
+/* called from intelWaitForIdle() and intelFlush()
+ *
+ * For now, just flush everything.  Could be smarter later.
+ */
+static GLuint brw_flush_cmd( void )
+{
+   struct brw_mi_flush flush;
+   flush.opcode = CMD_MI_FLUSH;
+   flush.pad = 0;
+   flush.flags = BRW_FLUSH_READ_CACHE | BRW_FLUSH_STATE_CACHE;
+   return *(GLuint *)&flush;
+}
+
+
+
+
+static void brw_invalidate_state( struct intel_context *intel, GLuint new_state )
+{
+   /* nothing */
+}
+
+
+void brwInitVtbl( struct brw_context *brw )
+{
+   brw->intel.vtbl.check_vertex_size = 0;
+   brw->intel.vtbl.emit_state = 0; 
+   brw->intel.vtbl.reduced_primitive_state = 0;	
+   brw->intel.vtbl.render_start = 0;
+   brw->intel.vtbl.update_texture_state = 0; 
+
+   brw->intel.vtbl.invalidate_state = brw_invalidate_state; 
+   brw->intel.vtbl.note_fence = brw_note_fence; 
+   brw->intel.vtbl.note_unlock = brw_note_unlock; 
+   brw->intel.vtbl.lost_hardware = brw_lost_hardware;
+   brw->intel.vtbl.destroy = brw_destroy_context;
+   brw->intel.vtbl.set_draw_region = brw_set_draw_region;
+   brw->intel.vtbl.flush_cmd = brw_flush_cmd;
+   brw->intel.vtbl.emit_flush = brw_emit_flush;
+}
+
diff --git a/i965/brw_wm.c b/i965/brw_wm.c
new file mode 100644
index 0000000..f80ba17
--- /dev/null
+++ b/i965/brw_wm.c
@@ -0,0 +1,355 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+             
+
+#include "brw_context.h"
+#include "brw_util.h"
+#include "brw_wm.h"
+#include "brw_state.h"
+#include "brw_hal.h"
+
+
+GLuint brw_wm_nr_args( GLuint opcode )
+{
+   switch (opcode) {
+
+   case WM_PIXELXY:
+   case OPCODE_ABS:
+   case OPCODE_FLR:
+   case OPCODE_FRC:
+   case OPCODE_SWZ:
+   case OPCODE_MOV:
+   case OPCODE_COS:
+   case OPCODE_EX2:
+   case OPCODE_LG2:
+   case OPCODE_RCP:
+   case OPCODE_RSQ:
+   case OPCODE_SIN:
+   case OPCODE_SCS:
+   case OPCODE_TEX:
+   case OPCODE_TXB:
+   case OPCODE_TXP:	
+   case OPCODE_KIL:
+   case OPCODE_LIT: 
+   case WM_CINTERP: 
+   case WM_WPOSXY: 
+      return 1;
+
+   case OPCODE_POW:
+   case OPCODE_SUB:
+   case OPCODE_SGE:
+   case OPCODE_SLT:
+   case OPCODE_ADD:
+   case OPCODE_MAX:
+   case OPCODE_MIN:
+   case OPCODE_MUL:
+   case OPCODE_XPD:
+   case OPCODE_DP3:	
+   case OPCODE_DP4:
+   case OPCODE_DPH:
+   case OPCODE_DST:
+   case WM_LINTERP: 
+   case WM_DELTAXY:
+   case WM_PIXELW:
+      return 2;
+
+   case WM_FB_WRITE:
+   case WM_PINTERP: 
+   case OPCODE_MAD:	
+   case OPCODE_CMP:
+   case OPCODE_LRP:
+      return 3;
+      
+   default:
+      return 0;
+   }
+}
+
+
+GLuint brw_wm_is_scalar_result( GLuint opcode )
+{
+   switch (opcode) {
+   case OPCODE_COS:
+   case OPCODE_EX2:
+   case OPCODE_LG2:
+   case OPCODE_POW:
+   case OPCODE_RCP:
+   case OPCODE_RSQ:
+   case OPCODE_SIN:
+   case OPCODE_DP3:
+   case OPCODE_DP4:
+   case OPCODE_DPH:
+   case OPCODE_DST:
+      return 1;
+      
+   default:
+      return 0;
+   }
+}
+
+
+static void brw_wm_pass_hal (struct brw_wm_compile *c)
+{
+   static void (*hal_wm_pass) (struct brw_wm_compile *c);
+   static GLboolean hal_tried;
+   
+   if (!hal_tried)
+   {
+      hal_wm_pass = brw_hal_find_symbol ("intel_hal_wm_pass");
+      hal_tried = 1;
+   }
+   if (hal_wm_pass)
+      (*hal_wm_pass) (c);
+}
+
+static void do_wm_prog( struct brw_context *brw,
+			struct brw_fragment_program *fp, 
+			struct brw_wm_prog_key *key)
+{
+   struct brw_wm_compile *c;
+   const GLuint *program;
+   GLuint program_size;
+
+   c = brw->wm.compile_data;
+   if (c == NULL) {
+     brw->wm.compile_data = calloc(1, sizeof(*brw->wm.compile_data));
+     c = brw->wm.compile_data;
+   } else {
+     memset(c, 0, sizeof(*brw->wm.compile_data));
+   }
+   memcpy(&c->key, key, sizeof(*key));
+
+   c->fp = fp;
+   c->env_param = brw->intel.ctx.FragmentProgram.Parameters;
+
+   /* Augment fragment program.  Add instructions for pre- and
+    * post-fragment-program tasks such as interpolation and fogging.
+    */
+   brw_wm_pass_fp(c);
+   
+   /* Translate to intermediate representation.  Build register usage
+    * chains.
+    */
+   brw_wm_pass0(c);
+
+   /* Dead code removal.
+    */
+   brw_wm_pass1(c);
+
+   /* Hal optimization
+    */
+   brw_wm_pass_hal (c);
+   
+   /* Register allocation.
+    */
+   c->grf_limit = BRW_WM_MAX_GRF/2;
+
+   /* This is where we start emitting gen4 code:
+    */
+   brw_init_compile(brw, &c->func);    
+
+   brw_wm_pass2(c);
+
+   c->prog_data.total_grf = c->max_wm_grf;
+   if (c->last_scratch) {
+      c->prog_data.total_scratch =
+	 c->last_scratch + 0x40;
+   } else {
+      c->prog_data.total_scratch = 0;
+   }
+
+   /* Emit GEN4 code.
+    */
+   brw_wm_emit(c);
+
+   /* get the program
+    */
+   program = brw_get_program(&c->func, &program_size);
+
+   /*
+    */
+   brw->wm.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_WM_PROG],
+					      &c->key,
+					      sizeof(c->key),
+					      program,
+					      program_size,
+					      &c->prog_data,
+					      &brw->wm.prog_data );
+}
+
+
+
+static void brw_wm_populate_key( struct brw_context *brw,
+				 struct brw_wm_prog_key *key )
+{
+   /* BRW_NEW_FRAGMENT_PROGRAM */
+   struct brw_fragment_program *fp = 
+      (struct brw_fragment_program *)brw->fragment_program;
+   GLuint lookup = 0;
+   GLuint line_aa;
+   GLuint i;
+
+   memset(key, 0, sizeof(*key));
+
+   /* Build the index for table lookup
+    */
+   /* _NEW_COLOR */
+   if (fp->program.UsesKill ||
+       brw->attribs.Color->AlphaEnabled)
+      lookup |= IZ_PS_KILL_ALPHATEST_BIT;
+
+   if (fp->program.Base.OutputsWritten & (1<<FRAG_RESULT_DEPR))
+      lookup |= IZ_PS_COMPUTES_DEPTH_BIT;
+
+   /* _NEW_DEPTH */
+   if (brw->attribs.Depth->Test)
+      lookup |= IZ_DEPTH_TEST_ENABLE_BIT;
+
+   if (brw->attribs.Depth->Test &&  
+       brw->attribs.Depth->Mask) /* ?? */
+      lookup |= IZ_DEPTH_WRITE_ENABLE_BIT;
+
+   /* _NEW_STENCIL */
+   if (brw->attribs.Stencil->Enabled) {
+      lookup |= IZ_STENCIL_TEST_ENABLE_BIT;
+
+      if (brw->attribs.Stencil->WriteMask[0] ||
+	  (brw->attribs.Stencil->TestTwoSide && brw->attribs.Stencil->WriteMask[1]))
+	 lookup |= IZ_STENCIL_WRITE_ENABLE_BIT;
+   }
+
+   /* XXX: when should this be disabled?
+    */
+   if (1)
+      lookup |= IZ_EARLY_DEPTH_TEST_BIT;
+
+   
+   line_aa = AA_NEVER;
+
+   /* _NEW_LINE, _NEW_POLYGON, BRW_NEW_REDUCED_PRIMITIVE */
+   if (brw->attribs.Line->SmoothFlag) {
+      if (brw->intel.reduced_primitive == GL_LINES) {
+	 line_aa = AA_ALWAYS;
+      }
+      else if (brw->intel.reduced_primitive == GL_TRIANGLES) {
+	 if (brw->attribs.Polygon->FrontMode == GL_LINE) {
+	    line_aa = AA_SOMETIMES;
+
+	    if (brw->attribs.Polygon->BackMode == GL_LINE ||
+		(brw->attribs.Polygon->CullFlag &&
+		 brw->attribs.Polygon->CullFaceMode == GL_BACK))
+	       line_aa = AA_ALWAYS;
+	 }
+	 else if (brw->attribs.Polygon->BackMode == GL_LINE) {
+	    line_aa = AA_SOMETIMES;
+
+	    if ((brw->attribs.Polygon->CullFlag &&
+		 brw->attribs.Polygon->CullFaceMode == GL_FRONT))
+	       line_aa = AA_ALWAYS;
+	 }
+      }
+   }
+	 
+   brw_wm_lookup_iz(line_aa,
+		    lookup,
+		    key);
+
+
+   /* BRW_NEW_WM_INPUT_DIMENSIONS */
+   key->projtex_mask = brw->wm.input_size_masks[4-1]; 
+
+   /* _NEW_LIGHT */
+   key->flat_shade = (brw->attribs.Light->ShadeModel == GL_FLAT);
+
+   /* _NEW_TEXTURE */
+   for (i = 0; i < BRW_MAX_TEX_UNIT; i++) {
+      const struct gl_texture_unit *unit = &brw->attribs.Texture->Unit[i];
+      const struct gl_texture_object *t = unit->_Current;
+
+      if (unit->_ReallyEnabled) {
+
+	 if (t->CompareMode == GL_COMPARE_R_TO_TEXTURE_ARB &&
+	     t->Image[0][t->BaseLevel]->_BaseFormat == GL_DEPTH_COMPONENT) {
+	    key->shadowtex_mask |= 1<<i;
+	 }
+
+	 if (t->Image[0][t->BaseLevel]->InternalFormat == GL_YCBCR_MESA)
+	    key->yuvtex_mask |= 1<<i;
+      }
+   }
+	  
+
+   /* Extra info:
+    */
+   key->program_string_id = fp->id;
+
+}
+
+
+static void brw_upload_wm_prog( struct brw_context *brw )
+{
+   struct brw_wm_prog_key key;
+   struct brw_fragment_program *fp = (struct brw_fragment_program *)
+      brw->fragment_program;
+     
+   brw_wm_populate_key(brw, &key);
+
+   /* Make an early check for the key.
+    */
+   if (brw_search_cache(&brw->cache[BRW_WM_PROG], 
+			&key, sizeof(key),
+			&brw->wm.prog_data,
+			&brw->wm.prog_gs_offset))
+      return;
+
+   do_wm_prog(brw, fp, &key);
+}
+
+
+/* See brw_wm.c:
+ */
+const struct brw_tracked_state brw_wm_prog = {
+   .dirty = {
+      .mesa  = (_NEW_COLOR |
+		_NEW_DEPTH |
+		_NEW_STENCIL |
+		_NEW_POLYGON |
+		_NEW_LINE |
+		_NEW_LIGHT |
+		_NEW_TEXTURE),
+      .brw   = (BRW_NEW_FRAGMENT_PROGRAM |
+		BRW_NEW_WM_INPUT_DIMENSIONS |
+		BRW_NEW_REDUCED_PRIMITIVE),
+      .cache = 0
+   },
+   .update = brw_upload_wm_prog
+};
+
diff --git a/i965/brw_wm.h b/i965/brw_wm.h
new file mode 100644
index 0000000..f5fddfd
--- /dev/null
+++ b/i965/brw_wm.h
@@ -0,0 +1,262 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+              
+
+#ifndef BRW_WM_H
+#define BRW_WM_H
+
+
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "prog_instruction.h"
+
+/* A big lookup table is used to figure out which and how many
+ * additional regs will inserted before the main payload in the WM
+ * program execution.  These mainly relate to depth and stencil
+ * processing and the early-depth-test optimization.
+ */
+#define IZ_PS_KILL_ALPHATEST_BIT    0x1
+#define IZ_PS_COMPUTES_DEPTH_BIT    0x2
+#define IZ_DEPTH_WRITE_ENABLE_BIT   0x4
+#define IZ_DEPTH_TEST_ENABLE_BIT    0x8
+#define IZ_STENCIL_WRITE_ENABLE_BIT 0x10
+#define IZ_STENCIL_TEST_ENABLE_BIT  0x20
+#define IZ_EARLY_DEPTH_TEST_BIT     0x40
+#define IZ_BIT_MAX                  0x80
+
+#define AA_NEVER     0
+#define AA_SOMETIMES 1
+#define AA_ALWAYS    2
+
+struct brw_wm_prog_key {
+   GLuint source_depth_reg:3;
+   GLuint aa_dest_stencil_reg:3;
+   GLuint dest_depth_reg:3;
+   GLuint nr_depth_regs:3;
+   GLuint projtex_mask:8;
+   GLuint shadowtex_mask:8;
+   GLuint computes_depth:1;	/* could be derived from program string */
+   GLuint source_depth_to_render_target:1;
+   GLuint flat_shade:1;
+   GLuint runtime_check_aads_emit:1;
+   
+   GLuint yuvtex_mask:8;
+   GLuint pad1:24;
+
+   GLuint program_string_id:32;
+};
+
+
+/* A bit of a glossary:
+ *
+ * brw_wm_value: A computed value or program input.  Values are
+ * constant, they are created once and are never modified.  When a
+ * fragment program register is written or overwritten, new values are
+ * created fresh, preserving the rule that values are constant.
+ *
+ * brw_wm_ref: A reference to a value.  Wherever a value used is by an
+ * instruction or as a program output, that is tracked with an
+ * instance of this struct.  All references to a value occur after it
+ * is created.  After the last reference, a value is dead and can be
+ * discarded.
+ *
+ * brw_wm_grf: Represents a physical hardware register.  May be either
+ * empty or hold a value.  Register allocation is the process of
+ * assigning values to grf registers.  This occurs in pass2 and the
+ * brw_wm_grf struct is not used before that.
+ *
+ * Fragment program registers: These are time-varying constructs that
+ * are hard to reason about and which we translate away in pass0.  A
+ * single fragment program register element (eg. temp[0].x) will be
+ * translated to one or more brw_wm_value structs, one for each time
+ * that temp[0].x is written to during the program. 
+ */
+
+
+
+/* Used in pass2 to track register allocation.
+ */
+struct brw_wm_grf {
+   struct brw_wm_value *value;
+   GLuint nextuse;
+};
+
+struct brw_wm_value {
+   struct brw_reg hw_reg;	/* emitted to this reg, may not always be there */
+   struct brw_wm_ref *lastuse;
+   struct brw_wm_grf *resident; 
+   GLuint contributes_to_output:1;
+   GLuint spill_slot:16;	/* if non-zero, spill immediately after calculation */
+};
+
+struct brw_wm_ref {
+   struct brw_reg hw_reg;	/* nr filled in in pass2, everything else, pass0 */
+   struct brw_wm_value *value;
+   struct brw_wm_ref *prevuse;
+   GLuint unspill_reg:7;	/* unspill to reg */
+   GLuint emitted:1;
+   GLuint insn:24;
+};
+
+struct brw_wm_constref {
+   const struct brw_wm_ref *ref;
+   GLfloat constval;
+};
+
+
+struct brw_wm_instruction {
+   struct brw_wm_value *dst[4];
+   struct brw_wm_ref *src[3][4];
+   GLuint opcode:8;
+   GLuint saturate:1;
+   GLuint writemask:4;
+   GLuint tex_unit:4;   /* texture unit for TEX, TXD, TXP instructions */
+   GLuint tex_idx:3;    /* TEXTURE_1D,2D,3D,CUBE,RECT_INDEX source target */
+};
+
+
+#define PROGRAM_INTERNAL_PARAM 
+
+#define BRW_WM_MAX_INSN  (MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS*3 + FRAG_ATTRIB_MAX + 3)
+#define BRW_WM_MAX_GRF   128		/* hardware limit */
+#define BRW_WM_MAX_VREG  (BRW_WM_MAX_INSN * 4)
+#define BRW_WM_MAX_REF   (BRW_WM_MAX_INSN * 12)
+#define BRW_WM_MAX_PARAM 256
+#define BRW_WM_MAX_CONST 256
+#define BRW_WM_MAX_KILLS MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS
+
+
+
+/* New opcodes to track internal operations required for WM unit.
+ * These are added early so that the registers used can be tracked,
+ * freed and reused like those of other instructions.
+ */
+#define WM_PIXELXY        (MAX_OPCODE)
+#define WM_DELTAXY        (MAX_OPCODE + 1)
+#define WM_PIXELW         (MAX_OPCODE + 2)
+#define WM_LINTERP        (MAX_OPCODE + 3)
+#define WM_PINTERP        (MAX_OPCODE + 4)
+#define WM_CINTERP        (MAX_OPCODE + 5)
+#define WM_WPOSXY         (MAX_OPCODE + 6)
+#define WM_FB_WRITE       (MAX_OPCODE + 7)
+#define MAX_WM_OPCODE     (MAX_OPCODE + 8)
+
+#define PROGRAM_PAYLOAD   (PROGRAM_FILE_MAX)
+#define PAYLOAD_DEPTH     (FRAG_ATTRIB_MAX)
+
+struct brw_wm_compile {
+   struct brw_compile func;
+   struct brw_wm_prog_key key;
+   struct brw_wm_prog_data prog_data;
+
+   struct brw_fragment_program *fp;
+
+   GLfloat (*env_param)[4];
+
+   enum {
+      START,
+      PASS2_DONE
+   } state;
+
+   /* Initial pass - translate fp instructions to fp instructions,
+    * simplifying and adding instructions for interpolation and
+    * framebuffer writes.
+    */
+   struct prog_instruction prog_instructions[BRW_WM_MAX_INSN];
+   GLuint nr_fp_insns;
+   GLuint fp_temp;
+   GLuint fp_interp_emitted;
+
+   struct prog_src_register pixel_xy;
+   struct prog_src_register delta_xy;
+   struct prog_src_register pixel_w;
+
+
+   struct brw_wm_value vreg[BRW_WM_MAX_VREG];
+   GLuint nr_vreg;
+
+   struct brw_wm_value creg[BRW_WM_MAX_PARAM];
+   GLuint nr_creg;
+
+   struct {
+      struct brw_wm_value depth[4]; /* includes r0/r1 */
+      struct brw_wm_value input_interp[FRAG_ATTRIB_MAX];
+   } payload;
+
+
+   const struct brw_wm_ref *pass0_fp_reg[PROGRAM_PAYLOAD+1][256][4];
+
+   struct brw_wm_ref undef_ref;
+   struct brw_wm_value undef_value;
+
+   struct brw_wm_ref refs[BRW_WM_MAX_REF];
+   GLuint nr_refs;
+
+   struct brw_wm_instruction instruction[BRW_WM_MAX_INSN];
+   GLuint nr_insns;
+
+   struct brw_wm_constref constref[BRW_WM_MAX_CONST];
+   GLuint nr_constrefs;
+
+   struct brw_wm_grf pass2_grf[BRW_WM_MAX_GRF/2];
+
+   GLuint grf_limit;
+   GLuint max_wm_grf;
+   GLuint last_scratch;
+};
+
+
+GLuint brw_wm_nr_args( GLuint opcode );
+GLuint brw_wm_is_scalar_result( GLuint opcode );
+
+void brw_wm_pass_fp( struct brw_wm_compile *c );
+void brw_wm_pass0( struct brw_wm_compile *c );
+void brw_wm_pass1( struct brw_wm_compile *c );
+void brw_wm_pass2( struct brw_wm_compile *c );
+void brw_wm_emit( struct brw_wm_compile *c );
+
+void brw_wm_print_value( struct brw_wm_compile *c,
+			 struct brw_wm_value *value );
+
+void brw_wm_print_ref( struct brw_wm_compile *c,
+		       struct brw_wm_ref *ref );
+
+void brw_wm_print_insn( struct brw_wm_compile *c,
+			struct brw_wm_instruction *inst );
+
+void brw_wm_print_program( struct brw_wm_compile *c,
+			   const char *stage );
+
+void brw_wm_lookup_iz( GLuint line_aa,
+		       GLuint lookup,
+		       struct brw_wm_prog_key *key );
+
+#endif
diff --git a/i965/brw_wm_debug.c b/i965/brw_wm_debug.c
new file mode 100644
index 0000000..f31d097
--- /dev/null
+++ b/i965/brw_wm_debug.c
@@ -0,0 +1,171 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+               
+
+#include "brw_context.h"
+#include "brw_wm.h"
+
+
+void brw_wm_print_value( struct brw_wm_compile *c,
+		       struct brw_wm_value *value )
+{
+   assert(value);
+   if (c->state >= PASS2_DONE) 
+      brw_print_reg(value->hw_reg);
+   else if( value == &c->undef_value )
+      _mesa_printf("undef");
+   else if( value - c->vreg >= 0 &&
+	    value - c->vreg < BRW_WM_MAX_VREG)
+      _mesa_printf("r%d", value - c->vreg);
+   else if (value - c->creg >= 0 &&
+	    value - c->creg < BRW_WM_MAX_PARAM)
+      _mesa_printf("c%d", value - c->creg);
+   else if (value - c->payload.input_interp >= 0 &&
+	    value - c->payload.input_interp < FRAG_ATTRIB_MAX)
+      _mesa_printf("i%d", value - c->payload.input_interp);
+   else if (value - c->payload.depth >= 0 &&
+	    value - c->payload.depth < FRAG_ATTRIB_MAX)
+      _mesa_printf("d%d", value - c->payload.depth);
+   else 
+      _mesa_printf("?");
+}
+
+void brw_wm_print_ref( struct brw_wm_compile *c,
+		       struct brw_wm_ref *ref )
+{
+   struct brw_reg hw_reg = ref->hw_reg;
+
+   if (ref->unspill_reg)
+      _mesa_printf("UNSPILL(%x)/", ref->value->spill_slot);
+
+   if (c->state >= PASS2_DONE)
+      brw_print_reg(ref->hw_reg);
+   else {
+      _mesa_printf("%s", hw_reg.negate ? "-" : "");
+      _mesa_printf("%s", hw_reg.abs ? "abs/" : "");
+      brw_wm_print_value(c, ref->value);
+      if ((hw_reg.nr&1) || hw_reg.subnr) {
+	 _mesa_printf("->%d.%d", (hw_reg.nr&1), hw_reg.subnr);
+      }
+   }
+}
+
+void brw_wm_print_insn( struct brw_wm_compile *c,
+			struct brw_wm_instruction *inst )
+{
+   GLuint i, arg;
+   GLuint nr_args = brw_wm_nr_args(inst->opcode);
+
+   _mesa_printf("[");
+   for (i = 0; i < 4; i++) {
+      if (inst->dst[i]) {
+	 brw_wm_print_value(c, inst->dst[i]);
+	 if (inst->dst[i]->spill_slot)
+	    _mesa_printf("/SPILL(%x)",inst->dst[i]->spill_slot);
+      }
+      else
+	 _mesa_printf("#");
+      if (i < 3)      
+	 _mesa_printf(",");
+   }
+   _mesa_printf("]");
+
+   if (inst->writemask != WRITEMASK_XYZW)
+      _mesa_printf(".%s%s%s%s", 
+		   GET_BIT(inst->writemask, 0) ? "x" : "",
+		   GET_BIT(inst->writemask, 1) ? "y" : "",
+		   GET_BIT(inst->writemask, 2) ? "z" : "",
+		   GET_BIT(inst->writemask, 3) ? "w" : "");
+
+   switch (inst->opcode) {
+   case WM_PIXELXY:
+      _mesa_printf(" = PIXELXY");
+      break;
+   case WM_DELTAXY:
+      _mesa_printf(" = DELTAXY");
+      break;
+   case WM_PIXELW:
+      _mesa_printf(" = PIXELW");
+      break;
+   case WM_WPOSXY:
+      _mesa_printf(" = WPOSXY");
+      break;
+   case WM_PINTERP:
+      _mesa_printf(" = PINTERP");
+      break;
+   case WM_LINTERP:
+      _mesa_printf(" = LINTERP");
+      break;
+   case WM_CINTERP:
+      _mesa_printf(" = CINTERP");
+      break;
+   case WM_FB_WRITE:
+      _mesa_printf(" = FB_WRITE");
+      break;
+   default:
+      _mesa_printf(" = %s", _mesa_opcode_string(inst->opcode));
+      break;
+   }
+
+   if (inst->saturate)
+      _mesa_printf("_SAT");
+
+   for (arg = 0; arg < nr_args; arg++) {
+
+      _mesa_printf(" [");
+
+      for (i = 0; i < 4; i++) {
+	 if (inst->src[arg][i]) {
+	    brw_wm_print_ref(c, inst->src[arg][i]);
+	 }
+	 else
+	    _mesa_printf("%%");
+
+	 if (i < 3) 
+	    _mesa_printf(",");
+	 else
+	    _mesa_printf("]");
+      }
+   }
+   _mesa_printf("\n");
+}
+
+void brw_wm_print_program( struct brw_wm_compile *c,
+			   const char *stage )
+{
+   GLuint insn;
+
+   _mesa_printf("\n\n\n%s:\n", stage);
+   for (insn = 0; insn < c->nr_insns; insn++)
+      brw_wm_print_insn(c, &c->instruction[insn]);
+   _mesa_printf("\n\n\n");
+}
+
diff --git a/i965/brw_wm_emit.c b/i965/brw_wm_emit.c
new file mode 100644
index 0000000..80bd576
--- /dev/null
+++ b/i965/brw_wm_emit.c
@@ -0,0 +1,1249 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+               
+
+#include "macros.h"
+#include "brw_context.h"
+#include "brw_wm.h"
+
+#define SATURATE (1<<5)
+
+/* Not quite sure how correct this is - need to understand horiz
+ * vs. vertical strides a little better.
+ */
+static __inline struct brw_reg sechalf( struct brw_reg reg )
+{
+   if (reg.vstride)
+      reg.nr++;
+   return reg;
+}
+
+/* Payload R0:
+ *
+ * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles,
+ *         corresponding to each of the 16 execution channels.
+ * R0.1..8 -- ?
+ * R1.0 -- triangle vertex 0.X
+ * R1.1 -- triangle vertex 0.Y
+ * R1.2 -- tile 0 x,y coords (2 packed uwords)
+ * R1.3 -- tile 1 x,y coords (2 packed uwords)
+ * R1.4 -- tile 2 x,y coords (2 packed uwords)
+ * R1.5 -- tile 3 x,y coords (2 packed uwords)
+ * R1.6 -- ?
+ * R1.7 -- ?
+ * R1.8 -- ?
+ */
+
+
+static void emit_pixel_xy(struct brw_compile *p,
+			  const struct brw_reg *dst,
+			  GLuint mask,
+			  const struct brw_reg *arg0)
+{
+   struct brw_reg r1 = brw_vec1_grf(1, 0);
+   struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);
+
+   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+
+   /* Calculate pixel centers by adding 1 or 0 to each of the
+    * micro-tile coordinates passed in r1.
+    */
+   if (mask & WRITEMASK_X) {
+      brw_ADD(p,
+	      vec16(retype(dst[0], BRW_REGISTER_TYPE_UW)),
+	      stride(suboffset(r1_uw, 4), 2, 4, 0),
+	      brw_imm_v(0x10101010));
+   }
+
+   if (mask & WRITEMASK_Y) {
+      brw_ADD(p,
+	      vec16(retype(dst[1], BRW_REGISTER_TYPE_UW)),
+	      stride(suboffset(r1_uw,5), 2, 4, 0),
+	      brw_imm_v(0x11001100));
+   }
+
+   brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+}
+
+
+
+static void emit_delta_xy(struct brw_compile *p,
+			  const struct brw_reg *dst,
+			  GLuint mask,
+			  const struct brw_reg *arg0,
+			  const struct brw_reg *arg1)
+{
+   struct brw_reg r1 = brw_vec1_grf(1, 0);
+
+   /* Calc delta X,Y by subtracting origin in r1 from the pixel
+    * centers.
+    */
+   if (mask & WRITEMASK_X) {
+      brw_ADD(p,
+	      dst[0],
+	      retype(arg0[0], BRW_REGISTER_TYPE_UW),
+	      negate(r1));
+   }
+
+   if (mask & WRITEMASK_Y) {
+      brw_ADD(p,
+	      dst[1],
+	      retype(arg0[1], BRW_REGISTER_TYPE_UW),
+	      negate(suboffset(r1,1)));
+
+   }
+}
+
+static void emit_wpos_xy(struct brw_compile *p,
+			   const struct brw_reg *dst,
+			   GLuint mask,
+			   const struct brw_reg *arg0)
+{
+   /* Calc delta X,Y by subtracting origin in r1 from the pixel
+    * centers.
+    */
+   if (mask & WRITEMASK_X) {
+      brw_MOV(p,
+	      dst[0],
+	      retype(arg0[0], BRW_REGISTER_TYPE_UW));
+   }
+
+   if (mask & WRITEMASK_Y) {
+      /* TODO -- window_height - Y */
+      brw_MOV(p,
+	      dst[1],
+	      negate(retype(arg0[1], BRW_REGISTER_TYPE_UW)));
+
+   }
+}
+
+
+static void emit_pixel_w( struct brw_compile *p,
+			  const struct brw_reg *dst,
+			  GLuint mask,
+			  const struct brw_reg *arg0,
+			  const struct brw_reg *deltas)
+{
+   /* Don't need this if all you are doing is interpolating color, for
+    * instance.
+    */
+   if (mask & WRITEMASK_W) {      
+      struct brw_reg interp3 = brw_vec1_grf(arg0[0].nr+1, 4);
+
+      /* Calc 1/w - just linterp wpos[3] optimized by putting the
+       * result straight into a message reg.
+       */
+      brw_LINE(p, brw_null_reg(), interp3, deltas[0]);
+      brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]);
+
+      /* Calc w */
+      brw_math_16( p, dst[3],
+		   BRW_MATH_FUNCTION_INV,
+		   BRW_MATH_SATURATE_NONE,
+		   2, brw_null_reg(),
+		   BRW_MATH_PRECISION_FULL);
+   }
+}
+
+
+
+static void emit_linterp( struct brw_compile *p, 
+			 const struct brw_reg *dst,
+			 GLuint mask,
+			 const struct brw_reg *arg0,
+			 const struct brw_reg *deltas )
+{
+   struct brw_reg interp[4];
+   GLuint nr = arg0[0].nr;
+   GLuint i;
+
+   interp[0] = brw_vec1_grf(nr, 0);
+   interp[1] = brw_vec1_grf(nr, 4);
+   interp[2] = brw_vec1_grf(nr+1, 0);
+   interp[3] = brw_vec1_grf(nr+1, 4);
+
+   for(i = 0; i < 4; i++ ) {
+      if (mask & (1<<i)) {
+	 brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
+	 brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
+      }
+   }
+}
+
+
+static void emit_pinterp( struct brw_compile *p, 
+			  const struct brw_reg *dst,
+			  GLuint mask,
+			  const struct brw_reg *arg0,
+			  const struct brw_reg *deltas,
+			  const struct brw_reg *w)
+{
+   struct brw_reg interp[4];
+   GLuint nr = arg0[0].nr;
+   GLuint i;
+
+   interp[0] = brw_vec1_grf(nr, 0);
+   interp[1] = brw_vec1_grf(nr, 4);
+   interp[2] = brw_vec1_grf(nr+1, 0);
+   interp[3] = brw_vec1_grf(nr+1, 4);
+
+   for(i = 0; i < 4; i++ ) {
+      if (mask & (1<<i)) {
+	 brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
+	 brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
+	 brw_MUL(p, dst[i], dst[i], w[3]);
+      }
+   }
+}
+
+static void emit_cinterp( struct brw_compile *p, 
+			 const struct brw_reg *dst,
+			 GLuint mask,
+			 const struct brw_reg *arg0 )
+{
+   struct brw_reg interp[4];
+   GLuint nr = arg0[0].nr;
+   GLuint i;
+
+   interp[0] = brw_vec1_grf(nr, 0);
+   interp[1] = brw_vec1_grf(nr, 4);
+   interp[2] = brw_vec1_grf(nr+1, 0);
+   interp[3] = brw_vec1_grf(nr+1, 4);
+
+   for(i = 0; i < 4; i++ ) {
+      if (mask & (1<<i)) {
+	 brw_MOV(p, dst[i], suboffset(interp[i],3));	/* TODO: optimize away like other moves */
+      }
+   }
+}
+
+
+
+
+
+static void emit_alu1( struct brw_compile *p, 
+		       struct brw_instruction *(*func)(struct brw_compile *, 
+						       struct brw_reg, 
+						       struct brw_reg),
+		       const struct brw_reg *dst,
+		       GLuint mask,
+		       const struct brw_reg *arg0 )
+{
+   GLuint i;
+
+   if (mask & SATURATE)
+      brw_set_saturate(p, 1);
+
+   for (i = 0; i < 4; i++) {
+      if (mask & (1<<i)) {
+	 func(p, dst[i], arg0[i]);
+      }
+   }
+
+   if (mask & SATURATE)
+      brw_set_saturate(p, 0);
+}
+
+static void emit_alu2( struct brw_compile *p, 
+		       struct brw_instruction *(*func)(struct brw_compile *, 
+						       struct brw_reg, 
+						       struct brw_reg, 
+						       struct brw_reg),
+		       const struct brw_reg *dst,
+		       GLuint mask,
+		       const struct brw_reg *arg0,
+		       const struct brw_reg *arg1 )
+{
+   GLuint i;
+
+   if (mask & SATURATE)
+      brw_set_saturate(p, 1);
+
+   for (i = 0; i < 4; i++) {
+      if (mask & (1<<i)) {
+	 func(p, dst[i], arg0[i], arg1[i]);
+      }
+   }
+
+   if (mask & SATURATE)
+      brw_set_saturate(p, 0);
+}
+
+
+static void emit_mad( struct brw_compile *p, 
+		      const struct brw_reg *dst,
+		      GLuint mask,
+		      const struct brw_reg *arg0,
+		      const struct brw_reg *arg1,
+		      const struct brw_reg *arg2 )
+{
+   GLuint i;
+
+   for (i = 0; i < 4; i++) {
+      if (mask & (1<<i)) {
+	 brw_MUL(p, dst[i], arg0[i], arg1[i]);
+
+	 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+	 brw_ADD(p, dst[i], dst[i], arg2[i]);
+	 brw_set_saturate(p, 0);
+      }
+   }
+}
+
+
+static void emit_lrp( struct brw_compile *p, 
+		      const struct brw_reg *dst,
+		      GLuint mask,
+		      const struct brw_reg *arg0,
+		      const struct brw_reg *arg1,
+		      const struct brw_reg *arg2 )
+{
+   GLuint i;
+
+   /* Uses dst as a temporary:
+    */
+   for (i = 0; i < 4; i++) {
+      if (mask & (1<<i)) {	
+	 /* Can I use the LINE instruction for this? 
+	  */
+	 brw_ADD(p, dst[i], negate(arg0[i]), brw_imm_f(1.0));
+	 brw_MUL(p, brw_null_reg(), dst[i], arg2[i]);
+
+	 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+	 brw_MAC(p, dst[i], arg0[i], arg1[i]);
+	 brw_set_saturate(p, 0);
+      }
+   }
+}
+
+
+static void emit_slt( struct brw_compile *p, 
+		      const struct brw_reg *dst,
+		      GLuint mask,
+		      const struct brw_reg *arg0,
+		      const struct brw_reg *arg1 )
+{
+   GLuint i;
+
+   for (i = 0; i < 4; i++) {
+      if (mask & (1<<i)) {	
+	 brw_MOV(p, dst[i], brw_imm_f(0));
+	 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
+	 brw_MOV(p, dst[i], brw_imm_f(1.0));
+	 brw_set_predicate_control_flag_value(p, 0xff);
+      }
+   }
+}
+
+/* Isn't this just the same as the above with the args swapped?
+ */
+static void emit_sge( struct brw_compile *p, 
+		      const struct brw_reg *dst,
+		      GLuint mask,
+		      const struct brw_reg *arg0,
+		      const struct brw_reg *arg1 )
+{
+   GLuint i;
+
+   for (i = 0; i < 4; i++) {
+      if (mask & (1<<i)) {	
+	 brw_MOV(p, dst[i], brw_imm_f(0));
+	 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], arg1[i]);
+	 brw_MOV(p, dst[i], brw_imm_f(1.0));
+	 brw_set_predicate_control_flag_value(p, 0xff);
+      }
+   }
+}
+
+
+
+static void emit_cmp( struct brw_compile *p, 
+		      const struct brw_reg *dst,
+		      GLuint mask,
+		      const struct brw_reg *arg0,
+		      const struct brw_reg *arg1,
+		      const struct brw_reg *arg2 )
+{
+   GLuint i;
+
+   for (i = 0; i < 4; i++) {
+      if (mask & (1<<i)) {	
+	 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+	 brw_MOV(p, dst[i], arg2[i]);
+	 brw_set_saturate(p, 0);
+
+	 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], brw_imm_f(0));
+
+	 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+	 brw_MOV(p, dst[i], arg1[i]);
+	 brw_set_saturate(p, 0);
+	 brw_set_predicate_control_flag_value(p, 0xff);
+      }
+   }
+}
+
+static void emit_max( struct brw_compile *p, 
+		      const struct brw_reg *dst,
+		      GLuint mask,
+		      const struct brw_reg *arg0,
+		      const struct brw_reg *arg1 )
+{
+   GLuint i;
+
+   for (i = 0; i < 4; i++) {
+      if (mask & (1<<i)) {	
+	 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+	 brw_MOV(p, dst[i], arg0[i]);
+	 brw_set_saturate(p, 0);
+
+	 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
+
+	 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+	 brw_MOV(p, dst[i], arg1[i]);
+	 brw_set_saturate(p, 0);
+	 brw_set_predicate_control_flag_value(p, 0xff);
+      }
+   }
+}
+
+static void emit_min( struct brw_compile *p, 
+		      const struct brw_reg *dst,
+		      GLuint mask,
+		      const struct brw_reg *arg0,
+		      const struct brw_reg *arg1 )
+{
+   GLuint i;
+
+   for (i = 0; i < 4; i++) {
+      if (mask & (1<<i)) {	
+	 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+	 brw_MOV(p, dst[i], arg1[i]);
+	 brw_set_saturate(p, 0);
+
+	 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
+
+	 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+	 brw_MOV(p, dst[i], arg0[i]);
+	 brw_set_saturate(p, 0);
+	 brw_set_predicate_control_flag_value(p, 0xff);
+      }
+   }
+}
+
+
+static void emit_dp3( struct brw_compile *p, 
+		      const struct brw_reg *dst,
+		      GLuint mask,
+		      const struct brw_reg *arg0,
+		      const struct brw_reg *arg1 )
+{
+   assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
+
+   brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
+   brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
+
+   brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+   brw_MAC(p, dst[0], arg0[2], arg1[2]);
+   brw_set_saturate(p, 0);
+}
+
+
+static void emit_dp4( struct brw_compile *p, 
+		      const struct brw_reg *dst,
+		      GLuint mask,
+		      const struct brw_reg *arg0,
+		      const struct brw_reg *arg1 )
+{
+   assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
+
+   brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
+   brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
+   brw_MAC(p, brw_null_reg(), arg0[2], arg1[2]);
+
+   brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+   brw_MAC(p, dst[0], arg0[3], arg1[3]);
+   brw_set_saturate(p, 0);
+}
+
+
+static void emit_dph( struct brw_compile *p, 
+		      const struct brw_reg *dst,
+		      GLuint mask,
+		      const struct brw_reg *arg0,
+		      const struct brw_reg *arg1 )
+{
+   assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
+
+   brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
+   brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
+   brw_MAC(p, dst[0], arg0[2], arg1[2]);
+
+   brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+   brw_ADD(p, dst[0], dst[0], arg1[3]);
+   brw_set_saturate(p, 0);
+}
+
+
+static void emit_xpd( struct brw_compile *p, 
+		      const struct brw_reg *dst,
+		      GLuint mask,
+		      const struct brw_reg *arg0,
+		      const struct brw_reg *arg1 )
+{
+   GLuint i;
+
+   assert(!(mask & WRITEMASK_W) == WRITEMASK_X);
+   
+   for (i = 0 ; i < 3; i++) {
+      if (mask & (1<<i)) {
+	 GLuint i2 = (i+2)%3;
+	 GLuint i1 = (i+1)%3;
+
+	 brw_MUL(p, brw_null_reg(), negate(arg0[i2]), arg1[i1]);
+
+	 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+	 brw_MAC(p, dst[i], arg0[i1], arg1[i2]);
+	 brw_set_saturate(p, 0);
+      }
+   }
+}
+
+
+static void emit_math1( struct brw_compile *p, 
+			GLuint function,
+			const struct brw_reg *dst,
+			GLuint mask,
+			const struct brw_reg *arg0 )
+{
+   assert((mask & WRITEMASK_XYZW) == WRITEMASK_X ||
+	  function == BRW_MATH_FUNCTION_SINCOS);
+   
+   brw_MOV(p, brw_message_reg(2), arg0[0]);
+
+   /* Send two messages to perform all 16 operations:
+    */
+   brw_math_16(p, 
+	       dst[0],
+	       function,
+	       (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
+	       2,
+	       brw_null_reg(),
+	       BRW_MATH_PRECISION_FULL);
+}
+
+
+static void emit_math2( struct brw_compile *p, 
+			GLuint function,
+			const struct brw_reg *dst,
+			GLuint mask,
+			const struct brw_reg *arg0,
+			const struct brw_reg *arg1)
+{
+   assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
+
+   brw_push_insn_state(p);
+
+   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+   brw_MOV(p, brw_message_reg(2), arg0[0]);
+   brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+   brw_MOV(p, brw_message_reg(4), sechalf(arg0[0]));
+
+   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+   brw_MOV(p, brw_message_reg(3), arg1[0]);
+   brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+   brw_MOV(p, brw_message_reg(5), sechalf(arg1[0]));
+
+   
+   /* Send two messages to perform all 16 operations:
+    */
+   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+   brw_math(p, 
+	    dst[0],
+	    function,
+	    (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
+	    2,
+	    brw_null_reg(),
+	    BRW_MATH_DATA_VECTOR,
+	    BRW_MATH_PRECISION_FULL);
+
+   brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+   brw_math(p, 
+	    offset(dst[0],1),
+	    function,
+	    (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
+	    4,
+	    brw_null_reg(),
+	    BRW_MATH_DATA_VECTOR,
+	    BRW_MATH_PRECISION_FULL);
+   
+   brw_pop_insn_state(p);
+}
+		     
+
+
+static void emit_tex( struct brw_wm_compile *c,
+		      const struct brw_wm_instruction *inst,
+		      struct brw_reg *dst,
+		      GLuint dst_flags,
+		      struct brw_reg *arg )
+{
+   struct brw_compile *p = &c->func;
+   GLuint msgLength, responseLength;
+   GLboolean shadow = (c->key.shadowtex_mask & (1<<inst->tex_unit)) ? 1 : 0;
+   GLuint i, nr;
+   GLuint emit;
+
+   /* How many input regs are there?
+    */
+   switch (inst->tex_idx) {
+   case TEXTURE_1D_INDEX:
+      emit = WRITEMASK_X;
+      nr = 1;
+      break;
+   case TEXTURE_2D_INDEX:
+   case TEXTURE_RECT_INDEX:
+      emit = WRITEMASK_XY;
+      nr = 2;
+      break;
+   default:
+      emit = WRITEMASK_XYZ;
+      nr = 3;
+      break;
+   }
+
+   if (shadow) {
+      nr = 4;
+      emit |= WRITEMASK_W;
+   }
+
+   msgLength = 1;
+
+   for (i = 0; i < nr; i++) {
+      static const GLuint swz[4] = {0,1,2,2};
+      if (emit & (1<<i)) 
+	 brw_MOV(p, brw_message_reg(msgLength+1), arg[swz[i]]);
+      else
+	 brw_MOV(p, brw_message_reg(msgLength+1), brw_imm_f(0));
+      msgLength += 2;
+   }
+
+   responseLength = 8;		/* always */
+
+   brw_SAMPLE(p, 
+	      retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
+	      1,
+	      retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
+	      inst->tex_unit + 1, /* surface */
+	      inst->tex_unit,	  /* sampler */
+	      inst->writemask,
+	      (shadow ? 
+	       BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE : 
+	       BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE),
+	      responseLength,
+	      msgLength,
+	      0);	
+
+}
+
+
+static void emit_txb( struct brw_wm_compile *c,
+		      const struct brw_wm_instruction *inst,
+		      struct brw_reg *dst,
+		      GLuint dst_flags,
+		      struct brw_reg *arg )
+{
+   struct brw_compile *p = &c->func;
+   GLuint msgLength;
+
+   /* Shadow ignored for txb.
+    */
+   switch (inst->tex_idx) {
+   case TEXTURE_1D_INDEX:
+      brw_MOV(p, brw_message_reg(2), arg[0]);
+      brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
+      brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
+      break;
+   case TEXTURE_2D_INDEX:
+   case TEXTURE_RECT_INDEX:
+      brw_MOV(p, brw_message_reg(2), arg[0]);
+      brw_MOV(p, brw_message_reg(4), arg[1]);
+      brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
+      break;
+   default:
+      brw_MOV(p, brw_message_reg(2), arg[0]);
+      brw_MOV(p, brw_message_reg(4), arg[1]);
+      brw_MOV(p, brw_message_reg(6), arg[2]);
+      break;
+   }
+
+   brw_MOV(p, brw_message_reg(8), arg[3]);
+   msgLength = 9;
+
+
+   brw_SAMPLE(p, 
+	      retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
+	      1,
+	      retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
+	      inst->tex_unit + 1, /* surface */
+	      inst->tex_unit,	  /* sampler */
+	      inst->writemask,
+	      BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS,
+	      8,		/* responseLength */
+	      msgLength,
+	      0);	
+
+}
+
+
+static void emit_lit( struct brw_compile *p, 
+		      const struct brw_reg *dst,
+		      GLuint mask,
+		      const struct brw_reg *arg0 )
+{
+   assert((mask & WRITEMASK_XW) == 0);
+
+   if (mask & WRITEMASK_Y) {
+      brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+      brw_MOV(p, dst[1], arg0[0]);
+      brw_set_saturate(p, 0);
+   }
+
+   if (mask & WRITEMASK_Z) {
+      emit_math2(p, BRW_MATH_FUNCTION_POW,
+		 &dst[2],
+		 WRITEMASK_X | (mask & SATURATE),
+		 &arg0[1],
+		 &arg0[3]);
+   }
+
+   /* Ordinarily you'd use an iff statement to skip or shortcircuit
+    * some of the POW calculations above, but 16-wide iff statements
+    * seem to lock c1 hardware, so this is a nasty workaround:
+    */
+   brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_LE, arg0[0], brw_imm_f(0));
+   {
+      if (mask & WRITEMASK_Y) 
+	 brw_MOV(p, dst[1], brw_imm_f(0));
+
+      if (mask & WRITEMASK_Z) 
+	 brw_MOV(p, dst[2], brw_imm_f(0)); 
+   }
+   brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+}
+
+
+/* Kill pixel - set execution mask to zero for those pixels which
+ * fail.
+ */
+static void emit_kil( struct brw_wm_compile *c,
+		      struct brw_reg *arg0)
+{
+   struct brw_compile *p = &c->func;
+   struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
+   GLuint i;
+   
+
+   /* XXX - usually won't need 4 compares!
+    */
+   for (i = 0; i < 4; i++) {
+      brw_push_insn_state(p);
+      brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0));   
+      brw_set_predicate_control_flag_value(p, 0xff);
+      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+      brw_AND(p, r0uw, brw_flag_reg(), r0uw);
+      brw_pop_insn_state(p);
+   }
+}
+
+static void fire_fb_write( struct brw_wm_compile *c,
+			   GLuint base_reg,
+			   GLuint nr )
+{
+   struct brw_compile *p = &c->func;
+   
+   /* Pass through control information:
+    */
+/*  mov (8) m1.0<1>:ud   r1.0<8;8,1>:ud   { Align1 NoMask } */
+   {
+      brw_push_insn_state(p);
+      brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */
+      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+      brw_MOV(p, 
+	       brw_message_reg(base_reg + 1),
+	       brw_vec8_grf(1, 0));
+      brw_pop_insn_state(p);
+   }
+
+   /* Send framebuffer write message: */
+/*  send (16) null.0<1>:uw m0               r0.0<8;8,1>:uw   0x85a04000:ud    { Align1 EOT } */
+   brw_fb_WRITE(p,
+		retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW),
+		base_reg,
+		retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
+		0,		/* render surface always 0 */
+		nr,
+		0, 
+		1);
+}
+
+static void emit_aa( struct brw_wm_compile *c,
+		     struct brw_reg *arg1,
+		     GLuint reg )
+{
+   struct brw_compile *p = &c->func;
+   GLuint comp = c->key.aa_dest_stencil_reg / 2;
+   GLuint off = c->key.aa_dest_stencil_reg % 2;
+   struct brw_reg aa = offset(arg1[comp], off);
+
+   brw_push_insn_state(p);
+   brw_set_compression_control(p, BRW_COMPRESSION_NONE); /* ?? */
+   brw_MOV(p, brw_message_reg(reg), aa);
+   brw_pop_insn_state(p);
+}
+
+
+/* Post-fragment-program processing.  Send the results to the
+ * framebuffer.
+ */
+static void emit_fb_write( struct brw_wm_compile *c,
+			   struct brw_reg *arg0,
+			   struct brw_reg *arg1,
+			   struct brw_reg *arg2)
+{
+   struct brw_compile *p = &c->func;
+   GLuint nr = 2;
+   GLuint channel;
+
+   /* Reserve a space for AA - may not be needed:
+    */
+   if (c->key.aa_dest_stencil_reg)
+      nr += 1;
+
+   /* I don't really understand how this achieves the color interleave
+    * (ie RGBARGBA) in the result:  [Do the saturation here]
+    */
+   {
+      brw_push_insn_state(p);
+      
+      for (channel = 0; channel < 4; channel++) {
+	 /*  mov (8) m2.0<1>:ud   r28.0<8;8,1>:ud  { Align1 } */
+	 /*  mov (8) m6.0<1>:ud   r29.0<8;8,1>:ud  { Align1 SecHalf } */
+
+	 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+	 brw_MOV(p,
+		 brw_message_reg(nr + channel),
+		 arg0[channel]);
+       
+	 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+	 brw_MOV(p,
+		 brw_message_reg(nr + channel + 4),
+		 sechalf(arg0[channel]));
+      }
+
+      /* skip over the regs populated above:
+       */
+      nr += 8;
+   
+      brw_pop_insn_state(p);
+   }
+
+   if (c->key.source_depth_to_render_target)
+   {
+      if (c->key.computes_depth) 
+	 brw_MOV(p, brw_message_reg(nr), arg2[2]);
+      else 
+	 brw_MOV(p, brw_message_reg(nr), arg1[1]); /* ? */
+
+      nr += 2;
+   }
+
+   if (c->key.dest_depth_reg)
+   {
+      GLuint comp = c->key.dest_depth_reg / 2;
+      GLuint off = c->key.dest_depth_reg % 2;
+
+      if (off != 0) {
+	 brw_push_insn_state(p);
+	 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+	 brw_MOV(p, brw_message_reg(nr), arg1[comp]);
+	 /* 2nd half? */
+	 brw_MOV(p, brw_message_reg(nr+1), offset(arg1[comp],1));
+	 brw_pop_insn_state(p);
+      }
+      else {
+	 brw_MOV(p, brw_message_reg(nr), arg1[comp]);
+      }
+      nr += 2;
+   }
+
+
+   if (!c->key.runtime_check_aads_emit) {
+      if (c->key.aa_dest_stencil_reg)
+	 emit_aa(c, arg1, 2);
+
+      fire_fb_write(c, 0, nr);
+   }
+   else {
+      struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
+      struct brw_reg ip = brw_ip_reg();
+      struct brw_instruction *jmp;
+      
+      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+      brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
+      brw_AND(p, 
+	      v1_null_ud, 
+	      get_element_ud(brw_vec8_grf(1,0), 6), 
+	      brw_imm_ud(1<<26)); 
+
+      jmp = brw_JMPI(p, ip, ip, brw_imm_w(0));
+      {
+	 emit_aa(c, arg1, 2);
+	 fire_fb_write(c, 0, nr);
+	 /* note - thread killed in subroutine */
+      }
+      brw_land_fwd_jump(p, jmp);
+
+      /* ELSE: Shuffle up one register to fill in the hole left for AA:
+       */
+      fire_fb_write(c, 1, nr-1);
+   }
+}
+
+
+
+
+/* Post-fragment-program processing.  Send the results to the
+ * framebuffer.
+ */
+static void emit_spill( struct brw_wm_compile *c,
+			struct brw_reg reg,
+			GLuint slot )
+{
+   struct brw_compile *p = &c->func;
+
+   /*
+     mov (16) m2.0<1>:ud   r2.0<8;8,1>:ud   { Align1 Compr }
+   */
+   brw_MOV(p, brw_message_reg(2), reg);
+
+   /*
+     mov (1) r0.2<1>:d    0x00000080:d     { Align1 NoMask }
+     send (16) null.0<1>:uw m1               r0.0<8;8,1>:uw   0x053003ff:ud    { Align1 }
+   */
+   brw_dp_WRITE_16(p, 
+		   retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW),
+		   1, 
+		   slot);
+}
+
+static void emit_unspill( struct brw_wm_compile *c,
+			  struct brw_reg reg,
+			  GLuint slot )
+{
+   struct brw_compile *p = &c->func;
+
+   /* Slot 0 is the undef value.
+    */
+   if (slot == 0) {
+      brw_MOV(p, reg, brw_imm_f(0));
+      return;
+   }
+
+   /*
+     mov (1) r0.2<1>:d    0x000000c0:d     { Align1 NoMask }
+     send (16) r110.0<1>:uw m1               r0.0<8;8,1>:uw   0x041243ff:ud    { Align1 }
+   */
+
+   brw_dp_READ_16(p,
+		  retype(vec16(reg), BRW_REGISTER_TYPE_UW),
+		  1, 
+		  slot);
+}
+
+
+
+/**
+ * Retrieve upto 4 GEN4 register pairs for the given wm reg:
+ */
+static void get_argument_regs( struct brw_wm_compile *c,
+			       struct brw_wm_ref *arg[],
+			       struct brw_reg *regs )
+{
+   GLuint i;
+
+   for (i = 0; i < 4; i++) {
+      if (arg[i]) {
+
+	 if (arg[i]->unspill_reg) 
+	    emit_unspill(c, 
+			 brw_vec8_grf(arg[i]->unspill_reg, 0),
+			 arg[i]->value->spill_slot);
+
+	 regs[i] = arg[i]->hw_reg;	 
+      }
+      else {
+	 regs[i] = brw_null_reg();
+      }
+   }
+}
+
+static void spill_values( struct brw_wm_compile *c,
+			  struct brw_wm_value *values,
+			  GLuint nr )
+{
+   GLuint i;
+
+   for (i = 0; i < nr; i++)
+      if (values[i].spill_slot) 
+	 emit_spill(c, values[i].hw_reg, values[i].spill_slot);
+}
+
+
+
+/* Emit the fragment program instructions here.
+ */
+void brw_wm_emit( struct brw_wm_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   GLuint insn;
+
+   brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+
+   /* Check if any of the payload regs need to be spilled:
+    */
+   spill_values(c, c->payload.depth, 4);
+   spill_values(c, c->creg, c->nr_creg);
+   spill_values(c, c->payload.input_interp, FRAG_ATTRIB_MAX);
+   
+
+   for (insn = 0; insn < c->nr_insns; insn++) {
+
+      struct brw_wm_instruction *inst = &c->instruction[insn];
+      struct brw_reg args[3][4], dst[4];
+      GLuint i, dst_flags;
+      
+      /* Get argument regs:
+       */
+      for (i = 0; i < 3; i++) 
+	 get_argument_regs(c, inst->src[i], args[i]);
+
+      /* Get dest regs:
+       */
+      for (i = 0; i < 4; i++)
+	 if (inst->dst[i])
+	    dst[i] = inst->dst[i]->hw_reg;
+	 else
+	    dst[i] = brw_null_reg();
+      
+      /* Flags
+       */
+      dst_flags = inst->writemask;
+      if (inst->saturate) 
+	 dst_flags |= SATURATE;
+
+      switch (inst->opcode) {
+	 /* Generated instructions for calculating triangle interpolants:
+	  */
+      case WM_PIXELXY:
+	 emit_pixel_xy(p, dst, dst_flags, args[0]);
+	 break;
+
+      case WM_DELTAXY:
+	 emit_delta_xy(p, dst, dst_flags, args[0], args[1]);
+	 break;
+
+      case WM_WPOSXY:
+	 emit_wpos_xy(p, dst, dst_flags, args[0]);
+	 break;
+
+      case WM_PIXELW:
+	 emit_pixel_w(p, dst, dst_flags, args[0], args[1]);
+	 break;
+
+      case WM_LINTERP:
+	 emit_linterp(p, dst, dst_flags, args[0], args[1]);
+	 break;
+
+      case WM_PINTERP:
+	 emit_pinterp(p, dst, dst_flags, args[0], args[1], args[2]);
+	 break;
+
+      case WM_CINTERP:
+	 emit_cinterp(p, dst, dst_flags, args[0]);
+	 break;
+
+      case WM_FB_WRITE:
+	 emit_fb_write(c, args[0], args[1], args[2]);
+	 break;
+
+	 /* Straightforward arithmetic:
+	  */
+      case OPCODE_ADD:
+	 emit_alu2(p, brw_ADD, dst, dst_flags, args[0], args[1]);
+	 break;
+
+      case OPCODE_FRC:
+	 emit_alu1(p, brw_FRC, dst, dst_flags, args[0]);
+	 break;
+
+      case OPCODE_FLR:
+	 emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]);
+	 break;
+
+      case OPCODE_DP3:	/*  */
+	 emit_dp3(p, dst, dst_flags, args[0], args[1]);
+	 break;
+
+      case OPCODE_DP4:
+	 emit_dp4(p, dst, dst_flags, args[0], args[1]);
+	 break;
+
+      case OPCODE_DPH:
+	 emit_dph(p, dst, dst_flags, args[0], args[1]);
+	 break;
+
+      case OPCODE_LRP:	/*  */
+	 emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]);
+	 break;
+
+      case OPCODE_MAD:	
+	 emit_mad(p, dst, dst_flags, args[0], args[1], args[2]);
+	 break;
+
+      case OPCODE_MOV:
+      case OPCODE_SWZ:
+	 emit_alu1(p, brw_MOV, dst, dst_flags, args[0]);
+	 break;
+
+      case OPCODE_MUL:
+	 emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]);
+	 break;
+
+      case OPCODE_XPD:
+	 emit_xpd(p, dst, dst_flags, args[0], args[1]);
+	 break;
+
+	 /* Higher math functions:
+	  */
+      case OPCODE_RCP:
+	 emit_math1(p, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]);
+	 break;
+
+      case OPCODE_RSQ:
+	 emit_math1(p, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]);
+	 break;
+
+      case OPCODE_SIN:
+	 emit_math1(p, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]);
+	 break;
+
+      case OPCODE_COS:
+	 emit_math1(p, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]);
+	 break;
+
+      case OPCODE_EX2:
+	 emit_math1(p, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]);
+	 break;
+
+      case OPCODE_LG2:
+	 emit_math1(p, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]);
+	 break;
+
+      case OPCODE_SCS:
+	 /* There is an scs math function, but it would need some
+	  * fixup for 16-element execution.
+	  */
+	 if (dst_flags & WRITEMASK_X)
+	    emit_math1(p, BRW_MATH_FUNCTION_COS, dst, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
+	 if (dst_flags & WRITEMASK_Y)
+	    emit_math1(p, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
+	 break;
+
+      case OPCODE_POW:
+	 emit_math2(p, BRW_MATH_FUNCTION_POW, dst, dst_flags, args[0], args[1]);
+	 break;
+
+	 /* Comparisons:
+	  */
+      case OPCODE_CMP:
+	 emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]);
+	 break;
+
+      case OPCODE_MAX:
+	 emit_max(p, dst, dst_flags, args[0], args[1]);
+	 break;
+
+      case OPCODE_MIN:
+	 emit_min(p, dst, dst_flags, args[0], args[1]);
+	 break;
+
+      case OPCODE_SLT:
+	 emit_slt(p, dst, dst_flags, args[0], args[1]);
+	 break;
+
+      case OPCODE_SGE:
+	 emit_sge(p, dst, dst_flags, args[0], args[1]);
+	 break;
+
+      case OPCODE_LIT:
+	 emit_lit(p, dst, dst_flags, args[0]);
+	 break;
+
+	 /* Texturing operations:
+	  */
+      case OPCODE_TEX:
+	 emit_tex(c, inst, dst, dst_flags, args[0]);
+	 break;
+
+      case OPCODE_TXB:
+	 emit_txb(c, inst, dst, dst_flags, args[0]);
+	 break;
+
+      case OPCODE_KIL:
+	 emit_kil(c, args[0]);
+	 break;
+
+      default:
+	 assert(0);
+      }
+      
+      for (i = 0; i < 4; i++)
+	if (inst->dst[i] && inst->dst[i]->spill_slot) 
+	   emit_spill(c, 
+		      inst->dst[i]->hw_reg, 
+		      inst->dst[i]->spill_slot);
+   }
+}
+
+
+
+
+
diff --git a/i965/brw_wm_fp.c b/i965/brw_wm_fp.c
new file mode 100644
index 0000000..dc57fd2
--- /dev/null
+++ b/i965/brw_wm_fp.c
@@ -0,0 +1,981 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+               
+
+#include "glheader.h"
+#include "macros.h"
+#include "enums.h"
+#include "brw_context.h"
+#include "brw_wm.h"
+#include "brw_util.h"
+
+#include "shader/prog_parameter.h"
+#include "shader/prog_print.h"
+#include "shader/prog_statevars.h"
+
+
+#define FIRST_INTERNAL_TEMP MAX_NV_FRAGMENT_PROGRAM_TEMPS
+
+#define X    0
+#define Y    1
+#define Z    2
+#define W    3
+
+
+static const char *wm_opcode_strings[] = {   
+   "PIXELXY",
+   "DELTAXY",
+   "PIXELW",
+   "LINTERP",
+   "PINTERP",
+   "CINTERP",
+   "WPOSXY",
+   "FB_WRITE"
+};
+
+#if 0
+static const char *wm_file_strings[] = {   
+   "PAYLOAD"
+};
+#endif
+
+
+/***********************************************************************
+ * Source regs
+ */
+
+static struct prog_src_register src_reg(GLuint file, GLuint idx)
+{
+   struct prog_src_register reg;
+   reg.File = file;
+   reg.Index = idx;
+   reg.Swizzle = SWIZZLE_NOOP;
+   reg.RelAddr = 0;
+   reg.NegateBase = 0;
+   reg.Abs = 0;
+   reg.NegateAbs = 0;
+   return reg;
+}
+
+static struct prog_src_register src_reg_from_dst(struct prog_dst_register dst)
+{
+   return src_reg(dst.File, dst.Index);
+}
+
+static struct prog_src_register src_undef( void )
+{
+   return src_reg(PROGRAM_UNDEFINED, 0);
+}
+
+static GLboolean src_is_undef(struct prog_src_register src)
+{
+   return src.File == PROGRAM_UNDEFINED;
+}
+
+static struct prog_src_register src_swizzle( struct prog_src_register reg, int x, int y, int z, int w )
+{
+   reg.Swizzle = MAKE_SWIZZLE4(x,y,z,w);
+   return reg;
+}
+
+static struct prog_src_register src_swizzle1( struct prog_src_register reg, int x )
+{
+   return src_swizzle(reg, x, x, x, x);
+}
+
+
+/***********************************************************************
+ * Dest regs
+ */
+
+static struct prog_dst_register dst_reg(GLuint file, GLuint idx)
+{
+   struct prog_dst_register reg;
+   reg.File = file;
+   reg.Index = idx;
+   reg.WriteMask = WRITEMASK_XYZW;
+   reg.CondMask = 0;
+   reg.CondSwizzle = 0;
+   reg.pad = 0;
+   reg.CondSrc = 0;
+   return reg;
+}
+
+static struct prog_dst_register dst_mask( struct prog_dst_register reg, int mask )
+{
+   reg.WriteMask &= mask;
+   return reg;
+}
+
+static struct prog_dst_register dst_undef( void )
+{
+   return dst_reg(PROGRAM_UNDEFINED, 0);
+}
+
+
+
+static struct prog_dst_register get_temp( struct brw_wm_compile *c )
+{
+   int bit = ffs( ~c->fp_temp );
+
+   if (!bit) {
+      _mesa_printf("%s: out of temporaries\n", __FILE__);
+      exit(1);
+   }
+
+   c->fp_temp |= 1<<(bit-1);
+   return dst_reg(PROGRAM_TEMPORARY, FIRST_INTERNAL_TEMP+(bit-1));
+}
+
+
+static void release_temp( struct brw_wm_compile *c, struct prog_dst_register temp )
+{
+   c->fp_temp &= ~1<<(temp.Index + 1 - FIRST_INTERNAL_TEMP);
+}
+
+
+/***********************************************************************
+ * Instructions 
+ */
+
+static struct prog_instruction *get_fp_inst(struct brw_wm_compile *c)
+{
+   return &c->prog_instructions[c->nr_fp_insns++];
+}
+
+static struct prog_instruction *emit_insn(struct brw_wm_compile *c,
+					const struct prog_instruction *inst0)
+{
+   struct prog_instruction *inst = get_fp_inst(c);
+   *inst = *inst0;
+   return inst;
+}
+
+static struct prog_instruction * emit_op(struct brw_wm_compile *c,
+				       GLuint op,
+				       struct prog_dst_register dest,
+				       GLuint saturate,
+				       GLuint tex_src_unit,
+				       GLuint tex_src_target,
+				       struct prog_src_register src0,
+				       struct prog_src_register src1,
+				       struct prog_src_register src2 )
+{
+   struct prog_instruction *inst = get_fp_inst(c);
+      
+   memset(inst, 0, sizeof(*inst));
+
+   inst->Opcode = op;
+   inst->DstReg = dest;
+   inst->SaturateMode = saturate;   
+   inst->TexSrcUnit = tex_src_unit;
+   inst->TexSrcTarget = tex_src_target;
+   inst->SrcReg[0] = src0;
+   inst->SrcReg[1] = src1;
+   inst->SrcReg[2] = src2;
+   
+   return inst;
+}
+   
+
+
+
+/***********************************************************************
+ * Special instructions for interpolation and other tasks
+ */
+
+static struct prog_src_register get_pixel_xy( struct brw_wm_compile *c )
+{
+   if (src_is_undef(c->pixel_xy)) {
+      struct prog_dst_register pixel_xy = get_temp(c);
+      struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
+      
+      
+      /* Emit the out calculations, and hold onto the results.  Use
+       * two instructions as a temporary is required.
+       */   
+      /* pixel_xy.xy = PIXELXY payload[0];
+       */
+      emit_op(c,
+	      WM_PIXELXY,
+	      dst_mask(pixel_xy, WRITEMASK_XY),
+	      0, 0, 0,
+	      payload_r0_depth,
+	      src_undef(),
+	      src_undef());
+
+      c->pixel_xy = src_reg_from_dst(pixel_xy);
+   }
+
+   return c->pixel_xy;
+}
+
+static struct prog_src_register get_delta_xy( struct brw_wm_compile *c )
+{
+   if (src_is_undef(c->delta_xy)) {
+      struct prog_dst_register delta_xy = get_temp(c);
+      struct prog_src_register pixel_xy = get_pixel_xy(c);
+      struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
+      
+      /* deltas.xy = DELTAXY pixel_xy, payload[0]
+       */
+      emit_op(c,
+	      WM_DELTAXY,
+	      dst_mask(delta_xy, WRITEMASK_XY),
+	      0, 0, 0,
+	      pixel_xy, 
+	      payload_r0_depth,
+	      src_undef());
+      
+      c->delta_xy = src_reg_from_dst(delta_xy);
+   }
+
+   return c->delta_xy;
+}
+
+static struct prog_src_register get_pixel_w( struct brw_wm_compile *c )
+{
+   if (src_is_undef(c->pixel_w)) {
+      struct prog_dst_register pixel_w = get_temp(c);
+      struct prog_src_register deltas = get_delta_xy(c);
+      struct prog_src_register interp_wpos = src_reg(PROGRAM_PAYLOAD, FRAG_ATTRIB_WPOS);
+      
+      
+      /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x
+       */
+      emit_op(c,
+	      WM_PIXELW,
+	      dst_mask(pixel_w, WRITEMASK_W),
+	      0, 0, 0,
+	      interp_wpos,
+	      deltas, 
+	      src_undef());
+      
+
+      c->pixel_w = src_reg_from_dst(pixel_w);
+   }
+
+   return c->pixel_w;
+}
+
+static void emit_interp( struct brw_wm_compile *c,
+			 GLuint idx )
+{
+   struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx);
+   struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
+   struct prog_src_register deltas = get_delta_xy(c);
+   struct prog_src_register arg2;
+   GLuint opcode;
+   
+   /* Need to use PINTERP on attributes which have been
+    * multiplied by 1/W in the SF program, and LINTERP on those
+    * which have not:
+    */
+   switch (idx) {
+   case FRAG_ATTRIB_WPOS:
+      opcode = WM_LINTERP;
+      arg2 = src_undef();
+
+      /* Have to treat wpos.xy specially:
+       */
+      emit_op(c,
+	      WM_WPOSXY,
+	      dst_mask(dst, WRITEMASK_XY),
+	      0, 0, 0,
+	      get_pixel_xy(c),
+	      src_undef(),
+	      src_undef());
+      
+      dst = dst_mask(dst, WRITEMASK_ZW);
+
+      /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw
+       */
+      emit_op(c,
+	      WM_LINTERP,
+	      dst,
+	      0, 0, 0,
+	      interp,
+	      deltas,
+	      arg2);
+      break;
+   case FRAG_ATTRIB_COL0:
+   case FRAG_ATTRIB_COL1:
+      if (c->key.flat_shade) {
+	 emit_op(c,
+		 WM_CINTERP,
+		 dst,
+		 0, 0, 0,
+		 interp,
+		 src_undef(),
+		 src_undef());
+      }
+      else {
+	 emit_op(c,
+		 WM_LINTERP,
+		 dst,
+		 0, 0, 0,
+		 interp,
+		 deltas,
+		 src_undef());
+      }
+      break;
+   default:
+      emit_op(c,
+	      WM_PINTERP,
+	      dst,
+	      0, 0, 0,
+	      interp,
+	      deltas,
+	      get_pixel_w(c));
+      break;
+   }
+
+   c->fp_interp_emitted |= 1<<idx;
+}
+
+
+/***********************************************************************
+ * Hacks to extend the program parameter and constant lists.
+ */
+
+/* Add the fog parameters to the parameter list of the original
+ * program, rather than creating a new list.  Doesn't really do any
+ * harm and it's not as if the parameter handling isn't a big hack
+ * anyway.
+ */
+static struct prog_src_register search_or_add_param5(struct brw_wm_compile *c, 
+                                                     GLint s0,
+                                                     GLint s1,
+                                                     GLint s2,
+                                                     GLint s3,
+                                                     GLint s4)
+{
+   struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
+   gl_state_index tokens[STATE_LENGTH];
+   GLuint idx;
+   tokens[0] = s0;
+   tokens[1] = s1;
+   tokens[2] = s2;
+   tokens[3] = s3;
+   tokens[4] = s4;
+   
+   for (idx = 0; idx < paramList->NumParameters; idx++) {
+      if (paramList->Parameters[idx].Type == PROGRAM_STATE_VAR &&
+	  memcmp(paramList->Parameters[idx].StateIndexes, tokens, sizeof(tokens)) == 0)
+	 return src_reg(PROGRAM_STATE_VAR, idx);
+   }
+
+   idx = _mesa_add_state_reference( paramList, tokens );
+
+   /* Recalculate state dependency: 
+    */
+   c->fp->param_state = paramList->StateFlags;
+
+   return src_reg(PROGRAM_STATE_VAR, idx);
+}
+
+
+static struct prog_src_register search_or_add_const4f( struct brw_wm_compile *c, 
+						     GLfloat s0,
+						     GLfloat s1,
+						     GLfloat s2,
+						     GLfloat s3)
+{
+   struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
+   GLfloat values[4];
+   GLuint idx;
+   GLuint swizzle;
+
+   values[0] = s0;
+   values[1] = s1;
+   values[2] = s2;
+   values[3] = s3;
+
+   /* Have to search, otherwise multiple compilations will each grow
+    * the parameter list.
+    */
+   for (idx = 0; idx < paramList->NumParameters; idx++) {
+      if (paramList->Parameters[idx].Type == PROGRAM_CONSTANT &&
+	  memcmp(paramList->ParameterValues[idx], values, sizeof(values)) == 0)
+
+	 /* XXX: this mimics the mesa bug which puts all constants and
+	  * parameters into the "PROGRAM_STATE_VAR" category:
+	  */
+	 return src_reg(PROGRAM_STATE_VAR, idx);
+   }
+   
+   idx = _mesa_add_unnamed_constant( paramList, values, 4, &swizzle );
+   /* XXX what about swizzle? */
+   return src_reg(PROGRAM_STATE_VAR, idx);
+}
+
+
+
+/***********************************************************************
+ * Expand various instructions here to simpler forms.  
+ */
+static void precalc_dst( struct brw_wm_compile *c,
+			       const struct prog_instruction *inst )
+{
+   struct prog_src_register src0 = inst->SrcReg[0];
+   struct prog_src_register src1 = inst->SrcReg[1];
+   struct prog_dst_register dst = inst->DstReg;
+   
+   if (dst.WriteMask & WRITEMASK_Y) {      
+      /* dst.y = mul src0.y, src1.y
+       */
+      emit_op(c,
+	      OPCODE_MUL,
+	      dst_mask(dst, WRITEMASK_Y),
+	      inst->SaturateMode, 0, 0,
+	      src0,
+	      src1,
+	      src_undef());
+   }
+
+
+   if (dst.WriteMask & WRITEMASK_XZ) {
+      GLuint z = GET_SWZ(src0.Swizzle, Z);
+
+      /* dst.xz = swz src0.1zzz
+       */
+      emit_op(c,
+	      OPCODE_SWZ,
+	      dst_mask(dst, WRITEMASK_XZ),
+	      inst->SaturateMode, 0, 0,
+	      src_swizzle(src0, SWIZZLE_ONE, z, z, z),
+	      src_undef(),
+	      src_undef());
+   }
+   if (dst.WriteMask & WRITEMASK_W) {
+      /* dst.w = mov src1.w
+       */
+      emit_op(c,
+	      OPCODE_MOV,
+	      dst_mask(dst, WRITEMASK_W),
+	      inst->SaturateMode, 0, 0,
+	      src1,
+	      src_undef(),
+	      src_undef());
+   }
+}
+
+
+static void precalc_lit( struct brw_wm_compile *c,
+			 const struct prog_instruction *inst )
+{
+   struct prog_src_register src0 = inst->SrcReg[0];
+   struct prog_dst_register dst = inst->DstReg;
+   
+   if (dst.WriteMask & WRITEMASK_XW) {
+      /* dst.xw = swz src0.1111
+       */
+      emit_op(c,
+	      OPCODE_SWZ,
+	      dst_mask(dst, WRITEMASK_XW),
+	      0, 0, 0,
+	      src_swizzle1(src0, SWIZZLE_ONE),
+	      src_undef(),
+	      src_undef());
+   }
+
+
+   if (dst.WriteMask & WRITEMASK_YZ) {
+      emit_op(c,
+	      OPCODE_LIT,
+	      dst_mask(dst, WRITEMASK_YZ),
+	      inst->SaturateMode, 0, 0,
+	      src0,
+	      src_undef(),
+	      src_undef());
+   }
+}
+
+static void precalc_tex( struct brw_wm_compile *c,
+			 const struct prog_instruction *inst )
+{
+   struct prog_src_register coord;
+   struct prog_dst_register tmpcoord;
+
+   if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) {
+       struct prog_instruction *out;
+       struct prog_dst_register tmp0 = get_temp(c);
+       struct prog_src_register tmp0src = src_reg_from_dst(tmp0);
+       struct prog_dst_register tmp1 = get_temp(c);
+       struct prog_src_register tmp1src = src_reg_from_dst(tmp1);
+       struct prog_src_register src0 = inst->SrcReg[0];
+
+       tmpcoord = get_temp(c);
+       coord = src_reg_from_dst(tmpcoord);
+
+       out = emit_op(c, OPCODE_MOV,
+                     tmpcoord,
+                     0, 0, 0,
+                     src0,
+                     src_undef(),
+                     src_undef());
+       out->SrcReg[0].NegateBase = 0;
+       out->SrcReg[0].Abs = 1;
+
+       emit_op(c, OPCODE_MAX,
+               tmp0,
+               0, 0, 0,
+               src_swizzle1(coord, X),
+               src_swizzle1(coord, Y),
+               src_undef());
+
+       emit_op(c, OPCODE_MAX,
+               tmp1,
+               0, 0, 0,
+               tmp0src,
+               src_swizzle1(coord, Z),
+               src_undef());
+
+       emit_op(c, OPCODE_RCP,
+               tmp0,
+               0, 0, 0,
+               tmp1src,
+               src_undef(),
+               src_undef());
+
+       emit_op(c, OPCODE_MUL,
+               tmpcoord,
+               0, 0, 0,
+               src0,
+               tmp0src,
+               src_undef());
+
+       release_temp(c, tmp0);
+       release_temp(c, tmp1);
+   } else if (inst->TexSrcTarget == TEXTURE_RECT_INDEX) {
+      struct prog_src_register scale = 
+	 search_or_add_param5( c, 
+			       STATE_INTERNAL, 
+			       STATE_TEXRECT_SCALE,
+			       inst->TexSrcUnit,
+			       0,0 );
+
+      tmpcoord = get_temp(c);
+
+      /* coord.xy   = MUL inst->SrcReg[0], { 1/width, 1/height }
+       */
+      emit_op(c,
+	      OPCODE_MUL,
+	      tmpcoord,
+	      0, 0, 0,
+	      inst->SrcReg[0],
+	      scale,
+	      src_undef());
+
+      coord = src_reg_from_dst(tmpcoord);
+   }
+   else {
+      coord = inst->SrcReg[0];
+   }
+
+   /* Need to emit YUV texture conversions by hand.  Probably need to
+    * do this here - the alternative is in brw_wm_emit.c, but the
+    * conversion requires allocating a temporary variable which we
+    * don't have the facility to do that late in the compilation.
+    */
+   if (!(c->key.yuvtex_mask & (1<<inst->TexSrcUnit))) {
+      emit_op(c, 
+	      OPCODE_TEX,
+	      inst->DstReg,
+	      inst->SaturateMode,
+	      inst->TexSrcUnit,
+	      inst->TexSrcTarget,
+	      coord,
+	      src_undef(),
+	      src_undef());
+   }
+   else {
+      /* 
+	 CONST C0 = { -.5, -.0625,  -.5, 1.164 }
+	 CONST C1 = { 1.596, -0.813, 2.018, -.391 }
+	 UYV     = TEX ...
+	 UYV.xyz = ADD UYV,     C0
+	 UYV.y   = MUL UYV.y,   C0.w
+	 RGB.xyz = MAD UYV.xxz, C1,   UYV.y
+	 RGB.y   = MAD UYV.z,   C1.w, RGB.y
+      */
+      struct prog_dst_register dst = inst->DstReg;
+      struct prog_src_register src0 = inst->SrcReg[0];
+      struct prog_dst_register tmp = get_temp(c);
+      struct prog_src_register tmpsrc = src_reg_from_dst(tmp);
+      struct prog_src_register C0 = search_or_add_const4f( c,  -.5, -.0625, -.5, 1.164 );
+      struct prog_src_register C1 = search_or_add_const4f( c, 1.596, -0.813, 2.018, -.391 );
+     
+      /* tmp     = TEX ...
+       */
+      emit_op(c, 
+	      OPCODE_TEX,
+	      tmp,
+	      inst->SaturateMode,
+	      inst->TexSrcUnit,
+	      inst->TexSrcTarget,
+	      src0,
+	      src_undef(),
+	      src_undef());
+
+      /* tmp.xyz =  ADD TMP, C0
+       */
+      emit_op(c,
+	      OPCODE_ADD,
+	      dst_mask(tmp, WRITEMASK_XYZ),
+	      0, 0, 0,
+	      tmpsrc,
+	      C0,
+	      src_undef());
+
+      /* YUV.y   = MUL YUV.y, C0.w
+       */
+      emit_op(c,
+	      OPCODE_MUL,
+	      dst_mask(tmp, WRITEMASK_Y),
+	      0, 0, 0,
+	      tmpsrc,
+	      src_swizzle1(C0, W),
+	      src_undef());
+
+      /* RGB.xyz = MAD YUV.xxz, C1, YUV.y
+       */
+      emit_op(c,
+	      OPCODE_MAD,
+	      dst_mask(dst, WRITEMASK_XYZ),
+	      0, 0, 0,
+	      src_swizzle(tmpsrc, X,X,Z,Z),
+	      C1,
+	      src_swizzle1(tmpsrc, Y));
+
+      /*  RGB.y   = MAD YUV.z, C1.w, RGB.y
+       */
+      emit_op(c,
+	      OPCODE_MAD,
+	      dst_mask(dst, WRITEMASK_Y),
+	      0, 0, 0,
+	      src_swizzle1(tmpsrc, Z),
+	      src_swizzle1(C1, W),
+	      src_swizzle1(src_reg_from_dst(dst), Y));
+
+      release_temp(c, tmp);
+   }
+
+   if (inst->TexSrcTarget == GL_TEXTURE_RECTANGLE_NV) 
+      release_temp(c, tmpcoord);
+}
+
+
+static GLboolean projtex( struct brw_wm_compile *c,
+			  const struct prog_instruction *inst )
+{
+   struct prog_src_register src = inst->SrcReg[0];
+
+   /* Only try to detect the simplest cases.  Could detect (later)
+    * cases where we are trying to emit code like RCP {1.0}, MUL x,
+    * {1.0}, and so on.
+    *
+    * More complex cases than this typically only arise from
+    * user-provided fragment programs anyway:
+    */
+   if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX)
+      return 0;  /* ut2004 gun rendering !?! */
+   else if (src.File == PROGRAM_INPUT && 
+	    GET_SWZ(src.Swizzle, W) == W &&
+	    (c->key.projtex_mask & (1<<src.Index)) == 0)
+      return 0;
+   else
+      return 1;
+}
+
+
+static void precalc_txp( struct brw_wm_compile *c,
+			       const struct prog_instruction *inst )
+{
+   struct prog_src_register src0 = inst->SrcReg[0];
+
+   if (projtex(c, inst)) {
+      struct prog_dst_register tmp = get_temp(c);
+      struct prog_instruction tmp_inst;
+
+      /* tmp0.w = RCP inst.arg[0][3]
+       */
+      emit_op(c,
+	      OPCODE_RCP,
+	      dst_mask(tmp, WRITEMASK_W),
+	      0, 0, 0,
+	      src_swizzle1(src0, GET_SWZ(src0.Swizzle, W)),
+	      src_undef(),
+	      src_undef());
+
+      /* tmp0.xyz =  MUL inst.arg[0], tmp0.wwww
+       */
+      emit_op(c,
+	      OPCODE_MUL,
+	      dst_mask(tmp, WRITEMASK_XYZ),
+	      0, 0, 0,
+	      src0,
+	      src_swizzle1(src_reg_from_dst(tmp), W),
+	      src_undef());
+
+      /* dst = precalc(TEX tmp0)
+       */
+      tmp_inst = *inst;
+      tmp_inst.SrcReg[0] = src_reg_from_dst(tmp);
+      precalc_tex(c, &tmp_inst);
+
+      release_temp(c, tmp);
+   }
+   else
+   {
+      /* dst = precalc(TEX src0)
+       */
+      precalc_tex(c, inst);
+   }
+}
+
+
+
+
+
+/***********************************************************************
+ * Add instructions to perform fog blending
+ */
+
+static void fog_blend( struct brw_wm_compile *c,
+			     struct prog_src_register fog_factor )
+{
+   struct prog_dst_register outcolor = dst_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLR);
+   struct prog_src_register fogcolor = search_or_add_param5( c, STATE_FOG_COLOR, 0,0,0,0 );
+
+   /* color.xyz = LRP fog_factor.xxxx, output_color, fog_color */
+   
+   emit_op(c, 
+	   OPCODE_LRP,
+	   dst_mask(outcolor, WRITEMASK_XYZ),
+	   0, 0, 0,
+	   fog_factor,
+	   src_reg_from_dst(outcolor),
+	   fogcolor);
+}
+
+
+
+/* This one is simple - just take the interpolated fog coordinate and
+ * use it as the fog blend factor.
+ */
+static void fog_interpolated( struct brw_wm_compile *c )
+{
+   struct prog_src_register fogc = src_reg(PROGRAM_INPUT, FRAG_ATTRIB_FOGC);
+   
+   if (!(c->fp_interp_emitted & (1<<FRAG_ATTRIB_FOGC))) 
+      emit_interp(c, FRAG_ATTRIB_FOGC);
+
+   fog_blend( c, src_swizzle1(fogc, GET_SWZ(fogc.Swizzle,X)));
+}
+
+static void emit_fog( struct brw_wm_compile *c ) 
+{
+   if (!c->fp->program.FogOption)
+      return;
+
+   if (1) 
+      fog_interpolated( c );
+   else {
+      /* TODO: per-pixel fog */
+      assert(0);
+   }
+}
+
+static void emit_fb_write( struct brw_wm_compile *c )
+{
+   struct prog_src_register outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLR);
+   struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
+   struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPR);
+
+   emit_op(c,
+	   WM_FB_WRITE,
+	   dst_mask(dst_undef(),0),
+	   0, 0, 0,
+	   outcolor,
+	   payload_r0_depth,
+	   outdepth);
+}
+
+
+
+
+/***********************************************************************
+ * Emit INTERP instructions ahead of first use of each attrib.
+ */
+
+static void validate_src_regs( struct brw_wm_compile *c,
+			       const struct prog_instruction *inst )
+{
+   GLuint nr_args = brw_wm_nr_args( inst->Opcode );
+   GLuint i;
+
+   for (i = 0; i < nr_args; i++) {
+      if (inst->SrcReg[i].File == PROGRAM_INPUT) {
+	 GLuint idx = inst->SrcReg[i].Index;
+	 if (!(c->fp_interp_emitted & (1<<idx))) {
+	    emit_interp(c, idx);
+	 }
+      }
+   }
+}
+	 
+
+
+static void print_insns( const struct prog_instruction *insn,
+			 GLuint nr )
+{
+   GLuint i;
+   for (i = 0; i < nr; i++, insn++) {
+      _mesa_printf("%3d: ", i);
+      if (insn->Opcode < MAX_OPCODE)
+	 _mesa_print_instruction(insn);
+      else if (insn->Opcode < MAX_WM_OPCODE) {
+	 GLuint idx = insn->Opcode - MAX_OPCODE;
+
+	 _mesa_print_alu_instruction(insn,
+				     wm_opcode_strings[idx],
+				     3);
+      }
+      else 
+	 _mesa_printf("UNKNOWN\n");
+	   
+   }
+}
+
+void brw_wm_pass_fp( struct brw_wm_compile *c )
+{
+   struct brw_fragment_program *fp = c->fp;
+   GLuint insn;
+
+   if (INTEL_DEBUG & DEBUG_WM) {
+      _mesa_printf("\n\n\npre-fp:\n");
+      _mesa_print_program(&fp->program.Base); 
+      _mesa_printf("\n");
+   }
+
+   c->pixel_xy = src_undef();
+   c->delta_xy = src_undef();
+   c->pixel_w = src_undef();
+   c->nr_fp_insns = 0;
+
+   /* Emit preamble instructions:
+    */
+
+
+   for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
+      const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
+      struct prog_instruction *out;
+
+      /* Check for INPUT values, emit INTERP instructions where
+       * necessary:
+       */
+      validate_src_regs(c, inst);
+
+
+      switch (inst->Opcode) {
+      case OPCODE_SWZ: 
+	 out = emit_insn(c, inst);
+	 out->Opcode = OPCODE_MOV;
+	 break;
+	 
+      case OPCODE_ABS:
+	 out = emit_insn(c, inst);
+	 out->Opcode = OPCODE_MOV;
+	 out->SrcReg[0].NegateBase = 0;
+	 out->SrcReg[0].Abs = 1;
+	 break;
+
+      case OPCODE_SUB: 
+	 out = emit_insn(c, inst);
+	 out->Opcode = OPCODE_ADD;
+	 out->SrcReg[1].NegateBase ^= 0xf;
+	 break;
+
+      case OPCODE_SCS: 
+	 out = emit_insn(c, inst);
+	 /* This should probably be done in the parser. 
+	  */
+	 out->DstReg.WriteMask &= WRITEMASK_XY;
+	 break;
+	 
+      case OPCODE_DST:
+	 precalc_dst(c, inst);
+	 break;
+
+      case OPCODE_LIT:
+	 precalc_lit(c, inst);
+	 break;
+     
+      case OPCODE_TXP:
+	 precalc_txp(c, inst);
+	 break;
+
+      case OPCODE_XPD: 
+	 out = emit_insn(c, inst);
+	 /* This should probably be done in the parser. 
+	  */
+	 out->DstReg.WriteMask &= WRITEMASK_XYZ;
+	 break;
+
+      case OPCODE_KIL: 
+	 out = emit_insn(c, inst);
+	 /* This should probably be done in the parser. 
+	  */
+	 out->DstReg.WriteMask = 0;
+	 break;
+
+      case OPCODE_END:
+      case OPCODE_PRINT:
+	 break;
+	 
+      default:
+	 emit_insn(c, inst);
+	 break;
+      }
+   }
+   
+   emit_fog(c);
+   emit_fb_write(c);
+
+
+   if (INTEL_DEBUG & DEBUG_WM) {
+      _mesa_printf("\n\n\npass_fp:\n");
+      print_insns( c->prog_instructions, c->nr_fp_insns );
+      _mesa_printf("\n");
+   }
+}
+
diff --git a/i965/brw_wm_iz.c b/i965/brw_wm_iz.c
new file mode 100644
index 0000000..ec2b976
--- /dev/null
+++ b/i965/brw_wm_iz.c
@@ -0,0 +1,216 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+                
+
+#include "mtypes.h"
+#include "brw_wm.h"
+
+
+#undef P			/* prompted depth */
+#undef C			/* computed */
+#undef N			/* non-promoted? */
+
+#define P 0
+#define C 1
+#define N 2
+
+const struct {
+   GLuint mode:2;
+   GLuint sd_present:1;
+   GLuint sd_to_rt:1;
+   GLuint dd_present:1;
+   GLuint ds_present:1;
+} wm_iz_table[IZ_BIT_MAX] =
+{
+ { P, 0, 0, 0, 0 }, 
+ { P, 0, 0, 0, 0 }, 
+ { C, 0, 1, 0, 0 }, 
+ { C, 0, 1, 0, 0 }, 
+ { C, 1, 1, 0, 0 }, 
+ { C, 1, 1, 0, 0 }, 
+ { C, 0, 1, 0, 0 }, 
+ { C, 0, 1, 0, 0 }, 
+ { C, 1, 1, 1, 0 }, 
+ { C, 1, 1, 1, 0 }, 
+ { C, 0, 1, 1, 0 }, 
+ { C, 0, 1, 1, 0 }, 
+ { C, 1, 1, 1, 0 }, 
+ { C, 1, 1, 1, 0 }, 
+ { C, 0, 1, 1, 0 }, 
+ { C, 0, 1, 1, 0 }, 
+ { P, 0, 0, 0, 0 }, 
+ { P, 0, 0, 0, 0 }, 
+ { C, 0, 1, 0, 0 }, 
+ { C, 0, 1, 0, 0 }, 
+ { C, 1, 1, 0, 0 }, 
+ { C, 1, 1, 0, 0 }, 
+ { C, 0, 1, 0, 0 }, 
+ { C, 0, 1, 0, 0 }, 
+ { C, 1, 1, 1, 0 }, 
+ { C, 1, 1, 1, 0 }, 
+ { C, 0, 1, 1, 0 }, 
+ { C, 0, 1, 1, 0 }, 
+ { C, 1, 1, 1, 0 }, 
+ { C, 1, 1, 1, 0 }, 
+ { C, 0, 1, 1, 0 }, 
+ { C, 0, 1, 1, 0 }, 
+ { C, 0, 0, 0, 1 }, 
+ { C, 0, 0, 0, 1 }, 
+ { C, 0, 1, 0, 1 }, 
+ { C, 0, 1, 0, 1 }, 
+ { C, 1, 1, 0, 1 }, 
+ { C, 1, 1, 0, 1 }, 
+ { C, 0, 1, 0, 1 }, 
+ { C, 0, 1, 0, 1 }, 
+ { C, 1, 1, 1, 1 }, 
+ { C, 1, 1, 1, 1 }, 
+ { C, 0, 1, 1, 1 }, 
+ { C, 0, 1, 1, 1 }, 
+ { C, 1, 1, 1, 1 }, 
+ { C, 1, 1, 1, 1 }, 
+ { C, 0, 1, 1, 1 }, 
+ { C, 0, 1, 1, 1 }, 
+ { C, 0, 0, 0, 1 }, 
+ { C, 0, 0, 0, 1 }, 
+ { C, 0, 1, 0, 1 }, 
+ { C, 0, 1, 0, 1 }, 
+ { C, 1, 1, 0, 1 }, 
+ { C, 1, 1, 0, 1 }, 
+ { C, 0, 1, 0, 1 }, 
+ { C, 0, 1, 0, 1 }, 
+ { C, 1, 1, 1, 1 }, 
+ { C, 1, 1, 1, 1 }, 
+ { C, 0, 1, 1, 1 }, 
+ { C, 0, 1, 1, 1 }, 
+ { C, 1, 1, 1, 1 }, 
+ { C, 1, 1, 1, 1 }, 
+ { C, 0, 1, 1, 1 }, 
+ { C, 0, 1, 1, 1 }, 
+ { P, 0, 0, 0, 0 }, 
+ { P, 0, 0, 0, 0 }, 
+ { P, 0, 0, 0, 0 }, 
+ { P, 0, 0, 0, 0 }, 
+ { P, 0, 0, 0, 0 }, 
+ { N, 1, 1, 0, 0 }, 
+ { N, 0, 1, 0, 0 }, 
+ { N, 0, 1, 0, 0 }, 
+ { P, 0, 0, 0, 0 }, 
+ { P, 0, 0, 0, 0 }, 
+ { C, 0, 1, 1, 0 }, 
+ { C, 0, 1, 1, 0 }, 
+ { P, 0, 0, 0, 0 }, 
+ { N, 1, 1, 0, 0 }, 
+ { C, 0, 1, 1, 0 }, 
+ { C, 0, 1, 1, 0 }, 
+ { P, 0, 0, 0, 0 }, 
+ { P, 0, 0, 0, 0 }, 
+ { P, 0, 0, 0, 0 }, 
+ { P, 0, 0, 0, 0 }, 
+ { P, 0, 0, 0, 0 }, 
+ { N, 1, 1, 0, 0 }, 
+ { N, 0, 1, 0, 0 }, 
+ { N, 0, 1, 0, 0 }, 
+ { P, 0, 0, 0, 0 }, 
+ { P, 0, 0, 0, 0 }, 
+ { C, 0, 1, 1, 0 }, 
+ { C, 0, 1, 1, 0 }, 
+ { P, 0, 0, 0, 0 }, 
+ { N, 1, 1, 0, 0 }, 
+ { C, 0, 1, 1, 0 }, 
+ { C, 0, 1, 1, 0 }, 
+ { P, 0, 0, 0, 0 }, 
+ { P, 0, 0, 0, 0 }, 
+ { P, 0, 0, 0, 0 }, 
+ { P, 0, 0, 0, 0 }, 
+ { P, 0, 0, 0, 0 }, 
+ { N, 1, 1, 0, 1 }, 
+ { N, 0, 1, 0, 1 }, 
+ { N, 0, 1, 0, 1 }, 
+ { P, 0, 0, 0, 0 }, 
+ { P, 0, 0, 0, 0 }, 
+ { C, 0, 1, 1, 1 }, 
+ { C, 0, 1, 1, 1 }, 
+ { P, 0, 0, 0, 0 }, 
+ { N, 1, 1, 0, 1 }, 
+ { C, 0, 1, 1, 1 }, 
+ { C, 0, 1, 1, 1 }, 
+ { P, 0, 0, 0, 0 }, 
+ { C, 0, 0, 0, 1 }, 
+ { P, 0, 0, 0, 0 }, 
+ { C, 0, 1, 0, 1 }, 
+ { P, 0, 0, 0, 0 }, 
+ { C, 1, 1, 0, 1 }, 
+ { C, 0, 1, 0, 1 }, 
+ { C, 0, 1, 0, 1 }, 
+ { P, 0, 0, 0, 0 }, 
+ { C, 1, 1, 1, 1 }, 
+ { C, 0, 1, 1, 1 }, 
+ { C, 0, 1, 1, 1 }, 
+ { P, 0, 0, 0, 0 }, 
+ { C, 1, 1, 1, 1 }, 
+ { C, 0, 1, 1, 1 }, 
+ { C, 0, 1, 1, 1 } 
+};
+
+void brw_wm_lookup_iz( GLuint line_aa,
+		       GLuint lookup,
+		       struct brw_wm_prog_key *key )
+{
+   GLuint reg = 2;
+
+   assert (lookup < IZ_BIT_MAX);
+      
+   if (lookup & IZ_PS_COMPUTES_DEPTH_BIT)
+      key->computes_depth = 1;
+
+   if (wm_iz_table[lookup].sd_present) {
+      key->source_depth_reg = reg;
+      reg += 2;
+   }
+
+   if (wm_iz_table[lookup].sd_to_rt)
+      key->source_depth_to_render_target = 1;
+
+   if (wm_iz_table[lookup].ds_present || line_aa != AA_NEVER) {
+      key->aa_dest_stencil_reg = reg;
+      key->runtime_check_aads_emit = (!wm_iz_table[lookup].ds_present &&
+				      line_aa == AA_SOMETIMES);
+      reg++;
+   }
+
+   if (wm_iz_table[lookup].dd_present) {
+      key->dest_depth_reg = reg;
+      reg+=2;
+   }
+
+   key->nr_depth_regs = (reg+1)/2;
+}
+
diff --git a/i965/brw_wm_pass0.c b/i965/brw_wm_pass0.c
new file mode 100644
index 0000000..00f6f6b
--- /dev/null
+++ b/i965/brw_wm_pass0.c
@@ -0,0 +1,462 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+                 
+
+#include "brw_context.h"
+#include "brw_wm.h"
+#include "shader/prog_parameter.h"
+
+
+
+/***********************************************************************
+ */
+
+static struct brw_wm_ref *get_ref( struct brw_wm_compile *c )
+{
+   assert(c->nr_refs < BRW_WM_MAX_REF);
+   return &c->refs[c->nr_refs++];
+}
+
+static struct brw_wm_value *get_value( struct brw_wm_compile *c)
+{
+   assert(c->nr_refs < BRW_WM_MAX_VREG);
+   return &c->vreg[c->nr_vreg++];
+}
+
+static struct brw_wm_instruction *get_instruction( struct brw_wm_compile *c )
+{
+   assert(c->nr_insns < BRW_WM_MAX_INSN);
+   return &c->instruction[c->nr_insns++];
+}
+
+/***********************************************************************
+ */
+
+static void pass0_init_undef( struct brw_wm_compile *c)
+{
+   struct brw_wm_ref *ref = &c->undef_ref;
+   ref->value = &c->undef_value;
+   ref->hw_reg = brw_vec8_grf(0, 0);
+   ref->insn = 0;
+   ref->prevuse = NULL;
+}
+
+static void pass0_set_fpreg_value( struct brw_wm_compile *c,
+				   GLuint file,
+				   GLuint idx,
+				   GLuint component,
+				   struct brw_wm_value *value )
+{
+   struct brw_wm_ref *ref = get_ref(c);
+   ref->value = value;
+   ref->hw_reg = brw_vec8_grf(0, 0);
+   ref->insn = 0;
+   ref->prevuse = NULL;
+   c->pass0_fp_reg[file][idx][component] = ref;
+}
+
+static void pass0_set_fpreg_ref( struct brw_wm_compile *c,
+				 GLuint file,
+				 GLuint idx,
+				 GLuint component,
+				 const struct brw_wm_ref *src_ref )
+{
+   c->pass0_fp_reg[file][idx][component] = src_ref;
+}
+
+static const struct brw_wm_ref *get_param_ref( struct brw_wm_compile *c, 
+					       const GLfloat *param_ptr )
+{
+   GLuint i = c->prog_data.nr_params++;
+   
+   if (i >= BRW_WM_MAX_PARAM) {
+      _mesa_printf("%s: out of params\n", __FUNCTION__);
+      c->prog_data.error = 1;
+      return NULL;
+   }
+   else {
+      struct brw_wm_ref *ref = get_ref(c);
+
+      c->prog_data.param[i] = param_ptr;
+      c->nr_creg = (i+16)/16;
+
+      /* Push the offsets into hw_reg.  These will be added to the
+       * real register numbers once one is allocated in pass2.
+       */
+      ref->hw_reg = brw_vec1_grf((i&8)?1:0, i%8);
+      ref->value = &c->creg[i/16];
+      ref->insn = 0;
+      ref->prevuse = NULL;
+      
+      return ref;
+   }
+}
+
+
+static const struct brw_wm_ref *get_const_ref( struct brw_wm_compile *c,
+					       const GLfloat *constval )
+{
+   GLuint i;
+
+   /* Search for an existing const value matching the request:
+    */
+   for (i = 0; i < c->nr_constrefs; i++) {
+      if (c->constref[i].constval == *constval) 
+	 return c->constref[i].ref;
+   }
+
+   /* Else try to add a new one:
+    */
+   if (c->nr_constrefs < BRW_WM_MAX_CONST) {
+      GLuint i = c->nr_constrefs++;
+
+      /* A constant is a special type of parameter:
+       */
+      c->constref[i].constval = *constval;
+      c->constref[i].ref = get_param_ref(c, constval);
+   
+      return c->constref[i].ref;
+   }
+   else {
+      _mesa_printf("%s: out of constrefs\n", __FUNCTION__);
+      c->prog_data.error = 1;
+      return NULL;
+   }
+}
+
+
+/* Lookup our internal registers
+ */
+static const struct brw_wm_ref *pass0_get_reg( struct brw_wm_compile *c,
+					       GLuint file,
+					       GLuint idx,
+					       GLuint component )
+{
+   const struct brw_wm_ref *ref = c->pass0_fp_reg[file][idx][component];
+
+   if (!ref) {
+      switch (file) {
+      case PROGRAM_INPUT:
+      case PROGRAM_PAYLOAD:
+      case PROGRAM_TEMPORARY:
+      case PROGRAM_OUTPUT:
+	 break;
+
+      case PROGRAM_LOCAL_PARAM:
+	 ref = get_param_ref(c, &c->fp->program.Base.LocalParams[idx][component]);
+	 break;
+
+      case PROGRAM_ENV_PARAM:
+	 ref = get_param_ref(c, &c->env_param[idx][component]);
+	 break;
+
+      case PROGRAM_STATE_VAR:
+      case PROGRAM_NAMED_PARAM: {
+	 struct gl_program_parameter_list *plist = c->fp->program.Base.Parameters;
+	 
+	 /* There's something really hokey about parameters parsed in
+	  * arb programs - they all end up in here, whether they be
+	  * state values, paramters or constants.  This duplicates the
+	  * structure above & also seems to subvert the limits set for
+	  * each type of constant/param.
+	  */ 
+	 switch (plist->Parameters[idx].Type) {
+	 case PROGRAM_NAMED_PARAM:
+	 case PROGRAM_CONSTANT:
+	    /* These are invarient:
+	     */
+	    ref = get_const_ref(c, &plist->ParameterValues[idx][component]);
+	    break;
+	    
+	 case PROGRAM_STATE_VAR:
+	    /* These may change from run to run:
+	     */
+	    ref = get_param_ref(c, &plist->ParameterValues[idx][component] );
+	    break;
+
+	 default:
+	    assert(0);
+	    break;
+	 }
+	 break;
+      }
+
+      default:
+	 assert(0);
+	 break;
+      }
+
+      c->pass0_fp_reg[file][idx][component] = ref;
+   }
+
+   if (!ref)
+      ref = &c->undef_ref;
+
+   return ref;
+}
+
+
+
+
+/***********************************************************************
+ * Straight translation to internal instruction format
+ */
+
+static void pass0_set_dst( struct brw_wm_compile *c,
+			   struct brw_wm_instruction *out,		     
+			   const struct prog_instruction *inst,		     
+			   GLuint writemask )
+{
+   const struct prog_dst_register *dst = &inst->DstReg;
+   GLuint i;
+
+   for (i = 0; i < 4; i++) {
+      if (writemask & (1<<i)) {
+	 out->dst[i] = get_value(c);
+
+	 pass0_set_fpreg_value(c, dst->File, dst->Index, i, out->dst[i]);
+      }
+   }
+   
+   out->writemask = writemask;
+}
+
+
+static void pass0_set_dst_scalar( struct brw_wm_compile *c,
+				  struct brw_wm_instruction *out,		     
+				  const struct prog_instruction *inst,		     
+				  GLuint writemask )
+{
+   if (writemask) {
+      const struct prog_dst_register *dst = &inst->DstReg;
+      GLuint i;
+
+      /* Compute only the first (X) value:
+       */
+      out->writemask = WRITEMASK_X;
+      out->dst[0] = get_value(c);
+
+      /* Update our tracking register file for all the components in
+       * writemask:
+       */
+      for (i = 0; i < 4; i++) {
+	 if (writemask & (1<<i)) {
+	    pass0_set_fpreg_value(c, dst->File, dst->Index, i, out->dst[0]);
+	 }
+      }
+   }
+   else
+      out->writemask = 0;
+}
+
+
+
+static const struct brw_wm_ref *get_fp_src_reg_ref( struct brw_wm_compile *c,
+						    struct prog_src_register src,
+						    GLuint i )
+{
+   GLuint component = GET_SWZ(src.Swizzle,i);
+   const struct brw_wm_ref *src_ref;
+   static const GLfloat const_zero = 0.0;
+   static const GLfloat const_one = 1.0;
+
+	 
+   if (component == SWIZZLE_ZERO) 
+      src_ref = get_const_ref(c, &const_zero);
+   else if (component == SWIZZLE_ONE) 
+      src_ref = get_const_ref(c, &const_one);
+   else 
+      src_ref = pass0_get_reg(c, src.File, src.Index, component);
+	 
+   return src_ref;
+}
+
+
+static struct brw_wm_ref *get_new_ref( struct brw_wm_compile *c,
+				       struct prog_src_register src,
+				       GLuint i,
+				       struct brw_wm_instruction *insn)
+{
+   const struct brw_wm_ref *ref = get_fp_src_reg_ref(c, src, i);
+   struct brw_wm_ref *newref = get_ref(c);
+      
+   newref->value = ref->value;
+   newref->hw_reg = ref->hw_reg;
+
+   if (insn) { 
+      newref->insn = insn - c->instruction;
+      newref->prevuse = newref->value->lastuse;
+      newref->value->lastuse = newref;
+   }
+
+   if (src.NegateBase & (1<<i)) 
+      newref->hw_reg.negate ^= 1;
+	    
+   if (src.Abs) {
+      newref->hw_reg.negate = 0;
+      newref->hw_reg.abs = 1;
+   }
+
+   return newref;
+}
+
+
+
+static struct brw_wm_instruction *translate_insn( struct brw_wm_compile *c,
+						  const struct prog_instruction *inst )
+{
+   struct brw_wm_instruction *out = get_instruction(c);
+   GLuint writemask = inst->DstReg.WriteMask;
+   GLuint nr_args = brw_wm_nr_args(inst->Opcode);
+   GLuint i, j;
+
+   /* Copy some data out of the instruction
+    */
+   out->opcode = inst->Opcode;
+   out->saturate = (inst->SaturateMode != SATURATE_OFF);
+   out->tex_unit = inst->TexSrcUnit;
+   out->tex_idx = inst->TexSrcTarget;
+
+   /* Args:
+    */
+   for (i = 0; i < nr_args; i++) {
+      for (j = 0; j < 4; j++) {
+	 out->src[i][j] = get_new_ref(c, inst->SrcReg[i], j, out);
+      }
+   }
+
+   /* Dst:
+    */
+   if (brw_wm_is_scalar_result(out->opcode)) 
+      pass0_set_dst_scalar(c, out, inst, writemask);
+   else 
+      pass0_set_dst(c, out, inst, writemask);
+
+   return out;
+}
+
+
+
+/***********************************************************************
+ * Optimize moves and swizzles away:
+ */ 
+static void pass0_precalc_mov( struct brw_wm_compile *c,
+			       const struct prog_instruction *inst )
+{
+   const struct prog_dst_register *dst = &inst->DstReg;
+   GLuint writemask = inst->DstReg.WriteMask;
+   GLuint i;
+
+   /* Get the effect of a MOV by manipulating our register table:
+    */
+   for (i = 0; i < 4; i++) {
+      if (writemask & (1<<i)) {	    
+	 pass0_set_fpreg_ref( c, dst->File, dst->Index, i, 
+			      get_new_ref(c, inst->SrcReg[0], i, NULL));
+      }
+   }
+}
+
+
+/* Initialize payload "registers".
+ */
+static void pass0_init_payload( struct brw_wm_compile *c )
+{
+   GLuint i;
+
+   for (i = 0; i < 4; i++) {
+      GLuint j = i >= c->key.nr_depth_regs ? 0 : i;
+      pass0_set_fpreg_value( c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, 
+			     &c->payload.depth[j] );
+   }
+
+#if 0
+   /* This seems to be an alternative to the INTERP_WPOS stuff I do
+    * elsewhere:
+    */
+   if (c->key.source_depth_reg)
+      pass0_set_fpreg_value(c, PROGRAM_INPUT, FRAG_ATTRIB_WPOS, 2,
+			    &c->payload.depth[c->key.source_depth_reg/2]);
+#endif
+   
+   for (i = 0; i < FRAG_ATTRIB_MAX; i++)
+      pass0_set_fpreg_value( c, PROGRAM_PAYLOAD, i, 0, 
+			     &c->payload.input_interp[i] );      
+}
+
+/***********************************************************************
+ * PASS 0
+ *
+ * Work forwards to give each calculated value a unique number.  Where
+ * an instruction produces duplicate values (eg DP3), all are given
+ * the same number.
+ *
+ * Translate away swizzling and eliminate non-saturating moves.
+ */
+void brw_wm_pass0( struct brw_wm_compile *c )
+{
+   GLuint insn;
+
+   c->nr_vreg = 0;
+   c->nr_insns = 0;
+
+   pass0_init_undef(c);
+   pass0_init_payload(c);
+
+   for (insn = 0; insn < c->nr_fp_insns; insn++) {
+      const struct prog_instruction *inst = &c->prog_instructions[insn];
+
+
+      /* Optimize away moves, otherwise emit translated instruction:
+       */      
+      switch (inst->Opcode) {
+      case OPCODE_MOV: 
+      case OPCODE_SWZ: 
+	 if (!inst->SaturateMode) {
+	    pass0_precalc_mov(c, inst);
+	 }
+	 else {
+	    translate_insn(c, inst);
+	 }
+	 break;
+	 
+
+      default:
+	 translate_insn(c, inst);
+	 break;
+      }
+   }
+ 
+   if (INTEL_DEBUG & DEBUG_WM) {
+      brw_wm_print_program(c, "pass0");
+   }
+}
+
diff --git a/i965/brw_wm_pass1.c b/i965/brw_wm_pass1.c
new file mode 100644
index 0000000..d668def
--- /dev/null
+++ b/i965/brw_wm_pass1.c
@@ -0,0 +1,275 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+                  
+
+#include "brw_context.h"
+#include "brw_wm.h"
+
+
+static GLuint get_tracked_mask(struct brw_wm_compile *c,
+			       struct brw_wm_instruction *inst)
+{
+   GLuint i;
+   for (i = 0; i < 4; i++) {
+      if (inst->writemask & (1<<i)) {
+	 if (!inst->dst[i]->contributes_to_output) {
+	    inst->writemask &= ~(1<<i);
+	    inst->dst[i] = 0;
+	 }
+      }
+   }
+
+   return inst->writemask;
+}
+
+/* Remove a reference from a value's usage chain.
+ */
+static void unlink_ref(struct brw_wm_ref *ref)
+{
+   struct brw_wm_value *value = ref->value;
+
+   if (ref == value->lastuse) {
+      value->lastuse = ref->prevuse;
+   } else {
+      struct brw_wm_ref *i = value->lastuse;
+      while (i->prevuse != ref) i = i->prevuse;
+      i->prevuse = ref->prevuse;
+   }
+}
+
+static void track_arg(struct brw_wm_compile *c,
+		      struct brw_wm_instruction *inst,
+		      GLuint arg,
+		      GLuint readmask)
+{
+   GLuint i;
+
+   for (i = 0; i < 4; i++) {
+      struct brw_wm_ref *ref = inst->src[arg][i];
+      if (ref) {
+	 if (readmask & (1<<i)) 
+	    ref->value->contributes_to_output = 1;
+	 else {
+	    unlink_ref(ref);
+	    inst->src[arg][i] = NULL;
+	 }
+      }
+   }
+}
+
+static GLuint get_texcoord_mask( GLuint tex_idx )
+{
+   switch (tex_idx) {
+   case TEXTURE_1D_INDEX: return WRITEMASK_X;
+   case TEXTURE_2D_INDEX: return WRITEMASK_XY;
+   case TEXTURE_3D_INDEX: return WRITEMASK_XYZ;
+   case TEXTURE_CUBE_INDEX: return WRITEMASK_XYZ;
+   case TEXTURE_RECT_INDEX: return WRITEMASK_XY;
+   default: return 0;
+   }
+}
+
+/* Step two: Basically this is dead code elimination.  
+ *
+ * Iterate backwards over instructions, noting which values
+ * contribute to the final result.  Adjust writemasks to only
+ * calculate these values.
+ */
+void brw_wm_pass1( struct brw_wm_compile *c )
+{
+   GLint insn;
+
+   for (insn = c->nr_insns-1; insn >= 0; insn--) {
+      struct brw_wm_instruction *inst = &c->instruction[insn];
+      GLuint writemask;
+      GLuint read0, read1, read2;
+
+      if (inst->opcode == OPCODE_KIL) {
+	 track_arg(c, inst, 0, WRITEMASK_XYZW); /* All args contribute to final */
+	 continue;
+      }
+
+      if (inst->opcode == WM_FB_WRITE) {
+	 track_arg(c, inst, 0, WRITEMASK_XYZW); 
+	 track_arg(c, inst, 1, WRITEMASK_XYZW); 
+	 if (c->key.source_depth_to_render_target &&
+	     c->key.computes_depth)
+	    track_arg(c, inst, 2, WRITEMASK_Z); 
+	 else
+	    track_arg(c, inst, 2, 0); 
+	 continue;
+      }
+
+      /* Lookup all the registers which were written by this
+       * instruction and get a mask of those that contribute to the output:
+       */
+      writemask = get_tracked_mask(c, inst);
+      if (!writemask) {
+	 GLuint arg;
+	 for (arg = 0; arg < 3; arg++)
+	    track_arg(c, inst, arg, 0);
+	 continue;
+      }
+
+      read0 = 0;
+      read1 = 0;
+      read2 = 0;
+
+      /* Mark all inputs which contribute to the marked outputs:
+       */
+      switch (inst->opcode) {
+      case OPCODE_ABS:
+      case OPCODE_FLR:
+      case OPCODE_FRC:
+      case OPCODE_MOV:
+	 read0 = writemask;
+	 break;
+
+      case OPCODE_SUB:
+      case OPCODE_SLT:
+      case OPCODE_SGE:
+      case OPCODE_ADD:
+      case OPCODE_MAX:
+      case OPCODE_MIN:
+      case OPCODE_MUL:
+	 read0 = writemask;
+	 read1 = writemask;
+	 break;
+
+      case OPCODE_MAD:	
+      case OPCODE_CMP:
+      case OPCODE_LRP:
+	 read0 = writemask;
+	 read1 = writemask;	
+	 read2 = writemask;	
+	 break;
+
+      case OPCODE_XPD: 
+	 if (writemask & WRITEMASK_X) read0 |= WRITEMASK_YZ;	 
+	 if (writemask & WRITEMASK_Y) read0 |= WRITEMASK_XZ;	 
+	 if (writemask & WRITEMASK_Z) read0 |= WRITEMASK_XY;
+	 read1 = read0;
+	 break;
+
+      case OPCODE_COS:
+      case OPCODE_EX2:
+      case OPCODE_LG2:
+      case OPCODE_RCP:
+      case OPCODE_RSQ:
+      case OPCODE_SIN:
+      case OPCODE_SCS:
+      case WM_CINTERP:
+      case WM_PIXELXY:
+	 read0 = WRITEMASK_X;
+	 break;
+
+      case OPCODE_POW:
+	 read0 = WRITEMASK_X;
+	 read1 = WRITEMASK_X;
+	 break;
+
+      case OPCODE_TEX:
+	 read0 = get_texcoord_mask(inst->tex_idx);
+
+	 if (c->key.shadowtex_mask & (1<<inst->tex_unit))
+	    read0 |= WRITEMASK_Z;
+	 break;
+
+      case OPCODE_TXB:
+	 /* Shadow ignored for txb.
+	  */
+	 read0 = get_texcoord_mask(inst->tex_idx) | WRITEMASK_W;
+	 break;
+
+      case WM_WPOSXY:
+	 read0 = writemask & WRITEMASK_XY;
+	 break;
+
+      case WM_DELTAXY:
+	 read0 = writemask & WRITEMASK_XY;
+	 read1 = WRITEMASK_X;
+	 break;
+
+      case WM_PIXELW:
+	 read0 = WRITEMASK_X;
+	 read1 = WRITEMASK_XY;
+	 break;
+
+      case WM_LINTERP:
+	 read0 = WRITEMASK_X;
+	 read1 = WRITEMASK_XY;
+	 break;
+
+      case WM_PINTERP:
+	 read0 = WRITEMASK_X; /* interpolant */
+	 read1 = WRITEMASK_XY; /* deltas */
+	 read2 = WRITEMASK_W; /* pixel w */
+	 break;
+
+      case OPCODE_DP3:	
+	 read0 = WRITEMASK_XYZ;
+	 read1 = WRITEMASK_XYZ;
+	 break;
+
+      case OPCODE_DPH:
+	 read0 = WRITEMASK_XYZ;
+	 read1 = WRITEMASK_XYZW;
+	 break;
+
+      case OPCODE_DP4:
+	 read0 = WRITEMASK_XYZW;
+	 read1 = WRITEMASK_XYZW;
+	 break;
+
+      case OPCODE_LIT: 
+	 read0 = WRITEMASK_XYW;
+	 break;
+
+      case OPCODE_SWZ:
+      case OPCODE_DST:
+      case OPCODE_TXP:
+      default:
+	 assert(0);
+	 break;
+      }
+
+      track_arg(c, inst, 0, read0);
+      track_arg(c, inst, 1, read1);
+      track_arg(c, inst, 2, read2);
+   }
+
+   if (INTEL_DEBUG & DEBUG_WM) {
+      brw_wm_print_program(c, "pass1");
+   }
+}
+
+
+
diff --git a/i965/brw_wm_pass2.c b/i965/brw_wm_pass2.c
new file mode 100644
index 0000000..a1edbd6
--- /dev/null
+++ b/i965/brw_wm_pass2.c
@@ -0,0 +1,336 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+                   
+
+#include "brw_context.h"
+#include "brw_wm.h"
+
+
+/* Use these to force spilling so that that functionality can be
+ * tested with known-good examples rather than having to construct new
+ * tests.
+ */
+#define TEST_PAYLOAD_SPILLS 0
+#define TEST_DST_SPILLS 0
+
+static void spill_value(struct brw_wm_compile *c,
+			struct brw_wm_value *value);
+
+static void prealloc_reg(struct brw_wm_compile *c,
+			 struct brw_wm_value *value,
+			 GLuint reg)
+{
+   if (value->lastuse) {
+      /* Set nextuse to zero, it will be corrected by
+       * update_register_usage().
+       */
+      c->pass2_grf[reg].value = value;
+      c->pass2_grf[reg].nextuse = 0;
+
+      value->resident = &c->pass2_grf[reg];
+      value->hw_reg = brw_vec8_grf(reg*2, 0);
+
+      if (TEST_PAYLOAD_SPILLS)
+	 spill_value(c, value);
+   }
+}
+
+
+/* Initialize all the register values.  Do the initial setup
+ * calculations for interpolants.
+ */
+static void init_registers( struct brw_wm_compile *c )
+{
+   GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted;
+   GLuint nr_interp_regs = 0;
+   GLuint i = 0;
+   GLuint j;
+
+   for (j = 0; j < c->grf_limit; j++) 
+      c->pass2_grf[j].nextuse = BRW_WM_MAX_INSN;
+
+   for (j = 0; j < c->key.nr_depth_regs; j++) 
+      prealloc_reg(c, &c->payload.depth[j], i++);
+
+   for (j = 0; j < c->nr_creg; j++) 
+      prealloc_reg(c, &c->creg[j], i++);
+
+   for (j = 0; j < FRAG_ATTRIB_MAX; j++) 
+      if (inputs & (1<<j)) {
+	 nr_interp_regs++;
+	 prealloc_reg(c, &c->payload.input_interp[j], i++);
+      }
+
+   assert(nr_interp_regs >= 1);
+
+   c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2;
+   c->prog_data.urb_read_length = nr_interp_regs * 2;
+   c->prog_data.curb_read_length = c->nr_creg * 2;
+
+   c->max_wm_grf = i * 2;
+}
+
+
+/* Update the nextuse value for each register in our file.
+ */
+static void update_register_usage(struct brw_wm_compile *c,
+				  GLuint thisinsn)
+{
+   GLuint i;
+
+   for (i = 1; i < c->grf_limit; i++) {
+      struct brw_wm_grf *grf = &c->pass2_grf[i];
+
+      /* Only search those which can change:
+       */
+      if (grf->nextuse < thisinsn) {
+	 struct brw_wm_ref *ref = grf->value->lastuse;
+
+	 /* Has last use of value been passed?
+	  */
+	 if (ref->insn < thisinsn) {
+	    grf->value->resident = 0;
+	    grf->value = 0;
+	    grf->nextuse = BRW_WM_MAX_INSN;
+	 }
+	 else {
+	    /* Else loop through chain to update:
+	     */
+	    while (ref->prevuse && ref->prevuse->insn >= thisinsn)
+	       ref = ref->prevuse;
+
+	    grf->nextuse = ref->insn;
+	 }
+      }
+   }
+}
+
+
+static void spill_value(struct brw_wm_compile *c,
+			struct brw_wm_value *value)
+{	
+   /* Allocate a spill slot.  Note that allocations start from 0x40 -
+    * the first slot is reserved to mean "undef" in brw_wm_emit.c
+    */
+   if (!value->spill_slot) {  
+      c->last_scratch += 0x40;	
+      value->spill_slot = c->last_scratch;
+   }
+
+   /* The spill will be done in brw_wm_emit.c immediately after the
+    * value is calculated, so we can just take this reg without any
+    * further work.
+    */
+   value->resident->value = NULL;
+   value->resident->nextuse = BRW_WM_MAX_INSN;
+   value->resident = NULL;
+}
+
+
+
+/* Search for contiguous region with the most distant nearest
+ * member.  Free regs count as very distant.
+ *
+ * TODO: implement spill-to-reg so that we can rearrange discontigous
+ * free regs and then spill the oldest non-free regs in sequence.
+ * This would mean inserting instructions in this pass.
+ */
+static GLuint search_contiguous_regs(struct brw_wm_compile *c,
+				     GLuint nr,
+				     GLuint thisinsn)
+{
+   struct brw_wm_grf *grf = c->pass2_grf;
+   GLuint furthest = 0;
+   GLuint reg = 0;
+   GLuint i, j;
+
+   /* Start search at 1: r0 is special and can't be used or spilled.
+    */
+   for (i = 1; i < c->grf_limit && furthest < BRW_WM_MAX_INSN; i++) {
+      GLuint group_nextuse = BRW_WM_MAX_INSN;
+
+      for (j = 0; j < nr; j++) {
+	 if (grf[i+j].nextuse < group_nextuse)
+	    group_nextuse = grf[i+j].nextuse;
+      }
+	 
+      if (group_nextuse > furthest) {
+	 furthest = group_nextuse;
+	 reg = i;
+      }
+   }
+
+   assert(furthest != thisinsn);
+   
+   /* Any non-empty regs will need to be spilled:
+    */
+   for (j = 0; j < nr; j++) 
+      if (grf[reg+j].value)
+	 spill_value(c, grf[reg+j].value);
+
+   return reg;
+}
+
+
+static void alloc_contiguous_dest(struct brw_wm_compile *c, 
+				  struct brw_wm_value *dst[],
+				  GLuint nr,
+				  GLuint thisinsn)
+{
+   GLuint reg = search_contiguous_regs(c, nr, thisinsn);
+   GLuint i;
+
+   for (i = 0; i < nr; i++) {
+      if (!dst[i]) {
+	 /* Need to grab a dummy value in TEX case.  Don't introduce
+	  * it into the tracking scheme.
+	  */
+	 dst[i] = &c->vreg[c->nr_vreg++];
+      }
+      else {
+	 assert(!dst[i]->resident);
+	 assert(c->pass2_grf[reg+i].nextuse != thisinsn);
+
+	 c->pass2_grf[reg+i].value = dst[i];
+	 c->pass2_grf[reg+i].nextuse = thisinsn;
+
+	 dst[i]->resident = &c->pass2_grf[reg+i];
+      }
+
+      dst[i]->hw_reg = brw_vec8_grf((reg+i)*2, 0);
+   }
+
+   if ((reg+nr)*2 > c->max_wm_grf)
+      c->max_wm_grf = (reg+nr) * 2;
+}
+
+
+static void load_args(struct brw_wm_compile *c, 
+		      struct brw_wm_instruction *inst)
+{   
+   GLuint thisinsn = inst - c->instruction;
+   GLuint i,j;
+
+   for (i = 0; i < 3; i++) {
+      for (j = 0; j < 4; j++) {
+	 struct brw_wm_ref *ref = inst->src[i][j];
+
+	 if (ref) {
+	    if (!ref->value->resident) {
+	       /* Need to bring the value in from scratch space.  The code for
+		* this will be done in brw_wm_emit.c, here we just do the
+		* register allocation and mark the ref as requiring a fill.
+		*/
+	       GLuint reg = search_contiguous_regs(c, 1, thisinsn);
+            
+	       c->pass2_grf[reg].value = ref->value;
+	       c->pass2_grf[reg].nextuse = thisinsn;
+	    
+	       ref->value->resident = &c->pass2_grf[reg];
+
+	       /* Note that a fill is required:
+		*/
+	       ref->unspill_reg = reg*2;
+	    }
+	    
+	    /* Adjust the hw_reg to point at the value's current location:
+	     */
+	    assert(ref->value == ref->value->resident->value);
+	    ref->hw_reg.nr += (ref->value->resident - c->pass2_grf) * 2;
+	 }
+      }
+   }
+}
+
+
+
+/* Step 3: Work forwards once again.  Perform register allocations,
+ * taking into account instructions like TEX which require contiguous
+ * result registers.  Where necessary spill registers to scratch space
+ * and reload later.
+ */
+void brw_wm_pass2( struct brw_wm_compile *c )
+{
+   GLuint insn;
+   GLuint i;
+
+   init_registers(c);
+
+   for (insn = 0; insn < c->nr_insns; insn++) {
+      struct brw_wm_instruction *inst = &c->instruction[insn];
+      
+      /* Update registers' nextuse values:
+       */
+      update_register_usage(c, insn);
+
+      /* May need to unspill some args.
+       */
+      load_args(c, inst);
+
+      /* Allocate registers to hold results:
+       */
+      switch (inst->opcode) {
+      case OPCODE_TEX:
+      case OPCODE_TXB:
+      case OPCODE_TXP:
+	 alloc_contiguous_dest(c, inst->dst, 4, insn);
+	 break;
+
+      default:
+	 for (i = 0; i < 4; i++) {
+	    if (inst->writemask & (1<<i)) {
+	       assert(inst->dst[i]);
+	       alloc_contiguous_dest(c, &inst->dst[i], 1, insn);
+	    }
+	 }
+	 break;
+      }
+
+      if (TEST_DST_SPILLS && inst->opcode != WM_PIXELXY)
+	 for (i = 0; i < 4; i++)	
+	    if (inst->dst[i])
+	       spill_value(c, inst->dst[i]);
+
+   }
+
+   if (INTEL_DEBUG & DEBUG_WM) {
+      brw_wm_print_program(c, "pass2");
+   }
+
+   c->state = PASS2_DONE;
+
+   if (INTEL_DEBUG & DEBUG_WM) {
+      brw_wm_print_program(c, "pass2/done");
+   }
+}
+
+
+
diff --git a/i965/brw_wm_sampler_state.c b/i965/brw_wm_sampler_state.c
new file mode 100644
index 0000000..794c7d9
--- /dev/null
+++ b/i965/brw_wm_sampler_state.c
@@ -0,0 +1,253 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+                   
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+
+#include "macros.h"
+
+
+
+/* Samplers aren't strictly wm state from the hardware's perspective,
+ * but that is the only situation in which we use them in this driver.
+ */
+
+
+
+/* The brw (and related graphics cores) do not support GL_CLAMP.  The
+ * Intel drivers for "other operating systems" implement GL_CLAMP as
+ * GL_CLAMP_TO_EDGE, so the same is done here.
+ */
+static GLuint translate_wrap_mode( GLenum wrap )
+{
+   switch( wrap ) {
+   case GL_REPEAT: 
+      return BRW_TEXCOORDMODE_WRAP;
+   case GL_CLAMP:  
+      return BRW_TEXCOORDMODE_CLAMP_BORDER; /* conform likes it this way */
+   case GL_CLAMP_TO_EDGE: 
+      return BRW_TEXCOORDMODE_CLAMP; /* conform likes it this way */
+   case GL_CLAMP_TO_BORDER: 
+      return BRW_TEXCOORDMODE_CLAMP_BORDER;
+   case GL_MIRRORED_REPEAT: 
+      return BRW_TEXCOORDMODE_MIRROR;
+   default: 
+      return BRW_TEXCOORDMODE_WRAP;
+   }
+}
+
+
+static GLuint U_FIXED(GLfloat value, GLuint frac_bits)
+{
+   value *= (1<<frac_bits);
+   return value < 0 ? 0 : value;
+}
+
+static GLint S_FIXED(GLfloat value, GLuint frac_bits)
+{
+   return value * (1<<frac_bits);
+}
+
+
+static GLuint upload_default_color( struct brw_context *brw,
+				    const GLfloat *color )
+{
+   struct brw_sampler_default_color sdc;
+
+   COPY_4V(sdc.color, color); 
+   
+   return brw_cache_data( &brw->cache[BRW_SAMPLER_DEFAULT_COLOR], &sdc );
+}
+
+
+/*
+ */
+static void brw_update_sampler_state( struct gl_texture_unit *texUnit,
+				      struct gl_texture_object *texObj,
+				      GLuint sdc_gs_offset,
+				      struct brw_sampler_state *sampler)
+{   
+   _mesa_memset(sampler, 0, sizeof(*sampler));
+
+   switch (texObj->MinFilter) {
+   case GL_NEAREST:
+      sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST;
+      sampler->ss0.mip_filter = BRW_MIPFILTER_NONE;
+      break;
+   case GL_LINEAR:
+      sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR;
+      sampler->ss0.mip_filter = BRW_MIPFILTER_NONE;
+      break;
+   case GL_NEAREST_MIPMAP_NEAREST:
+      sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST;
+      sampler->ss0.mip_filter = BRW_MIPFILTER_NEAREST;
+      break;
+   case GL_LINEAR_MIPMAP_NEAREST:
+      sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR;
+      sampler->ss0.mip_filter = BRW_MIPFILTER_NEAREST;
+      break;
+   case GL_NEAREST_MIPMAP_LINEAR:
+      sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST;
+      sampler->ss0.mip_filter = BRW_MIPFILTER_LINEAR;
+      break;
+   case GL_LINEAR_MIPMAP_LINEAR:
+      sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR;
+      sampler->ss0.mip_filter = BRW_MIPFILTER_LINEAR;
+      break;
+   default:
+      break;
+   }
+
+   /* Set Anisotropy: 
+    */
+   if ( texObj->MaxAnisotropy > 1.0 ) {
+      sampler->ss0.min_filter = BRW_MAPFILTER_ANISOTROPIC; 
+      sampler->ss0.mag_filter = BRW_MAPFILTER_ANISOTROPIC;
+
+      if (texObj->MaxAnisotropy > 2.0) {
+	 sampler->ss3.max_aniso = MAX2((texObj->MaxAnisotropy - 2) / 2,
+				       BRW_ANISORATIO_16);
+      }
+   }
+   else {
+      switch (texObj->MagFilter) {
+      case GL_NEAREST:
+	 sampler->ss0.mag_filter = BRW_MAPFILTER_NEAREST;
+	 break;
+      case GL_LINEAR:
+	 sampler->ss0.mag_filter = BRW_MAPFILTER_LINEAR;
+	 break;
+      default:
+	 break;
+      }  
+   }
+
+   sampler->ss1.r_wrap_mode = translate_wrap_mode(texObj->WrapR);
+   sampler->ss1.s_wrap_mode = translate_wrap_mode(texObj->WrapS);
+   sampler->ss1.t_wrap_mode = translate_wrap_mode(texObj->WrapT);
+
+   /* Fulsim complains if I don't do this.  Hardware doesn't mind:
+    */
+#if 0
+   if (texObj->Target == GL_TEXTURE_CUBE_MAP_ARB) {
+      sampler->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CUBE;
+      sampler->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CUBE;
+      sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CUBE;
+   }
+#endif
+
+   /* Set shadow function: 
+    */
+   if (texObj->CompareMode == GL_COMPARE_R_TO_TEXTURE_ARB) {
+      /* Shadowing is "enabled" by emitting a particular sampler
+       * message (sample_c).  So need to recompile WM program when
+       * shadow comparison is enabled on each/any texture unit.
+       */
+      sampler->ss0.shadow_function = intel_translate_shadow_compare_func(texObj->CompareFunc);
+   }
+
+   /* Set LOD bias: 
+    */
+   sampler->ss0.lod_bias = S_FIXED(CLAMP(texUnit->LodBias + texObj->LodBias, -16, 15), 6);
+
+   sampler->ss0.lod_preclamp = 1; /* OpenGL mode */
+   sampler->ss0.default_color_mode = 0; /* OpenGL/DX10 mode */
+
+   /* Set BaseMipLevel, MaxLOD, MinLOD: 
+    *
+    * XXX: I don't think that using firstLevel, lastLevel works,
+    * because we always setup the surface state as if firstLevel ==
+    * level zero.  Probably have to subtract firstLevel from each of
+    * these:
+    */
+   sampler->ss0.base_level = U_FIXED(0, 1);
+
+   sampler->ss1.max_lod = U_FIXED(MIN2(MAX2(texObj->MaxLod, 0), 13), 6);
+   sampler->ss1.min_lod = U_FIXED(MIN2(MAX2(texObj->MinLod, 0), 13), 6);
+   
+   sampler->ss2.default_color_pointer = sdc_gs_offset >> 5;
+}
+
+
+
+/* All samplers must be uploaded in a single contiguous array, which
+ * complicates various things.  However, this is still too confusing -
+ * FIXME: simplify all the different new texture state flags.
+ */
+static void upload_wm_samplers( struct brw_context *brw )
+{
+   GLuint unit;
+   GLuint sampler_count = 0;
+
+   /* _NEW_TEXTURE */
+   for (unit = 0; unit < BRW_MAX_TEX_UNIT; unit++) {
+      if (brw->attribs.Texture->Unit[unit]._ReallyEnabled) {	 
+	 struct gl_texture_unit *texUnit = &brw->attribs.Texture->Unit[unit];
+	 struct gl_texture_object *texObj = texUnit->_Current;
+
+	 GLuint sdc_gs_offset = upload_default_color(brw, texObj->BorderColor);
+
+	 brw_update_sampler_state(texUnit,
+				  texObj, 
+				  sdc_gs_offset,
+				  &brw->wm.sampler[unit]);
+
+	 sampler_count = unit + 1;
+      }
+   }
+   
+   if (brw->wm.sampler_count != sampler_count) {
+      brw->wm.sampler_count = sampler_count;
+      brw->state.dirty.cache |= CACHE_NEW_SAMPLER;
+   }
+
+   brw->wm.sampler_gs_offset = 0;
+
+   if (brw->wm.sampler_count) 
+      brw->wm.sampler_gs_offset = 
+	 brw_cache_data_sz(&brw->cache[BRW_SAMPLER],
+			   brw->wm.sampler,
+			   sizeof(struct brw_sampler_state) * brw->wm.sampler_count);
+}
+
+
+const struct brw_tracked_state brw_wm_samplers = {
+   .dirty = {
+      .mesa = _NEW_TEXTURE,
+      .brw = 0,
+      .cache = 0
+   },
+   .update = upload_wm_samplers
+};
+
+
diff --git a/i965/brw_wm_state.c b/i965/brw_wm_state.c
new file mode 100644
index 0000000..5b4f2ab
--- /dev/null
+++ b/i965/brw_wm_state.c
@@ -0,0 +1,194 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+                   
+
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "bufmgr.h"
+
+/***********************************************************************
+ * WM unit - fragment programs and rasterization
+ */
+
+static void invalidate_scratch_cb( struct intel_context *intel,
+				   void *unused )
+{
+   /* nothing */
+}
+
+
+static void upload_wm_unit(struct brw_context *brw )
+{
+   struct intel_context *intel = &brw->intel;
+   struct brw_wm_unit_state wm;
+   GLuint max_threads;
+
+   if (INTEL_DEBUG & DEBUG_SINGLE_THREAD)
+      max_threads = 0; 
+   else
+      max_threads = 31;
+
+
+   memset(&wm, 0, sizeof(wm));
+
+   /* CACHE_NEW_WM_PROG */
+   wm.thread0.grf_reg_count = ((brw->wm.prog_data->total_grf-1) & ~15) / 16;
+   wm.thread0.kernel_start_pointer = brw->wm.prog_gs_offset >> 6;
+   wm.thread3.dispatch_grf_start_reg = brw->wm.prog_data->first_curbe_grf;
+   wm.thread3.urb_entry_read_length = brw->wm.prog_data->urb_read_length;
+   wm.thread3.const_urb_entry_read_length = brw->wm.prog_data->curb_read_length;
+
+   wm.wm5.max_threads = max_threads;      
+
+   if (brw->wm.prog_data->total_scratch) {
+      GLuint per_thread = (brw->wm.prog_data->total_scratch + 1023) / 1024;
+      GLuint total = per_thread * (max_threads + 1);
+
+      /* Scratch space -- just have to make sure there is sufficient
+       * allocated for the active program and current number of threads.
+       */      
+
+      if (!brw->wm.scratch_buffer) {
+	 bmGenBuffers(intel, "wm scratch", 1, &brw->wm.scratch_buffer, 12);
+	 bmBufferSetInvalidateCB(intel,
+				 brw->wm.scratch_buffer,
+				 invalidate_scratch_cb,
+				 NULL,
+				 GL_FALSE);
+      }
+
+      if (total > brw->wm.scratch_buffer_size) {
+	 brw->wm.scratch_buffer_size = total;
+	 bmBufferData(intel,
+		      brw->wm.scratch_buffer,
+		      brw->wm.scratch_buffer_size,
+		      NULL,
+		      0);
+      }
+		   
+      assert(per_thread <= 12 * 1024);
+      wm.thread2.per_thread_scratch_space = (per_thread / 1024) - 1;
+
+      /* XXX: could make this dynamic as this is so rarely active:
+       */
+      /* BRW_NEW_LOCK */
+      wm.thread2.scratch_space_base_pointer = 
+	 bmBufferOffset(intel, brw->wm.scratch_buffer) >> 10;
+   }
+
+   /* CACHE_NEW_SURFACE */
+   wm.thread1.binding_table_entry_count = brw->wm.nr_surfaces;
+
+   /* BRW_NEW_CURBE_OFFSETS */
+   wm.thread3.const_urb_entry_read_offset = brw->curbe.wm_start * 2;
+
+   wm.thread3.urb_entry_read_offset = 0;
+   wm.thread1.depth_coef_urb_read_offset = 1;
+   wm.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+
+   /* CACHE_NEW_SAMPLER */
+   wm.wm4.sampler_count = (brw->wm.sampler_count + 1) / 4;
+   wm.wm4.sampler_state_pointer = brw->wm.sampler_gs_offset >> 5;
+
+   /* BRW_NEW_FRAGMENT_PROGRAM */
+   {
+      const struct gl_fragment_program *fp = brw->fragment_program; 
+
+      if (fp->Base.InputsRead & (1<<FRAG_ATTRIB_WPOS)) 
+	 wm.wm5.program_uses_depth = 1; /* as far as we can tell */
+   
+      if (fp->Base.OutputsWritten & (1<<FRAG_RESULT_DEPR)) 
+	 wm.wm5.program_computes_depth = 1;
+   
+      /* _NEW_COLOR */
+      if (fp->UsesKill || 
+	  brw->attribs.Color->AlphaEnabled) 
+	 wm.wm5.program_uses_killpixel = 1; 
+   }
+
+   wm.wm5.enable_16_pix = 1;
+   wm.wm5.thread_dispatch_enable = 1;	/* AKA: color_write */
+   wm.wm5.legacy_line_rast = 0;
+   wm.wm5.legacy_global_depth_bias = 0;
+   wm.wm5.early_depth_test = 1;	        /* never need to disable */
+   wm.wm5.line_aa_region_width = 0;
+   wm.wm5.line_endcap_aa_region_width = 1;
+
+   /* _NEW_POLYGONSTIPPLE */
+   if (brw->attribs.Polygon->StippleFlag) 
+      wm.wm5.polygon_stipple = 1;
+
+   /* _NEW_POLYGON */
+   if (brw->attribs.Polygon->OffsetFill) {
+      wm.wm5.depth_offset = 1;
+      /* Something wierd going on with legacy_global_depth_bias,
+       * offset_constant, scaling and MRD.  This value passes glean
+       * but gives some odd results elsewere (eg. the
+       * quad-offset-units test).
+       */
+      wm.global_depth_offset_constant = brw->attribs.Polygon->OffsetUnits * 2;
+
+      /* This is the only value that passes glean:
+       */
+      wm.global_depth_offset_scale = brw->attribs.Polygon->OffsetFactor;
+   }
+
+   /* _NEW_LINE */
+   if (brw->attribs.Line->StippleFlag) {
+      wm.wm5.line_stipple = 1;
+   }
+
+   if (INTEL_DEBUG & DEBUG_STATS || intel->stats_wm)
+      wm.wm4.stats_enable = 1;
+
+   brw->wm.state_gs_offset = brw_cache_data( &brw->cache[BRW_WM_UNIT], &wm );
+}
+
+const struct brw_tracked_state brw_wm_unit = {
+   .dirty = {
+      .mesa = (_NEW_POLYGON | 
+	       _NEW_POLYGONSTIPPLE | 
+	       _NEW_LINE | 
+	       _NEW_COLOR),
+
+      .brw = (BRW_NEW_FRAGMENT_PROGRAM | 
+	      BRW_NEW_CURBE_OFFSETS |
+	      BRW_NEW_LOCK),
+
+      .cache = (CACHE_NEW_SURFACE | 
+		CACHE_NEW_WM_PROG | 
+		CACHE_NEW_SAMPLER)
+   },
+   .update = upload_wm_unit
+};
+
diff --git a/i965/brw_wm_surface_state.c b/i965/brw_wm_surface_state.c
new file mode 100644
index 0000000..d24c618
--- /dev/null
+++ b/i965/brw_wm_surface_state.c
@@ -0,0 +1,268 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+                   
+
+#include "mtypes.h"
+#include "texformat.h"
+#include "texstore.h"
+
+#include "intel_mipmap_tree.h"
+#include "intel_batchbuffer.h"
+#include "intel_tex.h"
+
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+
+
+static GLuint translate_tex_target( GLenum target )
+{
+   switch (target) {
+   case GL_TEXTURE_1D: 
+      return BRW_SURFACE_1D;
+
+   case GL_TEXTURE_RECTANGLE_NV: 
+      return BRW_SURFACE_2D;
+
+   case GL_TEXTURE_2D: 
+      return BRW_SURFACE_2D;
+
+   case GL_TEXTURE_3D: 
+      return BRW_SURFACE_3D;
+
+   case GL_TEXTURE_CUBE_MAP: 
+      return BRW_SURFACE_CUBE;
+
+   default: 
+      assert(0); 
+      return 0;
+   }
+}
+
+
+static GLuint translate_tex_format( GLuint mesa_format )
+{
+   switch( mesa_format ) {
+   case MESA_FORMAT_L8:
+      return BRW_SURFACEFORMAT_L8_UNORM;
+
+   case MESA_FORMAT_I8:
+      return BRW_SURFACEFORMAT_I8_UNORM;
+
+   case MESA_FORMAT_A8:
+      return BRW_SURFACEFORMAT_A8_UNORM; 
+
+   case MESA_FORMAT_AL88:
+      return BRW_SURFACEFORMAT_L8A8_UNORM;
+
+   case MESA_FORMAT_RGB888:
+      assert(0);		/* not supported for sampling */
+      return BRW_SURFACEFORMAT_R8G8B8_UNORM;      
+
+   case MESA_FORMAT_ARGB8888:
+      return BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
+
+   case MESA_FORMAT_RGBA8888_REV:
+      return BRW_SURFACEFORMAT_R8G8B8A8_UNORM;
+
+   case MESA_FORMAT_RGB565:
+      return BRW_SURFACEFORMAT_B5G6R5_UNORM;
+
+   case MESA_FORMAT_ARGB1555:
+      return BRW_SURFACEFORMAT_B5G5R5A1_UNORM;
+
+   case MESA_FORMAT_ARGB4444:
+      return BRW_SURFACEFORMAT_B4G4R4A4_UNORM;
+
+   case MESA_FORMAT_YCBCR_REV:
+      return BRW_SURFACEFORMAT_YCRCB_NORMAL;
+
+   case MESA_FORMAT_YCBCR:
+      return BRW_SURFACEFORMAT_YCRCB_SWAPUVY;
+
+   case MESA_FORMAT_RGB_FXT1:
+   case MESA_FORMAT_RGBA_FXT1:
+      return BRW_SURFACEFORMAT_FXT1;
+
+   case MESA_FORMAT_Z16:
+      return BRW_SURFACEFORMAT_L16_UNORM;
+
+   case MESA_FORMAT_RGBA_DXT1:
+   case MESA_FORMAT_RGB_DXT1:
+      return BRW_SURFACEFORMAT_DXT1_RGB;
+
+   default:
+      assert(0);
+      return 0;
+   }
+}
+
+static
+void brw_update_texture_surface( GLcontext *ctx, 
+				 GLuint unit,
+				 struct brw_surface_state *surf )
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct brw_context *brw = brw_context(ctx);
+   struct gl_texture_object *tObj = brw->attribs.Texture->Unit[unit]._Current;
+   struct intel_texture_object *intelObj = intel_texture_object(tObj);
+   struct gl_texture_image *firstImage = tObj->Image[0][intelObj->firstLevel];
+
+   memset(surf, 0, sizeof(*surf));
+
+   surf->ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW;   
+   surf->ss0.surface_type = translate_tex_target(tObj->Target);
+   surf->ss0.surface_format = translate_tex_format(firstImage->TexFormat->MesaFormat);
+
+   /* This is ok for all textures with channel width 8bit or less:
+    */
+/*    surf->ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */
+
+   /* BRW_NEW_LOCK */
+   surf->ss1.base_addr = bmBufferOffset(intel,
+					intelObj->mt->region->buffer);
+
+   surf->ss2.mip_count = intelObj->lastLevel - intelObj->firstLevel;
+   surf->ss2.width = firstImage->Width - 1;
+   surf->ss2.height = firstImage->Height - 1;
+
+   surf->ss3.tile_walk = BRW_TILEWALK_XMAJOR;
+   surf->ss3.tiled_surface = intelObj->mt->region->tiled; /* always zero */
+   surf->ss3.pitch = (intelObj->mt->pitch * intelObj->mt->cpp) - 1;
+   surf->ss3.depth = firstImage->Depth - 1;
+
+   surf->ss4.min_lod = 0;
+ 
+   if (tObj->Target == GL_TEXTURE_CUBE_MAP) {
+      surf->ss0.cube_pos_x = 1;
+      surf->ss0.cube_pos_y = 1;
+      surf->ss0.cube_pos_z = 1;
+      surf->ss0.cube_neg_x = 1;
+      surf->ss0.cube_neg_y = 1;
+      surf->ss0.cube_neg_z = 1;
+   }
+}
+
+
+
+#define OFFSET(TYPE, FIELD) ( (GLuint)&(((TYPE *)0)->FIELD) )
+
+
+static void upload_wm_surfaces(struct brw_context *brw )
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   struct intel_context *intel = &brw->intel;
+   struct brw_surface_binding_table bind;
+   GLuint i;
+
+   memcpy(&bind, &brw->wm.bind, sizeof(bind));
+      
+   {
+      struct brw_surface_state surf;
+      struct intel_region *region = brw->state.draw_region;
+
+      memset(&surf, 0, sizeof(surf));
+
+      if (region->cpp == 4)
+	 surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
+      else 
+	 surf.ss0.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM;
+
+      surf.ss0.surface_type = BRW_SURFACE_2D;
+
+      /* _NEW_COLOR */
+      surf.ss0.color_blend = (!brw->attribs.Color->_LogicOpEnabled &&
+			      brw->attribs.Color->BlendEnabled);
+
+
+      surf.ss0.writedisable_red =   !brw->attribs.Color->ColorMask[0];
+      surf.ss0.writedisable_green = !brw->attribs.Color->ColorMask[1];
+      surf.ss0.writedisable_blue =  !brw->attribs.Color->ColorMask[2];
+      surf.ss0.writedisable_alpha = !brw->attribs.Color->ColorMask[3];
+
+      /* BRW_NEW_LOCK */
+      surf.ss1.base_addr = bmBufferOffset(&brw->intel, region->buffer);
+
+
+      surf.ss2.width = region->pitch - 1; /* XXX: not really! */
+      surf.ss2.height = region->height - 1;
+      surf.ss3.tile_walk = BRW_TILEWALK_XMAJOR;
+      surf.ss3.tiled_surface = region->tiled;
+      surf.ss3.pitch = (region->pitch * region->cpp) - 1;
+
+      brw->wm.bind.surf_ss_offset[0] = brw_cache_data( &brw->cache[BRW_SS_SURFACE], &surf );
+      brw->wm.nr_surfaces = 1;
+   }
+
+
+   for (i = 0; i < BRW_MAX_TEX_UNIT; i++) {
+      struct gl_texture_unit *texUnit = &brw->attribs.Texture->Unit[i];
+
+      /* _NEW_TEXTURE, BRW_NEW_TEXDATA 
+       */
+      if (texUnit->_ReallyEnabled &&
+	  intel_finalize_mipmap_tree(intel,texUnit->_Current)) {
+
+	 struct brw_surface_state surf;
+
+	 brw_update_texture_surface(ctx, i, &surf);
+
+	 brw->wm.bind.surf_ss_offset[i+1] = brw_cache_data( &brw->cache[BRW_SS_SURFACE], &surf );
+	 brw->wm.nr_surfaces = i+2;
+      }
+      else if( texUnit->_ReallyEnabled &&
+	       texUnit->_Current == intel->frame_buffer_texobj )
+      {
+	 brw->wm.bind.surf_ss_offset[i+1] = brw->wm.bind.surf_ss_offset[0];
+	 brw->wm.nr_surfaces = i+2;
+      }    
+      else {
+	 brw->wm.bind.surf_ss_offset[i+1] = 0;
+      }
+   }
+
+   brw->wm.bind_ss_offset = brw_cache_data( &brw->cache[BRW_SS_SURF_BIND],
+					    &brw->wm.bind );
+}
+
+const struct brw_tracked_state brw_wm_surfaces = {
+   .dirty = {
+      .mesa = _NEW_COLOR | _NEW_TEXTURE | _NEW_BUFFERS,
+      .brw = (BRW_NEW_CONTEXT | 
+	      BRW_NEW_LOCK),	/* required for bmBufferOffset */
+      .cache = 0
+   },
+   .update = upload_wm_surfaces
+};
+
+
+
diff --git a/i965/bufmgr.h b/i965/bufmgr.h
new file mode 100644
index 0000000..e748c0d
--- /dev/null
+++ b/i965/bufmgr.h
@@ -0,0 +1,215 @@
+/**************************************************************************
+ * 
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef BUFMGR_H
+#define BUFMGR_H
+
+#include "intel_context.h"
+
+
+/* The buffer manager context.  Opaque.
+ */
+struct bufmgr;
+struct buffer;
+
+
+struct bufmgr *bm_fake_intel_Attach( struct intel_context *intel ); 
+
+/* Flags for validate and other calls.  If both NO_UPLOAD and NO_EVICT
+ * are specified, ValidateBuffers is essentially a query.
+ */
+#define BM_MEM_LOCAL   0x1
+#define BM_MEM_AGP     0x2
+#define BM_MEM_VRAM    0x4	/* not yet used */
+#define BM_WRITE       0x8	/* not yet used */
+#define BM_READ        0x10	/* not yet used */
+#define BM_NO_UPLOAD   0x20
+#define BM_NO_EVICT    0x40
+#define BM_NO_MOVE     0x80	/* not yet used */
+#define BM_NO_ALLOC    0x100	/* legacy "fixed" buffers only */
+#define BM_CLIENT      0x200	/* for map - pointer will be accessed
+				 * without dri lock */
+
+#define BM_MEM_MASK (BM_MEM_LOCAL|BM_MEM_AGP|BM_MEM_VRAM)
+
+
+
+
+/* Create a pool of a given memory type, from a certain offset and a
+ * certain size.  
+ *
+ * Also passed in is a virtual pointer to the start of the pool.  This
+ * is useful in the faked-out version in i915 so that MapBuffer can
+ * return a pointer to a buffer residing in AGP space.  
+ *
+ * Flags passed into a pool are inherited by all buffers allocated in
+ * that pool.  So pools representing the static front,back,depth
+ * buffer allocations should have MEM_AGP|NO_UPLOAD|NO_EVICT|NO_MOVE to match
+ * the behaviour of the legacy allocations.
+ *
+ * Returns -1 for failure, pool number for success.
+ */
+int bmInitPool( struct intel_context *, 
+		unsigned long low_offset,
+		void *low_virtual,
+		unsigned long size,
+		unsigned flags);
+
+
+/* Stick closely to ARB_vbo semantics - they're well defined and
+ * understood, and drivers can just pass the calls through without too
+ * much thunking.
+ */
+void bmGenBuffers(struct intel_context *, const char *, unsigned n, struct buffer **buffers,
+		  int align );
+void bmDeleteBuffers(struct intel_context *, unsigned n, struct buffer **buffers);
+
+
+/* Hook to inform faked buffer manager about fixed-position
+ * front,depth,back buffers.  These may move to a fully memory-managed
+ * scheme, or they may continue to be managed as is.
+ */
+struct buffer *bmGenBufferStatic(struct intel_context *,
+				 unsigned pool);
+
+/* On evict, buffer manager will call invalidate_cb() to note that the
+ * buffer needs to be reloaded.
+ *
+ * Buffer is uploaded by calling bmMapBuffer() and copying data into
+ * the returned pointer.
+ *
+ * This is basically a big hack to get some more performance by
+ * turning off backing store for buffers where we either have it
+ * already (textures) or don't need it (batch buffers, temporary
+ * vbo's).
+ */
+void bmBufferSetInvalidateCB(struct intel_context *,
+			     struct buffer *buf,
+			     void (*invalidate_cb)( struct intel_context *, void *ptr ),
+			     void *ptr,
+			     GLboolean dont_fence_subdata);
+
+
+/* The driver has more intimate knowledge of the hardare than a GL
+ * client would, so flags here is more proscriptive than the usage
+ * values in the ARB_vbo interface:
+ */
+int bmBufferData(struct intel_context *, 
+		  struct buffer *buf, 
+		  unsigned size, 
+		  const void *data, 
+		  unsigned flags );
+
+int bmBufferSubData(struct intel_context *, 
+		     struct buffer *buf, 
+		     unsigned offset, 
+		     unsigned size, 
+		     const void *data );
+
+
+int bmBufferDataAUB(struct intel_context *, 
+		     struct buffer *buf, 
+		     unsigned size, 
+		     const void *data, 
+		     unsigned flags,
+		     unsigned aubtype,
+		     unsigned aubsubtype );
+
+int bmBufferSubDataAUB(struct intel_context *, 
+			struct buffer *buf, 
+			unsigned offset, 
+			unsigned size, 
+			const void *data,
+			unsigned aubtype,
+			unsigned aubsubtype );
+
+
+/* In this version, taking the offset will provoke an upload on
+ * buffers not already resident in AGP:
+ */
+unsigned bmBufferOffset(struct intel_context *, 
+			struct buffer *buf);
+
+
+/* Extract data from the buffer:
+ */
+void bmBufferGetSubData(struct intel_context *, 
+			struct buffer *buf, 
+			unsigned offset, 
+			unsigned size, 
+			void *data );
+
+void *bmMapBuffer( struct intel_context *,
+		   struct buffer *buf, 
+		   unsigned access );
+
+void bmUnmapBuffer( struct intel_context *,
+		    struct buffer *buf );
+
+void bmUnmapBufferAUB( struct intel_context *,
+		       struct buffer *buf,
+		       unsigned aubtype,
+		       unsigned aubsubtype );
+
+
+/* Pertains to all buffers who's offset has been taken since the last
+ * fence or release.
+ */
+int bmValidateBuffers( struct intel_context * );
+void bmReleaseBuffers( struct intel_context * );
+
+GLuint bmCtxId( struct intel_context *intel );
+
+
+GLboolean bmError( struct intel_context * );
+void bmEvictAll( struct intel_context * );
+
+void *bmFindVirtual( struct intel_context *intel,
+		     unsigned int offset,
+		     size_t sz );
+
+/* This functionality is used by the buffer manager, not really sure
+ * if we need to be exposing it in this way, probably libdrm will
+ * offer equivalent calls.
+ *
+ * For now they can stay, but will likely change/move before final:
+ */
+unsigned bmSetFence( struct intel_context * );
+unsigned bmSetFenceLock( struct intel_context * );
+unsigned bmLockAndFence( struct intel_context *intel );
+int bmTestFence( struct intel_context *, unsigned fence );
+void bmFinishFence( struct intel_context *, unsigned fence );
+void bmFinishFenceLock( struct intel_context *, unsigned fence );
+
+void bm_fake_NotifyContendedLockTake( struct intel_context * );
+
+extern int INTEL_DEBUG;
+#define DEBUG_BUFMGR 0x10000000
+
+#define DBG(...)  do { if (INTEL_DEBUG & DEBUG_BUFMGR) _mesa_printf(__VA_ARGS__); } while(0)
+
+#endif
diff --git a/i965/bufmgr_fake.c b/i965/bufmgr_fake.c
new file mode 100644
index 0000000..fb4903d
--- /dev/null
+++ b/i965/bufmgr_fake.c
@@ -0,0 +1,1463 @@
+/**************************************************************************
+ * 
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+/* Originally a fake version of the buffer manager so that we can
+ * prototype the changes in a driver fairly quickly, has been fleshed
+ * out to a fully functional interim solution.
+ *
+ * Basically wraps the old style memory management in the new
+ * programming interface, but is more expressive and avoids many of
+ * the bugs in the old texture manager.
+ */
+#include "bufmgr.h"
+
+#include "intel_context.h"
+#include "intel_ioctl.h"
+#include "intel_batchbuffer.h"
+
+#include "simple_list.h"
+#include "mm.h"
+#include "imports.h"
+
+#define BM_POOL_MAX 8
+
+/* Internal flags:
+ */
+#define BM_NO_BACKING_STORE   0x2000
+#define BM_NO_FENCE_SUBDATA   0x4000
+
+
+static int check_fenced( struct intel_context *intel );
+
+static int nr_attach = 0;
+
+/* Wrapper around mm.c's mem_block, which understands that you must
+ * wait for fences to expire before memory can be freed.  This is
+ * specific to our use of memcpy for uploads - an upload that was
+ * processed through the command queue wouldn't need to care about
+ * fences.
+ */
+struct block {
+   struct block *next, *prev;
+   struct pool *pool;		/* BM_MEM_AGP */
+   struct mem_block *mem;	/* BM_MEM_AGP */
+
+   unsigned referenced:1;
+   unsigned on_hardware:1;
+   unsigned fenced:1;	
+   
+
+   unsigned fence;		/* BM_MEM_AGP, Split to read_fence, write_fence */
+
+   struct buffer *buf;
+   void *virtual;
+};
+
+
+struct buffer {
+   unsigned id;			/* debug only */
+   const char *name;
+   unsigned size;
+
+   unsigned mapped:1;		
+   unsigned dirty:1;		
+   unsigned aub_dirty:1;	
+   unsigned alignment:13;
+   unsigned flags:16;
+
+   struct block *block;
+   void *backing_store;
+   void (*invalidate_cb)( struct intel_context *, void * );
+   void *invalidate_ptr;
+};
+
+struct pool {
+   unsigned size;
+   unsigned low_offset;
+   struct buffer *static_buffer;
+   unsigned flags;
+   struct mem_block *heap;
+   void *virtual;
+   struct block lru;		/* only allocated, non-fence-pending blocks here */
+};
+
+struct bufmgr {
+   _glthread_Mutex mutex;	/**< for thread safety */
+   struct pool pool[BM_POOL_MAX];
+   unsigned nr_pools;
+
+   unsigned buf_nr;		/* for generating ids */
+
+   struct block referenced;	/* after bmBufferOffset */
+   struct block on_hardware;	/* after bmValidateBuffers */
+   struct block fenced;		/* after bmFenceBuffers (mi_flush, emit irq, write dword) */
+                                /* then to pool->lru or free() */
+
+   unsigned ctxId;
+   unsigned last_fence;
+   unsigned free_on_hardware;
+
+   unsigned fail:1;
+   unsigned need_fence:1;
+};
+
+#define MAXFENCE 0x7fffffff
+
+static GLboolean FENCE_LTE( unsigned a, unsigned b )
+{
+   if (a == b)
+      return GL_TRUE;
+
+   if (a < b && b - a < (1<<24))
+      return GL_TRUE;
+
+   if (a > b && MAXFENCE - a + b < (1<<24))
+      return GL_TRUE;
+
+   return GL_FALSE;
+}
+
+int bmTestFence( struct intel_context *intel, unsigned fence )
+{
+   /* Slight problem with wrap-around:
+    */
+   return fence == 0 || FENCE_LTE(fence, intel->sarea->last_dispatch);
+}
+
+#define LOCK(bm) \
+  int dolock = nr_attach > 1; \
+  if (dolock) _glthread_LOCK_MUTEX(bm->mutex)
+
+#define UNLOCK(bm) \
+  if (dolock) _glthread_UNLOCK_MUTEX(bm->mutex)
+
+
+
+static GLboolean alloc_from_pool( struct intel_context *intel,				
+				  unsigned pool_nr,
+				  struct buffer *buf )
+{
+   struct bufmgr *bm = intel->bm;
+   struct pool *pool = &bm->pool[pool_nr];
+   struct block *block = (struct block *)calloc(sizeof *block, 1);
+   GLuint sz, align = (1<<buf->alignment);
+
+   if (!block)
+      return GL_FALSE;
+
+   sz = (buf->size + align-1) & ~(align-1);
+
+   block->mem = mmAllocMem(pool->heap, 
+			   sz, 
+			   buf->alignment, 0);
+   if (!block->mem) {
+      free(block);
+      return GL_FALSE;
+   }
+
+   make_empty_list(block);
+
+   /* Insert at head or at tail???   
+    */
+   insert_at_tail(&pool->lru, block);
+
+   block->pool = pool;
+   block->virtual = pool->virtual + block->mem->ofs;
+   block->buf = buf;
+
+   buf->block = block;
+
+   return GL_TRUE;
+}
+
+
+
+
+
+
+
+
+/* Release the card storage associated with buf:
+ */
+static void free_block( struct intel_context *intel, struct block *block )
+{
+   DBG("free block %p\n", block);
+
+   if (!block) 
+      return;
+
+   check_fenced(intel);
+
+   if (block->referenced) {
+      _mesa_printf("tried to free block on referenced list\n");
+      assert(0);
+   }
+   else if (block->on_hardware) {
+      block->buf = NULL;
+      intel->bm->free_on_hardware += block->mem->size;
+   }
+   else if (block->fenced) {
+      block->buf = NULL;
+   }
+   else {
+      DBG("    - free immediately\n");
+      remove_from_list(block);
+
+      mmFreeMem(block->mem);
+      free(block);
+   }
+}
+
+
+static void alloc_backing_store( struct intel_context *intel, struct buffer *buf )
+{
+   assert(!buf->backing_store);
+   assert(!(buf->flags & (BM_NO_EVICT|BM_NO_BACKING_STORE)));
+
+   buf->backing_store = ALIGN_MALLOC(buf->size, 64);
+}
+
+static void free_backing_store( struct intel_context *intel, struct buffer *buf )
+{
+   assert(!(buf->flags & (BM_NO_EVICT|BM_NO_BACKING_STORE)));
+	  
+   if (buf->backing_store) {
+      ALIGN_FREE(buf->backing_store);
+      buf->backing_store = NULL;
+   }
+}
+
+
+
+
+
+
+static void set_dirty( struct intel_context *intel,
+			      struct buffer *buf )
+{
+   if (buf->flags & BM_NO_BACKING_STORE)
+      buf->invalidate_cb(intel, buf->invalidate_ptr);
+
+   assert(!(buf->flags & BM_NO_EVICT));
+
+   DBG("set_dirty - buf %d\n", buf->id);
+   buf->dirty = 1;
+}
+
+
+static int evict_lru( struct intel_context *intel, GLuint max_fence, GLuint *pool )
+{
+   struct bufmgr *bm = intel->bm;
+   struct block *block, *tmp;
+   int i;
+
+   DBG("%s\n", __FUNCTION__);
+
+   for (i = 0; i < bm->nr_pools; i++) {
+      if (!(bm->pool[i].flags & BM_NO_EVICT)) {
+	 foreach_s(block, tmp, &bm->pool[i].lru) {
+
+	    if (block->buf &&
+		(block->buf->flags & BM_NO_FENCE_SUBDATA))
+	       continue;
+
+	    if (block->fence && max_fence &&
+		!FENCE_LTE(block->fence, max_fence))
+	       return 0;
+
+	    set_dirty(intel, block->buf);
+	    block->buf->block = NULL;
+
+	    free_block(intel, block);
+	    *pool = i;
+	    return 1;
+	 }
+      }
+   }
+
+
+   return 0;
+}
+
+
+#define foreach_s_rev(ptr, t, list)   \
+        for(ptr=(list)->prev,t=(ptr)->prev; list != ptr; ptr=t, t=(t)->prev)
+
+static int evict_mru( struct intel_context *intel, GLuint *pool )
+{
+   struct bufmgr *bm = intel->bm;
+   struct block *block, *tmp;
+   int i;
+
+   DBG("%s\n", __FUNCTION__);
+
+   for (i = 0; i < bm->nr_pools; i++) {
+      if (!(bm->pool[i].flags & BM_NO_EVICT)) {
+	 foreach_s_rev(block, tmp, &bm->pool[i].lru) {
+
+	    if (block->buf &&
+		(block->buf->flags & BM_NO_FENCE_SUBDATA))
+	       continue;
+
+	    set_dirty(intel, block->buf);
+	    block->buf->block = NULL;
+
+	    free_block(intel, block);
+	    *pool = i;
+	    return 1;
+	 }
+      }
+   }
+
+
+   return 0;
+}
+
+
+static int check_fenced( struct intel_context *intel )
+{
+   struct bufmgr *bm = intel->bm;
+   struct block *block, *tmp;
+   int ret = 0;
+
+   foreach_s(block, tmp, &bm->fenced ) {
+      assert(block->fenced);
+
+      if (bmTestFence(intel, block->fence)) {
+
+	 block->fenced = 0;
+
+	 if (!block->buf) {
+	    DBG("delayed free: offset %x sz %x\n", block->mem->ofs, block->mem->size);
+	    remove_from_list(block);
+	    mmFreeMem(block->mem);
+	    free(block);
+	 }
+	 else {
+	    DBG("return to lru: offset %x sz %x\n", block->mem->ofs, block->mem->size);
+	    move_to_tail(&block->pool->lru, block);
+	 }
+
+	 ret = 1;
+      }
+      else {
+	 /* Blocks are ordered by fence, so if one fails, all from
+	  * here will fail also:
+	  */
+	 break;
+      }
+   }
+
+   /* Also check the referenced list: 
+    */
+   foreach_s(block, tmp, &bm->referenced ) {
+      if (block->fenced &&
+	  bmTestFence(intel, block->fence)) {
+	 block->fenced = 0;
+      }
+   }
+
+   
+   DBG("%s: %d\n", __FUNCTION__, ret);
+   return ret;
+}
+
+
+
+static void fence_blocks( struct intel_context *intel,
+			  unsigned fence )
+{
+   struct bufmgr *bm = intel->bm;
+   struct block *block, *tmp;
+
+   foreach_s (block, tmp, &bm->on_hardware) {
+      DBG("Fence block %p (sz 0x%x buf %p) with fence %d\n", block, 
+	  block->mem->size, block->buf, fence);
+      block->fence = fence;
+
+      block->on_hardware = 0;
+      block->fenced = 1;
+
+      /* Move to tail of pending list here
+       */
+      move_to_tail(&bm->fenced, block);
+   }
+
+   /* Also check the referenced list:
+    */  
+   foreach_s (block, tmp, &bm->referenced) {
+      if (block->on_hardware) {
+	 DBG("Fence block %p (sz 0x%x buf %p) with fence %d\n", block, 
+	     block->mem->size, block->buf, fence);
+	 
+	 block->fence = fence;
+	 block->on_hardware = 0;
+	 block->fenced = 1;
+      }
+   }
+
+
+   bm->last_fence = fence;
+   assert(is_empty_list(&bm->on_hardware));
+}
+
+
+
+
+static GLboolean alloc_block( struct intel_context *intel,
+			      struct buffer *buf )
+{
+   struct bufmgr *bm = intel->bm;
+   int i;
+
+   assert(intel->locked);
+
+   DBG("%s 0x%x bytes (%s)\n", __FUNCTION__, buf->size, buf->name);
+
+   for (i = 0; i < bm->nr_pools; i++) {
+      if (!(bm->pool[i].flags & BM_NO_ALLOC) &&
+	  alloc_from_pool(intel, i, buf)) {
+
+	 DBG("%s --> 0x%x (sz %x)\n", __FUNCTION__, 
+	     buf->block->mem->ofs, buf->block->mem->size);
+	 
+	 return GL_TRUE;
+      }
+   }
+
+   DBG("%s --> fail\n", __FUNCTION__);
+   return GL_FALSE;   
+}
+
+
+static GLboolean evict_and_alloc_block( struct intel_context *intel,
+					struct buffer *buf )
+{
+   GLuint pool;
+   struct bufmgr *bm = intel->bm;
+
+   assert(buf->block == NULL);
+
+   /* Put a cap on the amount of free memory we'll allow to accumulate
+    * before emitting a fence.
+    */
+   if (bm->free_on_hardware > 1 * 1024 * 1024) {
+      DBG("fence for free space: %x\n", bm->free_on_hardware);
+      bmSetFence(intel);
+   }
+
+   /* Search for already free memory:
+    */
+   if (alloc_block(intel, buf))
+      return GL_TRUE;
+
+   /* Look for memory that may have become free: 
+    */
+   if (check_fenced(intel) &&
+       alloc_block(intel, buf))
+      return GL_TRUE;
+
+   /* Look for memory blocks not used for >1 frame:
+    */
+   while (evict_lru(intel, intel->second_last_swap_fence, &pool))
+      if (alloc_from_pool(intel, pool, buf))
+	 return GL_TRUE;
+
+   /* If we're not thrashing, allow lru eviction to dig deeper into
+    * recently used textures.  We'll probably be thrashing soon:
+    */
+   if (!intel->thrashing) {
+      while (evict_lru(intel, 0, &pool))
+	 if (alloc_from_pool(intel, pool, buf))
+	    return GL_TRUE;
+   }
+
+   /* Keep thrashing counter alive?
+    */
+   if (intel->thrashing)
+      intel->thrashing = 20;
+
+   /* Wait on any already pending fences - here we are waiting for any
+    * freed memory that has been submitted to hardware and fenced to
+    * become available:
+    */
+   while (!is_empty_list(&bm->fenced)) {
+      GLuint fence = bm->fenced.next->fence;
+      bmFinishFence(intel, fence);
+
+      if (alloc_block(intel, buf))
+	 return GL_TRUE;
+   }
+
+
+   /* 
+    */
+   if (!is_empty_list(&bm->on_hardware)) {
+      bmSetFence(intel);
+
+      while (!is_empty_list(&bm->fenced)) {
+	 GLuint fence = bm->fenced.next->fence;
+	 bmFinishFence(intel, fence);
+      }
+
+      if (!intel->thrashing) {	 
+	 DBG("thrashing\n");
+      }
+      intel->thrashing = 20; 
+
+      if (alloc_block(intel, buf))
+	 return GL_TRUE;
+   }
+
+   while (evict_mru(intel, &pool))
+      if (alloc_from_pool(intel, pool, buf))
+	 return GL_TRUE;
+
+   DBG("%s 0x%x bytes failed\n", __FUNCTION__, buf->size);
+
+   assert(is_empty_list(&bm->on_hardware));
+   assert(is_empty_list(&bm->fenced));
+
+   return GL_FALSE;
+}
+
+
+
+
+
+
+
+
+
+
+/***********************************************************************
+ * Public functions
+ */
+
+
+/* The initialization functions are skewed in the fake implementation.
+ * This call would be to attach to an existing manager, rather than to
+ * create a local one.
+ */
+struct bufmgr *bm_fake_intel_Attach( struct intel_context *intel )
+{
+   _glthread_DECLARE_STATIC_MUTEX(initMutex);   
+   static struct bufmgr bm;
+   
+   /* This function needs a mutex of its own...
+    */
+   _glthread_LOCK_MUTEX(initMutex);
+
+   if (nr_attach == 0) {
+      _glthread_INIT_MUTEX(bm.mutex);
+
+      make_empty_list(&bm.referenced);
+      make_empty_list(&bm.fenced);
+      make_empty_list(&bm.on_hardware);
+      
+      /* The context id of any of the share group.  This won't be used
+       * in communication with the kernel, so it doesn't matter if
+       * this context is eventually deleted.
+       */
+      bm.ctxId = intel->hHWContext;
+   }
+
+   nr_attach++;
+
+   _glthread_UNLOCK_MUTEX(initMutex);
+
+   return &bm;
+}
+
+
+
+/* The virtual pointer would go away in a true implementation.
+ */
+int bmInitPool( struct intel_context *intel, 
+		unsigned long low_offset,
+		void *low_virtual,
+		unsigned long size,
+		unsigned flags)
+{
+   struct bufmgr *bm = intel->bm;
+   int retval = 0;
+
+   LOCK(bm);
+   {
+      GLuint i;
+
+      for (i = 0; i < bm->nr_pools; i++) {
+	 if (bm->pool[i].low_offset == low_offset &&
+	     bm->pool[i].size == size) {
+	    retval = i;
+	    goto out;
+	 }
+      }
+
+
+      if (bm->nr_pools >= BM_POOL_MAX)
+	 retval = -1;
+      else {
+	 i = bm->nr_pools++;
+   
+	 DBG("bmInitPool %d low_offset %x sz %x\n",
+	     i, low_offset, size);
+   
+	 bm->pool[i].low_offset = low_offset;
+	 bm->pool[i].size = size;
+	 bm->pool[i].heap = mmInit( low_offset, size );
+	 bm->pool[i].virtual = low_virtual - low_offset;
+	 bm->pool[i].flags = flags;
+   
+	 make_empty_list(&bm->pool[i].lru);
+	 
+	 retval = i;
+      }
+   }
+ out:
+   UNLOCK(bm);
+   return retval;
+}
+
+static struct buffer *do_GenBuffer(struct intel_context *intel, const char *name, int align)
+{
+   struct bufmgr *bm = intel->bm;
+   struct buffer *buf = calloc(sizeof(*buf), 1);
+
+   buf->id = ++bm->buf_nr;
+   buf->name = name;
+   buf->alignment = align;	
+   buf->flags = BM_MEM_AGP|BM_MEM_VRAM|BM_MEM_LOCAL;
+
+   return buf;
+}
+
+
+void *bmFindVirtual( struct intel_context *intel,
+		     unsigned int offset,
+		     size_t sz )
+{
+   struct bufmgr *bm = intel->bm;
+   int i;
+
+   for (i = 0; i < bm->nr_pools; i++)
+      if (offset >= bm->pool[i].low_offset &&
+	  offset + sz <= bm->pool[i].low_offset + bm->pool[i].size)
+	 return bm->pool[i].virtual + offset;
+
+   return NULL;
+}
+ 
+
+void bmGenBuffers(struct intel_context *intel, 
+		  const char *name, unsigned n, 
+		  struct buffer **buffers,
+		  int align )
+{
+   struct bufmgr *bm = intel->bm;
+   LOCK(bm);
+   {
+      int i;
+
+      for (i = 0; i < n; i++)
+	 buffers[i] = do_GenBuffer(intel, name, align);
+   }
+   UNLOCK(bm);
+}
+
+
+void bmDeleteBuffers(struct intel_context *intel, unsigned n, struct buffer **buffers)
+{
+   struct bufmgr *bm = intel->bm;
+
+   LOCK(bm);
+   {
+      unsigned i;
+   
+      for (i = 0; i < n; i++) {
+	 struct buffer *buf = buffers[i];
+
+	 if (buf && buf->block)
+	    free_block(intel, buf->block);
+
+	 if (buf && buf->backing_store)
+	    free_backing_store(intel, buf);
+
+	 if (buf) 
+	    free(buf);	 
+      }
+   }
+   UNLOCK(bm);
+}
+
+
+
+
+/* Hook to inform faked buffer manager about fixed-position
+ * front,depth,back buffers.  These may move to a fully memory-managed
+ * scheme, or they may continue to be managed as is.  It will probably
+ * be useful to pass a fixed offset here one day.
+ */
+struct buffer *bmGenBufferStatic(struct intel_context *intel,
+				 unsigned pool )
+{
+   struct bufmgr *bm = intel->bm;
+   struct buffer *buf;
+   LOCK(bm);
+   {
+      assert(bm->pool[pool].flags & BM_NO_EVICT);
+      assert(bm->pool[pool].flags & BM_NO_MOVE);
+
+      if (bm->pool[pool].static_buffer)
+	 buf = bm->pool[pool].static_buffer;
+      else {
+	 buf = do_GenBuffer(intel, "static", 12);
+   
+	 bm->pool[pool].static_buffer = buf;
+	 assert(!buf->block);
+
+	 buf->size = bm->pool[pool].size;
+	 buf->flags = bm->pool[pool].flags;
+	 buf->alignment = 12;
+	 
+	 if (!alloc_from_pool(intel, pool, buf))
+	    assert(0);
+      }
+   }
+   UNLOCK(bm);
+   return buf;
+}
+
+
+static void wait_quiescent(struct intel_context *intel,
+			   struct block *block)
+{
+   if (block->on_hardware) {
+      assert(intel->bm->need_fence);
+      bmSetFence(intel);
+      assert(!block->on_hardware);
+   }
+
+
+   if (block->fenced) {
+      bmFinishFence(intel, block->fence);
+   }
+
+   assert(!block->on_hardware);
+   assert(!block->fenced);
+}
+
+
+
+/* If buffer size changes, free and reallocate.  Otherwise update in
+ * place.
+ */
+int bmBufferData(struct intel_context *intel, 
+		 struct buffer *buf, 
+		 unsigned size, 
+		 const void *data, 
+		 unsigned flags )
+{
+   struct bufmgr *bm = intel->bm;
+   int retval = 0;
+
+   LOCK(bm);
+   {
+      DBG("bmBufferData %d sz 0x%x data: %p\n", buf->id, size, data);
+
+      assert(!buf->mapped);
+
+      if (buf->block) {
+	 struct block *block = buf->block;
+
+	 /* Optimistic check to see if we can reuse the block -- not
+	  * required for correctness:
+	  */
+	 if (block->fenced)
+	    check_fenced(intel);
+
+	 if (block->on_hardware ||
+	     block->fenced ||
+	     (buf->size && buf->size != size) || 
+	     (data == NULL)) {
+
+	    assert(!block->referenced);
+
+	    free_block(intel, block);
+	    buf->block = NULL;
+	    buf->dirty = 1;
+	 }
+      }
+
+      buf->size = size;
+      if (buf->block) {
+	 assert (buf->block->mem->size >= size);
+      }
+
+      if (buf->flags & (BM_NO_BACKING_STORE|BM_NO_EVICT)) {
+
+	 assert(intel->locked || data == NULL);
+
+	 if (data != NULL) {
+	    if (!buf->block && !evict_and_alloc_block(intel, buf)) {
+	       bm->fail = 1;
+	       retval = -1;
+	       goto out;
+	    }
+
+	    wait_quiescent(intel, buf->block);
+
+	    DBG("bmBufferData %d offset 0x%x sz 0x%x\n", 
+		buf->id, buf->block->mem->ofs, size);
+
+	    assert(buf->block->virtual == buf->block->pool->virtual + buf->block->mem->ofs);
+
+	    do_memcpy(buf->block->virtual, data, size);
+	 }
+	 buf->dirty = 0;
+      }
+      else {
+	       DBG("%s - set buf %d dirty\n", __FUNCTION__, buf->id);
+	 set_dirty(intel, buf);
+	 free_backing_store(intel, buf);
+   
+	 if (data != NULL) {      
+	    alloc_backing_store(intel, buf);
+	    do_memcpy(buf->backing_store, data, size);
+	 }
+      }
+   }
+ out:
+   UNLOCK(bm);
+   return retval;
+}
+
+
+/* Update the buffer in place, in whatever space it is currently resident:
+ */
+int bmBufferSubData(struct intel_context *intel, 
+		     struct buffer *buf, 
+		     unsigned offset, 
+		     unsigned size, 
+		     const void *data )
+{
+   struct bufmgr *bm = intel->bm;
+   int retval = 0;
+
+   if (size == 0) 
+      return 0;
+
+   LOCK(bm); 
+   {
+      DBG("bmBufferSubdata %d offset 0x%x sz 0x%x\n", buf->id, offset, size);
+      
+      assert(offset+size <= buf->size);
+
+      if (buf->flags & (BM_NO_EVICT|BM_NO_BACKING_STORE)) {
+
+	 assert(intel->locked);
+
+	 if (!buf->block && !evict_and_alloc_block(intel, buf)) {
+	    bm->fail = 1;
+	    retval = -1;
+	    goto out;
+	 }
+	 
+	 if (!(buf->flags & BM_NO_FENCE_SUBDATA))
+	    wait_quiescent(intel, buf->block);
+
+	 buf->dirty = 0;
+
+	 do_memcpy(buf->block->virtual + offset, data, size);
+      }
+      else {
+	 DBG("%s - set buf %d dirty\n", __FUNCTION__, buf->id);
+	 set_dirty(intel, buf);
+
+	 if (buf->backing_store == NULL)
+	    alloc_backing_store(intel, buf);
+
+	 do_memcpy(buf->backing_store + offset, data, size); 
+      }
+   }
+ out:
+   UNLOCK(bm);
+   return retval;
+}
+
+
+
+int bmBufferDataAUB(struct intel_context *intel, 
+		     struct buffer *buf, 
+		     unsigned size, 
+		     const void *data, 
+		     unsigned flags,
+		     unsigned aubtype,
+		     unsigned aubsubtype )
+{
+   int retval = bmBufferData(intel, buf, size, data, flags);
+   
+
+   /* This only works because in this version of the buffer manager we
+    * allocate all buffers statically in agp space and so can emit the
+    * uploads to the aub file with the correct offsets as they happen.
+    */
+   if (retval == 0 && data && intel->aub_file) {
+
+      if (buf->block && !buf->dirty) {
+	 intel->vtbl.aub_gtt_data(intel,
+				      buf->block->mem->ofs,
+				      buf->block->virtual,
+				      size,
+				      aubtype,
+				      aubsubtype);
+	 buf->aub_dirty = 0;
+      }
+   }
+   
+   return retval;
+}
+		       
+
+int bmBufferSubDataAUB(struct intel_context *intel, 
+			struct buffer *buf, 
+			unsigned offset, 
+			unsigned size, 
+			const void *data,
+			unsigned aubtype,
+			unsigned aubsubtype )
+{
+   int retval = bmBufferSubData(intel, buf, offset, size, data);
+   
+
+   /* This only works because in this version of the buffer manager we
+    * allocate all buffers statically in agp space and so can emit the
+    * uploads to the aub file with the correct offsets as they happen.
+    */
+   if (intel->aub_file) {
+      if (retval == 0 && buf->block && !buf->dirty)
+	 intel->vtbl.aub_gtt_data(intel,
+				      buf->block->mem->ofs + offset,
+				      ((const char *)buf->block->virtual) + offset,
+				      size,
+				      aubtype,
+				      aubsubtype);
+   }
+
+   return retval;
+}
+
+void bmUnmapBufferAUB( struct intel_context *intel, 
+		       struct buffer *buf,
+		       unsigned aubtype,
+		       unsigned aubsubtype )
+{
+   bmUnmapBuffer(intel, buf);
+
+   if (intel->aub_file) {
+      /* Hack - exclude the framebuffer mappings.  If you removed
+       * this, you'd get very big aubfiles, but you *would* be able to
+       * see fallback rendering.
+       */
+      if (buf->block  && !buf->dirty && buf->block->pool == &intel->bm->pool[0]) {
+	 buf->aub_dirty = 1;
+      }
+   }
+}
+
+unsigned bmBufferOffset(struct intel_context *intel, 
+			struct buffer *buf)
+{
+   struct bufmgr *bm = intel->bm;
+   unsigned retval = 0;
+
+   LOCK(bm);
+   {
+      assert(intel->locked);
+
+      if (!buf->block &&
+	  !evict_and_alloc_block(intel, buf)) {
+	 bm->fail = 1;
+	 retval = ~0;
+      }
+      else {
+	 assert(buf->block);
+	 assert(buf->block->buf == buf);
+
+	 DBG("Add buf %d (block %p, dirty %d) to referenced list\n", buf->id, buf->block,
+	     buf->dirty);
+
+	 move_to_tail(&bm->referenced, buf->block);
+	 buf->block->referenced = 1;
+
+	 retval = buf->block->mem->ofs;
+      }
+   }
+   UNLOCK(bm);
+
+   return retval;
+}
+
+
+
+/* Extract data from the buffer:
+ */
+void bmBufferGetSubData(struct intel_context *intel, 
+			struct buffer *buf, 
+			unsigned offset, 
+			unsigned size, 
+			void *data )
+{
+   struct bufmgr *bm = intel->bm;
+
+   LOCK(bm);
+   {
+      DBG("bmBufferSubdata %d offset 0x%x sz 0x%x\n", buf->id, offset, size);
+
+      if (buf->flags & (BM_NO_EVICT|BM_NO_BACKING_STORE)) {
+	 if (buf->block && size) {
+	    wait_quiescent(intel, buf->block);
+	    do_memcpy(data, buf->block->virtual + offset, size); 
+	 }
+      }
+      else {
+	 if (buf->backing_store && size) {
+	    do_memcpy(data, buf->backing_store + offset, size); 
+	 }
+      }
+   }
+   UNLOCK(bm);
+}
+
+
+/* Return a pointer to whatever space the buffer is currently resident in:
+ */
+void *bmMapBuffer( struct intel_context *intel,
+		   struct buffer *buf, 
+		   unsigned flags )
+{
+   struct bufmgr *bm = intel->bm;
+   void *retval = NULL;
+
+   LOCK(bm);
+   {
+      DBG("bmMapBuffer %d\n", buf->id);
+
+      if (buf->mapped) {
+	 _mesa_printf("%s: already mapped\n", __FUNCTION__);
+	 retval = NULL;
+      }
+      else if (buf->flags & (BM_NO_BACKING_STORE|BM_NO_EVICT)) {
+
+	 assert(intel->locked);
+
+	 if (!buf->block && !evict_and_alloc_block(intel, buf)) {
+	    DBG("%s: alloc failed\n", __FUNCTION__);
+	    bm->fail = 1;
+	    retval = NULL;
+	 }
+	 else {
+	    assert(buf->block);
+	    buf->dirty = 0;
+
+	    if (!(buf->flags & BM_NO_FENCE_SUBDATA)) 
+	       wait_quiescent(intel, buf->block);
+
+	    buf->mapped = 1;
+	    retval = buf->block->virtual;
+	 }
+      }
+      else {
+	 DBG("%s - set buf %d dirty\n", __FUNCTION__, buf->id);
+	 set_dirty(intel, buf);
+
+	 if (buf->backing_store == 0)
+	    alloc_backing_store(intel, buf);
+
+	 buf->mapped = 1;
+	 retval = buf->backing_store;
+      }
+   }
+   UNLOCK(bm);
+   return retval;
+}
+
+void bmUnmapBuffer( struct intel_context *intel, struct buffer *buf )
+{
+   struct bufmgr *bm = intel->bm;
+
+   LOCK(bm);
+   {
+      DBG("bmUnmapBuffer %d\n", buf->id);
+      buf->mapped = 0;
+   }
+   UNLOCK(bm);
+}
+
+
+
+
+/* This is the big hack that turns on BM_NO_BACKING_STORE.  Basically
+ * says that an external party will maintain the backing store, eg
+ * Mesa's local copy of texture data.
+ */
+void bmBufferSetInvalidateCB(struct intel_context *intel,
+			     struct buffer *buf,
+			     void (*invalidate_cb)( struct intel_context *, void *ptr ),
+			     void *ptr,
+			     GLboolean dont_fence_subdata)
+{
+   struct bufmgr *bm = intel->bm;
+
+   LOCK(bm);
+   {
+      if (buf->backing_store)
+	 free_backing_store(intel, buf);
+
+      buf->flags |= BM_NO_BACKING_STORE;
+      
+      if (dont_fence_subdata)
+	 buf->flags |= BM_NO_FENCE_SUBDATA;
+
+      DBG("bmBufferSetInvalidateCB set buf %d dirty\n", buf->id);
+      buf->dirty = 1;
+      buf->invalidate_cb = invalidate_cb;
+      buf->invalidate_ptr = ptr;
+
+      /* Note that it is invalid right from the start.  Also note
+       * invalidate_cb is called with the bufmgr locked, so cannot
+       * itself make bufmgr calls.
+       */
+      invalidate_cb( intel, ptr );
+   }
+   UNLOCK(bm);
+}
+
+
+
+
+
+
+
+/* This is only protected against thread interactions by the DRI lock
+ * and the policy of ensuring that all dma is flushed prior to
+ * releasing that lock.  Otherwise you might have two threads building
+ * up a list of buffers to validate at once.
+ */
+int bmValidateBuffers( struct intel_context *intel )
+{
+   struct bufmgr *bm = intel->bm;
+   int retval = 0;
+
+   LOCK(bm);
+   {
+      DBG("%s fail %d\n", __FUNCTION__, bm->fail);
+      assert(intel->locked);
+
+      if (!bm->fail) {
+	 struct block *block, *tmp;
+
+	 foreach_s(block, tmp, &bm->referenced) {
+	    struct buffer *buf = block->buf;
+
+	    DBG("Validate buf %d / block %p / dirty %d\n", buf->id, block, buf->dirty);
+
+	    /* Upload the buffer contents if necessary:
+	     */
+	    if (buf->dirty) {
+	       DBG("Upload dirty buf %d (%s) sz %d offset 0x%x\n", buf->id, 
+		   buf->name, buf->size, block->mem->ofs);
+
+	       assert(!(buf->flags & (BM_NO_BACKING_STORE|BM_NO_EVICT)));
+
+	       wait_quiescent(intel, buf->block);
+
+	       do_memcpy(buf->block->virtual,
+			 buf->backing_store, 
+			 buf->size);
+
+	       if (intel->aub_file) {
+		  intel->vtbl.aub_gtt_data(intel,
+					       buf->block->mem->ofs,
+					       buf->backing_store,
+					       buf->size,
+					       0,
+					       0);
+	       }
+
+	       buf->dirty = 0;
+	       buf->aub_dirty = 0;
+	    }
+	    else if (buf->aub_dirty) {
+	       intel->vtbl.aub_gtt_data(intel,
+					    buf->block->mem->ofs,
+					    buf->block->virtual,
+					    buf->size,
+					    0,
+					    0);
+	       buf->aub_dirty = 0;
+	    }
+
+	    block->referenced = 0;
+	    block->on_hardware = 1;
+	    move_to_tail(&bm->on_hardware, block);
+	 }
+
+	 bm->need_fence = 1;
+      }
+
+      retval = bm->fail ? -1 : 0;
+   }
+   UNLOCK(bm);
+
+
+   if (retval != 0)
+      DBG("%s failed\n", __FUNCTION__);
+
+   return retval;
+}
+
+
+
+
+void bmReleaseBuffers( struct intel_context *intel )
+{
+   struct bufmgr *bm = intel->bm;
+
+   LOCK(bm);
+   {
+      struct block *block, *tmp;
+
+      foreach_s (block, tmp, &bm->referenced) {
+
+	 DBG("remove block %p from referenced list\n", block);
+
+	 if (block->on_hardware) {
+	    /* Return to the on-hardware list.
+	     */
+	    move_to_tail(&bm->on_hardware, block);	    
+	 }
+	 else if (block->fenced) {
+	    struct block *s;
+
+	    /* Hmm - have to scan the fenced list to insert the
+	     * buffers in order.  This is O(nm), but rare and the
+	     * numbers are low.
+	     */
+	    foreach (s, &bm->fenced) {
+	       if (FENCE_LTE(block->fence, s->fence))
+		  break;
+	    }
+	    
+	    move_to_tail(s, block);
+	 }
+	 else {			
+	    /* Return to the lru list:
+	     */
+	    move_to_tail(&block->pool->lru, block);
+	 }
+
+	 block->referenced = 0;
+      }
+   }
+   UNLOCK(bm);
+}
+
+
+/* This functionality is used by the buffer manager, not really sure
+ * if we need to be exposing it in this way, probably libdrm will
+ * offer equivalent calls.
+ *
+ * For now they can stay, but will likely change/move before final:
+ */
+unsigned bmSetFence( struct intel_context *intel )
+{
+   assert(intel->locked);
+
+   /* Emit MI_FLUSH here:
+    */
+   if (intel->bm->need_fence) {
+
+      /* Emit a flush without using a batchbuffer.  Can't rely on the
+       * batchbuffer at this level really.  Would really prefer that
+       * the IRQ ioctly emitted the flush at the same time.
+       */
+      GLuint dword[2];
+      dword[0] = intel->vtbl.flush_cmd();
+      dword[1] = 0;
+      intel_cmd_ioctl(intel, (char *)&dword, sizeof(dword));
+      
+      intel->bm->last_fence = intelEmitIrqLocked( intel );
+      
+      fence_blocks(intel, intel->bm->last_fence);
+
+      intel->vtbl.note_fence(intel, intel->bm->last_fence);
+      intel->bm->need_fence = 0;
+
+      if (intel->thrashing) {
+	 intel->thrashing--;
+	 if (!intel->thrashing)
+	    DBG("not thrashing\n");
+      }
+      
+      intel->bm->free_on_hardware = 0;
+   }
+   
+   return intel->bm->last_fence;
+}
+
+unsigned bmSetFenceLock( struct intel_context *intel )
+{
+  unsigned last;
+  LOCK(intel->bm);
+  last = bmSetFence(intel);
+  UNLOCK(intel->bm);
+  return last;
+}
+unsigned bmLockAndFence( struct intel_context *intel )
+{
+   if (intel->bm->need_fence) {
+      LOCK_HARDWARE(intel);
+      LOCK(intel->bm);
+      bmSetFence(intel);
+      UNLOCK(intel->bm);
+      UNLOCK_HARDWARE(intel);
+   }
+
+   return intel->bm->last_fence;
+}
+
+
+void bmFinishFence( struct intel_context *intel, unsigned fence )
+{
+   if (!bmTestFence(intel, fence)) {
+      DBG("...wait on fence %d\n", fence);
+      intelWaitIrq( intel, fence );
+   }
+   assert(bmTestFence(intel, fence));
+   check_fenced(intel);
+}
+
+void bmFinishFenceLock( struct intel_context *intel, unsigned fence )
+{
+   LOCK(intel->bm);
+   bmFinishFence(intel, fence);
+   UNLOCK(intel->bm);
+}
+
+
+/* Specifically ignore texture memory sharing.
+ *  -- just evict everything
+ *  -- and wait for idle
+ */
+void bm_fake_NotifyContendedLockTake( struct intel_context *intel )
+{
+   struct bufmgr *bm = intel->bm;
+
+   LOCK(bm);
+   {
+      struct block *block, *tmp;
+      GLuint i;
+
+      assert(is_empty_list(&bm->referenced));
+
+      bm->need_fence = 1;
+      bm->fail = 0;
+      bmFinishFence(intel, bmSetFence(intel));
+
+      assert(is_empty_list(&bm->fenced));
+      assert(is_empty_list(&bm->on_hardware));
+
+      for (i = 0; i < bm->nr_pools; i++) {
+	 if (!(bm->pool[i].flags & BM_NO_EVICT)) {
+	    foreach_s(block, tmp, &bm->pool[i].lru) {
+	       assert(bmTestFence(intel, block->fence));
+	       set_dirty(intel, block->buf);
+	    }
+	 }
+      }
+   }
+   UNLOCK(bm);
+}
+
+
+
+void bmEvictAll( struct intel_context *intel )
+{
+   struct bufmgr *bm = intel->bm;
+
+   LOCK(bm);
+   {
+      struct block *block, *tmp;
+      GLuint i;
+
+      DBG("%s\n", __FUNCTION__);
+
+      assert(is_empty_list(&bm->referenced));
+
+      bm->need_fence = 1;
+      bm->fail = 0;
+      bmFinishFence(intel, bmSetFence(intel));
+
+      assert(is_empty_list(&bm->fenced));
+      assert(is_empty_list(&bm->on_hardware));
+
+      for (i = 0; i < bm->nr_pools; i++) {
+	 if (!(bm->pool[i].flags & BM_NO_EVICT)) {
+	    foreach_s(block, tmp, &bm->pool[i].lru) {
+	       assert(bmTestFence(intel, block->fence));
+	       set_dirty(intel, block->buf);
+	       block->buf->block = NULL;
+
+	       free_block(intel, block);
+	    }
+	 }
+      }
+   }
+   UNLOCK(bm);
+}
+
+
+GLboolean bmError( struct intel_context *intel )
+{
+   struct bufmgr *bm = intel->bm;
+   GLboolean retval;
+
+   LOCK(bm);
+   {
+      retval = bm->fail;
+   }
+   UNLOCK(bm);
+
+   return retval;
+}
+
+
+GLuint bmCtxId( struct intel_context *intel )
+{
+   return intel->bm->ctxId;
+}
diff --git a/i965/intel_batchbuffer.c b/i965/intel_batchbuffer.c
new file mode 100644
index 0000000..64885ed
--- /dev/null
+++ b/i965/intel_batchbuffer.c
@@ -0,0 +1,243 @@
+/**************************************************************************
+ * 
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "imports.h"
+#include "intel_batchbuffer.h"
+#include "intel_ioctl.h"
+#include "bufmgr.h"
+
+
+static void intel_batchbuffer_reset( struct intel_batchbuffer *batch )
+{
+   assert(batch->map == NULL);
+
+   batch->offset = (unsigned long)batch->ptr;
+   batch->offset = (batch->offset + 63) & ~63;
+   batch->ptr = (unsigned char *) batch->offset;
+
+   if (BATCH_SZ - batch->offset < BATCH_REFILL) {
+      bmBufferData(batch->intel, 
+		   batch->buffer,
+		   BATCH_SZ, 
+		   NULL, 
+		   0); 
+      batch->offset = 0;
+      batch->ptr = NULL;
+   }
+		
+   batch->flags = 0;
+}
+
+static void intel_batchbuffer_reset_cb( struct intel_context *intel,
+					void *ptr )
+{
+   struct intel_batchbuffer *batch = (struct intel_batchbuffer *)ptr;
+   assert(batch->map == NULL);
+   batch->flags = 0;
+   batch->offset = 0;
+   batch->ptr = NULL;
+}
+
+GLubyte *intel_batchbuffer_map( struct intel_batchbuffer *batch )
+{
+   if (!batch->map) {
+      batch->map = bmMapBuffer(batch->intel, batch->buffer, 
+			       BM_MEM_AGP|BM_MEM_LOCAL|BM_CLIENT|BM_WRITE);
+      batch->ptr += (unsigned long)batch->map;
+   }
+
+   return batch->map;
+}
+
+void intel_batchbuffer_unmap( struct intel_batchbuffer *batch )
+{
+   if (batch->map) {
+      batch->ptr -= (unsigned long)batch->map;
+      batch->map = NULL;
+      bmUnmapBuffer(batch->intel, batch->buffer);
+   }
+}
+
+
+
+/*======================================================================
+ * Public functions
+ */
+struct intel_batchbuffer *intel_batchbuffer_alloc( struct intel_context *intel )
+{
+   struct intel_batchbuffer *batch = calloc(sizeof(*batch), 1);
+
+   batch->intel = intel;
+
+   bmGenBuffers(intel, "batch", 1, &batch->buffer, 12);
+
+   bmBufferSetInvalidateCB(intel, batch->buffer,
+			   intel_batchbuffer_reset_cb,
+			   batch,
+			   GL_TRUE);
+
+   bmBufferData(batch->intel,
+		batch->buffer,
+		BATCH_SZ,
+		NULL,
+		0);
+
+
+   return batch;
+}
+
+void intel_batchbuffer_free( struct intel_batchbuffer *batch )
+{
+   if (batch->map) 
+      bmUnmapBuffer(batch->intel, batch->buffer);
+   
+   bmDeleteBuffers(batch->intel, 1, &batch->buffer);
+   free(batch);
+}
+
+
+#define MI_BATCH_BUFFER_END 	(0xA<<23)
+
+
+GLboolean intel_batchbuffer_flush( struct intel_batchbuffer *batch )
+{
+   struct intel_context *intel = batch->intel;
+   GLuint used = batch->ptr - (batch->map + batch->offset);
+   GLuint offset;
+   GLint retval = GL_TRUE;
+
+   assert(intel->locked);
+
+   if (used == 0) {
+      bmReleaseBuffers( batch->intel );
+      return GL_TRUE;
+   }
+
+   /* Add the MI_BATCH_BUFFER_END.  Always add an MI_FLUSH - this is a
+    * performance drain that we would like to avoid.
+    */
+   if (used & 4) {
+      ((int *)batch->ptr)[0] = MI_BATCH_BUFFER_END;
+      batch->ptr += 4;
+      used += 4;
+   }
+   else {
+      ((int *)batch->ptr)[0] = 0;
+      ((int *)batch->ptr)[1] = MI_BATCH_BUFFER_END;
+
+      batch->ptr += 8;
+      used += 8;
+   }
+
+   intel_batchbuffer_unmap(batch);
+
+   /* Get the batch buffer offset: Must call bmBufferOffset() before
+    * bmValidateBuffers(), otherwise the buffer won't be on the inuse
+    * list.
+    */
+   offset = bmBufferOffset(batch->intel, batch->buffer);
+
+   if (bmValidateBuffers( batch->intel ) != 0) {
+      assert(intel->locked);
+      bmReleaseBuffers( batch->intel );
+      retval = GL_FALSE;
+      goto out;
+   }
+
+
+   if (intel->aub_file) {
+      /* Send buffered commands to aubfile as a single packet. 
+       */
+      intel_batchbuffer_map(batch);
+      ((int *)batch->ptr)[-1] = intel->vtbl.flush_cmd();
+      intel->vtbl.aub_commands(intel,
+			       offset, /* Fulsim wierdness - don't adjust */
+			       batch->map + batch->offset,
+			       used);
+      ((int *)batch->ptr)[-1] = MI_BATCH_BUFFER_END;
+      intel_batchbuffer_unmap(batch);
+   }
+
+
+   /* Fire the batch buffer, which was uploaded above:
+    */
+   intel_batch_ioctl(batch->intel, 
+		     offset + batch->offset,
+		     used);
+
+   if (intel->aub_file && 
+       intel->ctx.DrawBuffer->_ColorDrawBufferMask[0] == BUFFER_BIT_FRONT_LEFT)
+      intel->vtbl.aub_dump_bmp( intel, 0 );
+
+   /* Reset the buffer:
+    */
+ out:
+   intel_batchbuffer_reset( batch );
+   intel_batchbuffer_map( batch );
+
+   if (!retval)
+      DBG("%s failed\n", __FUNCTION__);
+
+   return retval;
+}
+
+
+
+
+
+
+
+void intel_batchbuffer_align( struct intel_batchbuffer *batch,
+			      GLuint align,
+			      GLuint sz )
+{
+   unsigned long ptr = (unsigned long) batch->ptr;
+   unsigned long aptr = (ptr + align) & ~((unsigned long)align-1);
+   GLuint fixup = aptr - ptr;
+
+   if (intel_batchbuffer_space(batch) < fixup + sz)
+      intel_batchbuffer_flush(batch);
+   else {
+      memset(batch->ptr, 0, fixup);      
+      batch->ptr += fixup;
+   }
+}
+
+
+
+
+void intel_batchbuffer_data(struct intel_batchbuffer *batch,
+			    const void *data,
+			    GLuint bytes,
+			    GLuint flags)
+{
+   assert((bytes & 3) == 0);
+   intel_batchbuffer_require_space(batch, bytes, flags);
+   __memcpy(batch->ptr, data, bytes);
+   batch->ptr += bytes;
+}
+
diff --git a/i965/intel_batchbuffer.h b/i965/intel_batchbuffer.h
new file mode 100644
index 0000000..25e0a65
--- /dev/null
+++ b/i965/intel_batchbuffer.h
@@ -0,0 +1,127 @@
+/**************************************************************************
+ * 
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef INTEL_BATCHBUFFER_H
+#define INTEL_BATCHBUFFER_H
+
+#include "mtypes.h"
+#include "bufmgr.h"
+
+struct intel_context;
+
+#define BATCH_SZ (16 * 1024)
+#define BATCH_REFILL 4096
+#define BATCH_RESERVED 16
+
+#define INTEL_BATCH_NO_CLIPRECTS 0x1
+#define INTEL_BATCH_CLIPRECTS    0x2
+
+struct intel_batchbuffer {
+   struct intel_context *intel;
+
+   struct buffer *buffer;
+
+   GLuint flags;
+   unsigned long offset;
+
+   GLubyte *map;
+   GLubyte *ptr; 
+};
+
+struct intel_batchbuffer *intel_batchbuffer_alloc( struct intel_context *intel );
+
+void intel_batchbuffer_free( struct intel_batchbuffer *batch );
+
+
+GLboolean intel_batchbuffer_flush( struct intel_batchbuffer *batch );
+
+void intel_batchbuffer_unmap( struct intel_batchbuffer *batch );
+GLubyte *intel_batchbuffer_map( struct intel_batchbuffer *batch );
+
+
+/* Unlike bmBufferData, this currently requires the buffer be mapped.
+ * Consider it a convenience function wrapping multple
+ * intel_buffer_dword() calls.
+ */
+void intel_batchbuffer_data(struct intel_batchbuffer *batch,
+			    const void *data,
+			    GLuint bytes,
+			    GLuint flags);
+
+void intel_batchbuffer_release_space(struct intel_batchbuffer *batch,
+				   GLuint bytes);
+
+
+/* Inline functions - might actually be better off with these
+ * non-inlined.  Certainly better off switching all command packets to
+ * be passed as structs rather than dwords, but that's a little bit of
+ * work...
+ */
+static inline GLuint 
+intel_batchbuffer_space( struct intel_batchbuffer *batch )
+{
+   return (BATCH_SZ - BATCH_RESERVED) - (batch->ptr - (batch->map + batch->offset));
+}
+
+
+static inline void 
+intel_batchbuffer_emit_dword(struct intel_batchbuffer *batch,
+			     GLuint dword)
+{
+   assert(batch->map);
+   assert(intel_batchbuffer_space(batch) >= 4);
+   *(GLuint *)(batch->ptr) = dword;
+   batch->ptr += 4;
+}
+
+static inline void 
+intel_batchbuffer_require_space(struct intel_batchbuffer *batch,
+				GLuint sz,
+				GLuint flags)
+{
+   assert(sz < BATCH_SZ - 8);
+   if (intel_batchbuffer_space(batch) < sz ||
+       (batch->flags != 0 && flags != 0 && batch->flags != flags))
+      intel_batchbuffer_flush(batch);
+   
+   batch->flags |= flags;
+}
+
+void intel_batchbuffer_align( struct intel_batchbuffer *batch,
+			      GLuint align,
+			      GLuint sz );
+
+
+/* Here are the crusty old macros, to be removed:
+ */
+#define BATCH_LOCALS 
+#define BEGIN_BATCH(n, flags) intel_batchbuffer_require_space(intel->batch, n*4, flags)
+#define OUT_BATCH(d)  intel_batchbuffer_emit_dword(intel->batch, d)
+#define ADVANCE_BATCH() do { } while(0)
+
+
+#endif
diff --git a/i965/intel_blit.c b/i965/intel_blit.c
new file mode 100644
index 0000000..f88cbb2
--- /dev/null
+++ b/i965/intel_blit.c
@@ -0,0 +1,617 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+
+#include <stdio.h>
+#include <errno.h>
+
+#include "mtypes.h"
+#include "context.h"
+#include "enums.h"
+#include "vblank.h"
+
+#include "intel_reg.h"
+#include "intel_batchbuffer.h"
+#include "intel_context.h"
+#include "intel_blit.h"
+#include "intel_regions.h"
+#include "intel_structs.h"
+
+#include "bufmgr.h"
+
+
+
+
+/*
+ * Copy the back buffer to the front buffer. 
+ */
+void intelCopyBuffer( const __DRIdrawablePrivate *dPriv,
+		      const drm_clip_rect_t *rect ) 
+{
+   struct intel_context *intel;
+   GLboolean   missed_target;
+   int64_t ust;
+
+   DBG("%s\n", __FUNCTION__);
+
+   assert(dPriv);
+   assert(dPriv->driContextPriv);
+   assert(dPriv->driContextPriv->driverPrivate);
+
+   intel = (struct intel_context *) dPriv->driContextPriv->driverPrivate;
+   intelFlush( &intel->ctx );
+
+
+   bmFinishFenceLock(intel, intel->last_swap_fence);
+
+   /* The LOCK_HARDWARE is required for the cliprects.  Buffer offsets
+    * should work regardless.
+    */
+   LOCK_HARDWARE( intel );
+
+   if (!rect)
+   {
+       UNLOCK_HARDWARE( intel );
+       driWaitForVBlank( dPriv, &intel->vbl_seq, intel->vblank_flags, & missed_target );
+       LOCK_HARDWARE( intel );
+   }
+
+   {
+      intelScreenPrivate *intelScreen = intel->intelScreen;
+      __DRIdrawablePrivate *dPriv = intel->driDrawable;
+      int nbox = dPriv->numClipRects;
+      drm_clip_rect_t *pbox = dPriv->pClipRects;
+      int cpp = intelScreen->cpp;
+      struct intel_region *src, *dst;
+      int BR13, CMD;
+      int i;
+      int src_pitch, dst_pitch;
+
+      if (intel->sarea->pf_current_page == 0) {
+	 dst = intel->front_region;
+	 src = intel->back_region;
+      }
+      else {
+	 assert(0);
+	 src = intel->front_region;
+	 dst = intel->back_region;
+      }
+
+      src_pitch = src->pitch * src->cpp;
+      dst_pitch = dst->pitch * dst->cpp;
+
+      if (cpp == 2) {
+	 BR13 = (0xCC << 16) | (1<<24);
+	 CMD = XY_SRC_COPY_BLT_CMD;
+      } 
+      else {
+	 BR13 = (0xCC << 16) | (1<<24) | (1<<25);
+	 CMD = (XY_SRC_COPY_BLT_CMD | XY_SRC_COPY_BLT_WRITE_ALPHA |
+		XY_SRC_COPY_BLT_WRITE_RGB);
+      }
+
+      if (src->tiled) {
+	 CMD |= XY_SRC_TILED;
+	 src_pitch /= 4;
+      }
+      
+      if (dst->tiled) {
+	 CMD |= XY_DST_TILED;
+ 	 dst_pitch /= 4;
+      }
+  
+      for (i = 0 ; i < nbox; i++, pbox++) 
+      {
+	 drm_clip_rect_t tmp = *pbox;
+
+	 if (rect) {
+	    if (!intel_intersect_cliprects(&tmp, &tmp, rect))
+	       continue;
+	 }
+
+
+	 if (tmp.x1 > tmp.x2 ||
+	     tmp.y1 > tmp.y2 ||
+	     tmp.x2 > intelScreen->width ||
+	     tmp.y2 > intelScreen->height)
+	    continue;
+ 
+	 BEGIN_BATCH(8, INTEL_BATCH_NO_CLIPRECTS);
+	 OUT_BATCH( CMD );
+	 OUT_BATCH( dst_pitch | BR13 );
+	 OUT_BATCH( (tmp.y1 << 16) | tmp.x1 );
+	 OUT_BATCH( (tmp.y2 << 16) | tmp.x2 );
+	 OUT_BATCH( bmBufferOffset(intel, dst->buffer) );
+	 OUT_BATCH( (tmp.y1 << 16) | tmp.x1 );
+	 OUT_BATCH( src_pitch );
+	 OUT_BATCH( bmBufferOffset(intel, src->buffer) ); 
+	 ADVANCE_BATCH();
+      }
+   }
+
+   intel_batchbuffer_flush( intel->batch );
+   intel->second_last_swap_fence = intel->last_swap_fence;
+   intel->last_swap_fence = bmSetFenceLock( intel );
+   UNLOCK_HARDWARE( intel );
+
+   if (!rect)
+   {
+       intel->swap_count++;
+       (*dri_interface->getUST)(&ust);
+       if (missed_target) {
+	   intel->swap_missed_count++;
+	   intel->swap_missed_ust = ust -  intel->swap_ust;
+       }
+   
+       intel->swap_ust = ust;
+   }
+
+}
+
+
+
+
+void intelEmitFillBlit( struct intel_context *intel,
+			GLuint cpp,
+			GLshort dst_pitch,
+			struct buffer *dst_buffer,
+			GLuint dst_offset,
+			GLboolean dst_tiled,
+			GLshort x, GLshort y, 
+			GLshort w, GLshort h,
+			GLuint color )
+{
+   GLuint BR13, CMD;
+   BATCH_LOCALS;
+
+   dst_pitch *= cpp;
+
+   switch(cpp) {
+   case 1: 
+   case 2: 
+   case 3: 
+      BR13 = (0xF0 << 16) | (1<<24);
+      CMD = XY_COLOR_BLT_CMD;
+      break;
+   case 4:
+      BR13 = (0xF0 << 16) | (1<<24) | (1<<25);
+      CMD = (XY_COLOR_BLT_CMD | XY_COLOR_BLT_WRITE_ALPHA |
+	     XY_COLOR_BLT_WRITE_RGB);
+      break;
+   default:
+      return;
+   }
+
+   if (dst_tiled) {
+      CMD |= XY_DST_TILED;
+      dst_pitch /= 4;
+   }
+
+   BEGIN_BATCH(6, INTEL_BATCH_NO_CLIPRECTS);
+   OUT_BATCH( CMD );
+   OUT_BATCH( dst_pitch | BR13 );
+   OUT_BATCH( (y << 16) | x );
+   OUT_BATCH( ((y+h) << 16) | (x+w) );
+   OUT_BATCH( bmBufferOffset(intel, dst_buffer) + dst_offset );
+   OUT_BATCH( color );
+   ADVANCE_BATCH();
+}
+
+static GLuint translate_raster_op(GLenum logicop)
+{
+   switch(logicop) {
+   case GL_CLEAR: return 0x00;
+   case GL_AND: return 0x88;
+   case GL_AND_REVERSE: return 0x44;
+   case GL_COPY: return 0xCC;
+   case GL_AND_INVERTED: return 0x22;
+   case GL_NOOP: return 0xAA;
+   case GL_XOR: return 0x66;
+   case GL_OR: return 0xEE;
+   case GL_NOR: return 0x11;
+   case GL_EQUIV: return 0x99;
+   case GL_INVERT: return 0x55;
+   case GL_OR_REVERSE: return 0xDD;
+   case GL_COPY_INVERTED: return 0x33;
+   case GL_OR_INVERTED: return 0xBB;
+   case GL_NAND: return 0x77;
+   case GL_SET: return 0xFF;
+   default: return 0;
+   }
+}
+
+
+/* Copy BitBlt
+ */
+void intelEmitCopyBlit( struct intel_context *intel,
+			GLuint cpp,
+			GLshort src_pitch,
+			struct buffer *src_buffer,
+			GLuint  src_offset,
+			GLboolean src_tiled,
+			GLshort dst_pitch,
+			struct buffer *dst_buffer,
+			GLuint  dst_offset,
+			GLboolean dst_tiled,
+			GLshort src_x, GLshort src_y,
+			GLshort dst_x, GLshort dst_y,
+			GLshort w, GLshort h,
+			GLenum logic_op )
+{
+   GLuint CMD, BR13;
+   int dst_y2 = dst_y + h;
+   int dst_x2 = dst_x + w;
+   BATCH_LOCALS;
+
+
+   DBG("%s src:buf(%d)/%d %d,%d dst:buf(%d)/%d %d,%d sz:%dx%d op:%d\n",
+       __FUNCTION__,
+       src_buffer, src_pitch, src_x, src_y,
+       dst_buffer, dst_pitch, dst_x, dst_y,
+       w,h,logic_op);
+
+   assert( logic_op - GL_CLEAR >= 0 );
+   assert( logic_op - GL_CLEAR < 0x10 );
+      
+   src_pitch *= cpp;
+   dst_pitch *= cpp;
+
+   switch(cpp) {
+   case 1: 
+   case 2: 
+   case 3: 
+      BR13 = (translate_raster_op(logic_op) << 16) | (1<<24);
+      CMD = XY_SRC_COPY_BLT_CMD;
+      break;
+   case 4:
+      BR13 = (translate_raster_op(logic_op) << 16) | (1<<24) |
+	  (1<<25);
+      CMD = (XY_SRC_COPY_BLT_CMD | XY_SRC_COPY_BLT_WRITE_ALPHA |
+	     XY_SRC_COPY_BLT_WRITE_RGB);
+      break;
+   default:
+      return;
+   }
+
+   if (src_tiled) {
+      CMD |= XY_SRC_TILED;
+      src_pitch /= 4;
+   }
+   
+   if (dst_tiled) {
+      CMD |= XY_DST_TILED;
+      dst_pitch /= 4;
+   }
+
+   if (dst_y2 < dst_y ||
+       dst_x2 < dst_x) {
+      return;
+   }
+
+   dst_pitch &= 0xffff;
+   src_pitch &= 0xffff;
+
+   /* Initial y values don't seem to work with negative pitches.  If
+    * we adjust the offsets manually (below), it seems to work fine.
+    *
+    * On the other hand, if we always adjust, the hardware doesn't
+    * know which blit directions to use, so overlapping copypixels get
+    * the wrong result.
+    */
+   if (dst_pitch > 0 && src_pitch > 0) {
+      BEGIN_BATCH(8, INTEL_BATCH_NO_CLIPRECTS);
+      OUT_BATCH( CMD );
+      OUT_BATCH( dst_pitch | BR13 );
+      OUT_BATCH( (dst_y << 16) | dst_x );
+      OUT_BATCH( (dst_y2 << 16) | dst_x2 );
+      OUT_BATCH( bmBufferOffset(intel, dst_buffer) + dst_offset );	
+      OUT_BATCH( (src_y << 16) | src_x );
+      OUT_BATCH( src_pitch );
+      OUT_BATCH( bmBufferOffset(intel, src_buffer) + src_offset ); 
+      ADVANCE_BATCH();
+   }
+   else {
+      BEGIN_BATCH(8, INTEL_BATCH_NO_CLIPRECTS);
+      OUT_BATCH( CMD );
+      OUT_BATCH( (dst_pitch & 0xffff) | BR13 );
+      OUT_BATCH( (0 << 16) | dst_x );
+      OUT_BATCH( (h << 16) | dst_x2 );
+      OUT_BATCH( bmBufferOffset(intel, dst_buffer) + dst_offset + dst_y * dst_pitch );	
+      OUT_BATCH( (0 << 16) | src_x );
+      OUT_BATCH( (src_pitch & 0xffff) );
+      OUT_BATCH( bmBufferOffset(intel, src_buffer) + src_offset + src_y * src_pitch ); 
+      ADVANCE_BATCH();
+   }
+}
+
+
+
+void intelClearWithBlit(GLcontext *ctx, GLbitfield flags)
+{
+   struct intel_context *intel = intel_context( ctx );
+   intelScreenPrivate *intelScreen = intel->intelScreen;
+   GLuint clear_depth, clear_color;
+   GLint cx, cy, cw, ch;
+   GLint cpp = intelScreen->cpp;
+   GLboolean all;
+   GLint i;
+   struct intel_region *front = intel->front_region;
+   struct intel_region *back = intel->back_region;
+   struct intel_region *depth = intel->depth_region;
+   GLuint BR13, FRONT_CMD, BACK_CMD, DEPTH_CMD;
+   GLuint front_pitch;
+   GLuint back_pitch;
+   GLuint depth_pitch;
+   BATCH_LOCALS;
+
+   
+   clear_color = intel->ClearColor;
+   clear_depth = 0;
+
+   if (flags & BUFFER_BIT_DEPTH) {
+      clear_depth = (GLuint)(ctx->Depth.Clear * intel->ClearDepth);
+   }
+
+   if (flags & BUFFER_BIT_STENCIL) {
+      clear_depth |= (ctx->Stencil.Clear & 0xff) << 24;
+   }
+
+   switch(cpp) {
+   case 2: 
+      BR13 = (0xF0 << 16) | (1<<24);
+      BACK_CMD  = FRONT_CMD = XY_COLOR_BLT_CMD;
+      DEPTH_CMD = XY_COLOR_BLT_CMD;
+      break;
+   case 4:
+      BR13 = (0xF0 << 16) | (1<<24) | (1<<25);
+      BACK_CMD = FRONT_CMD = (XY_COLOR_BLT_CMD |
+			      XY_COLOR_BLT_WRITE_ALPHA | 
+			      XY_COLOR_BLT_WRITE_RGB);
+      DEPTH_CMD = XY_COLOR_BLT_CMD;
+      if (flags & BUFFER_BIT_DEPTH) DEPTH_CMD |= XY_COLOR_BLT_WRITE_RGB;
+      if (flags & BUFFER_BIT_STENCIL) DEPTH_CMD |= XY_COLOR_BLT_WRITE_ALPHA;
+      break;
+   default:
+      return;
+   }
+
+
+
+   intelFlush( &intel->ctx );
+   LOCK_HARDWARE( intel );
+   {
+      /* get clear bounds after locking */
+      cx = ctx->DrawBuffer->_Xmin;
+      cy = ctx->DrawBuffer->_Ymin;
+      ch = ctx->DrawBuffer->_Ymax - ctx->DrawBuffer->_Ymin;
+      cw = ctx->DrawBuffer->_Xmax - ctx->DrawBuffer->_Xmin;
+      all = (cw == ctx->DrawBuffer->Width && ch == ctx->DrawBuffer->Height);
+
+      /* flip top to bottom */
+      cy = intel->driDrawable->h - cy - ch;
+      cx = cx + intel->drawX;
+      cy += intel->drawY;
+
+      /* adjust for page flipping */
+      if ( intel->sarea->pf_current_page == 0 ) {
+	 front = intel->front_region;
+	 back = intel->back_region;
+      } 
+      else {
+	 back = intel->front_region;
+	 front = intel->back_region;
+      }
+      
+      front_pitch = front->pitch * front->cpp;
+      back_pitch = back->pitch * back->cpp;
+      depth_pitch = depth->pitch * depth->cpp;
+      
+      if (front->tiled) {
+	 FRONT_CMD |= XY_DST_TILED;
+	 front_pitch /= 4;
+      }
+
+      if (back->tiled) {
+	 BACK_CMD |= XY_DST_TILED;
+	 back_pitch /= 4;
+      }
+
+      if (depth->tiled) {
+	 DEPTH_CMD |= XY_DST_TILED;
+	 depth_pitch /= 4;
+      }
+
+      for (i = 0 ; i < intel->numClipRects ; i++) 
+      { 	 
+	 drm_clip_rect_t *box = &intel->pClipRects[i];	 
+	 drm_clip_rect_t b;
+
+	 if (!all) {
+	    GLint x = box->x1;
+	    GLint y = box->y1;
+	    GLint w = box->x2 - x;
+	    GLint h = box->y2 - y;
+
+	    if (x < cx) w -= cx - x, x = cx; 
+	    if (y < cy) h -= cy - y, y = cy;
+	    if (x + w > cx + cw) w = cx + cw - x;
+	    if (y + h > cy + ch) h = cy + ch - y;
+	    if (w <= 0) continue;
+	    if (h <= 0) continue;
+
+	    b.x1 = x;
+	    b.y1 = y;
+	    b.x2 = x + w;
+	    b.y2 = y + h;      
+	 } else {
+	    b = *box;
+	 }
+
+
+	 if (b.x1 > b.x2 ||
+	     b.y1 > b.y2 ||
+	     b.x2 > intelScreen->width ||
+	     b.y2 > intelScreen->height)
+	    continue;
+
+	 if ( flags & BUFFER_BIT_FRONT_LEFT ) {	    
+	    BEGIN_BATCH(6, INTEL_BATCH_NO_CLIPRECTS);
+	    OUT_BATCH( FRONT_CMD );
+	    OUT_BATCH( front_pitch | BR13 );
+	    OUT_BATCH( (b.y1 << 16) | b.x1 );
+	    OUT_BATCH( (b.y2 << 16) | b.x2 );
+	    OUT_BATCH( bmBufferOffset(intel, front->buffer) );
+	    OUT_BATCH( clear_color );
+	    ADVANCE_BATCH();
+	 }
+
+	 if ( flags & BUFFER_BIT_BACK_LEFT ) {
+	    BEGIN_BATCH(6, INTEL_BATCH_NO_CLIPRECTS); 
+	    OUT_BATCH( BACK_CMD );
+	    OUT_BATCH( back_pitch | BR13 );
+	    OUT_BATCH( (b.y1 << 16) | b.x1 );
+	    OUT_BATCH( (b.y2 << 16) | b.x2 );
+	    OUT_BATCH( bmBufferOffset(intel, back->buffer) );
+	    OUT_BATCH( clear_color );
+	    ADVANCE_BATCH();
+	 }
+
+	 if ( flags & (BUFFER_BIT_STENCIL | BUFFER_BIT_DEPTH) ) {
+	    BEGIN_BATCH(6, INTEL_BATCH_NO_CLIPRECTS);
+	    OUT_BATCH( DEPTH_CMD );
+	    OUT_BATCH( depth_pitch | BR13 );
+	    OUT_BATCH( (b.y1 << 16) | b.x1 );
+	    OUT_BATCH( (b.y2 << 16) | b.x2 );
+	    OUT_BATCH( bmBufferOffset(intel, depth->buffer) );
+	    OUT_BATCH( clear_depth );
+	    ADVANCE_BATCH();
+	 }      
+      }
+   }
+   intel_batchbuffer_flush( intel->batch );
+   UNLOCK_HARDWARE( intel );
+}
+
+
+
+#define BR13_565  0x1
+#define BR13_8888 0x3
+
+
+void
+intelEmitImmediateColorExpandBlit(struct intel_context *intel,
+				  GLuint cpp,
+				  GLubyte *src_bits, GLuint src_size,
+				  GLuint fg_color,
+				  GLshort dst_pitch,
+				  struct buffer *dst_buffer,
+				  GLuint dst_offset,
+				  GLboolean dst_tiled,
+				  GLshort x, GLshort y, 
+				  GLshort w, GLshort h,
+				  GLenum logic_op)
+{
+   struct xy_setup_blit setup;
+   struct xy_text_immediate_blit text;
+   int dwords = ((src_size + 7) & ~7) / 4;
+
+   assert( logic_op - GL_CLEAR >= 0 );
+   assert( logic_op - GL_CLEAR < 0x10 );
+
+   if (w < 0 || h < 0) 
+      return;
+
+   dst_pitch *= cpp;
+
+   if (dst_tiled) 
+      dst_pitch /= 4;
+
+   DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d, %d bytes %d dwords\n",
+       __FUNCTION__,
+       dst_buffer, dst_pitch, dst_offset, x, y, w, h, src_size, dwords);
+
+   memset(&setup, 0, sizeof(setup));
+   
+   setup.br0.client = CLIENT_2D;
+   setup.br0.opcode = OPCODE_XY_SETUP_BLT;
+   setup.br0.write_alpha = (cpp == 4);
+   setup.br0.write_rgb = (cpp == 4);
+   setup.br0.dst_tiled = dst_tiled;
+   setup.br0.length = (sizeof(setup) / sizeof(int)) - 2;
+      
+   setup.br13.dest_pitch = dst_pitch;
+   setup.br13.rop = translate_raster_op(logic_op);
+   setup.br13.color_depth = (cpp == 4) ? BR13_8888 : BR13_565;
+   setup.br13.clipping_enable = 0;
+   setup.br13.mono_source_transparency = 1;
+
+   setup.dw2.clip_y1 = 0;
+   setup.dw2.clip_x1 = 0;
+   setup.dw3.clip_y2 = 100;
+   setup.dw3.clip_x2 = 100;
+
+   setup.dest_base_addr = bmBufferOffset(intel, dst_buffer) + dst_offset;
+   setup.background_color = 0;
+   setup.foreground_color = fg_color;
+   setup.pattern_base_addr = 0;
+
+   memset(&text, 0, sizeof(text));
+   text.dw0.client = CLIENT_2D;
+   text.dw0.opcode = OPCODE_XY_TEXT_IMMEDIATE_BLT;
+   text.dw0.pad0 = 0;
+   text.dw0.byte_packed = 1;	/* ?maybe? */
+   text.dw0.pad1 = 0;
+   text.dw0.dst_tiled = dst_tiled;
+   text.dw0.pad2 = 0;
+   text.dw0.length = (sizeof(text)/sizeof(int)) - 2 + dwords;
+   text.dw1.dest_y1 = y;	/* duplicates info in setup blit */
+   text.dw1.dest_x1 = x;
+   text.dw2.dest_y2 = y + h;
+   text.dw2.dest_x2 = x + w;
+
+   intel_batchbuffer_require_space( intel->batch,
+				    sizeof(setup) + 
+				    sizeof(text) + 
+				    dwords,
+				    INTEL_BATCH_NO_CLIPRECTS );
+
+   intel_batchbuffer_data( intel->batch,
+			   &setup,
+			   sizeof(setup),
+			   INTEL_BATCH_NO_CLIPRECTS );
+
+   intel_batchbuffer_data( intel->batch,
+			   &text,
+			   sizeof(text),
+			   INTEL_BATCH_NO_CLIPRECTS );
+
+   intel_batchbuffer_data( intel->batch,
+			   src_bits,
+			   dwords * 4,
+			   INTEL_BATCH_NO_CLIPRECTS );
+}
+
diff --git a/i965/intel_blit.h b/i965/intel_blit.h
new file mode 100644
index 0000000..e361545
--- /dev/null
+++ b/i965/intel_blit.h
@@ -0,0 +1,78 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef INTEL_BLIT_H
+#define INTEL_BLIT_H
+
+#include "intel_context.h"
+#include "intel_ioctl.h"
+
+struct buffer;
+
+extern void intelCopyBuffer( const __DRIdrawablePrivate *dpriv,
+			     const drm_clip_rect_t *rect );
+extern void intelClearWithBlit(GLcontext *ctx, GLbitfield mask);
+
+extern void intelEmitCopyBlit( struct intel_context *intel,
+			       GLuint cpp,
+			       GLshort src_pitch,
+			       struct buffer *src_buffer,
+			       GLuint  src_offset,
+			       GLboolean src_tiled,
+			       GLshort dst_pitch,
+			       struct buffer *dst_buffer,
+			       GLuint  dst_offset,
+			       GLboolean dst_tiled,
+			       GLshort srcx, GLshort srcy,
+			       GLshort dstx, GLshort dsty,
+			       GLshort w, GLshort h,
+			       GLenum logic_op );
+
+extern void intelEmitFillBlit( struct intel_context *intel,
+			       GLuint cpp,
+			       GLshort dst_pitch,
+			       struct buffer *dst_buffer,
+			       GLuint dst_offset,
+			       GLboolean dst_tiled,
+			       GLshort x, GLshort y, 
+			       GLshort w, GLshort h,
+			       GLuint color );
+
+void
+intelEmitImmediateColorExpandBlit(struct intel_context *intel,
+				  GLuint cpp,
+				  GLubyte *src_bits, GLuint src_size,
+				  GLuint fg_color,
+				  GLshort dst_pitch,
+				  struct buffer *dst_buffer,
+				  GLuint dst_offset,
+				  GLboolean dst_tiled,
+				  GLshort dst_x, GLshort dst_y, 
+				  GLshort w, GLshort h,
+				  GLenum logic_op );
+
+#endif
diff --git a/i965/intel_buffer_objects.c b/i965/intel_buffer_objects.c
new file mode 100644
index 0000000..015e433
--- /dev/null
+++ b/i965/intel_buffer_objects.c
@@ -0,0 +1,207 @@
+/**************************************************************************
+ * 
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+
+#include "imports.h"
+#include "mtypes.h"
+#include "bufferobj.h"
+
+#include "intel_context.h"
+#include "intel_buffer_objects.h"
+#include "bufmgr.h"
+
+
+/**
+ * There is some duplication between mesa's bufferobjects and our
+ * bufmgr buffers.  Both have an integer handle and a hashtable to
+ * lookup an opaque structure.  It would be nice if the handles and
+ * internal structure where somehow shared.
+ */
+static struct gl_buffer_object *intel_bufferobj_alloc( GLcontext *ctx, 
+						       GLuint name, 
+						       GLenum target )
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_buffer_object *obj = MALLOC_STRUCT(intel_buffer_object);
+
+   _mesa_initialize_buffer_object(&obj->Base, name, target);
+
+   /* XXX:  We generate our own handle, which is different to 'name' above.
+    */
+   bmGenBuffers(intel, "bufferobj", 1, &obj->buffer, 6);
+   assert(obj->buffer);
+
+   return &obj->Base;
+}
+
+
+/**
+ * Deallocate/free a vertex/pixel buffer object.
+ * Called via glDeleteBuffersARB().
+ */
+static void intel_bufferobj_free( GLcontext *ctx, 
+				  struct gl_buffer_object *obj )
+{ 
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
+
+   assert(intel_obj);
+
+   if (intel_obj->buffer) 
+      bmDeleteBuffers( intel, 1, &intel_obj->buffer );
+  
+   _mesa_free(intel_obj);
+}
+
+
+
+/**
+ * Allocate space for and store data in a buffer object.  Any data that was
+ * previously stored in the buffer object is lost.  If data is NULL,
+ * memory will be allocated, but no copy will occur.
+ * Called via glBufferDataARB().
+ */
+static void intel_bufferobj_data( GLcontext *ctx, 
+				  GLenum target, 
+				  GLsizeiptrARB size,
+				  const GLvoid *data, 
+				  GLenum usage,
+				  struct gl_buffer_object *obj )
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
+
+   /* XXX: do something useful with 'usage' (eg. populate flags
+    * argument below)
+    */
+   assert(intel_obj);
+
+   obj->Size = size;
+   obj->Usage = usage;
+
+   bmBufferDataAUB(intel, intel_obj->buffer, size, data, 0,
+		   0, 0);
+}
+
+
+/**
+ * Replace data in a subrange of buffer object.  If the data range
+ * specified by size + offset extends beyond the end of the buffer or
+ * if data is NULL, no copy is performed.
+ * Called via glBufferSubDataARB().
+ */
+static void intel_bufferobj_subdata( GLcontext *ctx, 
+				     GLenum target, 
+				     GLintptrARB offset,
+				     GLsizeiptrARB size, 
+				     const GLvoid * data,
+				     struct gl_buffer_object * obj )
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
+
+   assert(intel_obj);
+   bmBufferSubDataAUB(intel, intel_obj->buffer, offset, size, data, 0, 0);
+}
+
+
+/**
+ * Called via glGetBufferSubDataARB().
+ */
+static void intel_bufferobj_get_subdata( GLcontext *ctx, 
+					 GLenum target, 
+					 GLintptrARB offset,
+					 GLsizeiptrARB size, 
+					 GLvoid * data,
+					 struct gl_buffer_object * obj )
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
+
+   assert(intel_obj);
+   bmBufferGetSubData(intel, intel_obj->buffer, offset, size, data);
+}
+
+
+
+/**
+ * Called via glMapBufferARB().
+ */
+static void *intel_bufferobj_map( GLcontext *ctx, 
+				  GLenum target, 
+				  GLenum access,
+				  struct gl_buffer_object *obj )
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
+
+   /* XXX: Translate access to flags arg below:
+    */
+   assert(intel_obj);
+   assert(intel_obj->buffer);
+   obj->Pointer = bmMapBuffer(intel, intel_obj->buffer, 0);
+   return obj->Pointer;
+}
+
+
+/**
+ * Called via glMapBufferARB().
+ */
+static GLboolean intel_bufferobj_unmap( GLcontext *ctx,
+					GLenum target,
+					struct gl_buffer_object *obj )
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
+
+   assert(intel_obj);
+   assert(intel_obj->buffer);
+   assert(obj->Pointer);
+   bmUnmapBufferAUB(intel, intel_obj->buffer, 0, 0);
+   obj->Pointer = NULL;
+   return GL_TRUE;
+}
+
+struct buffer *intel_bufferobj_buffer( const struct intel_buffer_object *intel_obj )
+{
+   assert(intel_obj->Base.Name);
+   assert(intel_obj->buffer);
+   return intel_obj->buffer;
+}  
+
+void intel_bufferobj_init( struct intel_context *intel )
+{
+   GLcontext *ctx = &intel->ctx;
+
+   ctx->Driver.NewBufferObject = intel_bufferobj_alloc;
+   ctx->Driver.DeleteBuffer = intel_bufferobj_free;
+   ctx->Driver.BufferData = intel_bufferobj_data;
+   ctx->Driver.BufferSubData = intel_bufferobj_subdata;
+   ctx->Driver.GetBufferSubData = intel_bufferobj_get_subdata;
+   ctx->Driver.MapBuffer = intel_bufferobj_map;
+   ctx->Driver.UnmapBuffer = intel_bufferobj_unmap;
+}
diff --git a/i965/intel_buffer_objects.h b/i965/intel_buffer_objects.h
new file mode 100644
index 0000000..4b38803
--- /dev/null
+++ b/i965/intel_buffer_objects.h
@@ -0,0 +1,70 @@
+ /**************************************************************************
+ * 
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef INTEL_BUFFEROBJ_H
+#define INTEL_BUFFEROBJ_H
+
+#include "mtypes.h"
+
+struct intel_context;
+struct gl_buffer_object;
+
+
+/**
+ * Intel vertex/pixel buffer object, derived from Mesa's gl_buffer_object.
+ */
+struct intel_buffer_object {
+   struct gl_buffer_object Base;
+   struct buffer *buffer;   /* the low-level buffer manager's buffer handle */
+};
+
+
+/* Get the bm buffer associated with a GL bufferobject:
+ */
+struct buffer *intel_bufferobj_buffer( const struct intel_buffer_object *obj );
+
+/* Hook the bufferobject implementation into mesa: 
+ */
+void intel_bufferobj_init( struct intel_context *intel );
+
+
+
+/* Are the obj->Name tests necessary?  Unfortunately yes, mesa
+ * allocates a couple of gl_buffer_object structs statically, and
+ * the Name == 0 test is the only way to identify them and avoid
+ * casting them erroneously to our structs.
+ */
+static inline struct intel_buffer_object *
+intel_buffer_object( struct gl_buffer_object *obj )
+{
+   if (obj->Name)
+      return (struct intel_buffer_object *)obj;
+   else
+      return NULL;
+}
+
+#endif
diff --git a/i965/intel_buffers.c b/i965/intel_buffers.c
new file mode 100644
index 0000000..de6a867
--- /dev/null
+++ b/i965/intel_buffers.c
@@ -0,0 +1,581 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "intel_screen.h"
+#include "intel_context.h"
+#include "intel_blit.h"
+#include "intel_regions.h"
+#include "intel_batchbuffer.h"
+#include "context.h"
+#include "utils.h"
+#include "framebuffer.h"
+#include "vblank.h"
+#include "macros.h"
+#include "swrast/swrast.h"
+
+GLboolean intel_intersect_cliprects( drm_clip_rect_t *dst,
+				     const drm_clip_rect_t *a,
+				     const drm_clip_rect_t *b )
+{
+   dst->x1 = MAX2(a->x1, b->x1);
+   dst->x2 = MIN2(a->x2, b->x2);
+   dst->y1 = MAX2(a->y1, b->y1);
+   dst->y2 = MIN2(a->y2, b->y2);
+
+   return (dst->x1 <= dst->x2 &&
+	   dst->y1 <= dst->y2);
+}
+
+struct intel_region *intel_drawbuf_region( struct intel_context *intel )
+{
+   switch (intel->ctx.DrawBuffer->_ColorDrawBufferMask[0]) {
+   case BUFFER_BIT_FRONT_LEFT:
+      return intel->front_region;
+   case BUFFER_BIT_BACK_LEFT:
+      return intel->back_region;
+   default:
+      /* Not necessary to fallback - could handle either NONE or
+       * FRONT_AND_BACK cases below.
+       */
+      return NULL;		
+   }
+}
+
+struct intel_region *intel_readbuf_region( struct intel_context *intel )
+{
+   GLcontext *ctx = &intel->ctx;
+
+   /* This will have to change to support EXT_fbo's, but is correct
+    * for now:
+    */
+   switch (ctx->ReadBuffer->_ColorReadBufferIndex) {
+   case BUFFER_FRONT_LEFT:
+      return intel->front_region;
+   case BUFFER_BACK_LEFT:
+      return intel->back_region;
+   default:
+      assert(0);
+      return NULL;
+   }
+}
+
+
+
+static void intelBufferSize(GLframebuffer *buffer,
+			    GLuint *width, 
+			    GLuint *height)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   struct intel_context *intel = intel_context(ctx);
+   /* Need to lock to make sure the driDrawable is uptodate.  This
+    * information is used to resize Mesa's software buffers, so it has
+    * to be correct.
+    */
+   LOCK_HARDWARE(intel);
+   if (intel->driDrawable) {
+      *width = intel->driDrawable->w;
+      *height = intel->driDrawable->h;
+   }
+   else {
+      *width = 0;
+      *height = 0;
+   }
+   UNLOCK_HARDWARE(intel);
+}
+
+
+static void intelSetFrontClipRects( struct intel_context *intel )
+{
+   __DRIdrawablePrivate *dPriv = intel->driDrawable;
+
+   if (!dPriv) return;
+
+   intel->numClipRects = dPriv->numClipRects;
+   intel->pClipRects = dPriv->pClipRects;
+   intel->drawX = dPriv->x;
+   intel->drawY = dPriv->y;
+}
+
+
+static void intelSetBackClipRects( struct intel_context *intel )
+{
+   __DRIdrawablePrivate *dPriv = intel->driDrawable;
+
+   if (!dPriv) return;
+
+   if (intel->sarea->pf_enabled == 0 && dPriv->numBackClipRects == 0) {
+      intel->numClipRects = dPriv->numClipRects;
+      intel->pClipRects = dPriv->pClipRects;
+      intel->drawX = dPriv->x;
+      intel->drawY = dPriv->y;
+   } else {
+      intel->numClipRects = dPriv->numBackClipRects;
+      intel->pClipRects = dPriv->pBackClipRects;
+      intel->drawX = dPriv->backX;
+      intel->drawY = dPriv->backY;
+      
+      if (dPriv->numBackClipRects == 1 &&
+	  dPriv->x == dPriv->backX &&
+	  dPriv->y == dPriv->backY) {
+      
+	 /* Repeat the calculation of the back cliprect dimensions here
+	  * as early versions of dri.a in the Xserver are incorrect.  Try
+	  * very hard not to restrict future versions of dri.a which
+	  * might eg. allocate truly private back buffers.
+	  */
+	 int x1, y1;
+	 int x2, y2;
+	 
+	 x1 = dPriv->x;
+	 y1 = dPriv->y;      
+	 x2 = dPriv->x + dPriv->w;
+	 y2 = dPriv->y + dPriv->h;
+	 
+	 if (x1 < 0) x1 = 0;
+	 if (y1 < 0) y1 = 0;
+	 if (x2 > intel->intelScreen->width) x2 = intel->intelScreen->width;
+	 if (y2 > intel->intelScreen->height) y2 = intel->intelScreen->height;
+
+	 if (x1 == dPriv->pBackClipRects[0].x1 &&
+	     y1 == dPriv->pBackClipRects[0].y1) {
+
+	    dPriv->pBackClipRects[0].x2 = x2;
+	    dPriv->pBackClipRects[0].y2 = y2;
+	 }
+      }
+   }
+}
+
+
+void intelWindowMoved( struct intel_context *intel )
+{
+   __DRIdrawablePrivate *dPriv = intel->driDrawable;
+
+   if (!intel->ctx.DrawBuffer) {
+      intelSetFrontClipRects( intel );
+   }
+   else {
+      switch (intel->ctx.DrawBuffer->_ColorDrawBufferMask[0]) {
+      case BUFFER_BIT_FRONT_LEFT:
+	 intelSetFrontClipRects( intel );
+	 break;
+      case BUFFER_BIT_BACK_LEFT:
+	 intelSetBackClipRects( intel );
+	 break;
+      default:
+	 /* glDrawBuffer(GL_NONE or GL_FRONT_AND_BACK): software fallback */
+	 intelSetFrontClipRects( intel );
+      }
+   }
+
+   {
+      if (intel->intelScreen->driScrnPriv->ddxMinor >= 7) {
+	 volatile drmI830Sarea *sarea = intel->sarea;
+	 drm_clip_rect_t drw_rect = { .x1 = dPriv->x, .x2 = dPriv->x + dPriv->w,
+				      .y1 = dPriv->y, .y2 = dPriv->y + dPriv->h 
+	 };
+	 drm_clip_rect_t pipeA_rect = { .x1 = sarea->pipeA_x,
+					.x2 = sarea->pipeA_x + sarea->pipeA_w,
+					.y1 = sarea->pipeA_y,
+                                        .y2 = sarea->pipeA_y + sarea->pipeA_h };
+         drm_clip_rect_t pipeB_rect = { .x1 = sarea->pipeB_x,
+                                        .x2 = sarea->pipeB_x + sarea->pipeB_w,
+                                        .y1 = sarea->pipeB_y,
+                                        .y2 = sarea->pipeB_y + sarea->pipeB_h };
+         GLint areaA = driIntersectArea( drw_rect, pipeA_rect );
+         GLint areaB = driIntersectArea( drw_rect, pipeB_rect );
+         GLuint flags = intel->vblank_flags;
+	 
+         if (areaB > areaA || (areaA > 0 && areaB > 0)) {
+            flags = intel->vblank_flags | VBLANK_FLAG_SECONDARY;
+         } else {
+            flags = intel->vblank_flags & ~VBLANK_FLAG_SECONDARY;
+         }
+	 
+         if (flags != intel->vblank_flags) {
+            intel->vblank_flags = flags;
+            driGetCurrentVBlank(dPriv, intel->vblank_flags, &intel->vbl_seq);
+         }
+      } else {
+         intel->vblank_flags &= ~VBLANK_FLAG_SECONDARY;
+      }
+   }
+   _mesa_resize_framebuffer(&intel->ctx,
+   			    (GLframebuffer*)dPriv->driverPrivate,
+			    dPriv->w, dPriv->h);
+
+   /* Set state we know depends on drawable parameters:
+    */
+   {
+      GLcontext *ctx = &intel->ctx;
+
+      if (ctx->Driver.Scissor)
+	 ctx->Driver.Scissor( ctx, ctx->Scissor.X, ctx->Scissor.Y,
+			      ctx->Scissor.Width, ctx->Scissor.Height );
+      
+      if (ctx->Driver.DepthRange)
+	 ctx->Driver.DepthRange( ctx, 
+				 ctx->Viewport.Near,
+				 ctx->Viewport.Far );
+
+      intel->NewGLState |= _NEW_SCISSOR;
+   }
+
+   /* This works because the lock is always grabbed before emitting
+    * commands and commands are always flushed prior to releasing
+    * the lock.
+    */
+   intel->NewGLState |= _NEW_WINDOW_POS; 
+}
+
+
+
+/* A true meta version of this would be very simple and additionally
+ * machine independent.  Maybe we'll get there one day.
+ */
+static void intelClearWithTris(struct intel_context *intel, 
+			       GLbitfield mask)
+{
+   GLcontext *ctx = &intel->ctx;
+   drm_clip_rect_t clear;
+   GLint cx, cy, cw, ch;
+
+   if (INTEL_DEBUG & DEBUG_DRI)
+      _mesa_printf("%s %x\n", __FUNCTION__, mask);
+
+   {
+
+      intel->vtbl.install_meta_state(intel);
+
+      /* Get clear bounds after locking */
+      cx = ctx->DrawBuffer->_Xmin;
+      cy = ctx->DrawBuffer->_Ymin;
+      cw = ctx->DrawBuffer->_Xmax - ctx->DrawBuffer->_Xmin;
+      ch = ctx->DrawBuffer->_Ymax - ctx->DrawBuffer->_Ymin;
+
+      clear.x1 = cx;
+      clear.y1 = cy;
+      clear.x2 = cx + cw;
+      clear.y2 = cy + ch;
+
+      /* Back and stencil cliprects are the same.  Try and do both
+       * buffers at once:
+       */
+      if (mask & (BUFFER_BIT_BACK_LEFT|BUFFER_BIT_STENCIL|BUFFER_BIT_DEPTH)) { 
+	 intel->vtbl.meta_draw_region(intel, 
+				      intel->back_region,
+				      intel->depth_region );
+
+	 if (mask & BUFFER_BIT_BACK_LEFT)
+	    intel->vtbl.meta_color_mask(intel, GL_TRUE );
+	 else
+	    intel->vtbl.meta_color_mask(intel, GL_FALSE );
+
+	 if (mask & BUFFER_BIT_STENCIL) 
+	    intel->vtbl.meta_stencil_replace( intel, 
+					      intel->ctx.Stencil.WriteMask[0], 
+					      intel->ctx.Stencil.Clear);
+	 else
+	    intel->vtbl.meta_no_stencil_write(intel);
+
+	 if (mask & BUFFER_BIT_DEPTH) 
+	    intel->vtbl.meta_depth_replace( intel );
+	 else
+	    intel->vtbl.meta_no_depth_write(intel);
+      
+	 /* XXX: Using INTEL_BATCH_NO_CLIPRECTS here is dangerous as the
+	  * drawing origin may not be correctly emitted.
+	  */
+	 intel->vtbl.meta_draw_quad(intel, 
+				    clear.x1, clear.x2, 
+				    clear.y1, clear.y2, 
+				    intel->ctx.Depth.Clear,
+				    intel->clear_chan[0], 
+				    intel->clear_chan[1], 
+				    intel->clear_chan[2], 
+				    intel->clear_chan[3], 
+				    0, 0, 0, 0);
+      }
+
+      /* Front may have different cliprects: 
+       */
+      if (mask & BUFFER_BIT_FRONT_LEFT) {
+	 intel->vtbl.meta_no_depth_write(intel);
+	 intel->vtbl.meta_no_stencil_write(intel);
+	 intel->vtbl.meta_color_mask(intel, GL_TRUE );
+	 intel->vtbl.meta_draw_region(intel, 
+				      intel->front_region,
+				      intel->depth_region);
+
+	 /* XXX: Using INTEL_BATCH_NO_CLIPRECTS here is dangerous as the
+	  * drawing origin may not be correctly emitted.
+	  */
+	 intel->vtbl.meta_draw_quad(intel, 
+				    clear.x1, clear.x2, 
+				    clear.y1, clear.y2, 
+				    0,
+				    intel->clear_chan[0], 
+				    intel->clear_chan[1], 
+				    intel->clear_chan[2], 
+				    intel->clear_chan[3], 
+				    0, 0, 0, 0);
+      }
+
+      intel->vtbl.leave_meta_state( intel );
+   }
+}
+
+
+
+
+
+static void intelClear(GLcontext *ctx, GLbitfield mask)
+{
+   struct intel_context *intel = intel_context( ctx );
+   const GLuint colorMask = *((GLuint *) &ctx->Color.ColorMask);
+   GLbitfield tri_mask = 0;
+   GLbitfield blit_mask = 0;
+   GLbitfield swrast_mask = 0;
+
+   if (INTEL_DEBUG & DEBUG_DRI)
+      fprintf(stderr, "%s %x\n", __FUNCTION__, mask);
+
+
+   if (mask & BUFFER_BIT_FRONT_LEFT) {
+      if (colorMask == ~0) {
+	 blit_mask |= BUFFER_BIT_FRONT_LEFT;
+      } 
+      else {
+	 tri_mask |= BUFFER_BIT_FRONT_LEFT;
+      }
+   }
+
+   if (mask & BUFFER_BIT_BACK_LEFT) {
+      if (colorMask == ~0) {
+	 blit_mask |= BUFFER_BIT_BACK_LEFT;
+      } 
+      else {
+	 tri_mask |= BUFFER_BIT_BACK_LEFT;
+      }
+   }
+
+
+   if (mask & BUFFER_BIT_STENCIL) {
+      if (!intel->hw_stencil) {
+	 swrast_mask |= BUFFER_BIT_STENCIL;
+      }
+      else if ((ctx->Stencil.WriteMask[0] & 0xff) != 0xff ||
+	       intel->depth_region->tiled) {
+	 tri_mask |= BUFFER_BIT_STENCIL;
+      } 
+      else {
+	 blit_mask |= BUFFER_BIT_STENCIL;
+      }
+   }
+
+   /* Do depth with stencil if possible to avoid 2nd pass over the
+    * same buffer.
+    */
+   if (mask & BUFFER_BIT_DEPTH) {
+      if ((tri_mask & BUFFER_BIT_STENCIL) ||
+	  intel->depth_region->tiled)
+	 tri_mask |= BUFFER_BIT_DEPTH;
+      else 
+	 blit_mask |= BUFFER_BIT_DEPTH;
+   }
+
+   swrast_mask |= (mask & BUFFER_BIT_ACCUM);
+
+   intelFlush( ctx );
+
+   if (blit_mask)
+      intelClearWithBlit( ctx, blit_mask );
+
+   if (tri_mask) 
+      intelClearWithTris( intel, tri_mask );
+
+   if (swrast_mask)
+      _swrast_Clear( ctx, swrast_mask );
+}
+
+
+
+
+
+
+
+/* Flip the front & back buffers
+ */
+static void intelPageFlip( const __DRIdrawablePrivate *dPriv )
+{
+#if 0
+   struct intel_context *intel;
+   int tmp, ret;
+
+   if (INTEL_DEBUG & DEBUG_IOCTL)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   assert(dPriv);
+   assert(dPriv->driContextPriv);
+   assert(dPriv->driContextPriv->driverPrivate);
+
+   intel = (struct intel_context *) dPriv->driContextPriv->driverPrivate;
+
+   intelFlush( &intel->ctx );
+   LOCK_HARDWARE( intel );
+
+   if (dPriv->pClipRects) {
+      *(drm_clip_rect_t *)intel->sarea->boxes = dPriv->pClipRects[0];
+      intel->sarea->nbox = 1;
+   }
+
+   ret = drmCommandNone(intel->driFd, DRM_I830_FLIP); 
+   if (ret) {
+      fprintf(stderr, "%s: %d\n", __FUNCTION__, ret);
+      UNLOCK_HARDWARE( intel );
+      exit(1);
+   }
+
+   tmp = intel->sarea->last_enqueue;
+   intelRefillBatchLocked( intel );
+   UNLOCK_HARDWARE( intel );
+
+
+   intelSetDrawBuffer( &intel->ctx, intel->ctx.Color.DriverDrawBuffer );
+#endif
+}
+
+
+void intelSwapBuffers( __DRIdrawablePrivate *dPriv )
+{
+   if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) {
+      struct intel_context *intel;
+      GLcontext *ctx;
+      intel = (struct intel_context *) dPriv->driContextPriv->driverPrivate;
+      ctx = &intel->ctx;
+      if (ctx->Visual.doubleBufferMode) {
+	 _mesa_notifySwapBuffers( ctx );  /* flush pending rendering comands */
+	 if ( 0 /*intel->doPageFlip*/ ) { /* doPageFlip is never set !!! */
+	    intelPageFlip( dPriv );
+	 } else {
+	    intelCopyBuffer( dPriv, NULL );
+	 }
+	 if (intel->aub_file) {
+	    intelFlush(ctx);
+	    intel->vtbl.aub_dump_bmp( intel, 1 );
+
+	    intel->aub_wrap = 1;
+	 }
+      }
+   } else {
+      /* XXX this shouldn't be an error but we can't handle it for now */
+      fprintf(stderr, "%s: drawable has no context!\n", __FUNCTION__);
+   }
+}
+
+void intelCopySubBuffer( __DRIdrawablePrivate *dPriv,
+			 int x, int y, int w, int h )
+{
+   if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) {
+      struct intel_context *intel = dPriv->driContextPriv->driverPrivate;
+      GLcontext *ctx = &intel->ctx;
+
+      if (ctx->Visual.doubleBufferMode) {
+	 drm_clip_rect_t rect;
+	 rect.x1 = x + dPriv->x;
+	 rect.y1 = (dPriv->h - y - h) + dPriv->y;
+	 rect.x2 = rect.x1 + w;
+	 rect.y2 = rect.y1 + h;
+	 _mesa_notifySwapBuffers( ctx );  /* flush pending rendering comands */
+	 intelCopyBuffer( dPriv, &rect );
+      }
+   } else {
+      /* XXX this shouldn't be an error but we can't handle it for now */
+      fprintf(stderr, "%s: drawable has no context!\n", __FUNCTION__);
+   }
+}
+
+
+static void intelDrawBuffer(GLcontext *ctx, GLenum mode )
+{
+   struct intel_context *intel = intel_context(ctx);
+   int front = 0;
+ 
+   if (!ctx->DrawBuffer)
+      return;
+
+   switch ( ctx->DrawBuffer->_ColorDrawBufferMask[0] ) {
+   case BUFFER_BIT_FRONT_LEFT:
+      front = 1;
+      FALLBACK( intel, INTEL_FALLBACK_DRAW_BUFFER, GL_FALSE );
+      break;
+   case BUFFER_BIT_BACK_LEFT:
+      front = 0;
+      FALLBACK( intel, INTEL_FALLBACK_DRAW_BUFFER, GL_FALSE );
+      break;
+   default:
+      FALLBACK( intel, INTEL_FALLBACK_DRAW_BUFFER, GL_TRUE );
+      return;
+   }
+
+   if ( intel->sarea->pf_current_page == 1 ) 
+      front ^= 1;
+   
+   intelSetFrontClipRects( intel );
+
+
+   if (front) {
+      if (intel->draw_region != intel->front_region) {
+	 intel_region_release(intel, &intel->draw_region);
+	 intel_region_reference(&intel->draw_region, intel->front_region);
+      }
+   } else {
+      if (intel->draw_region != intel->back_region) {
+	 intel_region_release(intel, &intel->draw_region);
+	 intel_region_reference(&intel->draw_region, intel->back_region);
+      }
+   }
+
+   intel->vtbl.set_draw_region( intel, 
+				intel->draw_region,
+				intel->depth_region);
+}
+
+static void intelReadBuffer( GLcontext *ctx, GLenum mode )
+{
+   /* nothing, until we implement h/w glRead/CopyPixels or CopyTexImage */
+}
+
+
+
+void intelInitBufferFuncs( struct dd_function_table *functions )
+{
+   functions->Clear = intelClear;
+   functions->GetBufferSize = intelBufferSize;
+   functions->DrawBuffer = intelDrawBuffer;
+   functions->ReadBuffer = intelReadBuffer;
+}
diff --git a/i965/intel_context.c b/i965/intel_context.c
new file mode 100644
index 0000000..5ee5282
--- /dev/null
+++ b/i965/intel_context.c
@@ -0,0 +1,716 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+
+#include "glheader.h"
+#include "context.h"
+#include "matrix.h"
+#include "simple_list.h"
+#include "extensions.h"
+#include "framebuffer.h"
+#include "imports.h"
+#include "points.h"
+
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "tnl/tnl.h"
+#include "vbo/vbo.h"
+
+#include "tnl/t_pipeline.h"
+#include "tnl/t_vertex.h"
+
+#include "drivers/common/driverfuncs.h"
+
+#include "intel_screen.h"
+
+#include "i830_dri.h"
+#include "i830_common.h"
+
+#include "intel_tex.h"
+#include "intel_span.h"
+#include "intel_ioctl.h"
+#include "intel_batchbuffer.h"
+#include "intel_blit.h"
+#include "intel_regions.h"
+#include "intel_buffer_objects.h"
+
+#include "bufmgr.h"
+
+#include "utils.h"
+#include "vblank.h"
+#ifndef INTEL_DEBUG
+int INTEL_DEBUG = (0);
+#endif
+
+#define need_GL_ARB_multisample
+#define need_GL_ARB_point_parameters
+#define need_GL_ARB_texture_compression
+#define need_GL_ARB_vertex_buffer_object
+#define need_GL_ARB_vertex_program
+#define need_GL_ARB_window_pos
+#define need_GL_ARB_occlusion_query
+#define need_GL_EXT_blend_color
+#define need_GL_EXT_blend_equation_separate
+#define need_GL_EXT_blend_func_separate
+#define need_GL_EXT_blend_minmax
+#define need_GL_EXT_cull_vertex
+#define need_GL_EXT_fog_coord
+#define need_GL_EXT_multi_draw_arrays
+#define need_GL_EXT_secondary_color
+#include "extension_helper.h"
+
+#ifndef VERBOSE
+int VERBOSE = 0;
+#endif
+
+/***************************************
+ * Mesa's Driver Functions
+ ***************************************/
+
+#define DRIVER_VERSION                     "4.1.3002"
+
+static const GLubyte *intelGetString( GLcontext *ctx, GLenum name )
+{
+   const char * chipset;
+   static char buffer[128];
+
+   switch (name) {
+   case GL_VENDOR:
+      return (GLubyte *)"Tungsten Graphics, Inc";
+      break;
+      
+   case GL_RENDERER:
+      switch (intel_context(ctx)->intelScreen->deviceID) {
+      case PCI_CHIP_I965_Q:
+	 chipset = "Intel(R) 965Q"; break;
+         break;
+      case PCI_CHIP_I965_G:
+      case PCI_CHIP_I965_G_1:
+	 chipset = "Intel(R) 965G"; break;
+         break;
+      case PCI_CHIP_I946_GZ:
+	 chipset = "Intel(R) 946GZ"; break;
+         break;
+      case PCI_CHIP_I965_GM:
+	 chipset = "Intel(R) 965GM"; break;
+         break;
+      case PCI_CHIP_IGD_GM:
+	 chipset = "Intel(R) Integrated Graphics Device";
+         break;
+      default:
+	 chipset = "Unknown Intel Chipset"; break;
+      }
+
+      (void) driGetRendererString( buffer, chipset, DRIVER_VERSION, 0 );
+      return (GLubyte *) buffer;
+
+   default:
+      return NULL;
+   }
+}
+
+
+/**
+ * Extension strings exported by the intel driver.
+ *
+ * \note
+ * It appears that ARB_texture_env_crossbar has "disappeared" compared to the
+ * old i830-specific driver.
+ */
+const struct dri_extension card_extensions[] =
+{
+    { "GL_ARB_multisample",                GL_ARB_multisample_functions },
+    { "GL_ARB_multitexture",               NULL },
+    { "GL_ARB_point_parameters",           GL_ARB_point_parameters_functions },
+    { "GL_ARB_texture_border_clamp",       NULL },
+    { "GL_ARB_texture_compression",        GL_ARB_texture_compression_functions },
+    { "GL_ARB_texture_cube_map",           NULL },
+    { "GL_ARB_texture_env_add",            NULL },
+    { "GL_ARB_texture_env_combine",        NULL },
+    { "GL_ARB_texture_env_dot3",           NULL },
+    { "GL_ARB_texture_mirrored_repeat",    NULL },
+    { "GL_ARB_texture_non_power_of_two",   NULL },
+    { "GL_ARB_texture_rectangle",          NULL },
+    { "GL_NV_texture_rectangle",           NULL },
+    { "GL_EXT_texture_rectangle",          NULL },
+    { "GL_ARB_texture_rectangle",          NULL },
+    { "GL_ARB_vertex_buffer_object",       GL_ARB_vertex_buffer_object_functions },
+    { "GL_ARB_vertex_program",             GL_ARB_vertex_program_functions },
+    { "GL_ARB_window_pos",                 GL_ARB_window_pos_functions },
+    { "GL_EXT_blend_color",                GL_EXT_blend_color_functions },
+    { "GL_EXT_blend_equation_separate",    GL_EXT_blend_equation_separate_functions },
+    { "GL_EXT_blend_func_separate",        GL_EXT_blend_func_separate_functions },
+    { "GL_EXT_blend_minmax",               GL_EXT_blend_minmax_functions },
+    { "GL_EXT_blend_logic_op",             NULL },
+    { "GL_EXT_blend_subtract",             NULL },
+    { "GL_EXT_cull_vertex",                GL_EXT_cull_vertex_functions },
+    { "GL_EXT_fog_coord",                  GL_EXT_fog_coord_functions },
+    { "GL_EXT_multi_draw_arrays",          GL_EXT_multi_draw_arrays_functions },
+    { "GL_EXT_secondary_color",            GL_EXT_secondary_color_functions },
+    { "GL_EXT_stencil_wrap",               NULL },
+    { "GL_EXT_texture_edge_clamp",         NULL },
+    { "GL_EXT_texture_env_combine",        NULL },
+    { "GL_EXT_texture_env_dot3",           NULL },
+    { "GL_EXT_texture_filter_anisotropic", NULL },
+    { "GL_EXT_texture_lod_bias",           NULL },
+    { "GL_3DFX_texture_compression_FXT1",  NULL },
+    { "GL_APPLE_client_storage",           NULL },
+    { "GL_MESA_pack_invert",               NULL },
+    { "GL_MESA_ycbcr_texture",             NULL },
+    { "GL_NV_blend_square",                NULL },
+    { "GL_SGIS_generate_mipmap",           NULL },
+    { NULL,                                NULL }
+};
+
+const struct dri_extension arb_oc_extension = 
+    { "GL_ARB_occlusion_query",            GL_ARB_occlusion_query_functions};
+
+void intelInitExtensions(GLcontext *ctx, GLboolean enable_imaging)
+{	     
+	struct intel_context *intel = ctx?intel_context(ctx):NULL;
+	driInitExtensions(ctx, card_extensions, enable_imaging);
+	if (!ctx || intel->intelScreen->drmMinor >= 8)
+		driInitSingleExtension (ctx, &arb_oc_extension);
+}
+
+static const struct dri_debug_control debug_control[] =
+{
+    { "fall",  DEBUG_FALLBACKS },
+    { "tex",   DEBUG_TEXTURE },
+    { "ioctl", DEBUG_IOCTL },
+    { "prim",  DEBUG_PRIMS },
+    { "vert",  DEBUG_VERTS },
+    { "state", DEBUG_STATE },
+    { "verb",  DEBUG_VERBOSE },
+    { "dri",   DEBUG_DRI },
+    { "dma",   DEBUG_DMA },
+    { "san",   DEBUG_SANITY },
+    { "sync",  DEBUG_SYNC },
+    { "sleep", DEBUG_SLEEP },
+    { "pix",   DEBUG_PIXEL },
+    { "buf",   DEBUG_BUFMGR },
+    { "stats", DEBUG_STATS },
+    { "tile",  DEBUG_TILE },
+    { "sing",  DEBUG_SINGLE_THREAD },
+    { "thre",  DEBUG_SINGLE_THREAD },
+    { "wm",    DEBUG_WM },
+    { "vs",    DEBUG_VS },
+    { NULL,    0 }
+};
+
+
+static void intelInvalidateState( GLcontext *ctx, GLuint new_state )
+{
+   struct intel_context *intel = intel_context(ctx);
+
+   _swrast_InvalidateState( ctx, new_state );
+   _swsetup_InvalidateState( ctx, new_state );
+   _vbo_InvalidateState( ctx, new_state );
+   _tnl_InvalidateState( ctx, new_state );
+   _tnl_invalidate_vertex_state( ctx, new_state );
+   
+   intel->NewGLState |= new_state;
+
+   if (intel->vtbl.invalidate_state)
+      intel->vtbl.invalidate_state( intel, new_state );
+}
+
+
+void intelFlush( GLcontext *ctx )
+{
+   struct intel_context *intel = intel_context( ctx );
+
+   bmLockAndFence(intel);
+}
+
+void intelFinish( GLcontext *ctx ) 
+{
+   struct intel_context *intel = intel_context( ctx );
+
+   bmFinishFence(intel, bmLockAndFence(intel));
+}
+
+static void
+intelBeginQuery(GLcontext *ctx, GLenum target, struct gl_query_object *q)
+{
+	struct intel_context *intel = intel_context( ctx );
+	drmI830MMIO io = {
+		.read_write = MMIO_READ,
+		.reg = MMIO_REGS_PS_DEPTH_COUNT,
+		.data = &q->Result 
+	};
+	intel->stats_wm++;
+	intelFinish(&intel->ctx);
+	drmCommandWrite(intel->driFd, DRM_I830_MMIO, &io, sizeof(io));
+}
+
+static void
+intelEndQuery(GLcontext *ctx, GLenum target, struct gl_query_object *q)
+{
+	struct intel_context *intel = intel_context( ctx );
+	GLuint64EXT tmp;	
+	drmI830MMIO io = {
+		.read_write = MMIO_READ,
+		.reg = MMIO_REGS_PS_DEPTH_COUNT,
+		.data = &tmp
+	};
+	intelFinish(&intel->ctx);
+	drmCommandWrite(intel->driFd, DRM_I830_MMIO, &io, sizeof(io));
+	q->Result = tmp - q->Result;
+	q->Ready = GL_TRUE;
+	intel->stats_wm--;
+}
+
+
+void intelInitDriverFunctions( struct dd_function_table *functions )
+{
+   _mesa_init_driver_functions( functions );
+
+   functions->Flush = intelFlush;
+   functions->Finish = intelFinish;
+   functions->GetString = intelGetString;
+   functions->UpdateState = intelInvalidateState;
+   functions->BeginQuery = intelBeginQuery;
+   functions->EndQuery = intelEndQuery;
+
+   /* CopyPixels can be accelerated even with the current memory
+    * manager:
+    */
+   if (!getenv("INTEL_NO_BLIT")) {
+      functions->CopyPixels = intelCopyPixels;
+      functions->Bitmap = intelBitmap;
+   }
+
+   intelInitTextureFuncs( functions );
+   intelInitStateFuncs( functions );
+   intelInitBufferFuncs( functions );
+}
+
+
+
+GLboolean intelInitContext( struct intel_context *intel,
+			    const __GLcontextModes *mesaVis,
+			    __DRIcontextPrivate *driContextPriv,
+			    void *sharedContextPrivate,
+			    struct dd_function_table *functions )
+{
+   GLcontext *ctx = &intel->ctx;
+   GLcontext *shareCtx = (GLcontext *) sharedContextPrivate;
+   __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv;
+   intelScreenPrivate *intelScreen = (intelScreenPrivate *)sPriv->private;
+   volatile drmI830Sarea *saPriv = (volatile drmI830Sarea *)
+      (((GLubyte *)sPriv->pSAREA)+intelScreen->sarea_priv_offset);
+
+   if (!_mesa_initialize_context(&intel->ctx,
+				 mesaVis, shareCtx, 
+				 functions,
+				 (void*) intel)) {
+      _mesa_printf("%s: failed to init mesa context\n", __FUNCTION__);
+      return GL_FALSE;
+   }
+
+   driContextPriv->driverPrivate = intel;
+   intel->intelScreen = intelScreen;
+   intel->driScreen = sPriv;
+   intel->sarea = saPriv;
+
+   driParseConfigFiles (&intel->optionCache, &intelScreen->optionCache,
+		   intel->driScreen->myNum, "i965");
+
+   intel->vblank_flags = (intel->intelScreen->irq_active != 0)
+	   ? driGetDefaultVBlankFlags(&intel->optionCache) : VBLANK_FLAG_NO_IRQ;
+
+   ctx->Const.MaxTextureMaxAnisotropy = 2.0;
+
+   if (getenv("INTEL_STRICT_CONFORMANCE")) {
+      intel->strict_conformance = 1;
+   }
+
+   if (intel->strict_conformance) {
+      ctx->Const.MinLineWidth = 1.0;
+      ctx->Const.MinLineWidthAA = 1.0;
+      ctx->Const.MaxLineWidth = 1.0;
+      ctx->Const.MaxLineWidthAA = 1.0;
+      ctx->Const.LineWidthGranularity = 1.0;
+   }
+   else {
+      ctx->Const.MinLineWidth = 1.0;
+      ctx->Const.MinLineWidthAA = 1.0;
+      ctx->Const.MaxLineWidth = 5.0;
+      ctx->Const.MaxLineWidthAA = 5.0;
+      ctx->Const.LineWidthGranularity = 0.5;
+   }
+
+   ctx->Const.MinPointSize = 1.0;
+   ctx->Const.MinPointSizeAA = 1.0;
+   ctx->Const.MaxPointSize = 255.0;
+   ctx->Const.MaxPointSizeAA = 3.0;
+   ctx->Const.PointSizeGranularity = 1.0;
+
+   /* reinitialize the context point state.
+    * It depend on constants in __GLcontextRec::Const
+    */
+   _mesa_init_point(ctx);
+
+   /* Initialize the software rasterizer and helper modules. */
+   _swrast_CreateContext( ctx );
+   _vbo_CreateContext( ctx );
+   _tnl_CreateContext( ctx );
+   _swsetup_CreateContext( ctx );
+
+   TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
+
+   /* Configure swrast to match hardware characteristics: */
+   _swrast_allow_pixel_fog( ctx, GL_FALSE );
+   _swrast_allow_vertex_fog( ctx, GL_TRUE );
+
+   /* Dri stuff */
+   intel->hHWContext = driContextPriv->hHWContext;
+   intel->driFd = sPriv->fd;
+   intel->driHwLock = (drmLock *) &sPriv->pSAREA->lock;
+
+   intel->hw_stencil = mesaVis->stencilBits && mesaVis->depthBits == 24;
+   intel->hw_stipple = 1;
+
+   switch(mesaVis->depthBits) {
+   case 0:			/* what to do in this case? */
+   case 16:
+      intel->depth_scale = 1.0/0xffff;
+      intel->polygon_offset_scale = 1.0/0xffff;
+      intel->depth_clear_mask = ~0;
+      intel->ClearDepth = 0xffff;
+      break;
+   case 24:
+      intel->depth_scale = 1.0/0xffffff;
+      intel->polygon_offset_scale = 2.0/0xffffff; /* req'd to pass glean */
+      intel->depth_clear_mask = 0x00ffffff;
+      intel->stencil_clear_mask = 0xff000000;
+      intel->ClearDepth = 0x00ffffff;
+      break;
+   default:
+      assert(0); 
+      break;
+   }
+
+   /* Initialize swrast, tnl driver tables: */
+   intelInitSpanFuncs( ctx );
+
+   intel->no_hw = getenv("INTEL_NO_HW") != NULL;
+
+   if (!intel->intelScreen->irq_active) {
+      _mesa_printf("IRQs not active.  Exiting\n");
+      exit(1);
+   }
+   intelInitExtensions(ctx, GL_TRUE); 
+
+   INTEL_DEBUG  = driParseDebugString( getenv( "INTEL_DEBUG" ),
+				       debug_control );
+
+
+   /* Buffer manager: 
+    */
+   intel->bm = bm_fake_intel_Attach( intel );
+
+
+   bmInitPool(intel,
+	      intel->intelScreen->tex.offset, /* low offset */
+	      intel->intelScreen->tex.map, /* low virtual */
+	      intel->intelScreen->tex.size,
+	      BM_MEM_AGP);
+
+   /* These are still static, but create regions for them.  
+    */
+   intel->front_region = 
+      intel_region_create_static(intel,
+				 BM_MEM_AGP,
+				 intelScreen->front.offset,
+				 intelScreen->front.map,
+				 intelScreen->cpp,
+				 intelScreen->front.pitch / intelScreen->cpp,
+				 intelScreen->height,
+				 intelScreen->front.size,
+				 intelScreen->front.tiled != 0);
+
+   intel->back_region = 
+      intel_region_create_static(intel,
+				 BM_MEM_AGP,
+				 intelScreen->back.offset,
+				 intelScreen->back.map,
+				 intelScreen->cpp,
+				 intelScreen->back.pitch / intelScreen->cpp,
+				 intelScreen->height,
+				 intelScreen->back.size,
+                                 intelScreen->back.tiled != 0);
+
+   /* Still assuming front.cpp == depth.cpp
+    *
+    * XXX: Setting tiling to false because Depth tiling only supports
+    * YMAJOR but the blitter only supports XMAJOR tiling.  Have to
+    * resolve later.
+    */
+   intel->depth_region = 
+      intel_region_create_static(intel,
+				 BM_MEM_AGP,
+				 intelScreen->depth.offset,
+				 intelScreen->depth.map,
+				 intelScreen->cpp,
+				 intelScreen->depth.pitch / intelScreen->cpp,
+				 intelScreen->height,
+				 intelScreen->depth.size,
+                                 intelScreen->depth.tiled != 0);
+   
+   intel_bufferobj_init( intel );
+   intel->batch = intel_batchbuffer_alloc( intel );
+
+   if (intel->ctx.Mesa_DXTn) {
+      _mesa_enable_extension( ctx, "GL_EXT_texture_compression_s3tc" );
+      _mesa_enable_extension( ctx, "GL_S3_s3tc" );
+   }
+   else if (driQueryOptionb (&intel->optionCache, "force_s3tc_enable")) {
+      _mesa_enable_extension( ctx, "GL_EXT_texture_compression_s3tc" );
+   }
+
+/*    driInitTextureObjects( ctx, & intel->swapped, */
+/* 			  DRI_TEXMGR_DO_TEXTURE_1D | */
+/* 			  DRI_TEXMGR_DO_TEXTURE_2D |  */
+/* 			  DRI_TEXMGR_DO_TEXTURE_RECT ); */
+
+
+   if (getenv("INTEL_NO_RAST")) {
+      fprintf(stderr, "disabling 3D rasterization\n");
+      intel->no_rast = 1;
+   }
+
+
+   return GL_TRUE;
+}
+
+void intelDestroyContext(__DRIcontextPrivate *driContextPriv)
+{
+   struct intel_context *intel = (struct intel_context *) driContextPriv->driverPrivate;
+
+   assert(intel); /* should never be null */
+   if (intel) {
+      GLboolean   release_texture_heaps;
+
+
+      intel->vtbl.destroy( intel );
+
+      release_texture_heaps = (intel->ctx.Shared->RefCount == 1);
+      _swsetup_DestroyContext (&intel->ctx);
+      _tnl_DestroyContext (&intel->ctx);
+      _vbo_DestroyContext (&intel->ctx);
+
+      _swrast_DestroyContext (&intel->ctx);
+      intel->Fallback = 0;	/* don't call _swrast_Flush later */
+      intel_batchbuffer_free(intel->batch);
+      intel->batch = NULL;
+      
+
+      if ( release_texture_heaps ) {
+         /* This share group is about to go away, free our private
+          * texture object data.
+          */
+
+	 /* XXX: destroy the shared bufmgr struct here?
+	  */
+      }
+
+      /* Free the regions created to describe front/back/depth
+       * buffers:
+       */
+#if 0
+      intel_region_release(intel, &intel->front_region);
+      intel_region_release(intel, &intel->back_region);
+      intel_region_release(intel, &intel->depth_region);
+      intel_region_release(intel, &intel->draw_region);
+#endif
+
+      /* free the Mesa context */
+      _mesa_destroy_context(&intel->ctx);
+   }
+
+   driContextPriv->driverPrivate = NULL;
+}
+
+GLboolean intelUnbindContext(__DRIcontextPrivate *driContextPriv)
+{
+   return GL_TRUE;
+}
+
+GLboolean intelMakeCurrent(__DRIcontextPrivate *driContextPriv,
+			  __DRIdrawablePrivate *driDrawPriv,
+			  __DRIdrawablePrivate *driReadPriv)
+{
+
+   if (driContextPriv) {
+      struct intel_context *intel = (struct intel_context *) driContextPriv->driverPrivate;
+
+      if (intel->driReadDrawable != driReadPriv) {
+          intel->driReadDrawable = driReadPriv;
+      }
+
+      if ( intel->driDrawable != driDrawPriv ) {
+	 /* Shouldn't the readbuffer be stored also? */
+	 driDrawableInitVBlank( driDrawPriv, intel->vblank_flags,
+		      &intel->vbl_seq );
+
+	 intel->driDrawable = driDrawPriv;
+	 intelWindowMoved( intel );
+      }
+
+      _mesa_make_current(&intel->ctx,
+			 (GLframebuffer *) driDrawPriv->driverPrivate,
+			 (GLframebuffer *) driReadPriv->driverPrivate);
+
+      intel->ctx.Driver.DrawBuffer( &intel->ctx, intel->ctx.Color.DrawBuffer[0] );
+   } else {
+      _mesa_make_current(NULL, NULL, NULL);
+   }
+
+   return GL_TRUE;
+}
+
+
+static void intelContendedLock( struct intel_context *intel, GLuint flags )
+{
+   __DRIdrawablePrivate *dPriv = intel->driDrawable;
+   __DRIscreenPrivate *sPriv = intel->driScreen;
+   volatile drmI830Sarea * sarea = intel->sarea;
+   int me = intel->hHWContext;
+   int my_bufmgr = bmCtxId(intel);
+
+   drmGetLock(intel->driFd, intel->hHWContext, flags);
+
+   /* If the window moved, may need to set a new cliprect now.
+    *
+    * NOTE: This releases and regains the hw lock, so all state
+    * checking must be done *after* this call:
+    */
+   if (dPriv)
+      DRI_VALIDATE_DRAWABLE_INFO(sPriv, dPriv);
+
+
+   intel->locked = 1;
+   intel->need_flush = 1;
+
+   /* Lost context?
+    */
+   if (sarea->ctxOwner != me) {
+      DBG("Lost Context: sarea->ctxOwner %x me %x\n", sarea->ctxOwner, me);
+      sarea->ctxOwner = me;
+      intel->vtbl.lost_hardware( intel );
+   }
+
+   /* As above, but don't evict the texture data on transitions
+    * between contexts which all share a local buffer manager.
+    */
+   if (sarea->texAge != my_bufmgr) {
+      DBG("Lost Textures: sarea->texAge %x my_bufmgr %x\n", sarea->ctxOwner, my_bufmgr);
+      sarea->texAge = my_bufmgr;
+      bm_fake_NotifyContendedLockTake( intel ); 
+   }
+
+   /* Drawable changed?
+    */
+   if (dPriv && intel->lastStamp != dPriv->lastStamp) {
+      intelWindowMoved( intel );
+      intel->lastStamp = dPriv->lastStamp;
+   }
+}
+
+_glthread_DECLARE_STATIC_MUTEX(lockMutex);
+
+/* Lock the hardware and validate our state.  
+ */
+void LOCK_HARDWARE( struct intel_context *intel )
+{
+    char __ret=0;
+
+    _glthread_LOCK_MUTEX(lockMutex);
+    assert(!intel->locked);
+
+
+    DRM_CAS(intel->driHwLock, intel->hHWContext,
+	    (DRM_LOCK_HELD|intel->hHWContext), __ret);
+    if (__ret)
+        intelContendedLock( intel, 0 );
+
+   intel->locked = 1;
+
+   if (intel->aub_wrap) {
+      bm_fake_NotifyContendedLockTake( intel ); 
+      intel->vtbl.lost_hardware( intel );
+      intel->vtbl.aub_wrap(intel);
+      intel->aub_wrap = 0;
+   }
+
+   if (bmError(intel)) {
+      bmEvictAll(intel);
+      intel->vtbl.lost_hardware( intel );
+   }
+
+   /* Make sure nothing has been emitted prior to getting the lock: 
+    */
+   assert(intel->batch->map == 0);
+
+   /* XXX: postpone, may not be needed:
+    */
+   if (!intel_batchbuffer_map(intel->batch)) {
+      bmEvictAll(intel);
+      intel->vtbl.lost_hardware( intel );
+
+      /* This could only fail if the batchbuffer was greater in size
+       * than the available texture memory:
+       */
+      if (!intel_batchbuffer_map(intel->batch)) {
+	 _mesa_printf("double failure to map batchbuffer\n");
+	 assert(0);
+      }
+   }
+}
+ 
+  
+/* Unlock the hardware using the global current context 
+ */
+void UNLOCK_HARDWARE( struct intel_context *intel )
+{
+   /* Make sure everything has been released: 
+    */
+   assert(intel->batch->ptr == intel->batch->map + intel->batch->offset);
+
+   intel_batchbuffer_unmap(intel->batch);
+   intel->vtbl.note_unlock( intel );
+   intel->locked = 0;
+
+
+
+   DRM_UNLOCK(intel->driFd, intel->driHwLock, intel->hHWContext);
+   _glthread_UNLOCK_MUTEX(lockMutex); 
+}
+
+
diff --git a/i965/intel_context.h b/i965/intel_context.h
new file mode 100644
index 0000000..a244757
--- /dev/null
+++ b/i965/intel_context.h
@@ -0,0 +1,531 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef INTELCONTEXT_INC
+#define INTELCONTEXT_INC
+
+
+
+#include "mtypes.h"
+#include "drm.h"
+#include "texmem.h"
+
+#include "intel_screen.h"
+#include "i830_common.h"
+#include "tnl/t_vertex.h"
+
+#define TAG(x) intel##x
+#include "tnl_dd/t_dd_vertex.h"
+#undef TAG
+
+#define DV_PF_555  (1<<8)
+#define DV_PF_565  (2<<8)
+#define DV_PF_8888 (3<<8)
+
+struct intel_region;
+struct intel_context;
+
+typedef void (*intel_tri_func)(struct intel_context *, intelVertex *, intelVertex *,
+							  intelVertex *);
+typedef void (*intel_line_func)(struct intel_context *, intelVertex *, intelVertex *);
+typedef void (*intel_point_func)(struct intel_context *, intelVertex *);
+
+#define INTEL_FALLBACK_DRAW_BUFFER	 0x1
+#define INTEL_FALLBACK_READ_BUFFER	 0x2
+#define INTEL_FALLBACK_USER		 0x4
+#define INTEL_FALLBACK_RENDERMODE	 0x8
+#define INTEL_FALLBACK_TEXTURE   	 0x10
+
+extern void intelFallback( struct intel_context *intel, GLuint bit, GLboolean mode );
+#define FALLBACK( intel, bit, mode ) intelFallback( intel, bit, mode )
+
+
+
+struct intel_texture_object
+{
+   struct gl_texture_object base; /* The "parent" object */
+
+   /* The mipmap tree must include at least these levels once
+    * validated:
+    */
+   GLuint firstLevel;
+   GLuint lastLevel;
+
+   GLuint dirty_images[6];
+   GLuint dirty;
+
+   /* On validation any active images held in main memory or in other
+    * regions will be copied to this region and the old storage freed.
+    */
+   struct intel_mipmap_tree *mt;
+};
+
+
+
+struct intel_context
+{
+   GLcontext ctx;		/* the parent class */
+
+   struct {
+      void (*destroy)( struct intel_context *intel ); 
+      void (*emit_state)( struct intel_context *intel );
+      void (*emit_invarient_state)( struct intel_context *intel );
+      void (*lost_hardware)( struct intel_context *intel );
+      void (*note_fence)( struct intel_context *intel, GLuint fence );
+      void (*note_unlock)( struct intel_context *intel );
+      void (*update_texture_state)( struct intel_context *intel );
+
+      void (*render_start)( struct intel_context *intel );
+      void (*set_draw_region)( struct intel_context *intel, 
+			       struct intel_region *draw_region,
+			       struct intel_region *depth_region );
+
+      GLuint (*flush_cmd)( void );
+
+      void (*emit_flush)( struct intel_context *intel,
+			  GLuint unused );
+
+      void (*aub_commands)( struct intel_context *intel, 
+			    GLuint offset,
+			    const void *buf,
+			    GLuint sz );
+      void (*aub_dump_bmp)( struct intel_context *intel, GLuint buffer );
+      void (*aub_wrap)( struct intel_context *intel );
+      void (*aub_gtt_data)( struct intel_context *intel, 
+			    GLuint offset,
+			    const void *src,
+			    GLuint size,
+			    GLuint aubtype, 
+			    GLuint aubsubtype);
+
+
+      void (*reduced_primitive_state)( struct intel_context *intel, GLenum rprim );
+
+      GLboolean (*check_vertex_size)( struct intel_context *intel, GLuint expected );
+
+      void (*invalidate_state)( struct intel_context *intel, GLuint new_state );
+
+      /* Metaops: 
+       */
+      void (*install_meta_state)( struct intel_context *intel );
+      void (*leave_meta_state)( struct intel_context *intel );
+
+      void (*meta_draw_region)( struct intel_context *intel,
+				struct intel_region *draw_region,
+				struct intel_region *depth_region );
+
+      void (*meta_color_mask)( struct intel_context *intel,
+			       GLboolean );
+      
+      void (*meta_stencil_replace)( struct intel_context *intel,
+				    GLuint mask,
+				    GLuint clear );
+
+      void (*meta_depth_replace)( struct intel_context *intel );
+
+      void (*meta_texture_blend_replace) (struct intel_context * intel);
+      
+      void (*meta_no_stencil_write)( struct intel_context *intel );
+      void (*meta_no_depth_write)( struct intel_context *intel );
+      void (*meta_no_texture)( struct intel_context *intel );
+      void (*meta_import_pixel_state) (struct intel_context * intel);
+      void (*meta_frame_buffer_texture)( struct intel_context *intel,
+					 GLint xoff, GLint yoff );
+
+      void (*meta_draw_quad)(struct intel_context *intel, 
+			     GLfloat x0, GLfloat x1,
+			     GLfloat y0, GLfloat y1, 
+			     GLfloat z,
+			     GLubyte red, GLubyte green,
+			     GLubyte blue, GLubyte alpha,
+			     GLfloat s0, GLfloat s1,
+			     GLfloat t0, GLfloat t1);
+
+
+
+   } vtbl;
+
+   GLint refcount;   
+   GLuint Fallback;
+   GLuint NewGLState;
+   
+   GLuint last_swap_fence;
+   GLuint second_last_swap_fence;
+   
+   GLboolean aub_wrap;
+   GLuint stats_wm;
+
+   struct intel_batchbuffer *batch;
+
+   GLubyte clear_chan[4];
+   GLuint ClearColor;
+   GLuint ClearDepth;
+
+   GLfloat depth_scale;
+   GLfloat polygon_offset_scale; /* dependent on depth_scale, bpp */
+   GLuint depth_clear_mask;
+   GLuint stencil_clear_mask;
+
+   GLboolean hw_stencil;
+   GLboolean hw_stipple;
+   GLboolean depth_buffer_is_float;
+   GLboolean no_hw;
+   GLboolean no_rast;
+   GLboolean thrashing;
+   GLboolean locked;
+   GLboolean strict_conformance;
+   GLboolean need_flush;
+
+
+   
+   /* AGP memory buffer manager:
+    */
+   struct bufmgr *bm;
+
+
+   /* State for intelvb.c and inteltris.c.
+    */
+   GLenum render_primitive;
+   GLenum reduced_primitive;
+
+   struct intel_region *front_region;
+   struct intel_region *back_region;
+   struct intel_region *draw_region;
+   struct intel_region *depth_region;
+
+   /* These refer to the current draw (front vs. back) buffer:
+    */
+   int drawX;			/* origin of drawable in draw buffer */
+   int drawY;
+   GLuint numClipRects;		/* cliprects for that buffer */
+   drm_clip_rect_t *pClipRects;
+   struct gl_texture_object *frame_buffer_texobj;
+
+   GLboolean scissor;
+   drm_clip_rect_t draw_rect;
+   drm_clip_rect_t scissor_rect;
+
+   drm_context_t hHWContext;
+   drmLock *driHwLock;
+   int driFd;
+
+   __DRIdrawablePrivate *driDrawable;
+   __DRIdrawablePrivate *driReadDrawable;
+   __DRIscreenPrivate *driScreen;
+   intelScreenPrivate *intelScreen; 
+   volatile drmI830Sarea *sarea; 
+   
+   FILE *aub_file;
+
+   GLuint lastStamp;
+
+   /**
+    * Configuration cache
+    */
+   driOptionCache optionCache;
+
+   /* VBI
+    */
+   GLuint vbl_seq;
+   GLuint vblank_flags;
+
+   int64_t swap_ust;
+   int64_t swap_missed_ust;
+
+   GLuint swap_count;
+   GLuint swap_missed_count;
+};
+
+/* These are functions now:
+ */
+void LOCK_HARDWARE( struct intel_context *intel );
+void UNLOCK_HARDWARE( struct intel_context *intel );
+
+
+#define SUBPIXEL_X 0.125
+#define SUBPIXEL_Y 0.125
+
+/* ================================================================
+ * Color packing:
+ */
+
+#define INTEL_PACKCOLOR4444(r,g,b,a) \
+  ((((a) & 0xf0) << 8) | (((r) & 0xf0) << 4) | ((g) & 0xf0) | ((b) >> 4))
+
+#define INTEL_PACKCOLOR1555(r,g,b,a) \
+  ((((r) & 0xf8) << 7) | (((g) & 0xf8) << 2) | (((b) & 0xf8) >> 3) | \
+    ((a) ? 0x8000 : 0))
+
+#define INTEL_PACKCOLOR565(r,g,b) \
+  ((((r) & 0xf8) << 8) | (((g) & 0xfc) << 3) | (((b) & 0xf8) >> 3))
+
+#define INTEL_PACKCOLOR8888(r,g,b,a) \
+  ((a<<24) | (r<<16) | (g<<8) | b)
+
+
+#define INTEL_PACKCOLOR(format, r,  g,  b, a)		\
+(format == DV_PF_555 ? INTEL_PACKCOLOR1555(r,g,b,a) :	\
+ (format == DV_PF_565 ? INTEL_PACKCOLOR565(r,g,b) :	\
+  (format == DV_PF_8888 ? INTEL_PACKCOLOR8888(r,g,b,a) :	\
+   0)))
+
+
+
+/* ================================================================
+ * From linux kernel i386 header files, copes with odd sizes better
+ * than COPY_DWORDS would:
+ */
+#if defined(i386) || defined(__i386__)
+static inline void * __memcpy(void * to, const void * from, size_t n)
+{
+   int d0, d1, d2;
+   __asm__ __volatile__(
+      "rep ; movsl\n\t"
+      "testb $2,%b4\n\t"
+      "je 1f\n\t"
+      "movsw\n"
+      "1:\ttestb $1,%b4\n\t"
+      "je 2f\n\t"
+      "movsb\n"
+      "2:"
+      : "=&c" (d0), "=&D" (d1), "=&S" (d2)
+      :"0" (n/4), "q" (n),"1" ((long) to),"2" ((long) from)
+      : "memory");
+   return (to);
+}
+#else
+#define __memcpy(a,b,c) memcpy(a,b,c)
+#endif
+
+
+/* The system memcpy (at least on ubuntu 5.10) has problems copying
+ * to agp (writecombined) memory from a source which isn't 64-byte
+ * aligned - there is a 4x performance falloff.
+ *
+ * The x86 __memcpy is immune to this but is slightly slower
+ * (10%-ish) than the system memcpy.
+ *
+ * The sse_memcpy seems to have a slight cliff at 64/32 bytes, but
+ * isn't much faster than x86_memcpy for agp copies.
+ * 
+ * TODO: switch dynamically.
+ */
+static inline void *do_memcpy( void *dest, const void *src, size_t n )
+{
+   if ( (((unsigned long)src) & 63) ||
+	(((unsigned long)dest) & 63)) {
+      return  __memcpy(dest, src, n);	
+   }
+   else
+      return memcpy(dest, src, n);
+}
+
+
+
+
+
+/* ================================================================
+ * Debugging:
+ */
+extern int INTEL_DEBUG;
+
+#define DEBUG_TEXTURE	0x1
+#define DEBUG_STATE	0x2
+#define DEBUG_IOCTL	0x4
+#define DEBUG_PRIMS	0x8
+#define DEBUG_VERTS	0x10
+#define DEBUG_FALLBACKS	0x20
+#define DEBUG_VERBOSE	0x40
+#define DEBUG_DRI       0x80
+#define DEBUG_DMA       0x100
+#define DEBUG_SANITY    0x200
+#define DEBUG_SYNC      0x400
+#define DEBUG_SLEEP     0x800
+#define DEBUG_PIXEL     0x1000
+#define DEBUG_STATS     0x2000
+#define DEBUG_TILE      0x4000
+#define DEBUG_SINGLE_THREAD   0x8000
+#define DEBUG_WM        0x10000
+#define DEBUG_URB       0x20000
+#define DEBUG_VS        0x40000
+
+
+#define PCI_CHIP_845_G			0x2562
+#define PCI_CHIP_I830_M			0x3577
+#define PCI_CHIP_I855_GM		0x3582
+#define PCI_CHIP_I865_G			0x2572
+#define PCI_CHIP_I915_G			0x2582
+#define PCI_CHIP_I915_GM		0x2592
+#define PCI_CHIP_I945_G			0x2772
+#define PCI_CHIP_I965_G			0x29A2
+#define PCI_CHIP_I965_Q			0x2992
+#define PCI_CHIP_I965_G_1		0x2982
+#define PCI_CHIP_I946_GZ		0x2972
+#define PCI_CHIP_I965_GM                0x2A02
+
+#define PCI_CHIP_IGD_GM       0x2A42
+
+
+/* ================================================================
+ * intel_context.c:
+ */
+
+extern GLboolean intelInitContext( struct intel_context *intel, 
+				   const __GLcontextModes *mesaVis,
+				   __DRIcontextPrivate *driContextPriv,
+				   void *sharedContextPrivate,
+				   struct dd_function_table *functions );
+
+extern void intelGetLock(struct intel_context *intel, GLuint flags);
+
+extern void intelInitState( GLcontext *ctx );
+extern void intelFinish( GLcontext *ctx );
+extern void intelFlush( GLcontext *ctx );
+
+extern void intelInitDriverFunctions( struct dd_function_table *functions );
+
+
+/* ================================================================
+ * intel_state.c:
+ */
+extern void intelInitStateFuncs( struct dd_function_table *functions );
+
+#define COMPAREFUNC_ALWAYS		0
+#define COMPAREFUNC_NEVER		0x1
+#define COMPAREFUNC_LESS		0x2
+#define COMPAREFUNC_EQUAL		0x3
+#define COMPAREFUNC_LEQUAL		0x4
+#define COMPAREFUNC_GREATER		0x5
+#define COMPAREFUNC_NOTEQUAL		0x6
+#define COMPAREFUNC_GEQUAL		0x7
+
+#define STENCILOP_KEEP			0
+#define STENCILOP_ZERO			0x1
+#define STENCILOP_REPLACE		0x2
+#define STENCILOP_INCRSAT		0x3
+#define STENCILOP_DECRSAT		0x4
+#define STENCILOP_INCR			0x5
+#define STENCILOP_DECR			0x6
+#define STENCILOP_INVERT		0x7
+
+#define LOGICOP_CLEAR			0
+#define LOGICOP_NOR			0x1
+#define LOGICOP_AND_INV 		0x2
+#define LOGICOP_COPY_INV		0x3
+#define LOGICOP_AND_RVRSE		0x4
+#define LOGICOP_INV			0x5
+#define LOGICOP_XOR			0x6
+#define LOGICOP_NAND			0x7
+#define LOGICOP_AND			0x8
+#define LOGICOP_EQUIV			0x9
+#define LOGICOP_NOOP			0xa
+#define LOGICOP_OR_INV			0xb
+#define LOGICOP_COPY			0xc
+#define LOGICOP_OR_RVRSE		0xd
+#define LOGICOP_OR			0xe
+#define LOGICOP_SET			0xf
+
+#define BLENDFACT_ZERO			0x01
+#define BLENDFACT_ONE			0x02
+#define BLENDFACT_SRC_COLR		0x03
+#define BLENDFACT_INV_SRC_COLR 		0x04
+#define BLENDFACT_SRC_ALPHA		0x05
+#define BLENDFACT_INV_SRC_ALPHA 	0x06
+#define BLENDFACT_DST_ALPHA		0x07
+#define BLENDFACT_INV_DST_ALPHA 	0x08
+#define BLENDFACT_DST_COLR		0x09
+#define BLENDFACT_INV_DST_COLR		0x0a
+#define BLENDFACT_SRC_ALPHA_SATURATE	0x0b
+#define BLENDFACT_CONST_COLOR		0x0c
+#define BLENDFACT_INV_CONST_COLOR	0x0d
+#define BLENDFACT_CONST_ALPHA		0x0e
+#define BLENDFACT_INV_CONST_ALPHA	0x0f
+#define BLENDFACT_MASK          	0x0f
+
+extern int intel_translate_shadow_compare_func( GLenum func );
+extern int intel_translate_compare_func( GLenum func );
+extern int intel_translate_stencil_op( GLenum op );
+extern int intel_translate_blend_factor( GLenum factor );
+extern int intel_translate_logic_op( GLenum opcode );
+
+
+/* ================================================================
+ * intel_buffers.c:
+ */
+void intelInitBufferFuncs( struct dd_function_table *functions );
+
+struct intel_region *intel_readbuf_region( struct intel_context *intel );
+struct intel_region *intel_drawbuf_region( struct intel_context *intel );
+
+extern void intelWindowMoved( struct intel_context *intel );
+
+extern GLboolean intel_intersect_cliprects( drm_clip_rect_t *dest,
+					    const drm_clip_rect_t *a,
+					    const drm_clip_rect_t *b );
+
+
+/* ================================================================
+ * intel_pixel_copy.c:
+ */
+void intelCopyPixels(GLcontext * ctx,
+                     GLint srcx, GLint srcy,
+                     GLsizei width, GLsizei height,
+                     GLint destx, GLint desty, GLenum type);
+
+GLboolean intel_check_blit_fragment_ops(GLcontext * ctx);
+
+void intelBitmap(GLcontext * ctx,
+		 GLint x, GLint y,
+		 GLsizei width, GLsizei height,
+		 const struct gl_pixelstore_attrib *unpack,
+		 const GLubyte * pixels);
+
+void intelInitExtensions(GLcontext *ctx, GLboolean enable_imaging);
+#define _NEW_WINDOW_POS 0x40000000
+
+
+/*======================================================================
+ * Inline conversion functions.  
+ * These are better-typed than the macros used previously:
+ */
+static inline struct intel_context *intel_context( GLcontext *ctx )
+{
+   return (struct intel_context *)ctx;
+}
+
+static inline struct intel_texture_object *intel_texture_object( struct gl_texture_object *obj )
+{
+   return (struct intel_texture_object *)obj;
+}
+
+static inline struct intel_texture_image *intel_texture_image( struct gl_texture_image *img )
+{
+   return (struct intel_texture_image *)img;
+}
+
+#endif
+
diff --git a/i965/intel_ioctl.c b/i965/intel_ioctl.c
new file mode 100644
index 0000000..0a8e976
--- /dev/null
+++ b/i965/intel_ioctl.c
@@ -0,0 +1,205 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+
+#include <stdio.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sched.h>
+
+#include "mtypes.h"
+#include "context.h"
+#include "swrast/swrast.h"
+
+#include "intel_context.h"
+#include "intel_ioctl.h"
+#include "intel_batchbuffer.h"
+#include "intel_blit.h"
+#include "intel_regions.h"
+#include "drm.h"
+#include "bufmgr.h"
+
+static int intelWaitIdleLocked( struct intel_context *intel )
+{
+   static int in_wait_idle = 0;
+   unsigned int fence;
+
+   if (!in_wait_idle) {
+      if (INTEL_DEBUG & DEBUG_SYNC) {
+	 fprintf(stderr, "waiting for idle\n");
+      }
+
+      in_wait_idle = 1;
+      fence = bmSetFence(intel);
+      intelWaitIrq(intel, fence);
+      in_wait_idle = 0;
+
+      return bmTestFence(intel, fence);
+   } else {
+      return 1;
+   }
+}
+
+int intelEmitIrqLocked( struct intel_context *intel )
+{
+   int seq = 1;
+
+   if (!intel->no_hw) {
+      drmI830IrqEmit ie;
+      int ret;
+      
+      assert(((*(int *)intel->driHwLock) & ~DRM_LOCK_CONT) == 
+	     (DRM_LOCK_HELD|intel->hHWContext));
+
+      ie.irq_seq = &seq;
+
+      ret = drmCommandWriteRead( intel->driFd, DRM_I830_IRQ_EMIT, 
+				 &ie, sizeof(ie) );
+      if ( ret ) {
+	 fprintf( stderr, "%s: drmI830IrqEmit: %d\n", __FUNCTION__, ret );
+	 exit(1);
+      }   
+
+      if (0)
+	 fprintf(stderr, "%s -->  %d\n", __FUNCTION__, seq );
+   }
+
+   return seq;
+}
+
+void intelWaitIrq( struct intel_context *intel, int seq )
+{
+   if (!intel->no_hw) {
+      drmI830IrqWait iw;
+      int ret, lastdispatch;
+      
+      if (0)
+	 fprintf(stderr, "%s %d\n", __FUNCTION__, seq );
+
+      iw.irq_seq = seq;
+	
+      do {
+	 lastdispatch = intel->sarea->last_dispatch;
+	 ret = drmCommandWrite( intel->driFd, DRM_I830_IRQ_WAIT, &iw, sizeof(iw) );
+
+	 /* This seems quite often to return before it should!?! 
+	  */
+      } while (ret == -EAGAIN || ret == -EINTR || (ret == -EBUSY && lastdispatch != intel->sarea->last_dispatch) || (ret == 0 && seq > intel->sarea->last_dispatch)
+	       || (ret == 0 && intel->sarea->last_dispatch - seq >= (1 << 24)));
+      
+
+      if ( ret ) {
+	 fprintf( stderr, "%s: drmI830IrqWait: %d\n", __FUNCTION__, ret );
+
+	 if (intel->aub_file) {
+	    intel->vtbl.aub_dump_bmp( intel, intel->ctx.Visual.doubleBufferMode ? 1 : 0 );
+	 }
+
+	 exit(1);
+      }
+   }
+}
+
+
+void intel_batch_ioctl( struct intel_context *intel, 
+			GLuint start_offset,
+			GLuint used)
+{
+   drmI830BatchBuffer batch;
+
+   assert(intel->locked);
+   assert(used);
+
+   if (0)
+      fprintf(stderr, "%s used %d offset %x..%x\n",
+	      __FUNCTION__, 
+	      used, 
+	      start_offset,
+	      start_offset + used);
+
+   batch.start = start_offset;
+   batch.used = used;
+   batch.cliprects = NULL;
+   batch.num_cliprects = 0;
+   batch.DR1 = 0;
+   batch.DR4 = 0;
+      
+   if (INTEL_DEBUG & DEBUG_DMA)
+      fprintf(stderr, "%s: 0x%x..0x%x\n",
+	      __FUNCTION__, 
+	      batch.start, 
+	      batch.start + batch.used * 4);
+
+   if (!intel->no_hw) {
+      if (drmCommandWrite (intel->driFd, DRM_I830_BATCHBUFFER, &batch, 
+			   sizeof(batch))) {
+	 fprintf(stderr, "DRM_I830_BATCHBUFFER: %d\n",  -errno);
+	 UNLOCK_HARDWARE(intel);
+	 exit(1);
+      }
+
+      if (INTEL_DEBUG & DEBUG_SYNC) {
+	intelWaitIdleLocked(intel);
+      }
+   }
+}
+
+void intel_cmd_ioctl( struct intel_context *intel, 
+		      char *buf,
+		      GLuint used)
+{
+   drmI830CmdBuffer cmd;
+
+   assert(intel->locked);
+   assert(used);
+
+   cmd.buf = buf;
+   cmd.sz = used;
+   cmd.cliprects = intel->pClipRects;
+   cmd.num_cliprects = 0;
+   cmd.DR1 = 0;
+   cmd.DR4 = 0;
+      
+   if (INTEL_DEBUG & DEBUG_DMA)
+      fprintf(stderr, "%s: 0x%x..0x%x\n",
+	      __FUNCTION__, 
+	      0, 
+	      0 + cmd.sz);
+
+   if (!intel->no_hw) {
+      if (drmCommandWrite (intel->driFd, DRM_I830_CMDBUFFER, &cmd, 
+			   sizeof(cmd))) {
+	 fprintf(stderr, "DRM_I830_CMDBUFFER: %d\n",  -errno);
+	 UNLOCK_HARDWARE(intel);
+	 exit(1);
+      }
+
+      if (INTEL_DEBUG & DEBUG_SYNC) {
+	intelWaitIdleLocked(intel);
+      }
+   }
+}
diff --git a/i965/intel_ioctl.h b/i965/intel_ioctl.h
new file mode 100644
index 0000000..df27659
--- /dev/null
+++ b/i965/intel_ioctl.h
@@ -0,0 +1,44 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef INTEL_IOCTL_H
+#define INTEL_IOCTL_H
+
+#include "intel_context.h"
+
+void intelWaitIrq( struct intel_context *intel, int seq );
+int intelEmitIrqLocked( struct intel_context *intel );
+
+void intel_batch_ioctl( struct intel_context *intel, 
+			GLuint start_offset,
+			GLuint used);
+
+void intel_cmd_ioctl( struct intel_context *intel, 
+		      char *buf,
+		      GLuint used);
+
+#endif
diff --git a/i965/intel_mipmap_tree.c b/i965/intel_mipmap_tree.c
new file mode 100644
index 0000000..8548bc8
--- /dev/null
+++ b/i965/intel_mipmap_tree.c
@@ -0,0 +1,270 @@
+/**************************************************************************
+ * 
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "intel_context.h"
+#include "intel_mipmap_tree.h"
+#include "intel_regions.h"
+#include "bufmgr.h"
+#include "enums.h"
+#include "imports.h"
+
+static GLenum target_to_target( GLenum target )
+{
+   switch (target) {
+   case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB:
+   case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB:
+   case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB:
+   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB:
+   case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB:
+   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB:
+      return GL_TEXTURE_CUBE_MAP_ARB;
+   default:
+      return target;
+   }
+}
+
+struct intel_mipmap_tree *intel_miptree_create( struct intel_context *intel,
+						GLenum target,
+						GLenum internal_format,
+						GLuint first_level,
+						GLuint last_level,
+						GLuint width0,
+						GLuint height0,
+						GLuint depth0,
+						GLuint cpp,
+						GLboolean compressed)
+{
+   GLboolean ok;
+   struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1);
+
+   if (INTEL_DEBUG & DEBUG_TEXTURE)
+      _mesa_printf("%s target %s format %s level %d..%d\n", __FUNCTION__,
+		   _mesa_lookup_enum_by_nr(target),
+		   _mesa_lookup_enum_by_nr(internal_format),
+		   first_level,
+		   last_level);
+
+   mt->target = target_to_target(target);
+   mt->internal_format = internal_format;
+   mt->first_level = first_level;
+   mt->last_level = last_level;
+   mt->width0 = width0;
+   mt->height0 = height0;
+   mt->depth0 = depth0;
+   mt->cpp = compressed ? 2 : cpp;
+   mt->compressed = compressed;
+
+   switch (intel->intelScreen->deviceID) {
+#if 0
+   case PCI_CHIP_I945_G:
+      ok = i945_miptree_layout( mt );
+      break;
+   case PCI_CHIP_I915_G:
+   case PCI_CHIP_I915_GM:
+      ok = i915_miptree_layout( mt );
+      break;
+#endif
+   default:
+      if (INTEL_DEBUG & DEBUG_TEXTURE)
+	 _mesa_printf("assuming BRW texture layouts\n");
+      ok = brw_miptree_layout( intel, mt );
+      break;
+   }
+
+   if (ok)
+      mt->region = intel_region_alloc( intel, 
+				       mt->cpp,
+				       mt->pitch, 
+				       mt->total_height );
+
+   if (!mt->region) {
+      free(mt);
+      return NULL;
+   }
+
+   return mt;
+}
+
+
+/**
+ * intel_miptree_pitch_align:
+ *
+ * @intel: intel context pointer
+ *
+ * @mt: the miptree to compute pitch alignment for
+ *
+ * @pitch: the natural pitch value
+ *
+ * Given @pitch, compute a larger value which accounts for
+ * any necessary alignment required by the device
+ */
+
+int intel_miptree_pitch_align (struct intel_context *intel,
+			       struct intel_mipmap_tree *mt,
+			       int pitch)
+{
+   if (!mt->compressed)
+      pitch = ((pitch * mt->cpp + 3) & ~3) / mt->cpp;
+
+   return pitch;
+}
+
+
+void intel_miptree_destroy( struct intel_context *intel,
+			    struct intel_mipmap_tree *mt )
+{
+   if (mt) {
+      GLuint i;
+
+      intel_region_release(intel, &(mt->region));
+
+      for (i = 0; i < MAX_TEXTURE_LEVELS; i++)
+	 if (mt->level[i].image_offset)
+	    free(mt->level[i].image_offset);
+
+      free(mt);
+   }
+}
+
+
+
+
+void intel_miptree_set_level_info(struct intel_mipmap_tree *mt,
+				  GLuint level,
+				  GLuint nr_images,
+				  GLuint x, GLuint y,
+				  GLuint w, GLuint h, GLuint d)
+{
+   mt->level[level].width = w;
+   mt->level[level].height = h;
+   mt->level[level].depth = d;
+   mt->level[level].level_offset = (x + y * mt->pitch) * mt->cpp;
+   mt->level[level].nr_images = nr_images;
+
+   if (INTEL_DEBUG & DEBUG_TEXTURE)
+      _mesa_printf("%s level %d img size: %d,%d level_offset 0x%x\n", __FUNCTION__, level, w, h, 
+		   mt->level[level].level_offset);
+
+   /* Not sure when this would happen, but anyway: 
+    */
+   if (mt->level[level].image_offset) {
+      free(mt->level[level].image_offset);
+      mt->level[level].image_offset = NULL;
+   }
+
+   if (nr_images > 1) {
+      mt->level[level].image_offset = malloc(nr_images * sizeof(GLuint));
+      mt->level[level].image_offset[0] = 0;
+   }
+}
+
+
+
+void intel_miptree_set_image_offset(struct intel_mipmap_tree *mt,
+				    GLuint level,
+				    GLuint img,
+				    GLuint x, GLuint y)
+{
+   if (INTEL_DEBUG & DEBUG_TEXTURE)
+      _mesa_printf("%s level %d img %d pos %d,%d\n", __FUNCTION__, level, img, x, y);
+
+   if (img == 0)
+      assert(x == 0 && y == 0);
+
+   if (img > 0)
+      mt->level[level].image_offset[img] = (x + y * mt->pitch) * mt->cpp;
+}
+
+
+/* Although we use the image_offset[] array to store relative offsets
+ * to cube faces, Mesa doesn't know anything about this and expects
+ * each cube face to be treated as a separate image.
+ *
+ * These functions present that view to mesa:
+ */
+const GLuint *intel_miptree_depth_offsets(struct intel_mipmap_tree *mt,
+					  GLuint level)
+{
+   static const GLuint zero = 0;
+
+   if (mt->target != GL_TEXTURE_3D ||
+       mt->level[level].nr_images == 1)
+      return &zero;
+   else
+      return mt->level[level].image_offset;
+}
+
+
+GLuint intel_miptree_image_offset(struct intel_mipmap_tree *mt,
+				  GLuint face,
+				  GLuint level)
+{
+   if (mt->target == GL_TEXTURE_CUBE_MAP_ARB)
+      return (mt->level[level].level_offset +
+	      mt->level[level].image_offset[face]);
+   else
+      return mt->level[level].level_offset;
+}
+
+
+
+
+
+
+/* Upload data for a particular image.
+ */
+GLboolean intel_miptree_image_data(struct intel_context *intel, 
+				   struct intel_mipmap_tree *dst,
+				   GLuint face,
+				   GLuint level,
+				   const void *src, 
+				   GLuint src_row_pitch,
+				   GLuint src_image_pitch)
+{
+   GLuint depth = dst->level[level].depth;
+   GLuint dst_offset = intel_miptree_image_offset(dst, face, level);
+   const GLuint *dst_depth_offset = intel_miptree_depth_offsets(dst, level);
+   GLuint i;
+
+   DBG("%s\n", __FUNCTION__);
+   for (i = 0; i < depth; i++) {
+      if (!intel_region_data(intel,
+			     dst->region, 
+			     dst_offset + dst_depth_offset[i],
+			     0,
+			     0,
+			     src,
+			     src_row_pitch,
+			     0, 0,	/* source x,y */
+			     dst->level[level].width,
+			     dst->level[level].height))
+	 return GL_FALSE;
+      src += src_image_pitch;
+   }
+   return GL_TRUE;
+}
+
diff --git a/i965/intel_mipmap_tree.h b/i965/intel_mipmap_tree.h
new file mode 100644
index 0000000..c67c726
--- /dev/null
+++ b/i965/intel_mipmap_tree.h
@@ -0,0 +1,171 @@
+/**************************************************************************
+ * 
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef INTEL_MIPMAP_TREE_H
+#define INTEL_MIPMAP_TREE_H
+
+#include "intel_regions.h"
+
+/* A layer on top of the intel_regions code which adds:
+ *
+ * - Code to size and layout a region to hold a set of mipmaps.
+ * - Query to determine if a new image fits in an existing tree.
+ *
+ * The fixed mipmap layout of intel hardware where one offset
+ * specifies the position of all images in a mipmap hierachy
+ * complicates the implementation of GL texture image commands,
+ * compared to hardware where each image is specified with an
+ * independent offset.
+ *
+ * In an ideal world, each texture object would be associated with a
+ * single bufmgr buffer or 2d intel_region, and all the images within
+ * the texture object would slot into the tree as they arrive.  The
+ * reality can be a little messier, as images can arrive from the user
+ * with sizes that don't fit in the existing tree, or in an order
+ * where the tree layout cannot be guessed immediately.  
+ * 
+ * This structure encodes an idealized mipmap tree.  The GL image
+ * commands build these where possible, otherwise store the images in
+ * temporary system buffers.
+ */
+
+
+struct intel_mipmap_level {
+   GLuint level_offset;
+   GLuint width;
+   GLuint height;
+   GLuint depth;
+   GLuint nr_images;
+
+   /* Explicitly store the offset of each image for each cube face or
+    * depth value.  Pretty much have to accept that hardware formats
+    * are going to be so diverse that there is no unified way to
+    * compute the offsets of depth/cube images within a mipmap level,
+    * so have to store them as a lookup table:
+    */
+   GLuint *image_offset;
+};
+
+struct intel_mipmap_tree {
+   /* Effectively the key:
+    */
+   GLenum target;
+   GLenum internal_format;
+
+   GLuint first_level;
+   GLuint last_level;
+
+   GLuint width0, height0, depth0;
+   GLuint cpp;
+   GLboolean compressed;
+
+   /* Derived from the above:
+    */   
+   GLuint pitch;
+   GLuint depth_pitch;		/* per-image on i945? */
+   GLuint total_height;
+   
+   /* Includes image offset tables:
+    */
+   struct intel_mipmap_level level[MAX_TEXTURE_LEVELS];
+
+   /* The data is held here:
+    */
+   struct intel_region *region;
+
+   /* These are also refcounted:
+    */
+   GLuint refcount;
+};
+
+
+
+struct intel_mipmap_tree *intel_miptree_create( struct intel_context *intel,
+						GLenum target,
+						GLenum internal_format,
+						GLuint first_level,
+						GLuint last_level,
+						GLuint width0,
+						GLuint height0,
+						GLuint depth0,
+						GLuint cpp,
+						GLboolean compressed);
+
+int intel_miptree_pitch_align (struct intel_context *intel,
+			       struct intel_mipmap_tree *mt,
+			       int pitch);
+
+void intel_miptree_destroy( struct intel_context *intel,
+			    struct intel_mipmap_tree *mt );
+
+
+/* Return the linear offset of an image relative to the start of the
+ * tree:
+ */
+GLuint intel_miptree_image_offset( struct intel_mipmap_tree *mt,
+				   GLuint face,
+				   GLuint level );
+
+/* Return pointers to each 2d slice within an image.  Indexed by depth
+ * value.
+ */
+const GLuint *intel_miptree_depth_offsets(struct intel_mipmap_tree *mt,
+					  GLuint level);
+
+
+void intel_miptree_set_level_info(struct intel_mipmap_tree *mt,
+				  GLuint level,
+				  GLuint nr_images,
+				  GLuint x, GLuint y,
+				  GLuint w, GLuint h, GLuint d);
+
+void intel_miptree_set_image_offset(struct intel_mipmap_tree *mt,
+				    GLuint level,
+				    GLuint img,
+				    GLuint x, GLuint y);
+
+
+/* Upload an image into a tree
+ */
+GLboolean intel_miptree_image_data(struct intel_context *intel, 
+				   struct intel_mipmap_tree *dst,
+				   GLuint face,
+				   GLuint level,
+				   const void *src, 
+				   GLuint src_row_pitch,
+				   GLuint src_image_pitch);
+
+/* i915_mipmap_tree.c:
+ */
+GLboolean i915_miptree_layout( struct intel_mipmap_tree *mt );
+GLboolean i945_miptree_layout( struct intel_mipmap_tree *mt );
+GLboolean brw_miptree_layout( struct intel_context *intel,
+			      struct intel_mipmap_tree *mt );
+
+
+
+#endif
diff --git a/i965/intel_pixel_bitmap.c b/i965/intel_pixel_bitmap.c
new file mode 100644
index 0000000..df9d688
--- /dev/null
+++ b/i965/intel_pixel_bitmap.c
@@ -0,0 +1,362 @@
+/**************************************************************************
+ * 
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portionsalloc
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "glheader.h"
+#include "enums.h"
+#include "image.h"
+#include "colormac.h"
+#include "mtypes.h"
+#include "macros.h"
+#include "bufferobj.h"
+#include "swrast/swrast.h"
+
+#include "intel_screen.h"
+#include "intel_context.h"
+#include "intel_ioctl.h"
+#include "intel_batchbuffer.h"
+#include "intel_blit.h"
+#include "intel_regions.h"
+#include "intel_buffer_objects.h"
+
+
+
+#define FILE_DEBUG_FLAG DEBUG_PIXEL
+
+
+/* Unlike the other intel_pixel_* functions, the expectation here is
+ * that the incoming data is not in a PBO.  With the XY_TEXT blit
+ * method, there's no benefit haveing it in a PBO, but we could
+ * implement a path based on XY_MONO_SRC_COPY_BLIT which might benefit
+ * PBO bitmaps.  I think they are probably pretty rare though - I
+ * wonder if Xgl uses them?
+ */
+static const GLubyte *map_pbo( GLcontext *ctx,
+			       GLsizei width, GLsizei height,
+			       const struct gl_pixelstore_attrib *unpack,
+			       const GLubyte *bitmap )
+{
+   GLubyte *buf;
+
+   if (!_mesa_validate_pbo_access(2, unpack, width, height, 1,
+				  GL_COLOR_INDEX, GL_BITMAP,
+				  (GLvoid *) bitmap)) {
+      _mesa_error(ctx, GL_INVALID_OPERATION,"glBitmap(invalid PBO access)");
+      return NULL;
+   }
+
+   buf = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
+					   GL_READ_ONLY_ARB,
+					   unpack->BufferObj);
+   if (!buf) {
+      _mesa_error(ctx, GL_INVALID_OPERATION, "glBitmap(PBO is mapped)");
+      return NULL;
+   }
+
+   return ADD_POINTERS(buf, bitmap);
+}
+
+static GLboolean test_bit( const GLubyte *src,
+			    GLuint bit )
+{
+   return (src[bit/8] & (1<<(bit % 8))) ? 1 : 0;
+}
+
+static void set_bit( GLubyte *dest,
+			  GLuint bit )
+{
+   dest[bit/8] |= 1 << (bit % 8);
+}
+
+static int align(int x, int align)
+{
+   return (x + align - 1) & ~(align - 1);
+}
+
+/* Extract a rectangle's worth of data from the bitmap.  Called
+ * per-cliprect.
+ */
+static GLuint get_bitmap_rect(GLsizei width, GLsizei height,
+			      const struct gl_pixelstore_attrib *unpack,
+			      const GLubyte *bitmap,
+			      GLuint x, GLuint y, 
+			      GLuint w, GLuint h,
+			      GLubyte *dest,
+			      GLuint row_align,
+			      GLboolean invert)
+{
+   GLuint src_offset = (x + unpack->SkipPixels) & 0x7;
+   GLuint mask = unpack->LsbFirst ? 0 : 7;
+   GLuint bit = 0;
+   GLint row, col;
+   GLint first, last;
+   GLint incr;
+   GLuint count = 0;
+
+   if (INTEL_DEBUG & DEBUG_PIXEL)
+      _mesa_printf("%s %d,%d %dx%d bitmap %dx%d skip %d src_offset %d mask %d\n",
+		   __FUNCTION__, x,y,w,h,width,height,unpack->SkipPixels, src_offset, mask);
+
+   if (invert) {
+      first = h-1;
+      last = 0;
+      incr = -1;
+   }
+   else {
+      first = 0;
+      last = h-1;
+      incr = 1;
+   }
+
+   /* Require that dest be pre-zero'd.
+    */
+   for (row = first; row != (last+incr); row += incr) {
+      const GLubyte *rowsrc = _mesa_image_address2d(unpack, bitmap, 
+						    width, height, 
+						    GL_COLOR_INDEX, GL_BITMAP, 
+						    y + row, x);
+
+      for (col = 0; col < w; col++, bit++) {
+	 if (test_bit(rowsrc, (col + src_offset) ^ mask)) {
+	    set_bit(dest, bit ^ 7);
+	    count++;
+	 }
+      }
+
+      if (row_align)
+	 bit = (bit + row_align - 1) & ~(row_align - 1);
+   }
+
+   return count;
+}
+
+
+
+
+/*
+ * Render a bitmap.
+ */
+static GLboolean
+do_blit_bitmap( GLcontext *ctx, 
+		GLint dstx, GLint dsty,
+		GLsizei width, GLsizei height,
+		const struct gl_pixelstore_attrib *unpack,
+		const GLubyte *bitmap )
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_region *dst = intel_drawbuf_region(intel);
+   GLfloat tmpColor[4];
+
+   union {
+      GLuint ui;
+      GLubyte ub[4];
+   } color;
+
+   if (!dst)
+       return GL_FALSE;
+
+   if (unpack->BufferObj->Name) {
+      bitmap = map_pbo(ctx, width, height, unpack, bitmap);
+      if (bitmap == NULL)
+	 return GL_TRUE;	/* even though this is an error, we're done */
+   }
+
+   COPY_4V(tmpColor, ctx->Current.RasterColor);
+
+   if (NEED_SECONDARY_COLOR(ctx)) {
+       ADD_3V(tmpColor, tmpColor, ctx->Current.RasterSecondaryColor);
+   }
+
+   UNCLAMPED_FLOAT_TO_CHAN(color.ub[0], tmpColor[2]);
+   UNCLAMPED_FLOAT_TO_CHAN(color.ub[1], tmpColor[1]);
+   UNCLAMPED_FLOAT_TO_CHAN(color.ub[2], tmpColor[0]);
+   UNCLAMPED_FLOAT_TO_CHAN(color.ub[3], tmpColor[3]);
+
+   /* Does zoom apply to bitmaps?
+    */
+   if (!intel_check_blit_fragment_ops(ctx) ||
+       ctx->Pixel.ZoomX != 1.0F || 
+       ctx->Pixel.ZoomY != 1.0F)
+      return GL_FALSE;
+
+   LOCK_HARDWARE(intel);
+
+   if (intel->driDrawable->numClipRects) {
+      __DRIdrawablePrivate *dPriv = intel->driDrawable;
+      drm_clip_rect_t *box = dPriv->pClipRects;
+      drm_clip_rect_t dest_rect;
+      GLint nbox = dPriv->numClipRects;
+      GLint srcx = 0, srcy = 0;
+      GLint orig_screen_x1, orig_screen_y2;
+      GLuint i;
+
+
+      orig_screen_x1 = dPriv->x + dstx;
+      orig_screen_y2 = dPriv->y + (dPriv->h - dsty);
+
+      /* Do scissoring in GL coordinates:
+       */
+      if (ctx->Scissor.Enabled)
+      {
+	 GLint x = ctx->Scissor.X;
+	 GLint y = ctx->Scissor.Y;
+	 GLuint w = ctx->Scissor.Width;
+	 GLuint h = ctx->Scissor.Height;
+
+         if (!_mesa_clip_to_region(x, y, x+w-1, y+h-1, &dstx, &dsty, &width, &height))
+            goto out;
+      }
+
+      /* Convert from GL to hardware coordinates:
+       */
+      dsty = dPriv->y + (dPriv->h - dsty - height);  
+      dstx = dPriv->x + dstx;
+
+      dest_rect.x1 = dstx;
+      dest_rect.y1 = dsty;
+      dest_rect.x2 = dstx + width;
+      dest_rect.y2 = dsty + height;
+
+      for (i = 0; i < nbox; i++) {
+         drm_clip_rect_t rect;
+	 int box_w, box_h;
+	 GLint px, py;
+	 GLuint stipple[32];  
+
+         if (!intel_intersect_cliprects(&rect, &dest_rect, &box[i]))
+            continue;
+
+	 /* Now go back to GL coordinates to figure out what subset of
+	  * the bitmap we are uploading for this cliprect:
+	  */
+	 box_w = rect.x2 - rect.x1;
+	 box_h = rect.y2 - rect.y1;
+	 srcx = rect.x1 - orig_screen_x1;
+	 srcy = orig_screen_y2 - rect.y2;
+
+
+#define DY 32
+#define DX 32
+
+	 /* Then, finally, chop it all into chunks that can be
+	  * digested by hardware:
+	  */
+	 for (py = 0; py < box_h; py += DY) { 
+	    for (px = 0; px < box_w; px += DX) { 
+	       int h = MIN2(DY, box_h - py);
+	       int w = MIN2(DX, box_w - px); 
+	       GLuint sz = align(align(w,8) * h, 64)/8;
+	       GLenum logic_op = ctx->Color.ColorLogicOpEnabled ?
+		  ctx->Color.LogicOp : GL_COPY;
+
+	       assert(sz <= sizeof(stipple));
+	       memset(stipple, 0, sz);
+
+	       /* May need to adjust this when padding has been introduced in
+		* sz above:
+		*/
+	       if (get_bitmap_rect(width, height, unpack, 
+				   bitmap,
+				   srcx + px, srcy + py, w, h,
+				   (GLubyte *)stipple,
+				   8,
+				   GL_TRUE) == 0)
+		  continue;
+
+	       /* 
+		*/
+	       intelEmitImmediateColorExpandBlit( intel,
+						  dst->cpp,
+						  (GLubyte *)stipple, 
+						  sz,
+						  color.ui,
+						  dst->pitch,
+						  dst->buffer,
+						  0,
+						  dst->tiled,
+						  rect.x1 + px,
+						  rect.y2 - (py + h),
+						  w, h,
+						  logic_op);
+	    } 
+	 } 
+      }
+      intel->need_flush = GL_TRUE;
+   out:
+      intel_batchbuffer_flush(intel->batch);
+   }
+   UNLOCK_HARDWARE(intel);
+
+
+   if (unpack->BufferObj->Name) {
+      /* done with PBO so unmap it now */
+      ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
+                              unpack->BufferObj);
+   }
+
+   return GL_TRUE;
+}
+
+
+
+
+
+/* There are a large number of possible ways to implement bitmap on
+ * this hardware, most of them have some sort of drawback.  Here are a
+ * few that spring to mind:
+ * 
+ * Blit:
+ *    - XY_MONO_SRC_BLT_CMD
+ *         - use XY_SETUP_CLIP_BLT for cliprect clipping.
+ *    - XY_TEXT_BLT
+ *    - XY_TEXT_IMMEDIATE_BLT
+ *         - blit per cliprect, subject to maximum immediate data size.
+ *    - XY_COLOR_BLT 
+ *         - per pixel or run of pixels
+ *    - XY_PIXEL_BLT
+ *         - good for sparse bitmaps
+ *
+ * 3D engine:
+ *    - Point per pixel
+ *    - Translate bitmap to an alpha texture and render as a quad
+ *    - Chop bitmap up into 32x32 squares and render w/polygon stipple.
+ */
+void
+intelBitmap(GLcontext * ctx,
+	    GLint x, GLint y,
+	    GLsizei width, GLsizei height,
+	    const struct gl_pixelstore_attrib *unpack,
+	    const GLubyte * pixels)
+{
+   if (do_blit_bitmap(ctx, x, y, width, height,
+                          unpack, pixels))
+      return;
+
+   if (INTEL_DEBUG & DEBUG_PIXEL)
+      _mesa_printf("%s: fallback to swrast\n", __FUNCTION__);
+
+   _swrast_Bitmap(ctx, x, y, width, height, unpack, pixels);
+}
diff --git a/i965/intel_pixel_copy.c b/i965/intel_pixel_copy.c
new file mode 100644
index 0000000..3bdf2fb
--- /dev/null
+++ b/i965/intel_pixel_copy.c
@@ -0,0 +1,344 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "glheader.h"
+#include "enums.h"
+#include "image.h"
+#include "mtypes.h"
+#include "macros.h"
+#include "state.h"
+#include "swrast/swrast.h"
+
+#include "intel_screen.h"
+#include "intel_context.h"
+#include "intel_ioctl.h"
+#include "intel_batchbuffer.h"
+#include "intel_blit.h"
+#include "intel_regions.h"
+
+
+static struct intel_region *
+copypix_src_region(struct intel_context *intel, GLenum type)
+{
+   switch (type) {
+   case GL_COLOR:
+      return intel_readbuf_region(intel);
+   case GL_DEPTH:
+      /* Don't think this is really possible execpt at 16bpp, when we have no stencil.
+       */
+      if (intel->depth_region && intel->depth_region->cpp == 2)
+         return intel->depth_region;
+   case GL_STENCIL:
+      /* Don't think this is really possible. 
+       */
+      break;
+   case GL_DEPTH_STENCIL_EXT:
+      /* Does it matter whether it is stencil/depth or depth/stencil?
+       */
+      return intel->depth_region;
+   default:
+      break;
+   }
+
+   return NULL;
+}
+
+
+
+
+/**
+ * Check if any fragment operations are in effect which might effect
+ * glDraw/CopyPixels.
+ */
+GLboolean
+intel_check_blit_fragment_ops(GLcontext * ctx)
+{
+   if (ctx->NewState)
+      _mesa_update_state(ctx);
+
+   return !(ctx->_ImageTransferState ||
+	    ctx->RenderMode != GL_RENDER ||
+            ctx->Color.AlphaEnabled ||
+            ctx->Depth.Test ||
+            ctx->Fog.Enabled ||
+            ctx->Stencil.Enabled ||
+            !ctx->Color.ColorMask[0] ||
+            !ctx->Color.ColorMask[1] ||
+            !ctx->Color.ColorMask[2] ||
+            !ctx->Color.ColorMask[3] ||	/* can do this! */
+            ctx->Texture._EnabledUnits ||
+	    ctx->FragmentProgram._Enabled ||
+	    ctx->Color.BlendEnabled);
+}
+
+/* Doesn't work for overlapping regions.  Could do a double copy or
+ * just fallback.
+ */
+static GLboolean
+do_texture_copypixels(GLcontext * ctx,
+                      GLint srcx, GLint srcy,
+                      GLsizei width, GLsizei height,
+                      GLint dstx, GLint dsty, GLenum type)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_region *dst = intel_drawbuf_region(intel);
+   struct intel_region *src = copypix_src_region(intel, type);
+   GLenum src_format;
+   GLenum src_type;
+
+   DBG("%s %d,%d %dx%d --> %d,%d\n", __FUNCTION__, 
+       srcx, srcy, width, height, dstx, dsty);
+
+   if (!src || !dst || type != GL_COLOR ||
+       ctx->_ImageTransferState ||
+       ctx->Pixel.ZoomX != 1.0F || ctx->Pixel.ZoomY != 1.0F ||
+       ctx->RenderMode != GL_RENDER ||
+       ctx->Texture._EnabledUnits ||
+       ctx->FragmentProgram._Enabled ||
+       src != dst )
+       return GL_FALSE;
+   
+   /* Can't handle overlapping regions.  Don't have sufficient control
+    * over rasterization to pull it off in-place.  Punt on these for
+    * now.
+    * 
+    * XXX: do a copy to a temporary. 
+    */
+   if (src->buffer == dst->buffer) {
+      drm_clip_rect_t srcbox;
+      drm_clip_rect_t dstbox;
+      drm_clip_rect_t tmp;
+
+      srcbox.x1 = srcx;
+      srcbox.y1 = srcy;
+      srcbox.x2 = srcx + width - 1;
+      srcbox.y2 = srcy + height - 1;
+
+      dstbox.x1 = dstx;
+      dstbox.y1 = dsty;
+      dstbox.x2 = dstx + width - 1;
+      dstbox.y2 = dsty + height - 1;
+
+      DBG("src %d,%d %d,%d\n", srcbox.x1, srcbox.y1, srcbox.x2, srcbox.y2);
+      DBG("dst %d,%d %d,%d (%dx%d) (%f,%f)\n", dstbox.x1, dstbox.y1, dstbox.x2, dstbox.y2,
+	  width, height, ctx->Pixel.ZoomX, ctx->Pixel.ZoomY);
+
+      if (intel_intersect_cliprects(&tmp, &srcbox, &dstbox)) {
+         DBG("%s: regions overlap\n", __FUNCTION__);
+         return GL_FALSE;
+      }
+   }
+
+   intelFlush(&intel->ctx);
+
+   intel->vtbl.install_meta_state(intel);
+
+   /* Is this true?  Also will need to turn depth testing on according
+    * to state:
+    */
+   intel->vtbl.meta_no_stencil_write(intel);
+   intel->vtbl.meta_no_depth_write(intel);
+
+   /* Set the 3d engine to draw into the destination region:
+    */
+   intel->vtbl.meta_draw_region(intel, dst, intel->depth_region);
+
+   intel->vtbl.meta_import_pixel_state(intel);
+
+   if (src->cpp == 2) {
+      src_format = GL_RGB;
+      src_type = GL_UNSIGNED_SHORT_5_6_5;
+   }
+   else {
+      src_format = GL_BGRA;
+      src_type = GL_UNSIGNED_BYTE;
+   }
+
+   /* Set the frontbuffer up as a large rectangular texture.
+    */
+   intel->vtbl.meta_frame_buffer_texture( intel, srcx - dstx, srcy - dsty );
+
+   intel->vtbl.meta_texture_blend_replace(intel);
+   
+   if (intel->driDrawable->numClipRects)
+      intel->vtbl.meta_draw_quad( intel,
+				  dstx, dstx + width,
+				  dsty, dsty + height,
+				  ctx->Current.RasterPos[ 2 ],
+				  0, 0, 0, 0, 0.0, 0.0, 0.0, 0.0 );
+   
+   intel->vtbl.leave_meta_state( intel );
+   
+   DBG("%s: success\n", __FUNCTION__);
+   return GL_TRUE;
+}
+
+/**
+ * CopyPixels with the blitter.  Don't support zooming, pixel transfer, etc.
+ */
+static GLboolean
+do_blit_copypixels(GLcontext * ctx,
+                   GLint srcx, GLint srcy,
+                   GLsizei width, GLsizei height,
+                   GLint dstx, GLint dsty, GLenum type)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_region *dst = intel_drawbuf_region(intel);
+   struct intel_region *src = copypix_src_region(intel, type);
+
+   /* Copypixels can be more than a straight copy.  Ensure all the
+    * extra operations are disabled:
+    */
+   if (!intel_check_blit_fragment_ops(ctx) ||
+       ctx->Pixel.ZoomX != 1.0F || ctx->Pixel.ZoomY != 1.0F)
+      return GL_FALSE;
+
+   if (!src || !dst)
+      return GL_FALSE;
+
+
+
+   intelFlush(&intel->ctx);
+
+/*    intel->vtbl.render_start(intel); */
+/*    intel->vtbl.emit_state(intel); */
+
+   LOCK_HARDWARE(intel);
+
+   if (intel->driDrawable->numClipRects) {
+      __DRIdrawablePrivate *dPriv = intel->driDrawable;
+      __DRIdrawablePrivate *dReadPriv = intel->driReadDrawable;
+      drm_clip_rect_t *box = dPriv->pClipRects;
+      drm_clip_rect_t dest_rect;
+      GLint nbox = dPriv->numClipRects;
+      GLint delta_x = 0;
+      GLint delta_y = 0;
+      GLuint i;
+
+      /* Do scissoring in GL coordinates:
+       */
+      if (ctx->Scissor.Enabled)
+      {
+	 GLint x = ctx->Scissor.X;
+	 GLint y = ctx->Scissor.Y;
+	 GLuint w = ctx->Scissor.Width;
+	 GLuint h = ctx->Scissor.Height;
+	 GLint dx = dstx - srcx;
+         GLint dy = dsty - srcy;
+
+         if (!_mesa_clip_to_region(x, y, x+w-1, y+h-1, &dstx, &dsty, &width, &height))
+            goto out;
+	 
+         srcx = dstx - dx;
+         srcy = dsty - dy;
+      }
+
+      /* Convert from GL to hardware coordinates:
+       */
+      dsty = dPriv->h - dsty - height;  
+      srcy = dPriv->h - srcy - height;  
+      dstx += dPriv->x;
+      dsty += dPriv->y;
+      srcx += dReadPriv->x;
+      srcy += dReadPriv->y;
+
+      /* Clip against the source region.  This is the only source
+       * clipping we do.  Dst is clipped with cliprects below.
+       */
+      {
+         delta_x = srcx - dstx;
+         delta_y = srcy - dsty;
+
+         if (!_mesa_clip_to_region(0, 0, src->pitch, src->height,
+                                   &srcx, &srcy, &width, &height))
+            goto out;
+
+         dstx = srcx - delta_x;
+         dsty = srcy - delta_y;
+      }
+
+      dest_rect.x1 = dstx;
+      dest_rect.y1 = dsty;
+      dest_rect.x2 = dstx + width;
+      dest_rect.y2 = dsty + height;
+
+/*       intel->vtbl.emit_flush(intel, 0); */
+
+      /* Could do slightly more clipping: Eg, take the intersection of
+       * the existing set of cliprects and those cliprects translated
+       * by delta_x, delta_y:
+       * 
+       * This code will not overwrite other windows, but will
+       * introduce garbage when copying from obscured window regions.
+       */
+      for (i = 0; i < nbox; i++) {
+         drm_clip_rect_t rect;
+
+         if (!intel_intersect_cliprects(&rect, &dest_rect, &box[i]))
+            continue;
+
+
+         intelEmitCopyBlit(intel, 
+			   dst->cpp, 
+			   src->pitch, src->buffer, 0, src->tiled,
+			   dst->pitch, dst->buffer, 0, dst->tiled,
+			   rect.x1 + delta_x, 
+			   rect.y1 + delta_y,       /* srcx, srcy */
+                           rect.x1, rect.y1,    /* dstx, dsty */
+                           rect.x2 - rect.x1, rect.y2 - rect.y1,
+			   ctx->Color.ColorLogicOpEnabled ?
+			   ctx->Color.LogicOp : GL_COPY);
+      }
+
+      intel->need_flush = GL_TRUE;
+   out:
+      intel_batchbuffer_flush(intel->batch);
+   }
+   UNLOCK_HARDWARE(intel);
+   return GL_TRUE;
+}
+
+void
+intelCopyPixels(GLcontext * ctx,
+                GLint srcx, GLint srcy,
+                GLsizei width, GLsizei height,
+                GLint destx, GLint desty, GLenum type)
+{
+   if (INTEL_DEBUG & DEBUG_PIXEL)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   if (do_blit_copypixels(ctx, srcx, srcy, width, height, destx, desty, type))
+      return;
+
+   if (do_texture_copypixels(ctx, srcx, srcy, width, height, destx, desty, type))
+      return;
+   
+   if (INTEL_DEBUG & DEBUG_PIXEL)
+      _mesa_printf("fallback to _swrast_CopyPixels\n");
+
+   _swrast_CopyPixels(ctx, srcx, srcy, width, height, destx, desty, type);
+}
diff --git a/i965/intel_reg.h b/i965/intel_reg.h
new file mode 100644
index 0000000..3c448b3
--- /dev/null
+++ b/i965/intel_reg.h
@@ -0,0 +1,91 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+
+#ifndef _INTEL_REG_H_
+#define _INTEL_REG_H_
+
+
+
+#define CMD_3D (0x3<<29)
+
+
+#define _3DPRIMITIVE         ((0x3<<29)|(0x1f<<24))
+#define PRIM_INDIRECT            (1<<23)
+#define PRIM_INLINE              (0<<23)
+#define PRIM_INDIRECT_SEQUENTIAL (0<<17)
+#define PRIM_INDIRECT_ELTS       (1<<17)
+
+#define PRIM3D_TRILIST		(0x0<<18)
+#define PRIM3D_TRISTRIP 	(0x1<<18)
+#define PRIM3D_TRISTRIP_RVRSE	(0x2<<18)
+#define PRIM3D_TRIFAN		(0x3<<18)
+#define PRIM3D_POLY		(0x4<<18)
+#define PRIM3D_LINELIST 	(0x5<<18)
+#define PRIM3D_LINESTRIP	(0x6<<18)
+#define PRIM3D_RECTLIST 	(0x7<<18)
+#define PRIM3D_POINTLIST	(0x8<<18)
+#define PRIM3D_DIB		(0x9<<18)
+#define PRIM3D_MASK		(0x1f<<18)
+
+#define I915PACKCOLOR4444(r,g,b,a) \
+  ((((a) & 0xf0) << 8) | (((r) & 0xf0) << 4) | ((g) & 0xf0) | ((b) >> 4))
+
+#define I915PACKCOLOR1555(r,g,b,a) \
+  ((((r) & 0xf8) << 7) | (((g) & 0xf8) << 2) | (((b) & 0xf8) >> 3) | \
+    ((a) ? 0x8000 : 0))
+
+#define I915PACKCOLOR565(r,g,b) \
+  ((((r) & 0xf8) << 8) | (((g) & 0xfc) << 3) | (((b) & 0xf8) >> 3))
+
+#define I915PACKCOLOR8888(r,g,b,a) \
+  ((a<<24) | (r<<16) | (g<<8) | b)
+
+
+
+
+#define BR00_BITBLT_CLIENT   0x40000000
+#define BR00_OP_COLOR_BLT    0x10000000
+#define BR00_OP_SRC_COPY_BLT 0x10C00000
+#define BR13_SOLID_PATTERN   0x80000000
+
+#define XY_COLOR_BLT_CMD		((2<<29)|(0x50<<22)|0x4)
+#define XY_COLOR_BLT_WRITE_ALPHA	(1<<21)
+#define XY_COLOR_BLT_WRITE_RGB		(1<<20)
+
+#define XY_SRC_COPY_BLT_CMD             ((2<<29)|(0x53<<22)|6)
+#define XY_SRC_COPY_BLT_WRITE_ALPHA     (1<<21)
+#define XY_SRC_COPY_BLT_WRITE_RGB       (1<<20)
+
+#define XY_SRC_TILED  (1<<15)
+#define XY_DST_TILED  (1<<11)
+
+#define FENCE_LINEAR 0
+#define FENCE_XMAJOR 1
+#define FENCE_YMAJOR 2
+
+#endif
diff --git a/i965/intel_regions.c b/i965/intel_regions.c
new file mode 100644
index 0000000..835ecdd
--- /dev/null
+++ b/i965/intel_regions.c
@@ -0,0 +1,295 @@
+/**************************************************************************
+ * 
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+/* Provide additional functionality on top of bufmgr buffers:
+ *   - 2d semantics and blit operations
+ *   - refcounting of buffers for multiple images in a buffer.
+ *   - refcounting of buffer mappings.
+ *   - some logic for moving the buffers to the best memory pools for
+ *     given operations.
+ *
+ * Most of this is to make it easier to implement the fixed-layout
+ * mipmap tree required by intel hardware in the face of GL's
+ * programming interface where each image can be specifed in random
+ * order and it isn't clear what layout the tree should have until the
+ * last moment.
+ */
+
+#include "intel_context.h"
+#include "intel_regions.h"
+#include "intel_blit.h"
+#include "bufmgr.h"
+#include "imports.h"
+
+/* XXX: Thread safety?
+ */
+GLubyte *intel_region_map(struct intel_context *intel, struct intel_region *region)
+{
+   DBG("%s\n", __FUNCTION__);
+   if (!region->map_refcount++) {
+      region->map = bmMapBuffer(intel, region->buffer, 0);
+      if (!region->map)
+	 region->map_refcount--;
+   }
+
+   return region->map;
+}
+
+void intel_region_unmap(struct intel_context *intel, 
+			struct intel_region *region)
+{
+   DBG("%s\n", __FUNCTION__);
+   if (!--region->map_refcount) {
+      bmUnmapBufferAUB(intel, region->buffer, 0, 0);
+      region->map = NULL;
+   }
+}
+
+struct intel_region *intel_region_alloc( struct intel_context *intel, 
+					 GLuint cpp,
+					 GLuint pitch, 
+					 GLuint height )
+{
+   struct intel_region *region = calloc(sizeof(*region), 1);
+
+   DBG("%s %dx%dx%d == 0x%x bytes\n", __FUNCTION__,
+       cpp, pitch, height, cpp*pitch*height);
+
+   region->cpp = cpp;
+   region->pitch = pitch;
+   region->height = height; 	/* needed? */
+   region->refcount = 1;
+
+   bmGenBuffers(intel, "tex", 1, &region->buffer, 6);
+   bmBufferData(intel, region->buffer, pitch * cpp * height, NULL, 0);
+
+   return region;
+}
+
+void intel_region_reference( struct intel_region **dst,
+			     struct intel_region *src)
+{
+   src->refcount++;
+   assert(*dst == NULL);
+   *dst = src;
+}
+
+void intel_region_release( struct intel_context *intel,
+			   struct intel_region **region )
+{
+   if (!*region)
+      return;
+
+   DBG("%s %d\n", __FUNCTION__, (*region)->refcount-1);
+   
+   if (--(*region)->refcount == 0) {
+      assert((*region)->map_refcount == 0);
+      bmDeleteBuffers(intel, 1, &(*region)->buffer);
+      free(*region);
+   }
+   *region = NULL;
+}
+
+
+struct intel_region *intel_region_create_static( struct intel_context *intel, 
+						 GLuint mem_type,
+						 GLuint offset,
+						 void *virtual,
+						 GLuint cpp,
+						 GLuint pitch, 
+						 GLuint height,
+						 GLuint size,
+						 GLboolean tiled )
+{
+   struct intel_region *region = calloc(sizeof(*region), 1);
+   GLint pool;
+
+   DBG("%s\n", __FUNCTION__);
+
+   region->cpp = cpp;
+   region->pitch = pitch;
+   region->height = height; 	/* needed? */
+   region->refcount = 1;
+   region->tiled = tiled;
+
+   /* Recipe for creating a static buffer - create a static pool with
+    * the right offset and size, generate a buffer and use a special
+    * call to bind it to all of the memory in that pool.
+    */
+   pool = bmInitPool(intel, offset, virtual, size, 
+		     (BM_MEM_AGP |
+		      BM_NO_UPLOAD | 
+		      BM_NO_EVICT | 
+		      BM_NO_MOVE));
+   if (pool < 0) {
+      _mesa_printf("bmInitPool failed for static region\n");
+      exit(1);
+   }
+
+   region->buffer = bmGenBufferStatic(intel, pool);
+
+   return region;
+}
+
+
+
+
+void _mesa_copy_rect( GLubyte *dst,
+		      GLuint cpp,
+		      GLuint dst_pitch,
+		      GLuint dst_x, 
+		      GLuint dst_y,
+		      GLuint width,
+		      GLuint height,
+		      const GLubyte *src,
+		      GLuint src_pitch,
+		      GLuint src_x,
+		      GLuint src_y )
+{
+   GLuint i;
+
+   dst_pitch *= cpp;
+   src_pitch *= cpp;
+   dst += dst_x * cpp;
+   src += src_x * cpp;
+   dst += dst_y * dst_pitch;
+   src += src_y * dst_pitch;
+   width *= cpp;
+
+   if (width == dst_pitch && 
+       width == src_pitch)
+      do_memcpy(dst, src, height * width);
+   else {
+      for (i = 0; i < height; i++) {
+	 do_memcpy(dst, src, width);
+	 dst += dst_pitch;
+	 src += src_pitch;
+      }
+   }
+}
+
+
+/* Upload data to a rectangular sub-region.  Lots of choices how to do this:
+ *
+ * - memcpy by span to current destination
+ * - upload data as new buffer and blit
+ *
+ * Currently always memcpy.
+ */
+GLboolean intel_region_data(struct intel_context *intel, 
+			    struct intel_region *dst,
+			    GLuint dst_offset,
+			    GLuint dstx, GLuint dsty,
+			    const void *src, GLuint src_pitch,
+			    GLuint srcx, GLuint srcy,
+			    GLuint width, GLuint height)
+{
+   DBG("%s\n", __FUNCTION__);
+
+   if (width == dst->pitch && 
+       width == src_pitch &&
+       dst_offset == 0 &&
+       height == dst->height &&
+       srcx == 0 &&
+       srcy == 0) 
+   {
+      return (bmBufferDataAUB(intel,
+			      dst->buffer,
+			      dst->cpp * width * dst->height,
+			      src, 0, 0, 0) == 0);
+   }
+   else {
+      GLubyte *map = intel_region_map(intel, dst);
+
+      if (map) {
+	 assert (dst_offset + dstx + width + 
+		 (dsty + height - 1) * dst->pitch * dst->cpp <= 
+		 dst->pitch * dst->cpp * dst->height);
+	 
+	 _mesa_copy_rect(map + dst_offset,
+			 dst->cpp,
+			 dst->pitch,
+			 dstx, dsty,
+			 width, height,
+			 src,
+			 src_pitch,
+			 srcx, srcy);      
+	 
+	 intel_region_unmap(intel, dst);
+	 return GL_TRUE;
+      }
+      else 
+	 return GL_FALSE;
+   }
+}
+			  
+/* Copy rectangular sub-regions. Need better logic about when to
+ * push buffers into AGP - will currently do so whenever possible.
+ */
+void intel_region_copy( struct intel_context *intel,
+			struct intel_region *dst,
+			GLuint dst_offset,
+			GLuint dstx, GLuint dsty,
+			struct intel_region *src,
+			GLuint src_offset,
+			GLuint srcx, GLuint srcy,
+			GLuint width, GLuint height )
+{
+   DBG("%s\n", __FUNCTION__);
+
+   assert(src->cpp == dst->cpp);
+
+   intelEmitCopyBlit(intel,
+		     dst->cpp,
+		     src->pitch, src->buffer, src_offset, src->tiled,
+		     dst->pitch, dst->buffer, dst_offset, dst->tiled,
+		     srcx, srcy,
+		     dstx, dsty,
+		     width, height,
+		     GL_COPY );
+}
+
+/* Fill a rectangular sub-region.  Need better logic about when to
+ * push buffers into AGP - will currently do so whenever possible.
+ */
+void intel_region_fill( struct intel_context *intel,
+			struct intel_region *dst,
+			GLuint dst_offset,
+			GLuint dstx, GLuint dsty,
+			GLuint width, GLuint height,
+			GLuint color )
+{
+   DBG("%s\n", __FUNCTION__);
+   
+   intelEmitFillBlit(intel,
+		     dst->cpp,
+		     dst->pitch, dst->buffer, dst_offset, dst->tiled,
+		     dstx, dsty,
+		     width, height,
+		     color );
+}
+
diff --git a/i965/intel_regions.h b/i965/intel_regions.h
new file mode 100644
index 0000000..d2235f1
--- /dev/null
+++ b/i965/intel_regions.h
@@ -0,0 +1,140 @@
+/**************************************************************************
+ * 
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef INTEL_REGIONS_H
+#define INTEL_REGIONS_H
+
+#include "mtypes.h"
+#include "bufmgr.h"		/* for DBG! */
+struct intel_context;
+
+/* A layer on top of the bufmgr buffers that adds a few useful things:
+ *
+ * - Refcounting for local buffer references.
+ * - Refcounting for buffer maps
+ * - Buffer dimensions - pitch and height.
+ * - Blitter commands for copying 2D regions between buffers.
+ */
+struct intel_region {
+   struct buffer *buffer;
+   GLuint refcount;
+   GLuint cpp;
+   GLuint pitch;
+   GLuint height;
+   GLboolean tiled;
+   GLubyte *map;
+   GLuint map_refcount;
+};
+
+/* Allocate a refcounted region.  Pointers to regions should only be
+ * copied by calling intel_reference_region().
+ *
+ * No support for dynamically allocating tiled regions at this point.
+ */
+struct intel_region *intel_region_alloc( struct intel_context *intel,
+					 GLuint cpp,
+					 GLuint pitch, 
+					 GLuint height );
+
+void intel_region_reference( struct intel_region **dst, 
+			     struct intel_region *src );
+
+void intel_region_release(struct intel_context *intel,
+			  struct intel_region **ib );
+
+/* Static regions may be tiled.  The assumption is that the X server
+ * has set up fence registers to define tiled zones in agp and these
+ * buffers are within those zones.  Tiling regions without fence
+ * registers is more work.
+ */
+struct intel_region *intel_region_create_static( struct intel_context *intel,
+						 GLuint mem_type,
+						 GLuint offset,
+						 void *virtual,
+						 GLuint cpp,
+						 GLuint pitch,
+						 GLuint height,
+						 GLuint size,
+						 GLboolean tiled );
+
+/* Map/unmap regions.  This is refcounted also: 
+ */
+GLubyte *intel_region_map(struct intel_context *intel, 
+		       struct intel_region *ib);
+
+void intel_region_unmap(struct intel_context *intel,
+			struct intel_region *ib);
+
+
+/* Upload data to a rectangular sub-region
+ */
+GLboolean intel_region_data(struct intel_context *intel, 
+			    struct intel_region *dest,
+			    GLuint dest_offset,
+			    GLuint destx, GLuint desty,
+			    const void *src, GLuint src_stride,
+			    GLuint srcx, GLuint srcy,
+			    GLuint width, GLuint height);
+			  
+/* Copy rectangular sub-regions
+ */
+void intel_region_copy( struct intel_context *intel,
+			struct intel_region *dest,
+			GLuint dest_offset,
+			GLuint destx, GLuint desty,
+			struct intel_region *src,
+			GLuint src_offset,
+			GLuint srcx, GLuint srcy,
+			GLuint width, GLuint height );
+
+/* Fill a rectangular sub-region
+ */
+void intel_region_fill( struct intel_context *intel,
+			struct intel_region *dest,
+			GLuint dest_offset,
+			GLuint destx, GLuint desty,
+			GLuint width, GLuint height,
+			GLuint color );
+
+
+/***********************************************************************
+ * Misc utilities: move to somewhere generic
+ */
+void _mesa_copy_rect( GLubyte *dst,
+		      GLuint cpp,
+		      GLuint dst_pitch,
+		      GLuint dst_x, 
+		      GLuint dst_y,
+		      GLuint width,
+		      GLuint height,
+		      const GLubyte *src,
+		      GLuint src_pitch,
+		      GLuint src_x,
+		      GLuint src_y );
+
+
+#endif
diff --git a/i965/intel_screen.c b/i965/intel_screen.c
new file mode 100644
index 0000000..5dac50d
--- /dev/null
+++ b/i965/intel_screen.c
@@ -0,0 +1,701 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "glheader.h"
+#include "context.h"
+#include "framebuffer.h"
+#include "matrix.h"
+#include "renderbuffer.h"
+#include "simple_list.h"
+#include "utils.h"
+#include "vblank.h"
+#include "xmlpool.h"
+
+
+#include "intel_screen.h"
+
+#include "intel_context.h"
+#include "intel_tex.h"
+#include "intel_span.h"
+#include "intel_ioctl.h"
+
+#include "i830_dri.h"
+
+PUBLIC const char __driConfigOptions[] =
+DRI_CONF_BEGIN
+    DRI_CONF_SECTION_PERFORMANCE
+       DRI_CONF_FTHROTTLE_MODE(DRI_CONF_FTHROTTLE_IRQS) 
+       DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_0)
+    DRI_CONF_SECTION_END
+    DRI_CONF_SECTION_QUALITY
+       DRI_CONF_FORCE_S3TC_ENABLE(false)
+       DRI_CONF_ALLOW_LARGE_TEXTURES(1)
+      DRI_CONF_SECTION_END
+DRI_CONF_END;
+const GLuint __driNConfigOptions = 4;
+
+#ifdef USE_NEW_INTERFACE
+static PFNGLXCREATECONTEXTMODES create_context_modes = NULL;
+#endif /*USE_NEW_INTERFACE*/
+
+/**
+ * Map all the memory regions described by the screen.
+ * \return GL_TRUE if success, GL_FALSE if error.
+ */
+GLboolean
+intelMapScreenRegions(__DRIscreenPrivate *sPriv)
+{
+   intelScreenPrivate *intelScreen = (intelScreenPrivate *)sPriv->private;
+
+   if (intelScreen->front.handle) {
+      if (drmMap(sPriv->fd,
+                 intelScreen->front.handle,
+                 intelScreen->front.size,
+                 (drmAddress *)&intelScreen->front.map) != 0) {
+         _mesa_problem(NULL, "drmMap(frontbuffer) failed!");
+         return GL_FALSE;
+      }
+   } else {
+      /* Use the old static allocation method if the server isn't setting up
+       * a movable handle for us.  Add in the front buffer offset from
+       * framebuffer start, as our span routines (unlike other drivers) expect
+       * the renderbuffer address to point to the beginning of the
+       * renderbuffer.
+       */
+      intelScreen->front.map = (char *)sPriv->pFB;
+      if (intelScreen->front.map == NULL) {
+	 fprintf(stderr, "Failed to find framebuffer mapping\n");
+	 return GL_FALSE;
+      }
+   }
+
+   if (drmMap(sPriv->fd,
+              intelScreen->back.handle,
+              intelScreen->back.size,
+              (drmAddress *)&intelScreen->back.map) != 0) {
+      intelUnmapScreenRegions(intelScreen);
+      return GL_FALSE;
+   }
+
+   if (drmMap(sPriv->fd,
+              intelScreen->depth.handle,
+              intelScreen->depth.size,
+              (drmAddress *)&intelScreen->depth.map) != 0) {
+      intelUnmapScreenRegions(intelScreen);
+      return GL_FALSE;
+   }
+
+   if (drmMap(sPriv->fd,
+              intelScreen->tex.handle,
+              intelScreen->tex.size,
+              (drmAddress *)&intelScreen->tex.map) != 0) {
+      intelUnmapScreenRegions(intelScreen);
+      return GL_FALSE;
+   }
+
+   if (0)
+      printf("Mappings:  front: %p  back: %p  depth: %p  tex: %p\n",
+          intelScreen->front.map,
+          intelScreen->back.map,
+          intelScreen->depth.map,
+          intelScreen->tex.map);
+   return GL_TRUE;
+}
+
+
+void
+intelUnmapScreenRegions(intelScreenPrivate *intelScreen)
+{
+#define REALLY_UNMAP 1
+   /* If front.handle is present, we're doing the dynamic front buffer mapping,
+    * but if we've fallen back to static allocation then we shouldn't try to
+    * unmap here.
+    */
+   if (intelScreen->front.handle) {
+#if REALLY_UNMAP
+      if (drmUnmap(intelScreen->front.map, intelScreen->front.size) != 0)
+         printf("drmUnmap front failed!\n");
+#endif
+      intelScreen->front.map = NULL;
+   }
+   if (intelScreen->back.map) {
+#if REALLY_UNMAP
+      if (drmUnmap(intelScreen->back.map, intelScreen->back.size) != 0)
+         printf("drmUnmap back failed!\n");
+#endif
+      intelScreen->back.map = NULL;
+   }
+   if (intelScreen->depth.map) {
+#if REALLY_UNMAP
+      drmUnmap(intelScreen->depth.map, intelScreen->depth.size);
+      intelScreen->depth.map = NULL;
+#endif
+   }
+   if (intelScreen->tex.map) {
+#if REALLY_UNMAP
+      drmUnmap(intelScreen->tex.map, intelScreen->tex.size);
+      intelScreen->tex.map = NULL;
+#endif
+   }
+}
+
+
+static void
+intelPrintDRIInfo(intelScreenPrivate *intelScreen,
+                  __DRIscreenPrivate *sPriv,
+                  I830DRIPtr gDRIPriv)
+{
+   fprintf(stderr, "*** Front size:   0x%x  offset: 0x%x  pitch: %d\n",
+           intelScreen->front.size, intelScreen->front.offset,
+           intelScreen->front.pitch);
+   fprintf(stderr, "*** Back size:    0x%x  offset: 0x%x  pitch: %d\n",
+           intelScreen->back.size, intelScreen->back.offset,
+           intelScreen->back.pitch);
+   fprintf(stderr, "*** Depth size:   0x%x  offset: 0x%x  pitch: %d\n",
+           intelScreen->depth.size, intelScreen->depth.offset,
+           intelScreen->depth.pitch);
+   fprintf(stderr, "*** Rotated size: 0x%x  offset: 0x%x  pitch: %d\n",
+           intelScreen->rotated.size, intelScreen->rotated.offset,
+           intelScreen->rotated.pitch);
+   fprintf(stderr, "*** Texture size: 0x%x  offset: 0x%x\n",
+           intelScreen->tex.size, intelScreen->tex.offset);
+   fprintf(stderr, "*** Memory : 0x%x\n", gDRIPriv->mem);
+}
+
+
+static void
+intelPrintSAREA(volatile drmI830Sarea *sarea)
+{
+   fprintf(stderr, "SAREA: sarea width %d  height %d\n", sarea->width, sarea->height);
+   fprintf(stderr, "SAREA: pitch: %d\n", sarea->pitch);
+   fprintf(stderr,
+           "SAREA: front offset: 0x%08x  size: 0x%x  handle: 0x%x\n",
+           sarea->front_offset, sarea->front_size,
+           (unsigned) sarea->front_handle);
+   fprintf(stderr,
+           "SAREA: back  offset: 0x%08x  size: 0x%x  handle: 0x%x\n",
+           sarea->back_offset, sarea->back_size,
+           (unsigned) sarea->back_handle);
+   fprintf(stderr, "SAREA: depth offset: 0x%08x  size: 0x%x  handle: 0x%x\n",
+           sarea->depth_offset, sarea->depth_size,
+           (unsigned) sarea->depth_handle);
+   fprintf(stderr, "SAREA: tex   offset: 0x%08x  size: 0x%x  handle: 0x%x\n",
+           sarea->tex_offset, sarea->tex_size,
+           (unsigned) sarea->tex_handle);
+   fprintf(stderr, "SAREA: rotation: %d\n", sarea->rotation);
+   fprintf(stderr,
+           "SAREA: rotated offset: 0x%08x  size: 0x%x\n",
+           sarea->rotated_offset, sarea->rotated_size);
+   fprintf(stderr, "SAREA: rotated pitch: %d\n", sarea->rotated_pitch);
+}
+
+
+/**
+ * A number of the screen parameters are obtained/computed from
+ * information in the SAREA.  This function updates those parameters.
+ */
+void
+intelUpdateScreenFromSAREA(intelScreenPrivate *intelScreen,
+                           volatile drmI830Sarea *sarea)
+{
+   intelScreen->width = sarea->width;
+   intelScreen->height = sarea->height;
+
+   intelScreen->front.offset = sarea->front_offset;
+   intelScreen->front.pitch = sarea->pitch * intelScreen->cpp;
+   intelScreen->front.handle = sarea->front_handle;
+   intelScreen->front.size = sarea->front_size;
+   intelScreen->front.tiled = sarea->front_tiled;
+
+   intelScreen->back.offset = sarea->back_offset;
+   intelScreen->back.pitch = sarea->pitch * intelScreen->cpp;
+   intelScreen->back.handle = sarea->back_handle;
+   intelScreen->back.size = sarea->back_size;
+   intelScreen->back.tiled = sarea->back_tiled;
+
+   intelScreen->depth.offset = sarea->depth_offset;
+   intelScreen->depth.pitch = sarea->pitch * intelScreen->cpp;
+   intelScreen->depth.handle = sarea->depth_handle;
+   intelScreen->depth.size = sarea->depth_size;
+   intelScreen->depth.tiled = sarea->depth_tiled;
+
+   intelScreen->tex.offset = sarea->tex_offset;
+   intelScreen->logTextureGranularity = sarea->log_tex_granularity;
+   intelScreen->tex.handle = sarea->tex_handle;
+   intelScreen->tex.size = sarea->tex_size;
+
+   intelScreen->rotated.offset = sarea->rotated_offset;
+   intelScreen->rotated.pitch = sarea->rotated_pitch * intelScreen->cpp;
+   intelScreen->rotated.size = sarea->rotated_size;
+   intelScreen->rotated.tiled = sarea->rotated_tiled;
+   intelScreen->current_rotation = sarea->rotation;
+#if 0
+   matrix23Rotate(&intelScreen->rotMatrix,
+                  sarea->width, sarea->height, sarea->rotation);
+#endif
+   intelScreen->rotatedWidth = sarea->virtualX;
+   intelScreen->rotatedHeight = sarea->virtualY;
+
+   if (0)
+      intelPrintSAREA(sarea);
+}
+
+
+static GLboolean intelInitDriver(__DRIscreenPrivate *sPriv)
+{
+   intelScreenPrivate *intelScreen;
+   I830DRIPtr         gDRIPriv = (I830DRIPtr)sPriv->pDevPriv;
+   PFNGLXSCRENABLEEXTENSIONPROC glx_enable_extension =
+     (PFNGLXSCRENABLEEXTENSIONPROC) (*dri_interface->getProcAddress("glxEnableExtension"));
+   void * const psc = sPriv->psc->screenConfigs;
+   volatile drmI830Sarea *sarea;
+
+   if (sPriv->devPrivSize != sizeof(I830DRIRec)) {
+      fprintf(stderr,"\nERROR!  sizeof(I830DRIRec) (%ld) does not match passed size from device driver (%d)\n", (unsigned long)sizeof(I830DRIRec), sPriv->devPrivSize);
+      return GL_FALSE;
+   }
+
+   /* Allocate the private area */
+   intelScreen = (intelScreenPrivate *)CALLOC(sizeof(intelScreenPrivate));
+   if (!intelScreen) {
+      fprintf(stderr,"\nERROR!  Allocating private area failed\n");
+      return GL_FALSE;
+   }
+   /* parse information in __driConfigOptions */
+   driParseOptionInfo (&intelScreen->optionCache,
+		       __driConfigOptions, __driNConfigOptions);
+
+   intelScreen->driScrnPriv = sPriv;
+   sPriv->private = (void *)intelScreen;
+   intelScreen->sarea_priv_offset = gDRIPriv->sarea_priv_offset;
+   sarea = (volatile drmI830Sarea *)
+         (((GLubyte *)sPriv->pSAREA)+intelScreen->sarea_priv_offset);
+
+   intelScreen->deviceID = gDRIPriv->deviceID;
+   intelScreen->mem = gDRIPriv->mem;
+   intelScreen->cpp = gDRIPriv->cpp;
+
+   switch (gDRIPriv->bitsPerPixel) {
+   case 15: intelScreen->fbFormat = DV_PF_555; break;
+   case 16: intelScreen->fbFormat = DV_PF_565; break;
+   case 32: intelScreen->fbFormat = DV_PF_8888; break;
+   }
+			 
+   intelUpdateScreenFromSAREA(intelScreen, sarea);
+
+   if (0)
+      intelPrintDRIInfo(intelScreen, sPriv, gDRIPriv);
+
+   if (!intelMapScreenRegions(sPriv)) {
+      fprintf(stderr,"\nERROR!  mapping regions\n");
+      _mesa_free(intelScreen);
+      sPriv->private = NULL;
+      return GL_FALSE;
+   }
+
+   intelScreen->drmMinor = sPriv->drmMinor;
+
+   /* Determine if IRQs are active? */
+   {
+      int ret;
+      drmI830GetParam gp;
+
+      gp.param = I830_PARAM_IRQ_ACTIVE;
+      gp.value = &intelScreen->irq_active;
+
+      ret = drmCommandWriteRead( sPriv->fd, DRM_I830_GETPARAM,
+				 &gp, sizeof(gp));
+      if (ret) {
+	 fprintf(stderr, "drmI830GetParam: %d\n", ret);
+	 return GL_FALSE;
+      }
+   }
+
+   /* Determine if batchbuffers are allowed */
+   {
+      int ret;
+      drmI830GetParam gp;
+
+      gp.param = I830_PARAM_ALLOW_BATCHBUFFER;
+      gp.value = &intelScreen->allow_batchbuffer;
+
+      ret = drmCommandWriteRead( sPriv->fd, DRM_I830_GETPARAM,
+				 &gp, sizeof(gp));
+      if (ret) {
+	 fprintf(stderr, "drmI830GetParam: (%d) %d\n", gp.param, ret);
+	 return GL_FALSE;
+      }
+   }
+
+   if (glx_enable_extension != NULL) {
+      (*glx_enable_extension)( psc, "GLX_SGI_swap_control" );
+      (*glx_enable_extension)( psc, "GLX_SGI_video_sync" );
+      (*glx_enable_extension)( psc, "GLX_MESA_swap_control" );
+      (*glx_enable_extension)( psc, "GLX_MESA_swap_frame_usage" );
+      (*glx_enable_extension)( psc, "GLX_SGI_make_current_read" );
+      (*glx_enable_extension)( psc, "GLX_MESA_copy_sub_buffer" );
+   }
+   
+   return GL_TRUE;
+}
+
+
+static void intelDestroyScreen(__DRIscreenPrivate *sPriv)
+{
+   intelScreenPrivate *intelScreen = (intelScreenPrivate *)sPriv->private;
+
+   intelUnmapScreenRegions(intelScreen);
+   FREE(intelScreen);
+   sPriv->private = NULL;
+}
+
+static GLboolean intelCreateBuffer( __DRIscreenPrivate *driScrnPriv,
+				    __DRIdrawablePrivate *driDrawPriv,
+				    const __GLcontextModes *mesaVis,
+				    GLboolean isPixmap )
+{
+   intelScreenPrivate *screen = (intelScreenPrivate *) driScrnPriv->private;
+
+   if (isPixmap) {
+      return GL_FALSE; /* not implemented */
+   } else {
+      GLboolean swStencil = (mesaVis->stencilBits > 0 && 
+			     mesaVis->depthBits != 24);
+
+      struct gl_framebuffer *fb = _mesa_create_framebuffer(mesaVis);
+
+      {
+         driRenderbuffer *frontRb
+            = driNewRenderbuffer(GL_RGBA,
+                                 screen->front.map,
+                                 screen->cpp,
+                                 screen->front.offset, screen->front.pitch,
+                                 driDrawPriv);
+         intelSetSpanFunctions(frontRb, mesaVis);
+         _mesa_add_renderbuffer(fb, BUFFER_FRONT_LEFT, &frontRb->Base);
+      }
+
+      if (mesaVis->doubleBufferMode) {
+         driRenderbuffer *backRb
+            = driNewRenderbuffer(GL_RGBA,
+                                 screen->back.map,
+                                 screen->cpp,
+                                 screen->back.offset, screen->back.pitch,
+                                 driDrawPriv);
+         intelSetSpanFunctions(backRb, mesaVis);
+         _mesa_add_renderbuffer(fb, BUFFER_BACK_LEFT, &backRb->Base);
+      }
+
+      if (mesaVis->depthBits == 16) {
+         driRenderbuffer *depthRb
+            = driNewRenderbuffer(GL_DEPTH_COMPONENT16,
+                                 screen->depth.map,
+                                 screen->cpp,
+                                 screen->depth.offset, screen->depth.pitch,
+                                 driDrawPriv);
+         intelSetSpanFunctions(depthRb, mesaVis);
+         _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depthRb->Base);
+      }
+      else if (mesaVis->depthBits == 24) {
+         driRenderbuffer *depthRb
+            = driNewRenderbuffer(GL_DEPTH_COMPONENT24,
+                                 screen->depth.map,
+                                 screen->cpp,
+                                 screen->depth.offset, screen->depth.pitch,
+                                 driDrawPriv);
+         intelSetSpanFunctions(depthRb, mesaVis);
+         _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depthRb->Base);
+      }
+
+      if (mesaVis->stencilBits > 0 && !swStencil) {
+         driRenderbuffer *stencilRb
+            = driNewRenderbuffer(GL_STENCIL_INDEX8_EXT,
+                                 screen->depth.map,
+                                 screen->cpp,
+                                 screen->depth.offset, screen->depth.pitch,
+                                 driDrawPriv);
+         intelSetSpanFunctions(stencilRb, mesaVis);
+         _mesa_add_renderbuffer(fb, BUFFER_STENCIL, &stencilRb->Base);
+      }
+
+      _mesa_add_soft_renderbuffers(fb,
+                                   GL_FALSE, /* color */
+                                   GL_FALSE, /* depth */
+                                   swStencil,
+                                   mesaVis->accumRedBits > 0,
+                                   GL_FALSE, /* alpha */
+                                   GL_FALSE /* aux */);
+      driDrawPriv->driverPrivate = (void *) fb;
+
+      return (driDrawPriv->driverPrivate != NULL);
+   }
+}
+
+static void intelDestroyBuffer(__DRIdrawablePrivate *driDrawPriv)
+{
+   _mesa_unreference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate)));
+}
+
+
+/**
+ * Get information about previous buffer swaps.
+ */
+static int
+intelGetSwapInfo( __DRIdrawablePrivate *dPriv, __DRIswapInfo * sInfo )
+{
+   struct intel_context *intel;
+
+   if ( (dPriv == NULL) || (dPriv->driContextPriv == NULL)
+	|| (dPriv->driContextPriv->driverPrivate == NULL)
+	|| (sInfo == NULL) ) {
+      return -1;
+   }
+
+   intel = dPriv->driContextPriv->driverPrivate;
+   sInfo->swap_count = intel->swap_count;
+   sInfo->swap_ust = intel->swap_ust;
+   sInfo->swap_missed_count = intel->swap_missed_count;
+
+   sInfo->swap_missed_usage = (sInfo->swap_missed_count != 0)
+       ? driCalculateSwapUsage( dPriv, 0, intel->swap_missed_ust )
+       : 0.0;
+
+   return 0;
+}
+
+
+/* There are probably better ways to do this, such as an
+ * init-designated function to register chipids and createcontext
+ * functions.
+ */
+extern GLboolean i830CreateContext( const __GLcontextModes *mesaVis,
+				    __DRIcontextPrivate *driContextPriv,
+				    void *sharedContextPrivate);
+
+extern GLboolean i915CreateContext( const __GLcontextModes *mesaVis,
+				   __DRIcontextPrivate *driContextPriv,
+				   void *sharedContextPrivate);
+
+extern GLboolean brwCreateContext( const __GLcontextModes *mesaVis,
+				   __DRIcontextPrivate *driContextPriv,
+				   void *sharedContextPrivate);
+
+
+
+
+static GLboolean intelCreateContext( const __GLcontextModes *mesaVis,
+				   __DRIcontextPrivate *driContextPriv,
+				   void *sharedContextPrivate)
+{
+#if 0
+   __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv;
+   intelScreenPrivate *intelScreen = (intelScreenPrivate *)sPriv->private;
+   switch (intelScreen->deviceID) {
+   case PCI_CHIP_845_G:
+   case PCI_CHIP_I830_M:
+   case PCI_CHIP_I855_GM:
+   case PCI_CHIP_I865_G:
+      return i830CreateContext( mesaVis, driContextPriv, 
+				sharedContextPrivate );
+
+   case PCI_CHIP_I915_G:
+   case PCI_CHIP_I915_GM:
+   case PCI_CHIP_I945_G:
+   case PCI_CHIP_I945_GM:
+      return i915CreateContext( mesaVis, driContextPriv, 
+			       sharedContextPrivate );
+ 
+   default:
+      fprintf(stderr, "Unrecognized deviceID %x\n", intelScreen->deviceID);
+      return GL_FALSE;
+   }
+#else
+   return brwCreateContext( mesaVis, driContextPriv, 
+			    sharedContextPrivate );
+#endif
+}
+
+
+static const struct __DriverAPIRec intelAPI = {
+   .InitDriver      = intelInitDriver,
+   .DestroyScreen   = intelDestroyScreen,
+   .CreateContext   = intelCreateContext,
+   .DestroyContext  = intelDestroyContext,
+   .CreateBuffer    = intelCreateBuffer,
+   .DestroyBuffer   = intelDestroyBuffer,
+   .SwapBuffers     = intelSwapBuffers,
+   .MakeCurrent     = intelMakeCurrent,
+   .UnbindContext   = intelUnbindContext,
+   .GetSwapInfo     = intelGetSwapInfo,
+   .GetMSC          = driGetMSC32,
+   .WaitForMSC      = driWaitForMSC32,
+   .WaitForSBC      = NULL,
+   .SwapBuffersMSC  = NULL,
+   .CopySubBuffer   = intelCopySubBuffer
+};
+
+
+static __GLcontextModes *
+intelFillInModes( unsigned pixel_bits, unsigned depth_bits,
+		 unsigned stencil_bits, GLboolean have_back_buffer )
+{
+   __GLcontextModes * modes;
+   __GLcontextModes * m;
+   unsigned num_modes;
+   unsigned depth_buffer_factor;
+   unsigned back_buffer_factor;
+   GLenum fb_format;
+   GLenum fb_type;
+
+   /* GLX_SWAP_COPY_OML is only supported because the Intel driver doesn't
+    * support pageflipping at all.
+    */
+   static const GLenum back_buffer_modes[] = {
+      GLX_NONE, GLX_SWAP_UNDEFINED_OML, GLX_SWAP_COPY_OML
+   };
+
+   u_int8_t depth_bits_array[3];
+   u_int8_t stencil_bits_array[3];
+
+
+   depth_bits_array[0] = 0;
+   depth_bits_array[1] = depth_bits;
+   depth_bits_array[2] = depth_bits;
+
+   /* Just like with the accumulation buffer, always provide some modes
+    * with a stencil buffer.  It will be a sw fallback, but some apps won't
+    * care about that.
+    */
+   stencil_bits_array[0] = 0;
+   stencil_bits_array[1] = 0;
+   stencil_bits_array[2] = (stencil_bits == 0) ? 8 : stencil_bits;
+
+   depth_buffer_factor = ((depth_bits != 0) || (stencil_bits != 0)) ? 3 : 1;
+   back_buffer_factor  = (have_back_buffer) ? 3 : 1;
+
+   num_modes = depth_buffer_factor * back_buffer_factor * 4;
+
+    if ( pixel_bits == 16 ) {
+        fb_format = GL_RGB;
+        fb_type = GL_UNSIGNED_SHORT_5_6_5;
+    }
+    else {
+        fb_format = GL_BGRA;
+        fb_type = GL_UNSIGNED_INT_8_8_8_8_REV;
+    }
+
+   modes = (*dri_interface->createContextModes)( num_modes, sizeof( __GLcontextModes ) );
+   m = modes;
+   if ( ! driFillInModes( & m, fb_format, fb_type,
+			  depth_bits_array, stencil_bits_array, depth_buffer_factor,
+			  back_buffer_modes, back_buffer_factor,
+			  GLX_TRUE_COLOR ) ) {
+	fprintf( stderr, "[%s:%u] Error creating FBConfig!\n",
+		 __func__, __LINE__ );
+	return NULL;
+   }
+   if ( ! driFillInModes( & m, fb_format, fb_type,
+			  depth_bits_array, stencil_bits_array, depth_buffer_factor,
+			  back_buffer_modes, back_buffer_factor,
+			  GLX_DIRECT_COLOR ) ) {
+	fprintf( stderr, "[%s:%u] Error creating FBConfig!\n",
+		 __func__, __LINE__ );
+	return NULL;
+   }
+
+   /* Mark the visual as slow if there are "fake" stencil bits.
+    */
+   for ( m = modes ; m != NULL ; m = m->next ) {
+      if ( (m->stencilBits != 0) && (m->stencilBits != stencil_bits) ) {
+	 m->visualRating = GLX_SLOW_CONFIG;
+      }
+   }
+
+   return modes;
+}
+
+
+/**
+ * This is the bootstrap function for the driver.  libGL supplies all of the
+ * requisite information about the system, and the driver initializes itself.
+ * This routine also fills in the linked list pointed to by \c driver_modes
+ * with the \c __GLcontextModes that the driver can support for windows or
+ * pbuffers.
+ * 
+ * \return A pointer to a \c __DRIscreenPrivate on success, or \c NULL on 
+ *         failure.
+ */
+PUBLIC
+void * __driCreateNewScreen_20050727( __DRInativeDisplay *dpy, int scrn, __DRIscreen *psc,
+			     const __GLcontextModes * modes,
+			     const __DRIversion * ddx_version,
+			     const __DRIversion * dri_version,
+			     const __DRIversion * drm_version,
+			     const __DRIframebuffer * frame_buffer,
+			     drmAddress pSAREA, int fd, 
+			     int internal_api_version,
+			     const __DRIinterfaceMethods * interface,
+			     __GLcontextModes ** driver_modes )
+			     
+{
+   __DRIscreenPrivate *psp;
+   static const __DRIversion ddx_expected = { 1, 6, 0 };
+   static const __DRIversion dri_expected = { 4, 0, 0 };
+   static const __DRIversion drm_expected = { 1, 3, 0 };
+
+   dri_interface = interface;
+
+   if ( ! driCheckDriDdxDrmVersions2( "i915",
+				      dri_version, & dri_expected,
+				      ddx_version, & ddx_expected,
+				      drm_version, & drm_expected ) ) {
+      return NULL;
+   }
+
+   psp = __driUtilCreateNewScreen(dpy, scrn, psc, NULL,
+				  ddx_version, dri_version, drm_version,
+				  frame_buffer, pSAREA, fd,
+				  internal_api_version, &intelAPI);
+   if ( psp != NULL ) {
+      I830DRIPtr dri_priv = (I830DRIPtr) psp->pDevPriv;
+      *driver_modes = intelFillInModes( dri_priv->cpp * 8,
+					(dri_priv->cpp == 2) ? 16 : 24,
+					(dri_priv->cpp == 2) ? 0  : 8,
+					GL_TRUE );
+      /* Calling driInitExtensions here, with a NULL context pointer, does not actually
+       * enable the extensions.  It just makes sure that all the dispatch offsets for all
+       * the extensions that *might* be enables are known.  This is needed because the
+       * dispatch offsets need to be known when _mesa_context_create is called, but we can't
+       * enable the extensions until we have a context pointer.
+       *
+       * Hello chicken.  Hello egg.  How are you two today?
+       */
+      intelInitExtensions(NULL, GL_FALSE);
+   }
+
+   return (void *) psp;
+}
diff --git a/i965/intel_screen.h b/i965/intel_screen.h
new file mode 100644
index 0000000..bf9a716
--- /dev/null
+++ b/i965/intel_screen.h
@@ -0,0 +1,114 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef _INTEL_INIT_H_
+#define _INTEL_INIT_H_
+
+#include <sys/time.h>
+#include "dri_util.h"
+#include "xmlconfig.h"
+#include "i830_common.h"
+
+/* XXX: change name or eliminate to avoid conflict with "struct
+ * intel_region"!!!
+ */
+typedef struct {
+   drm_handle_t handle;
+   drmSize size;        /* region size in bytes */
+   char *map;           /* memory map */
+   int offset;          /* from start of video mem, in bytes */
+   int pitch;           /* row stride, in pixels */
+   unsigned int tiled; 
+} intelRegion;
+
+typedef struct 
+{
+   intelRegion front;
+   intelRegion back;
+   intelRegion rotated;
+   intelRegion depth;
+   intelRegion tex;
+   
+   int deviceID;
+   int width;
+   int height;
+   int mem;         /* unused */
+
+   int cpp;         /* for front and back buffers */
+   int fbFormat;
+
+   int logTextureGranularity;
+   
+   __DRIscreenPrivate *driScrnPriv;
+   unsigned int sarea_priv_offset;
+
+   int drmMinor;
+
+   int irq_active;
+   int allow_batchbuffer;
+
+/*    struct matrix23 rotMatrix; */
+
+   int current_rotation;  /* 0, 90, 180 or 270 */
+   int rotatedWidth, rotatedHeight;
+
+   /**
+    * Configuration cache with default values for all contexts 
+    */
+   driOptionCache optionCache;
+} intelScreenPrivate;
+
+
+extern GLboolean
+intelMapScreenRegions(__DRIscreenPrivate *sPriv);
+
+extern void
+intelUnmapScreenRegions(intelScreenPrivate *intelScreen);
+
+extern void
+intelUpdateScreenFromSAREA(intelScreenPrivate *intelScreen,
+                           volatile drmI830Sarea *sarea);
+
+extern void
+intelDestroyContext(__DRIcontextPrivate *driContextPriv);
+
+extern GLboolean
+intelUnbindContext(__DRIcontextPrivate *driContextPriv);
+
+extern GLboolean
+intelMakeCurrent(__DRIcontextPrivate *driContextPriv,
+                 __DRIdrawablePrivate *driDrawPriv,
+                 __DRIdrawablePrivate *driReadPriv);
+
+extern void
+intelSwapBuffers(__DRIdrawablePrivate *dPriv);
+
+extern void 
+intelCopySubBuffer( __DRIdrawablePrivate *dPriv,
+		    int x, int y, int w, int h );
+
+#endif
diff --git a/i965/intel_span.c b/i965/intel_span.c
new file mode 100644
index 0000000..60fbecc
--- /dev/null
+++ b/i965/intel_span.c
@@ -0,0 +1,283 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "glheader.h"
+#include "macros.h"
+#include "mtypes.h"
+#include "colormac.h"
+
+#include "intel_screen.h"
+#include "intel_regions.h"
+#include "intel_span.h"
+#include "intel_ioctl.h"
+#include "intel_tex.h"
+#include "intel_batchbuffer.h"
+#include "swrast/swrast.h"
+
+#undef DBG
+#define DBG 0
+
+#define LOCAL_VARS						\
+   struct intel_context *intel = intel_context(ctx);                    \
+   __DRIdrawablePrivate *dPriv = intel->driDrawable;		\
+   driRenderbuffer *drb = (driRenderbuffer *) rb;		\
+   GLuint pitch = drb->pitch;					\
+   GLuint height = dPriv->h;					\
+   char *buf = (char *) drb->Base.Data +			\
+			dPriv->x * drb->cpp +			\
+			dPriv->y * pitch;			\
+   GLushort p;							\
+   (void) buf; (void) p
+
+#define LOCAL_DEPTH_VARS					\
+   struct intel_context *intel = intel_context(ctx);                    \
+   __DRIdrawablePrivate *dPriv = intel->driDrawable;		\
+   driRenderbuffer *drb = (driRenderbuffer *) rb;		\
+   GLuint pitch = drb->pitch;					\
+   GLuint height = dPriv->h;					\
+   char *buf = (char *) drb->Base.Data +			\
+			dPriv->x * drb->cpp +			\
+			dPriv->y * pitch
+
+#define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS 
+
+#define INIT_MONO_PIXEL(p,color)\
+	 p = INTEL_PACKCOLOR565(color[0],color[1],color[2])
+
+#define Y_FLIP(_y) (height - _y - 1)
+
+#define HW_LOCK()
+
+#define HW_UNLOCK()
+
+/* 16 bit, 565 rgb color spanline and pixel functions
+ */
+#define WRITE_RGBA( _x, _y, r, g, b, a )				\
+   *(GLushort *)(buf + _x*2 + _y*pitch)  = ( (((int)r & 0xf8) << 8) |	\
+		                             (((int)g & 0xfc) << 3) |	\
+		                             (((int)b & 0xf8) >> 3))
+#define WRITE_PIXEL( _x, _y, p )  \
+   *(GLushort *)(buf + _x*2 + _y*pitch) = p
+
+#define READ_RGBA( rgba, _x, _y )				\
+do {								\
+   GLushort p = *(GLushort *)(buf + _x*2 + _y*pitch);		\
+   rgba[0] = (((p >> 11) & 0x1f) * 255) / 31;			\
+   rgba[1] = (((p >>  5) & 0x3f) * 255) / 63;			\
+   rgba[2] = (((p >>  0) & 0x1f) * 255) / 31;			\
+   rgba[3] = 255;						\
+} while(0)
+
+#define TAG(x) intel##x##_565
+#include "spantmp.h"
+
+/* 15 bit, 555 rgb color spanline and pixel functions
+ */
+#define WRITE_RGBA( _x, _y, r, g, b, a )			\
+   *(GLushort *)(buf + _x*2 + _y*pitch)  = (((r & 0xf8) << 7) |	\
+		                            ((g & 0xf8) << 3) |	\
+                         		    ((b & 0xf8) >> 3))
+
+#define WRITE_PIXEL( _x, _y, p )  \
+   *(GLushort *)(buf + _x*2 + _y*pitch)  = p
+
+#define READ_RGBA( rgba, _x, _y )				\
+do {								\
+   GLushort p = *(GLushort *)(buf + _x*2 + _y*pitch);		\
+   rgba[0] = (p >> 7) & 0xf8;					\
+   rgba[1] = (p >> 3) & 0xf8;					\
+   rgba[2] = (p << 3) & 0xf8;					\
+   rgba[3] = 255;						\
+} while(0)
+
+#define TAG(x) intel##x##_555
+#include "spantmp.h"
+
+/* 16 bit depthbuffer functions.
+ */
+#define WRITE_DEPTH( _x, _y, d ) \
+   *(GLushort *)(buf + (_x)*2 + (_y)*pitch)  = d;
+
+#define READ_DEPTH( d, _x, _y )	\
+   d = *(GLushort *)(buf + (_x)*2 + (_y)*pitch);	 
+
+
+#define TAG(x) intel##x##_z16
+#include "depthtmp.h"
+
+
+#undef LOCAL_VARS
+#define LOCAL_VARS						\
+   struct intel_context *intel = intel_context(ctx);			\
+   __DRIdrawablePrivate *dPriv = intel->driDrawable;		\
+   driRenderbuffer *drb = (driRenderbuffer *) rb;		\
+   GLuint pitch = drb->pitch;					\
+   GLuint height = dPriv->h;					\
+   char *buf = (char *)drb->Base.Data +				\
+			dPriv->x * drb->cpp +			\
+			dPriv->y * pitch;			\
+   GLuint p;							\
+   (void) buf; (void) p
+
+#undef INIT_MONO_PIXEL
+#define INIT_MONO_PIXEL(p,color)\
+	 p = INTEL_PACKCOLOR8888(color[0],color[1],color[2],color[3])
+
+/* 32 bit, 8888 argb color spanline and pixel functions
+ */
+#define WRITE_RGBA(_x, _y, r, g, b, a)			\
+    *(GLuint *)(buf + _x*4 + _y*pitch) = ((r << 16) |	\
+					  (g << 8)  |	\
+					  (b << 0)  |	\
+					  (a << 24) )
+
+#define WRITE_PIXEL(_x, _y, p)			\
+    *(GLuint *)(buf + _x*4 + _y*pitch) = p
+
+
+#define READ_RGBA(rgba, _x, _y)					\
+    do {							\
+	GLuint p = *(GLuint *)(buf + _x*4 + _y*pitch);		\
+	rgba[0] = (p >> 16) & 0xff;				\
+	rgba[1] = (p >> 8)  & 0xff;				\
+	rgba[2] = (p >> 0)  & 0xff;				\
+	rgba[3] = (p >> 24) & 0xff;				\
+    } while (0)
+
+#define TAG(x) intel##x##_8888
+#include "spantmp.h"
+
+
+/* 24/8 bit interleaved depth/stencil functions
+ */
+#define WRITE_DEPTH( _x, _y, d ) {			\
+   GLuint tmp = *(GLuint *)(buf + (_x)*4 + (_y)*pitch);	\
+   tmp &= 0xff000000;					\
+   tmp |= (d) & 0xffffff;				\
+   *(GLuint *)(buf + (_x)*4 + (_y)*pitch) = tmp;		\
+}
+
+#define READ_DEPTH( d, _x, _y )		\
+   d = *(GLuint *)(buf + (_x)*4 + (_y)*pitch) & 0xffffff;
+
+
+#define TAG(x) intel##x##_z24_s8
+#include "depthtmp.h"
+
+#define WRITE_STENCIL( _x, _y, d ) {			\
+   GLuint tmp = *(GLuint *)(buf + (_x)*4 + (_y)*pitch);	\
+   tmp &= 0xffffff;					\
+   tmp |= ((d)<<24);					\
+   *(GLuint *)(buf + (_x)*4 + (_y)*pitch) = tmp;		\
+}
+
+#define READ_STENCIL( d, _x, _y )			\
+   d = *(GLuint *)(buf + (_x)*4 + (_y)*pitch) >> 24;
+
+#define TAG(x) intel##x##_z24_s8
+#include "stenciltmp.h"
+
+
+/* Move locking out to get reasonable span performance.
+ */
+void intelSpanRenderStart( GLcontext *ctx )
+{
+   struct intel_context *intel = intel_context(ctx);
+
+   if (intel->need_flush) {
+      LOCK_HARDWARE(intel);
+      intel->vtbl.emit_flush(intel, 0);
+      intel_batchbuffer_flush(intel->batch);
+      intel->need_flush = 0;
+      UNLOCK_HARDWARE(intel);
+      intelFinish(&intel->ctx);
+   }
+
+
+   LOCK_HARDWARE(intel);
+
+   /* Just map the framebuffer and all textures.  Bufmgr code will
+    * take care of waiting on the necessary fences:
+    */
+   intel_region_map(intel, intel->front_region);
+   intel_region_map(intel, intel->back_region);
+   intel_region_map(intel, intel->depth_region);
+}
+
+void intelSpanRenderFinish( GLcontext *ctx )
+{
+   struct intel_context *intel = intel_context( ctx );
+
+   _swrast_flush( ctx );
+
+   /* Now unmap the framebuffer:
+    */
+   intel_region_unmap(intel, intel->front_region);
+   intel_region_unmap(intel, intel->back_region);
+   intel_region_unmap(intel, intel->depth_region);
+
+   UNLOCK_HARDWARE( intel );
+}
+
+void intelInitSpanFuncs( GLcontext *ctx )
+{
+   struct swrast_device_driver *swdd = _swrast_GetDeviceDriverReference(ctx);
+   swdd->SpanRenderStart = intelSpanRenderStart;
+   swdd->SpanRenderFinish = intelSpanRenderFinish; 
+}
+
+
+/**
+ * Plug in the Get/Put routines for the given driRenderbuffer.
+ */
+void
+intelSetSpanFunctions(driRenderbuffer *drb, const GLvisual *vis)
+{
+   if (drb->Base.InternalFormat == GL_RGBA) {
+      if (vis->redBits == 5 && vis->greenBits == 5 && vis->blueBits == 5) {
+         intelInitPointers_555(&drb->Base);
+      }
+      else if (vis->redBits == 5 && vis->greenBits == 6 && vis->blueBits == 5) {
+         intelInitPointers_565(&drb->Base);
+      }
+      else {
+         assert(vis->redBits == 8);
+         assert(vis->greenBits == 8);
+         assert(vis->blueBits == 8);
+         intelInitPointers_8888(&drb->Base);
+      }
+   }
+   else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT16) {
+      intelInitDepthPointers_z16(&drb->Base);
+   }
+   else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT24) {
+      intelInitDepthPointers_z24_s8(&drb->Base);
+   }
+   else if (drb->Base.InternalFormat == GL_STENCIL_INDEX8_EXT) {
+      intelInitStencilPointers_z24_s8(&drb->Base);
+   }
+}
diff --git a/i965/intel_span.h b/i965/intel_span.h
new file mode 100644
index 0000000..2d4f858
--- /dev/null
+++ b/i965/intel_span.h
@@ -0,0 +1,41 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef _INTEL_SPAN_H
+#define _INTEL_SPAN_H
+
+#include "drirenderbuffer.h"
+
+extern void intelInitSpanFuncs( GLcontext *ctx );
+
+extern void intelSpanRenderFinish( GLcontext *ctx );
+extern void intelSpanRenderStart( GLcontext *ctx );
+
+extern void
+intelSetSpanFunctions(driRenderbuffer *rb, const GLvisual *vis);
+
+#endif
diff --git a/i965/intel_state.c b/i965/intel_state.c
new file mode 100644
index 0000000..701b30c
--- /dev/null
+++ b/i965/intel_state.c
@@ -0,0 +1,320 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+
+#include "glheader.h"
+#include "context.h"
+#include "macros.h"
+#include "enums.h"
+#include "colormac.h"
+#include "dd.h"
+
+#include "intel_screen.h"
+#include "intel_context.h"
+#include "intel_regions.h"
+#include "swrast/swrast.h"
+
+int intel_translate_shadow_compare_func( GLenum func )
+{
+   switch(func) {
+   case GL_NEVER: 
+       return COMPAREFUNC_ALWAYS; 
+   case GL_LESS: 
+       return COMPAREFUNC_LEQUAL; 
+   case GL_LEQUAL: 
+       return COMPAREFUNC_LESS;
+   case GL_GREATER: 
+       return COMPAREFUNC_GEQUAL; 
+   case GL_GEQUAL: 
+      return COMPAREFUNC_GREATER; 
+   case GL_NOTEQUAL: 
+      return COMPAREFUNC_EQUAL; 
+   case GL_EQUAL: 
+      return COMPAREFUNC_NOTEQUAL; 
+   case GL_ALWAYS: 
+       return COMPAREFUNC_NEVER; 
+   }
+
+   fprintf(stderr, "Unknown value in %s: %x\n", __FUNCTION__, func);
+   return COMPAREFUNC_NEVER; 
+}
+
+int intel_translate_compare_func( GLenum func )
+{
+   switch(func) {
+   case GL_NEVER: 
+      return COMPAREFUNC_NEVER; 
+   case GL_LESS: 
+      return COMPAREFUNC_LESS; 
+   case GL_LEQUAL: 
+      return COMPAREFUNC_LEQUAL; 
+   case GL_GREATER: 
+      return COMPAREFUNC_GREATER; 
+   case GL_GEQUAL: 
+      return COMPAREFUNC_GEQUAL; 
+   case GL_NOTEQUAL: 
+      return COMPAREFUNC_NOTEQUAL; 
+   case GL_EQUAL: 
+      return COMPAREFUNC_EQUAL; 
+   case GL_ALWAYS: 
+      return COMPAREFUNC_ALWAYS; 
+   }
+
+   fprintf(stderr, "Unknown value in %s: %x\n", __FUNCTION__, func);
+   return COMPAREFUNC_ALWAYS; 
+}
+
+int intel_translate_stencil_op( GLenum op )
+{
+   switch(op) {
+   case GL_KEEP: 
+      return STENCILOP_KEEP; 
+   case GL_ZERO: 
+      return STENCILOP_ZERO; 
+   case GL_REPLACE: 
+      return STENCILOP_REPLACE; 
+   case GL_INCR: 
+      return STENCILOP_INCRSAT;
+   case GL_DECR: 
+      return STENCILOP_DECRSAT;
+   case GL_INCR_WRAP:
+      return STENCILOP_INCR; 
+   case GL_DECR_WRAP:
+      return STENCILOP_DECR; 
+   case GL_INVERT: 
+      return STENCILOP_INVERT; 
+   default: 
+      return STENCILOP_ZERO;
+   }
+}
+
+int intel_translate_blend_factor( GLenum factor )
+{
+   switch(factor) {
+   case GL_ZERO: 
+      return BLENDFACT_ZERO; 
+   case GL_SRC_ALPHA: 
+      return BLENDFACT_SRC_ALPHA; 
+   case GL_ONE: 
+      return BLENDFACT_ONE; 
+   case GL_SRC_COLOR: 
+      return BLENDFACT_SRC_COLR; 
+   case GL_ONE_MINUS_SRC_COLOR: 
+      return BLENDFACT_INV_SRC_COLR; 
+   case GL_DST_COLOR: 
+      return BLENDFACT_DST_COLR; 
+   case GL_ONE_MINUS_DST_COLOR: 
+      return BLENDFACT_INV_DST_COLR; 
+   case GL_ONE_MINUS_SRC_ALPHA:
+      return BLENDFACT_INV_SRC_ALPHA; 
+   case GL_DST_ALPHA: 
+      return BLENDFACT_DST_ALPHA; 
+   case GL_ONE_MINUS_DST_ALPHA:
+      return BLENDFACT_INV_DST_ALPHA; 
+   case GL_SRC_ALPHA_SATURATE: 
+      return BLENDFACT_SRC_ALPHA_SATURATE;
+   case GL_CONSTANT_COLOR:
+      return BLENDFACT_CONST_COLOR; 
+   case GL_ONE_MINUS_CONSTANT_COLOR:
+      return BLENDFACT_INV_CONST_COLOR;
+   case GL_CONSTANT_ALPHA:
+      return BLENDFACT_CONST_ALPHA; 
+   case GL_ONE_MINUS_CONSTANT_ALPHA:
+      return BLENDFACT_INV_CONST_ALPHA;
+   }
+   
+   fprintf(stderr, "Unknown value in %s: %x\n", __FUNCTION__, factor);
+   return BLENDFACT_ZERO;
+}
+
+int intel_translate_logic_op( GLenum opcode )
+{
+   switch(opcode) {
+   case GL_CLEAR: 
+      return LOGICOP_CLEAR; 
+   case GL_AND: 
+      return LOGICOP_AND; 
+   case GL_AND_REVERSE: 
+      return LOGICOP_AND_RVRSE; 
+   case GL_COPY: 
+      return LOGICOP_COPY; 
+   case GL_COPY_INVERTED: 
+      return LOGICOP_COPY_INV; 
+   case GL_AND_INVERTED: 
+      return LOGICOP_AND_INV; 
+   case GL_NOOP: 
+      return LOGICOP_NOOP; 
+   case GL_XOR: 
+      return LOGICOP_XOR; 
+   case GL_OR: 
+      return LOGICOP_OR; 
+   case GL_OR_INVERTED: 
+      return LOGICOP_OR_INV; 
+   case GL_NOR: 
+      return LOGICOP_NOR; 
+   case GL_EQUIV: 
+      return LOGICOP_EQUIV; 
+   case GL_INVERT: 
+      return LOGICOP_INV; 
+   case GL_OR_REVERSE: 
+      return LOGICOP_OR_RVRSE; 
+   case GL_NAND: 
+      return LOGICOP_NAND; 
+   case GL_SET: 
+      return LOGICOP_SET; 
+   default:
+      return LOGICOP_SET;
+   }
+}
+
+
+static void intelClearColor(GLcontext *ctx, const GLfloat color[4])
+{
+   struct intel_context *intel = intel_context(ctx);
+   intelScreenPrivate *screen = intel->intelScreen;
+
+   UNCLAMPED_FLOAT_TO_RGBA_CHAN(intel->clear_chan, color);
+
+   intel->ClearColor = INTEL_PACKCOLOR(screen->fbFormat,
+				       intel->clear_chan[0], 
+				       intel->clear_chan[1], 
+				       intel->clear_chan[2], 
+				       intel->clear_chan[3]);
+}
+
+
+
+/* Fallback to swrast for select and feedback.
+ */
+static void intelRenderMode( GLcontext *ctx, GLenum mode )
+{
+   struct intel_context *intel = intel_context(ctx);
+   FALLBACK( intel, INTEL_FALLBACK_RENDERMODE, (mode != GL_RENDER) );
+}
+
+
+void intelInitStateFuncs( struct dd_function_table *functions )
+{
+   functions->RenderMode = intelRenderMode;
+   functions->ClearColor = intelClearColor;
+}
+
+
+
+
+void intelInitState( GLcontext *ctx )
+{
+   /* Mesa should do this for us:
+    */
+   ctx->Driver.AlphaFunc( ctx, 
+			  ctx->Color.AlphaFunc,
+			  ctx->Color.AlphaRef);
+
+   ctx->Driver.BlendColor( ctx,
+			   ctx->Color.BlendColor );
+
+   ctx->Driver.BlendEquationSeparate( ctx, 
+				      ctx->Color.BlendEquationRGB,
+				      ctx->Color.BlendEquationA);
+
+   ctx->Driver.BlendFuncSeparate( ctx,
+				  ctx->Color.BlendSrcRGB,
+				  ctx->Color.BlendDstRGB,
+				  ctx->Color.BlendSrcA,
+				  ctx->Color.BlendDstA);
+
+   ctx->Driver.ColorMask( ctx, 
+			  ctx->Color.ColorMask[RCOMP],
+			  ctx->Color.ColorMask[GCOMP],
+			  ctx->Color.ColorMask[BCOMP],
+			  ctx->Color.ColorMask[ACOMP]);
+
+   ctx->Driver.CullFace( ctx, ctx->Polygon.CullFaceMode );
+   ctx->Driver.DepthFunc( ctx, ctx->Depth.Func );
+   ctx->Driver.DepthMask( ctx, ctx->Depth.Mask );
+
+   ctx->Driver.Enable( ctx, GL_ALPHA_TEST, ctx->Color.AlphaEnabled );
+   ctx->Driver.Enable( ctx, GL_BLEND, ctx->Color.BlendEnabled );
+   ctx->Driver.Enable( ctx, GL_COLOR_LOGIC_OP, ctx->Color.ColorLogicOpEnabled );
+   ctx->Driver.Enable( ctx, GL_COLOR_SUM, ctx->Fog.ColorSumEnabled );
+   ctx->Driver.Enable( ctx, GL_CULL_FACE, ctx->Polygon.CullFlag );
+   ctx->Driver.Enable( ctx, GL_DEPTH_TEST, ctx->Depth.Test );
+   ctx->Driver.Enable( ctx, GL_DITHER, ctx->Color.DitherFlag );
+   ctx->Driver.Enable( ctx, GL_FOG, ctx->Fog.Enabled );
+   ctx->Driver.Enable( ctx, GL_LIGHTING, ctx->Light.Enabled );
+   ctx->Driver.Enable( ctx, GL_LINE_SMOOTH, ctx->Line.SmoothFlag );
+   ctx->Driver.Enable( ctx, GL_POLYGON_STIPPLE, ctx->Polygon.StippleFlag );
+   ctx->Driver.Enable( ctx, GL_SCISSOR_TEST, ctx->Scissor.Enabled );
+   ctx->Driver.Enable( ctx, GL_STENCIL_TEST, ctx->Stencil.Enabled );
+   ctx->Driver.Enable( ctx, GL_TEXTURE_1D, GL_FALSE );
+   ctx->Driver.Enable( ctx, GL_TEXTURE_2D, GL_FALSE );
+   ctx->Driver.Enable( ctx, GL_TEXTURE_RECTANGLE_NV, GL_FALSE );
+   ctx->Driver.Enable( ctx, GL_TEXTURE_3D, GL_FALSE );
+   ctx->Driver.Enable( ctx, GL_TEXTURE_CUBE_MAP, GL_FALSE );
+
+   ctx->Driver.Fogfv( ctx, GL_FOG_COLOR, ctx->Fog.Color );
+   ctx->Driver.Fogfv( ctx, GL_FOG_MODE, 0 );
+   ctx->Driver.Fogfv( ctx, GL_FOG_DENSITY, &ctx->Fog.Density );
+   ctx->Driver.Fogfv( ctx, GL_FOG_START, &ctx->Fog.Start );
+   ctx->Driver.Fogfv( ctx, GL_FOG_END, &ctx->Fog.End );
+
+   ctx->Driver.FrontFace( ctx, ctx->Polygon.FrontFace );
+
+   {
+      GLfloat f = (GLfloat)ctx->Light.Model.ColorControl;
+      ctx->Driver.LightModelfv( ctx, GL_LIGHT_MODEL_COLOR_CONTROL, &f );
+   }
+
+   ctx->Driver.LineWidth( ctx, ctx->Line.Width );
+   ctx->Driver.LogicOpcode( ctx, ctx->Color.LogicOp );
+   ctx->Driver.PointSize( ctx, ctx->Point.Size );
+   ctx->Driver.PolygonStipple( ctx, (const GLubyte *)ctx->PolygonStipple );
+   ctx->Driver.Scissor( ctx, ctx->Scissor.X, ctx->Scissor.Y,
+			ctx->Scissor.Width, ctx->Scissor.Height );
+   ctx->Driver.ShadeModel( ctx, ctx->Light.ShadeModel );
+   ctx->Driver.StencilFuncSeparate( ctx, GL_FRONT,
+                                    ctx->Stencil.Function[0],
+                                    ctx->Stencil.Ref[0],
+                                    ctx->Stencil.ValueMask[0] );
+   ctx->Driver.StencilFuncSeparate( ctx, GL_BACK,
+                                    ctx->Stencil.Function[1],
+                                    ctx->Stencil.Ref[1],
+                                    ctx->Stencil.ValueMask[1] );
+   ctx->Driver.StencilMaskSeparate( ctx, GL_FRONT, ctx->Stencil.WriteMask[0] );
+   ctx->Driver.StencilMaskSeparate( ctx, GL_BACK, ctx->Stencil.WriteMask[1] );
+   ctx->Driver.StencilOpSeparate( ctx, GL_FRONT,
+                                  ctx->Stencil.FailFunc[0],
+                                  ctx->Stencil.ZFailFunc[0],
+                                  ctx->Stencil.ZPassFunc[0]);
+   ctx->Driver.StencilOpSeparate( ctx, GL_BACK,
+                                  ctx->Stencil.FailFunc[1],
+                                  ctx->Stencil.ZFailFunc[1],
+                                  ctx->Stencil.ZPassFunc[1]);
+
+
+   ctx->Driver.DrawBuffer( ctx, ctx->Color.DrawBuffer[0] );
+}
diff --git a/i965/intel_structs.h b/i965/intel_structs.h
new file mode 100644
index 0000000..522e3bd
--- /dev/null
+++ b/i965/intel_structs.h
@@ -0,0 +1,132 @@
+#ifndef INTEL_STRUCTS_H
+#define INTEL_STRUCTS_H
+
+struct br0 {
+   GLuint length:8;
+   GLuint pad0:3;
+   GLuint dst_tiled:1;
+   GLuint pad1:8;
+   GLuint write_rgb:1;
+   GLuint write_alpha:1;
+   GLuint opcode:7;
+   GLuint client:3;
+};
+
+   
+struct br13 {
+   GLint dest_pitch:16;
+   GLuint rop:8;
+   GLuint color_depth:2;
+   GLuint pad1:3;
+   GLuint mono_source_transparency:1;
+   GLuint clipping_enable:1;
+   GLuint pad0:1;
+};
+
+
+
+/* This is an attempt to move some of the 2D interaction in this
+ * driver to using structs for packets rather than a bunch of #defines
+ * and dwords.
+ */
+struct xy_color_blit {
+   struct br0 br0;
+   struct br13 br13;
+
+   struct {
+      GLuint dest_x1:16;
+      GLuint dest_y1:16;
+   } dw2;
+
+   struct {
+      GLuint dest_x2:16;
+      GLuint dest_y2:16;
+   } dw3;
+   
+   GLuint dest_base_addr;
+   GLuint color;
+};
+
+struct xy_src_copy_blit {
+   struct br0 br0;
+   struct br13 br13;
+
+   struct {
+      GLuint dest_x1:16;
+      GLuint dest_y1:16;
+   } dw2;
+
+   struct {
+      GLuint dest_x2:16;
+      GLuint dest_y2:16;
+   } dw3;
+   
+   GLuint dest_base_addr;
+
+   struct {
+      GLuint src_x1:16;
+      GLuint src_y1:16;
+   } dw5;
+
+   struct {
+      GLint src_pitch:16;
+      GLuint pad:16;
+   } dw6;
+   
+   GLuint src_base_addr;
+};
+
+struct xy_setup_blit {
+   struct br0 br0;
+   struct br13 br13;
+
+   struct {
+      GLuint clip_x1:16;
+      GLuint clip_y1:16;
+   } dw2;
+
+   struct {
+      GLuint clip_x2:16;
+      GLuint clip_y2:16;
+   } dw3;
+      
+   GLuint dest_base_addr;
+   GLuint background_color;
+   GLuint foreground_color;
+   GLuint pattern_base_addr;
+};
+
+
+struct xy_text_immediate_blit {
+   struct {
+      GLuint length:8;
+      GLuint pad2:3;
+      GLuint dst_tiled:1;
+      GLuint pad1:4;
+      GLuint byte_packed:1;
+      GLuint pad0:5;
+      GLuint opcode:7;
+      GLuint client:3;
+   } dw0;
+
+   struct {
+      GLuint dest_x1:16;
+      GLuint dest_y1:16;
+   } dw1;
+
+   struct {
+      GLuint dest_x2:16;
+      GLuint dest_y2:16;
+   } dw2;   
+
+   /* Src bitmap data follows as inline dwords.
+    */
+};
+
+
+#define CLIENT_2D 0x2
+#define OPCODE_XY_SETUP_BLT 0x1
+#define OPCODE_XY_COLOR_BLT 0x50
+#define OPCODE_XY_TEXT_IMMEDIATE_BLT 0x31
+
+#endif
diff --git a/i965/intel_tex.c b/i965/intel_tex.c
new file mode 100644
index 0000000..4523969
--- /dev/null
+++ b/i965/intel_tex.c
@@ -0,0 +1,315 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "mtypes.h"
+#include "image.h"
+#include "texstore.h"
+#include "texformat.h"
+#include "teximage.h"
+#include "texobj.h"
+#include "swrast/swrast.h"
+
+
+#include "intel_context.h"
+#include "intel_tex.h"
+#include "intel_mipmap_tree.h"
+
+
+static GLuint target_to_face( GLenum target )
+{
+   switch (target) {
+   case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB:
+   case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB:
+   case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB:
+   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB:
+   case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB:
+   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB:
+      return ((GLuint) target - 
+	      (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X);
+   default:
+      return 0;
+   }
+}
+
+static void intelTexImage1D( GLcontext *ctx, GLenum target, GLint level,
+			    GLint internalFormat,
+			    GLint width, GLint border,
+			    GLenum format, GLenum type, const GLvoid *pixels,
+			    const struct gl_pixelstore_attrib *packing,
+			    struct gl_texture_object *texObj,
+			    struct gl_texture_image *texImage )
+{
+   struct intel_texture_object *intelObj = intel_texture_object(texObj);
+
+   _mesa_store_teximage1d( ctx, target, level, internalFormat,
+			   width, border, format, type,
+			   pixels, packing, texObj, texImage );
+
+   intelObj->dirty_images[0] |= (1 << level);
+   intelObj->dirty |= 1;
+}
+
+static void intelTexSubImage1D( GLcontext *ctx, 
+			       GLenum target,
+			       GLint level,	
+			       GLint xoffset,
+				GLsizei width,
+			       GLenum format, GLenum type,
+			       const GLvoid *pixels,
+			       const struct gl_pixelstore_attrib *packing,
+			       struct gl_texture_object *texObj,
+			       struct gl_texture_image *texImage )
+{
+   struct intel_texture_object *intelObj = intel_texture_object(texObj);
+
+   _mesa_store_texsubimage1d(ctx, target, level, xoffset, width, 
+			     format, type, pixels, packing, texObj,
+			     texImage);
+
+   intelObj->dirty_images[0] |= (1 << level);
+   intelObj->dirty |= 1;
+}
+
+
+/* Handles 2D, CUBE, RECT:
+ */
+static void intelTexImage2D( GLcontext *ctx, GLenum target, GLint level,
+			    GLint internalFormat,
+			    GLint width, GLint height, GLint border,
+			    GLenum format, GLenum type, const GLvoid *pixels,
+			    const struct gl_pixelstore_attrib *packing,
+			    struct gl_texture_object *texObj,
+			    struct gl_texture_image *texImage )
+{
+   struct intel_texture_object *intelObj = intel_texture_object(texObj);
+   GLuint face = target_to_face(target);
+
+   _mesa_store_teximage2d( ctx, target, level, internalFormat,
+			   width, height, border, format, type,
+			   pixels, packing, texObj, texImage );
+
+   intelObj->dirty_images[face] |= (1 << level);
+   intelObj->dirty |= 1 << face;
+}
+
+static void intelTexSubImage2D( GLcontext *ctx, 
+			       GLenum target,
+			       GLint level,	
+			       GLint xoffset, GLint yoffset,
+			       GLsizei width, GLsizei height,
+			       GLenum format, GLenum type,
+			       const GLvoid *pixels,
+			       const struct gl_pixelstore_attrib *packing,
+			       struct gl_texture_object *texObj,
+			       struct gl_texture_image *texImage )
+{
+   struct intel_texture_object *intelObj = intel_texture_object(texObj);
+   GLuint face = target_to_face(target);
+
+   _mesa_store_texsubimage2d(ctx, target, level, xoffset, yoffset, width, 
+			     height, format, type, pixels, packing, texObj,
+			     texImage);
+
+   intelObj->dirty_images[face] |= (1 << level);
+   intelObj->dirty |= 1 << face;
+}
+
+static void intelCompressedTexImage2D( GLcontext *ctx, GLenum target, GLint level,
+                              GLint internalFormat,
+                              GLint width, GLint height, GLint border,
+                              GLsizei imageSize, const GLvoid *data,
+                              struct gl_texture_object *texObj,
+                              struct gl_texture_image *texImage )
+{
+   struct intel_texture_object *intelObj = intel_texture_object(texObj);
+   GLuint face = target_to_face(target);
+
+   _mesa_store_compressed_teximage2d(ctx, target, level, internalFormat, width,
+				     height, border, imageSize, data, texObj, texImage);
+   
+   intelObj->dirty_images[face] |= (1 << level);
+   intelObj->dirty |= 1 << face;
+}
+
+
+static void intelCompressedTexSubImage2D( GLcontext *ctx, GLenum target, GLint level,
+                                 GLint xoffset, GLint yoffset,
+                                 GLsizei width, GLsizei height,
+                                 GLenum format,
+                                 GLsizei imageSize, const GLvoid *data,
+                                 struct gl_texture_object *texObj,
+                                 struct gl_texture_image *texImage )
+{
+   struct intel_texture_object *intelObj = intel_texture_object(texObj);
+   GLuint face = target_to_face(target);
+
+   _mesa_store_compressed_texsubimage2d(ctx, target, level, xoffset, yoffset, width,
+					height, format, imageSize, data, texObj, texImage);
+   
+   intelObj->dirty_images[face] |= (1 << level);
+   intelObj->dirty |= 1 << face;
+}
+
+
+static void intelTexImage3D( GLcontext *ctx, GLenum target, GLint level,
+                            GLint internalFormat,
+                            GLint width, GLint height, GLint depth,
+                            GLint border,
+                            GLenum format, GLenum type, const GLvoid *pixels,
+                            const struct gl_pixelstore_attrib *packing,
+                            struct gl_texture_object *texObj,
+                            struct gl_texture_image *texImage )
+{
+   struct intel_texture_object *intelObj = intel_texture_object(texObj);
+
+   _mesa_store_teximage3d(ctx, target, level, internalFormat,
+			  width, height, depth, border,
+			  format, type, pixels,
+			  &ctx->Unpack, texObj, texImage);
+   
+   intelObj->dirty_images[0] |= (1 << level);
+   intelObj->dirty |= 1 << 0;
+}
+
+
+static void
+intelTexSubImage3D( GLcontext *ctx, GLenum target, GLint level,
+                   GLint xoffset, GLint yoffset, GLint zoffset,
+                   GLsizei width, GLsizei height, GLsizei depth,
+                   GLenum format, GLenum type,
+                   const GLvoid *pixels,
+                   const struct gl_pixelstore_attrib *packing,
+                   struct gl_texture_object *texObj,
+                   struct gl_texture_image *texImage )
+{
+   struct intel_texture_object *intelObj = intel_texture_object(texObj);
+
+   _mesa_store_texsubimage3d(ctx, target, level, xoffset, yoffset, zoffset,
+                             width, height, depth,
+                             format, type, pixels, packing, texObj, texImage);
+
+   intelObj->dirty_images[0] |= (1 << level);
+   intelObj->dirty |= 1 << 0;
+}
+
+
+
+
+static struct gl_texture_object *intelNewTextureObject( GLcontext *ctx, 
+							GLuint name, 
+							GLenum target )
+{
+   struct intel_texture_object *obj = CALLOC_STRUCT(intel_texture_object);
+
+   _mesa_initialize_texture_object(&obj->base, name, target);
+
+   return &obj->base;
+}
+
+static GLboolean intelIsTextureResident(GLcontext *ctx,
+                                      struct gl_texture_object *texObj)
+{
+#if 0
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_texture_object *intelObj = intel_texture_object(texObj);
+   
+   return 
+      intelObj->mt && 
+      intelObj->mt->region && 
+      intel_is_region_resident(intel, intelObj->mt->region);
+#endif
+   return 1;
+}
+
+
+
+static void intelTexParameter( GLcontext *ctx, 
+			       GLenum target,
+			       struct gl_texture_object *texObj,
+			       GLenum pname, 
+			       const GLfloat *params )
+{
+   struct intel_texture_object *intelObj = intel_texture_object(texObj);
+ 
+   switch (pname) {
+      /* Anything which can affect the calculation of firstLevel and
+       * lastLevel, as changes to these may invalidate the miptree.
+       */
+   case GL_TEXTURE_MIN_FILTER:
+   case GL_TEXTURE_MAG_FILTER:
+   case GL_TEXTURE_BASE_LEVEL:
+   case GL_TEXTURE_MAX_LEVEL:
+   case GL_TEXTURE_MIN_LOD:
+   case GL_TEXTURE_MAX_LOD:
+      intelObj->dirty |= 1;
+      break;
+
+   default:
+      break;
+   }
+}
+
+
+static void
+intel_delete_texture_object( GLcontext *ctx, struct gl_texture_object *texObj )
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_texture_object *intelObj = intel_texture_object(texObj);
+
+   if (intelObj->mt)
+      intel_miptree_destroy(intel, intelObj->mt);
+
+   _mesa_delete_texture_object( ctx, texObj );
+}
+
+void intelInitTextureFuncs( struct dd_function_table *functions )
+{
+   functions->NewTextureObject          = intelNewTextureObject;
+   functions->TexImage1D                = intelTexImage1D;
+   functions->TexImage2D                = intelTexImage2D;
+   functions->TexImage3D                = intelTexImage3D;
+   functions->TexSubImage1D             = intelTexSubImage1D;
+   functions->TexSubImage2D             = intelTexSubImage2D;
+   functions->TexSubImage3D             = intelTexSubImage3D;
+   functions->CopyTexImage1D            = _swrast_copy_teximage1d;
+   functions->CopyTexImage2D            = _swrast_copy_teximage2d;
+   functions->CopyTexSubImage1D         = _swrast_copy_texsubimage1d;
+   functions->CopyTexSubImage2D         = _swrast_copy_texsubimage2d;
+   functions->CopyTexSubImage3D         = _swrast_copy_texsubimage3d;
+   functions->DeleteTexture             = intel_delete_texture_object;
+   functions->UpdateTexturePalette      = NULL;
+   functions->IsTextureResident = intelIsTextureResident;
+   functions->TestProxyTexImage         = _mesa_test_proxy_teximage;
+   functions->CompressedTexImage2D      = intelCompressedTexImage2D;
+   functions->CompressedTexSubImage2D   = intelCompressedTexSubImage2D;
+   functions->TexParameter              = intelTexParameter;
+}
+
+
+
+
+
diff --git a/i965/intel_tex.h b/i965/intel_tex.h
new file mode 100644
index 0000000..e389d52
--- /dev/null
+++ b/i965/intel_tex.h
@@ -0,0 +1,42 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef INTELTEX_INC
+#define INTELTEX_INC
+
+#include "mtypes.h"
+#include "intel_context.h"
+
+
+void intelInitTextureFuncs( struct dd_function_table *functions );
+
+
+GLuint intel_finalize_mipmap_tree( struct intel_context *intel,
+				   struct gl_texture_object *tObj );
+
+
+#endif
diff --git a/i965/intel_tex_validate.c b/i965/intel_tex_validate.c
new file mode 100644
index 0000000..cb23b9d
--- /dev/null
+++ b/i965/intel_tex_validate.c
@@ -0,0 +1,256 @@
+/**************************************************************************
+ * 
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "mtypes.h"
+#include "macros.h"
+
+#include "intel_context.h"
+#include "intel_mipmap_tree.h"
+#include "intel_tex.h"
+#include "bufmgr.h"
+
+/**
+ * Compute which mipmap levels that really need to be sent to the hardware.
+ * This depends on the base image size, GL_TEXTURE_MIN_LOD,
+ * GL_TEXTURE_MAX_LOD, GL_TEXTURE_BASE_LEVEL, and GL_TEXTURE_MAX_LEVEL.
+ */
+static void intel_calculate_first_last_level( struct intel_texture_object *intelObj )
+{
+   struct gl_texture_object *tObj = &intelObj->base;
+   const struct gl_texture_image * const baseImage =
+       tObj->Image[0][tObj->BaseLevel];
+
+   /* These must be signed values.  MinLod and MaxLod can be negative numbers,
+    * and having firstLevel and lastLevel as signed prevents the need for
+    * extra sign checks.
+    */
+   int   firstLevel;
+   int   lastLevel;
+
+   /* Yes, this looks overly complicated, but it's all needed.
+    */
+   switch (tObj->Target) {
+   case GL_TEXTURE_1D:
+   case GL_TEXTURE_2D:
+   case GL_TEXTURE_3D:
+   case GL_TEXTURE_CUBE_MAP:
+      if (tObj->MinFilter == GL_NEAREST || tObj->MinFilter == GL_LINEAR) {
+         /* GL_NEAREST and GL_LINEAR only care about GL_TEXTURE_BASE_LEVEL.
+          */
+         firstLevel = lastLevel = tObj->BaseLevel;
+      }
+      else {
+	 /* Currently not taking min/max lod into account here, those
+	  * values are programmed as sampler state elsewhere and we
+	  * upload the same mipmap levels regardless.  Not sure if
+	  * this makes sense as it means it isn't possible for the app
+	  * to use min/max lod to reduce texture memory pressure:
+	  */
+	 firstLevel = tObj->BaseLevel;
+	 lastLevel = MIN2(tObj->BaseLevel + baseImage->MaxLog2, 
+			  tObj->MaxLevel);
+	 lastLevel = MAX2(firstLevel, lastLevel); /* need at least one level */
+      }
+      break;
+   case GL_TEXTURE_RECTANGLE_NV:
+   case GL_TEXTURE_4D_SGIS:
+      firstLevel = lastLevel = 0;
+      break;
+   default:
+      return;
+   }
+
+   /* save these values */
+   intelObj->firstLevel = firstLevel;
+   intelObj->lastLevel = lastLevel;
+}
+
+static GLboolean copy_image_data_to_tree( struct intel_context *intel,
+					  struct intel_texture_object *intelObj,
+					  struct gl_texture_image *texImage,
+					  GLuint face,
+					  GLuint level)
+{
+   return intel_miptree_image_data(intel,
+				   intelObj->mt,
+				   face,
+				   level,
+				   texImage->Data,
+				   texImage->RowStride,
+				   (texImage->RowStride * 
+				    texImage->Height * 
+				    texImage->TexFormat->TexelBytes));
+}
+
+static void intel_texture_invalidate( struct intel_texture_object *intelObj )
+{
+   GLint nr_faces, face;
+   intelObj->dirty = ~0;
+
+   nr_faces = (intelObj->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
+   for (face = 0; face < nr_faces; face++) 
+      intelObj->dirty_images[face] = ~0;
+}
+
+static void intel_texture_invalidate_cb( struct intel_context *intel,
+					 void *ptr )
+{
+   intel_texture_invalidate( (struct intel_texture_object *) ptr );
+}
+
+
+/*  
+ */
+GLuint intel_finalize_mipmap_tree( struct intel_context *intel,
+				   struct gl_texture_object *tObj )
+{
+   struct intel_texture_object *intelObj = intel_texture_object(tObj);
+   GLuint face, i;
+   GLuint nr_faces = 0;
+   struct gl_texture_image *firstImage;
+
+   if( tObj == intel->frame_buffer_texobj )
+      return GL_FALSE;
+   
+   /* We know/require this is true by now: 
+    */
+   assert(intelObj->base.Complete);
+
+   /* What levels must the tree include at a minimum?
+    */
+   if (intelObj->dirty) {
+      intel_calculate_first_last_level( intelObj );
+/*       intel_miptree_destroy(intel, intelObj->mt); */
+/*       intelObj->mt = NULL; */
+   }
+
+   firstImage = intelObj->base.Image[0][intelObj->firstLevel];
+
+   /* Fallback case:
+    */
+   if (firstImage->Border) {
+      if (intelObj->mt) {
+	 intel_miptree_destroy(intel, intelObj->mt);
+	 intelObj->mt = NULL;
+	 /* Set all images dirty:
+	  */
+	 intel_texture_invalidate(intelObj);
+      }
+      return GL_FALSE;
+   }
+
+
+
+   /* Check tree can hold all active levels.  Check tree matches
+    * target, imageFormat, etc.
+    */
+   if (intelObj->mt &&
+       (intelObj->mt->target != intelObj->base.Target ||
+	intelObj->mt->internal_format != firstImage->InternalFormat ||
+	intelObj->mt->first_level != intelObj->firstLevel ||
+	intelObj->mt->last_level != intelObj->lastLevel ||
+	intelObj->mt->width0 != firstImage->Width ||
+	intelObj->mt->height0 != firstImage->Height ||
+	intelObj->mt->depth0 != firstImage->Depth ||
+	intelObj->mt->cpp != firstImage->TexFormat->TexelBytes ||
+	intelObj->mt->compressed != firstImage->IsCompressed)) 
+   {
+      intel_miptree_destroy(intel, intelObj->mt);
+      intelObj->mt = NULL;
+      
+      /* Set all images dirty:
+       */
+      intel_texture_invalidate(intelObj);
+   }
+      
+
+   /* May need to create a new tree:
+    */
+   if (!intelObj->mt) {
+      intelObj->mt = intel_miptree_create(intel,
+					  intelObj->base.Target,
+					  firstImage->InternalFormat,
+					  intelObj->firstLevel,
+					  intelObj->lastLevel,
+					  firstImage->Width,
+					  firstImage->Height,
+					  firstImage->Depth,
+					  firstImage->TexFormat->TexelBytes,
+					  firstImage->IsCompressed);
+
+      /* Tell the buffer manager that we will manage the backing
+       * store, but we still want it to do fencing for us.
+       */
+      bmBufferSetInvalidateCB(intel, 
+			      intelObj->mt->region->buffer,
+			      intel_texture_invalidate_cb,
+			      intelObj,
+			      GL_FALSE);
+   }
+
+   /* Pull in any images not in the object's tree:
+    */
+   if (intelObj->dirty) {
+      nr_faces = (intelObj->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
+      for (face = 0; face < nr_faces; face++) {
+	 if (intelObj->dirty_images[face]) {
+	    for (i = intelObj->firstLevel; i <= intelObj->lastLevel; i++) {
+	       struct gl_texture_image *texImage = intelObj->base.Image[face][i];
+
+	       /* Need to import images in main memory or held in other trees.
+		*/
+	       if (intelObj->dirty_images[face] & (1<<i) &&
+		   texImage) {
+
+		  if (INTEL_DEBUG & DEBUG_TEXTURE)
+		     _mesa_printf("copy data from image %d (%p) into object miptree\n",
+				  i,
+				  texImage->Data);
+
+		  if (!copy_image_data_to_tree(intel,
+					       intelObj,
+					       texImage,
+					       face,
+					       i))
+		     return GL_FALSE;
+
+	       }
+	    }
+	 }
+      }
+
+      /* Only clear the dirty flags if everything went ok:
+       */
+      for (face = 0; face < nr_faces; face++) {
+	 intelObj->dirty_images[face] = 0;
+      }
+
+      intelObj->dirty = 0;
+   }
+
+   return GL_TRUE;
+}
diff --git a/i965/server/i830_common.h b/i965/server/i830_common.h
new file mode 100644
index 0000000..fe2b8e8
--- /dev/null
+++ b/i965/server/i830_common.h
@@ -0,0 +1,232 @@
+/**************************************************************************
+
+Copyright 2001 VA Linux Systems Inc., Fremont, California.
+Copyright 2002 Tungsten Graphics Inc., Cedar Park, Texas.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/* $XFree86: xc/programs/Xserver/hw/xfree86/drivers/i810/i830_common.h,v 1.1 2002/09/11 00:29:32 dawes Exp $ */
+
+#ifndef _I830_COMMON_H_
+#define _I830_COMMON_H_
+
+
+#define I830_NR_TEX_REGIONS 255	/* maximum due to use of chars for next/prev */
+#define I830_LOG_MIN_TEX_REGION_SIZE 14
+
+
+/* Driver specific DRM command indices
+ * NOTE: these are not OS specific, but they are driver specific
+ */
+#define DRM_I830_INIT                     0x00
+#define DRM_I830_FLUSH                    0x01
+#define DRM_I830_FLIP                     0x02
+#define DRM_I830_BATCHBUFFER              0x03
+#define DRM_I830_IRQ_EMIT                 0x04
+#define DRM_I830_IRQ_WAIT                 0x05
+#define DRM_I830_GETPARAM                 0x06
+#define DRM_I830_SETPARAM                 0x07
+#define DRM_I830_ALLOC                    0x08
+#define DRM_I830_FREE                     0x09
+#define DRM_I830_INIT_HEAP                0x0a
+#define DRM_I830_CMDBUFFER                0x0b
+#define DRM_I830_DESTROY_HEAP             0x0c
+#define DRM_I830_MMIO		       	  0x10
+
+typedef struct {
+   enum {
+      I830_INIT_DMA = 0x01,
+      I830_CLEANUP_DMA = 0x02,
+      I830_RESUME_DMA = 0x03
+   } func;
+   unsigned int mmio_offset;
+   int sarea_priv_offset;
+   unsigned int ring_start;
+   unsigned int ring_end;
+   unsigned int ring_size;
+   unsigned int front_offset;
+   unsigned int back_offset;
+   unsigned int depth_offset;
+   unsigned int w;
+   unsigned int h;
+   unsigned int pitch;
+   unsigned int pitch_bits;
+   unsigned int back_pitch;
+   unsigned int depth_pitch;
+   unsigned int cpp;
+   unsigned int chipset;
+} drmI830Init;
+
+typedef struct {
+	drmTextureRegion texList[I830_NR_TEX_REGIONS+1];
+        int last_upload;	/* last time texture was uploaded */
+        int last_enqueue;	/* last time a buffer was enqueued */
+        volatile int last_dispatch;	/* age of the most recently dispatched buffer */
+	int ctxOwner;		/* last context to upload state */
+	int texAge;
+        int pf_enabled;		/* is pageflipping allowed? */
+        int pf_active;               
+        int pf_current_page;	/* which buffer is being displayed? */
+        int perf_boxes;	        /* performance boxes to be displayed */   
+	int width, height;      /* screen size in pixels */
+
+	drm_handle_t front_handle;
+	int front_offset;
+	int front_size;
+
+	drm_handle_t back_handle;
+	int back_offset;
+	int back_size;
+
+	drm_handle_t depth_handle;
+	int depth_offset;
+	int depth_size;
+
+	drm_handle_t tex_handle;
+	int tex_offset;
+	int tex_size;
+	int log_tex_granularity;
+	int pitch;
+	int rotation;           /* 0, 90, 180 or 270 */
+	int rotated_offset;
+	int rotated_size;
+	int rotated_pitch;
+	int virtualX, virtualY;
+
+	unsigned int front_tiled;
+        unsigned int back_tiled;
+        unsigned int depth_tiled;
+        unsigned int rotated_tiled;
+        unsigned int rotated2_tiled;
+
+	int pipeA_x;
+	int pipeA_y;
+	int pipeA_w;
+	int pipeA_h;
+	int pipeB_x;
+	int pipeB_y;
+	int pipeB_w;
+	int pipeB_h;
+
+} drmI830Sarea;
+
+/* Flags for perf_boxes
+ */
+#define I830_BOX_RING_EMPTY    0x1 /* populated by kernel */
+#define I830_BOX_FLIP          0x2 /* populated by kernel */
+#define I830_BOX_WAIT          0x4 /* populated by kernel & client */
+#define I830_BOX_TEXTURE_LOAD  0x8 /* populated by kernel */
+#define I830_BOX_LOST_CONTEXT  0x10 /* populated by client */
+
+
+typedef struct {
+   	int start;		/* agp offset */
+	int used;		/* nr bytes in use */
+	int DR1;		/* hw flags for GFX_OP_DRAWRECT_INFO */
+        int DR4;		/* window origin for GFX_OP_DRAWRECT_INFO*/
+	int num_cliprects;	/* mulitpass with multiple cliprects? */
+        drm_clip_rect_t *cliprects; /* pointer to userspace cliprects */
+} drmI830BatchBuffer;
+
+typedef struct {
+   	char *buf;		/* agp offset */
+	int sz; 		/* nr bytes in use */
+	int DR1;		/* hw flags for GFX_OP_DRAWRECT_INFO */
+        int DR4;		/* window origin for GFX_OP_DRAWRECT_INFO*/
+	int num_cliprects;	/* mulitpass with multiple cliprects? */
+        drm_clip_rect_t *cliprects; /* pointer to userspace cliprects */
+} drmI830CmdBuffer;
+ 
+typedef struct {
+	int *irq_seq;
+} drmI830IrqEmit;
+
+typedef struct {
+	int irq_seq;
+} drmI830IrqWait;
+
+typedef struct {
+	int param;
+	int *value;
+} drmI830GetParam;
+
+#define I830_PARAM_IRQ_ACTIVE     1
+#define I830_PARAM_ALLOW_BATCHBUFFER   2 
+
+typedef struct {
+	int param;
+	int value;
+} drmI830SetParam;
+
+#define I830_SETPARAM_USE_MI_BATCHBUFFER_START  1
+#define I830_SETPARAM_TEX_LRU_LOG_GRANULARITY   2
+#define I830_SETPARAM_ALLOW_BATCHBUFFER         3
+
+
+/* A memory manager for regions of shared memory:
+ */
+#define I830_MEM_REGION_AGP 1
+
+typedef struct {
+	int region;
+	int alignment;
+	int size;
+	int *region_offset;	/* offset from start of fb or agp */
+} drmI830MemAlloc;
+
+typedef struct {
+	int region;
+	int region_offset;
+} drmI830MemFree;
+
+typedef struct {
+	int region;
+	int size;
+	int start;	
+} drmI830MemInitHeap;
+
+typedef struct {
+	int region;
+} drmI830MemDestroyHeap;
+
+#define MMIO_READ  0
+#define MMIO_WRITE 1
+
+#define MMIO_REGS_IA_PRIMATIVES_COUNT           0
+#define MMIO_REGS_IA_VERTICES_COUNT             1
+#define MMIO_REGS_VS_INVOCATION_COUNT           2
+#define MMIO_REGS_GS_PRIMITIVES_COUNT           3
+#define MMIO_REGS_GS_INVOCATION_COUNT           4
+#define MMIO_REGS_CL_PRIMITIVES_COUNT           5
+#define MMIO_REGS_CL_INVOCATION_COUNT           6
+#define MMIO_REGS_PS_INVOCATION_COUNT           7
+#define MMIO_REGS_PS_DEPTH_COUNT                8
+
+typedef struct {
+        unsigned int read_write:1;
+        unsigned int reg:31;
+        void __user *data;
+} drmI830MMIO;
+
+#endif /* _I830_DRM_H_ */
diff --git a/i965/server/i830_dri.h b/i965/server/i830_dri.h
new file mode 100644
index 0000000..2295181
--- /dev/null
+++ b/i965/server/i830_dri.h
@@ -0,0 +1,63 @@
+/* $XFree86: xc/programs/Xserver/hw/xfree86/drivers/i810/i830_dri.h,v 1.4 2002/10/30 12:52:18 alanh Exp $ */
+
+#ifndef _I830_DRI_H
+#define _I830_DRI_H
+
+#include "xf86drm.h"
+#include "i830_common.h"
+
+#define I830_MAX_DRAWABLES 256
+
+#define I830_MAJOR_VERSION 1
+#define I830_MINOR_VERSION 3
+#define I830_PATCHLEVEL 0
+
+#define I830_REG_SIZE 0x80000
+
+typedef struct _I830DRIRec {
+   drm_handle_t regs;
+   drmSize regsSize;
+
+   drmSize unused1; /* backbufferSize */
+   drm_handle_t unused2; /* backbuffer */
+
+   drmSize unused3; /* depthbufferSize */
+   drm_handle_t unused4; /* depthbuffer */
+
+   drmSize unused5; /* rotatedSize /*/
+   drm_handle_t unused6; /* rotatedbuffer */
+
+   drm_handle_t unused7; /* textures */
+   int unused8; /* textureSize */
+
+   drm_handle_t unused9; /* agp_buffers */
+   drmSize unused10; /* agp_buf_size */
+
+   int deviceID;
+   int width;
+   int height;
+   int mem;
+   int cpp;
+   int bitsPerPixel;
+
+   int unused11[8]; /* was front/back/depth/rotated offset/pitch */
+
+   int unused12; /* logTextureGranularity */
+   int unused13; /* textureOffset */
+
+   int irq;
+   int sarea_priv_offset;
+} I830DRIRec, *I830DRIPtr;
+
+typedef struct {
+   /* Nothing here yet */
+   int dummy;
+} I830ConfigPrivRec, *I830ConfigPrivPtr;
+
+typedef struct {
+   /* Nothing here yet */
+   int dummy;
+} I830DRIContextRec, *I830DRIContextPtr;
+
+
+#endif
diff --git a/i965/server/intel.h b/i965/server/intel.h
new file mode 100644
index 0000000..d7858a2
--- /dev/null
+++ b/i965/server/intel.h
@@ -0,0 +1,328 @@
+#ifndef _INTEL_H_
+#define _INTEL_H_
+
+#include "xf86drm.h"		/* drm_handle_t, etc */
+
+/* Intel */
+#ifndef PCI_CHIP_I810
+#define PCI_CHIP_I810              0x7121
+#define PCI_CHIP_I810_DC100        0x7123
+#define PCI_CHIP_I810_E            0x7125
+#define PCI_CHIP_I815              0x1132
+#define PCI_CHIP_I810_BRIDGE       0x7120
+#define PCI_CHIP_I810_DC100_BRIDGE 0x7122
+#define PCI_CHIP_I810_E_BRIDGE     0x7124
+#define PCI_CHIP_I815_BRIDGE       0x1130
+#endif
+
+#define PCI_CHIP_845_G			0x2562
+#define PCI_CHIP_I830_M			0x3577
+
+#ifndef PCI_CHIP_I855_GM
+#define PCI_CHIP_I855_GM	   0x3582
+#define PCI_CHIP_I855_GM_BRIDGE	   0x3580
+#endif
+
+#ifndef PCI_CHIP_I865_G
+#define PCI_CHIP_I865_G		   0x2572
+#define PCI_CHIP_I865_G_BRIDGE	   0x2570
+#endif
+
+#ifndef PCI_CHIP_I915_G
+#define PCI_CHIP_I915_G		   0x2582
+#define PCI_CHIP_I915_G_BRIDGE	   0x2580
+#endif
+
+#ifndef PCI_CHIP_I915_GM
+#define PCI_CHIP_I915_GM	   0x2592
+#define PCI_CHIP_I915_GM_BRIDGE	   0x2590
+#endif
+
+#ifndef PCI_CHIP_E7221_G
+#define PCI_CHIP_E7221_G	   0x258A
+/* Same as I915_G_BRIDGE */
+#define PCI_CHIP_E7221_G_BRIDGE	   0x2580
+#endif
+
+#ifndef PCI_CHIP_I945_G
+#define PCI_CHIP_I945_G        0x2772
+#define PCI_CHIP_I945_G_BRIDGE 0x2770
+#endif
+
+#ifndef PCI_CHIP_I945_GM
+#define PCI_CHIP_I945_GM        0x27A2
+#define PCI_CHIP_I945_GM_BRIDGE 0x27A0
+#endif
+
+#define IS_I810(pI810) (pI810->Chipset == PCI_CHIP_I810 ||	\
+			pI810->Chipset == PCI_CHIP_I810_DC100 || \
+			pI810->Chipset == PCI_CHIP_I810_E)
+#define IS_I815(pI810) (pI810->Chipset == PCI_CHIP_I815)
+#define IS_I830(pI810) (pI810->Chipset == PCI_CHIP_I830_M)
+#define IS_845G(pI810) (pI810->Chipset == PCI_CHIP_845_G)
+#define IS_I85X(pI810)  (pI810->Chipset == PCI_CHIP_I855_GM)
+#define IS_I852(pI810)  (pI810->Chipset == PCI_CHIP_I855_GM && (pI810->variant == I852_GM || pI810->variant == I852_GME))
+#define IS_I855(pI810)  (pI810->Chipset == PCI_CHIP_I855_GM && (pI810->variant == I855_GM || pI810->variant == I855_GME))
+#define IS_I865G(pI810) (pI810->Chipset == PCI_CHIP_I865_G)
+
+#define IS_I915G(pI810) (pI810->Chipset == PCI_CHIP_I915_G || pI810->Chipset == PCI_CHIP_E7221_G)
+#define IS_I915GM(pI810) (pI810->Chipset == PCI_CHIP_I915_GM)
+#define IS_I945G(pI810) (pI810->Chipset == PCI_CHIP_I945_G)
+#define IS_I945GM(pI810) (pI810->Chipset == PCI_CHIP_I945_GM)
+#define IS_I9XX(pI810) (IS_I915G(pI810) || IS_I915GM(pI810) || IS_I945G(pI810) || IS_I945GM(pI810))
+
+#define IS_MOBILE(pI810) (IS_I830(pI810) || IS_I85X(pI810) || IS_I915GM(pI810) || IS_I945GM(pI810))
+
+#define I830_GMCH_CTRL		0x52
+
+
+#define I830_GMCH_GMS_MASK			0x70
+#define I830_GMCH_GMS_DISABLED		0x00
+#define I830_GMCH_GMS_LOCAL			0x10
+#define I830_GMCH_GMS_STOLEN_512	0x20
+#define I830_GMCH_GMS_STOLEN_1024	0x30
+#define I830_GMCH_GMS_STOLEN_8192	0x40
+
+#define I855_GMCH_GMS_MASK			(0x7 << 4)
+#define I855_GMCH_GMS_DISABLED			0x00
+#define I855_GMCH_GMS_STOLEN_1M			(0x1 << 4)
+#define I855_GMCH_GMS_STOLEN_4M			(0x2 << 4)
+#define I855_GMCH_GMS_STOLEN_8M			(0x3 << 4)
+#define I855_GMCH_GMS_STOLEN_16M		(0x4 << 4)
+#define I855_GMCH_GMS_STOLEN_32M		(0x5 << 4)
+#define I915G_GMCH_GMS_STOLEN_48M		(0x6 << 4)
+#define I915G_GMCH_GMS_STOLEN_64M		(0x7 << 4)
+
+typedef unsigned char Bool;
+#define TRUE 1
+#define FALSE 0
+
+#define PIPE_NONE	0<<0
+#define PIPE_CRT	1<<0
+#define PIPE_TV		1<<1
+#define PIPE_DFP	1<<2
+#define PIPE_LFP	1<<3
+#define PIPE_CRT2	1<<4
+#define PIPE_TV2	1<<5
+#define PIPE_DFP2	1<<6
+#define PIPE_LFP2	1<<7
+
+typedef struct _I830MemPool *I830MemPoolPtr;
+typedef struct _I830MemRange *I830MemRangePtr;
+typedef struct _I830MemRange {
+   long Start;
+   long End;
+   long Size;
+   unsigned long Physical;
+   unsigned long Offset;		/* Offset of AGP-allocated portion */
+   unsigned long Alignment;
+   drm_handle_t Key;
+   unsigned long Pitch; // add pitch
+   I830MemPoolPtr Pool;
+} I830MemRange;
+
+typedef struct _I830MemPool {
+   I830MemRange Total;
+   I830MemRange Free;
+   I830MemRange Fixed;
+   I830MemRange Allocated;
+} I830MemPool;
+
+typedef struct {
+   int tail_mask;
+   I830MemRange mem;
+   unsigned char *virtual_start;
+   int head;
+   int tail;
+   int space;
+} I830RingBuffer;
+
+typedef struct _I830Rec {
+   unsigned char *MMIOBase;
+   unsigned char *FbBase;
+   int cpp;
+
+   unsigned int bios_version;
+
+   /* These are set in PreInit and never changed. */
+   long FbMapSize;
+   long TotalVideoRam;
+   I830MemRange StolenMemory;		/* pre-allocated memory */
+   long BIOSMemorySize;			/* min stolen pool size */
+   int BIOSMemSizeLoc;
+
+   /* These change according to what has been allocated. */
+   long FreeMemory;
+   I830MemRange MemoryAperture;
+   I830MemPool StolenPool;
+   long allocatedMemory;
+
+   /* Regions allocated either from the above pools, or from agpgart. */
+   /* for single and dual head configurations */
+   I830MemRange FrontBuffer;
+   I830MemRange FrontBuffer2;
+   I830MemRange Scratch;
+   I830MemRange Scratch2;
+
+   I830RingBuffer *LpRing;
+
+   I830MemRange BackBuffer;
+   I830MemRange DepthBuffer;
+   I830MemRange TexMem;
+   int TexGranularity;
+   I830MemRange ContextMem;
+   int drmMinor;
+   Bool have3DWindows;
+
+   Bool NeedRingBufferLow;
+   Bool allowPageFlip;
+   Bool disableTiling;
+
+   int Chipset;
+   unsigned long LinearAddr;
+   unsigned long MMIOAddr;
+
+   drmSize           registerSize;     /**< \brief MMIO register map size */
+   drm_handle_t         registerHandle;   /**< \brief MMIO register map handle */
+  //   IOADDRESS ioBase;
+   int               irq;              /**< \brief IRQ number */
+   int GttBound;
+
+   drm_handle_t ring_map;
+   unsigned int Fence[8];
+
+} I830Rec;
+
+/*
+ * 12288 is set as the maximum, chosen because it is enough for
+ * 1920x1440@32bpp with a 2048 pixel line pitch with some to spare.
+ */
+#define I830_MAXIMUM_VBIOS_MEM		12288
+#define I830_DEFAULT_VIDEOMEM_2D	(MB(32) / 1024)
+#define I830_DEFAULT_VIDEOMEM_3D	(MB(64) / 1024)
+
+/* Flags for memory allocation function */
+#define FROM_ANYWHERE			0x00000000
+#define FROM_POOL_ONLY			0x00000001
+#define FROM_NEW_ONLY			0x00000002
+#define FROM_MASK			0x0000000f
+
+#define ALLOCATE_AT_TOP			0x00000010
+#define ALLOCATE_AT_BOTTOM		0x00000020
+#define FORCE_GAPS			0x00000040
+
+#define NEED_PHYSICAL_ADDR		0x00000100
+#define ALIGN_BOTH_ENDS			0x00000200
+#define FORCE_LOW			0x00000400
+
+#define ALLOC_NO_TILING			0x00001000
+#define ALLOC_INITIAL			0x00002000
+
+#define ALLOCATE_DRY_RUN		0x80000000
+
+/* Chipset registers for VIDEO BIOS memory RW access */
+#define _855_DRAM_RW_CONTROL 0x58
+#define _845_DRAM_RW_CONTROL 0x90
+#define DRAM_WRITE    0x33330000
+
+#define KB(x) ((x) * 1024)
+#define MB(x) ((x) * KB(1024))
+
+#define GTT_PAGE_SIZE			KB(4)
+#define ROUND_TO(x, y)			(((x) + (y) - 1) / (y) * (y))
+#define ROUND_DOWN_TO(x, y)		((x) / (y) * (y))
+#define ROUND_TO_PAGE(x)		ROUND_TO((x), GTT_PAGE_SIZE)
+#define ROUND_TO_MB(x)			ROUND_TO((x), MB(1))
+#define PRIMARY_RINGBUFFER_SIZE		KB(128)
+
+
+/* Ring buffer registers, p277, overview p19
+ */
+#define LP_RING     0x2030
+#define HP_RING     0x2040
+
+#define RING_TAIL      0x00
+#define TAIL_ADDR           0x000FFFF8
+#define I830_TAIL_MASK	    0x001FFFF8
+
+#define RING_HEAD      0x04
+#define HEAD_WRAP_COUNT     0xFFE00000
+#define HEAD_WRAP_ONE       0x00200000
+#define HEAD_ADDR           0x001FFFFC
+#define I830_HEAD_MASK      0x001FFFFC
+
+#define RING_START     0x08
+#define START_ADDR          0x03FFFFF8
+#define I830_RING_START_MASK	0xFFFFF000
+
+#define RING_LEN       0x0C
+#define RING_NR_PAGES       0x001FF000 
+#define I830_RING_NR_PAGES	0x001FF000
+#define RING_REPORT_MASK    0x00000006
+#define RING_REPORT_64K     0x00000002
+#define RING_REPORT_128K    0x00000004
+#define RING_NO_REPORT      0x00000000
+#define RING_VALID_MASK     0x00000001
+#define RING_VALID          0x00000001
+#define RING_INVALID        0x00000000
+
+
+/* Fence/Tiling ranges [0..7]
+ */
+#define FENCE            0x2000
+#define FENCE_NR         8
+
+#define I915G_FENCE_START_MASK	0x0ff00000
+
+#define I830_FENCE_START_MASK	0x07f80000
+
+#define FENCE_START_MASK    0x03F80000
+#define FENCE_X_MAJOR       0x00000000
+#define FENCE_Y_MAJOR       0x00001000
+#define FENCE_SIZE_MASK     0x00000700
+#define FENCE_SIZE_512K     0x00000000
+#define FENCE_SIZE_1M       0x00000100
+#define FENCE_SIZE_2M       0x00000200
+#define FENCE_SIZE_4M       0x00000300
+#define FENCE_SIZE_8M       0x00000400
+#define FENCE_SIZE_16M      0x00000500
+#define FENCE_SIZE_32M      0x00000600
+#define FENCE_SIZE_64M	    0x00000700
+#define I915G_FENCE_SIZE_1M       0x00000000
+#define I915G_FENCE_SIZE_2M       0x00000100
+#define I915G_FENCE_SIZE_4M       0x00000200
+#define I915G_FENCE_SIZE_8M       0x00000300
+#define I915G_FENCE_SIZE_16M      0x00000400
+#define I915G_FENCE_SIZE_32M      0x00000500
+#define I915G_FENCE_SIZE_64M	0x00000600
+#define I915G_FENCE_SIZE_128M	0x00000700
+#define FENCE_PITCH_1       0x00000000
+#define FENCE_PITCH_2       0x00000010
+#define FENCE_PITCH_4       0x00000020
+#define FENCE_PITCH_8       0x00000030
+#define FENCE_PITCH_16      0x00000040
+#define FENCE_PITCH_32      0x00000050
+#define FENCE_PITCH_64	    0x00000060
+#define FENCE_VALID         0x00000001
+
+#include <mmio.h>
+
+#  define MMIO_IN8(base, offset) \
+	*(volatile unsigned char *)(((unsigned char*)(base)) + (offset))
+#  define MMIO_IN32(base, offset) \
+	read_MMIO_LE32(base, offset)
+#  define MMIO_OUT8(base, offset, val) \
+	*(volatile unsigned char *)(((unsigned char*)(base)) + (offset)) = (val)
+#  define MMIO_OUT32(base, offset, val) \
+	*(volatile unsigned int *)(void *)(((unsigned char*)(base)) + (offset)) = CPU_TO_LE32(val)
+
+
+				/* Memory mapped register access macros */
+#define INREG8(addr)        MMIO_IN8(MMIO, addr)
+#define INREG(addr)         MMIO_IN32(MMIO, addr)
+#define OUTREG8(addr, val)  MMIO_OUT8(MMIO, addr, val)
+#define OUTREG(addr, val)   MMIO_OUT32(MMIO, addr, val)
+
+#define DSPABASE		0x70184
+
+#endif
diff --git a/i965/server/intel_dri.c b/i965/server/intel_dri.c
new file mode 100644
index 0000000..169fdbe
--- /dev/null
+++ b/i965/server/intel_dri.c
@@ -0,0 +1,1282 @@
+/**
+ * \file server/intel_dri.c
+ * \brief File to perform the device-specific initialization tasks typically
+ * done in the X server.
+ *
+ * Here they are converted to run in the client (or perhaps a standalone
+ * process), and to work with the frame buffer device rather than the X
+ * server infrastructure.
+ * 
+ * Copyright (C) 2006 Dave Airlie (airlied@linux.ie)
+
+ Permission is hereby granted, free of charge, to any person obtaining a
+ copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sub license, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial portions
+ of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT HOLDERS AND/OR THEIR SUPPLIERS BE LIABLE FOR
+ ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+
+#include "driver.h"
+#include "drm.h"
+
+#include "intel.h"
+#include "i830_dri.h"
+
+#include "memops.h"
+#include "pciaccess.h"
+
+static size_t drm_page_size;
+static int nextTile = 0;
+#define xf86DrvMsg(...) do {} while(0)
+
+static const int pitches[] = {
+  128 * 8,
+  128 * 16,
+  128 * 32,
+  128 * 64,
+  0
+};
+
+static Bool I830DRIDoMappings(DRIDriverContext *ctx, I830Rec *pI830, drmI830Sarea *sarea);
+
+static unsigned long
+GetBestTileAlignment(unsigned long size)
+{
+   unsigned long i;
+
+   for (i = KB(512); i < size; i <<= 1)
+      ;
+
+   if (i > MB(64))
+      i = MB(64);
+
+   return i;
+}
+
+static void SetFenceRegs(const DRIDriverContext *ctx, I830Rec *pI830)
+{
+  int i;
+  unsigned char *MMIO = ctx->MMIOAddress;
+
+  for (i = 0; i < 8; i++) {
+    OUTREG(FENCE + i * 4, pI830->Fence[i]);
+    //    if (I810_DEBUG & DEBUG_VERBOSE_VGA)
+    fprintf(stderr,"Fence Register : %x\n", pI830->Fence[i]);
+  }
+}
+
+/* Tiled memory is good... really, really good...
+ *
+ * Need to make it less likely that we miss out on this - probably
+ * need to move the frontbuffer away from the 'guarenteed' alignment
+ * of the first memory segment, or perhaps allocate a discontigous
+ * framebuffer to get more alignment 'sweet spots'.
+ */
+static void
+SetFence(const DRIDriverContext *ctx, I830Rec *pI830,
+	 int nr, unsigned int start, unsigned int pitch,
+         unsigned int size)
+{
+   unsigned int val;
+   unsigned int fence_mask = 0;
+   unsigned int fence_pitch;
+
+   if (nr < 0 || nr > 7) {
+      fprintf(stderr,
+		 "SetFence: fence %d out of range\n",nr);
+      return;
+   }
+
+   pI830->Fence[nr] = 0;
+
+   if (IS_I9XX(pI830))
+   	fence_mask = ~I915G_FENCE_START_MASK;
+   else
+   	fence_mask = ~I830_FENCE_START_MASK;
+
+   if (start & fence_mask) {
+      fprintf(stderr,
+		 "SetFence: %d: start (0x%08x) is not %s aligned\n",
+		 nr, start, (IS_I9XX(pI830)) ? "1MB" : "512k");
+      return;
+   }
+
+   if (start % size) {
+      fprintf(stderr,
+		 "SetFence: %d: start (0x%08x) is not size (%dk) aligned\n",
+		 nr, start, size / 1024);
+      return;
+   }
+
+   if (pitch & 127) {
+      fprintf(stderr,
+		 "SetFence: %d: pitch (%d) not a multiple of 128 bytes\n",
+		 nr, pitch);
+      return;
+   }
+
+   val = (start | FENCE_X_MAJOR | FENCE_VALID);
+
+   if (IS_I9XX(pI830)) {
+   	switch (size) {
+	   case MB(1):
+      		val |= I915G_FENCE_SIZE_1M;
+      		break;
+   	   case MB(2):
+      		val |= I915G_FENCE_SIZE_2M;
+      		break;
+   	   case MB(4):
+      		val |= I915G_FENCE_SIZE_4M;
+      		break;
+   	   case MB(8):
+      		val |= I915G_FENCE_SIZE_8M;
+      		break;
+   	   case MB(16):
+      		val |= I915G_FENCE_SIZE_16M;
+      		break;
+   	   case MB(32):
+      		val |= I915G_FENCE_SIZE_32M;
+      		break;
+   	   case MB(64):
+      		val |= I915G_FENCE_SIZE_64M;
+      		break;
+   	   default:
+      		fprintf(stderr,
+		 "SetFence: %d: illegal size (%d kByte)\n", nr, size / 1024);
+      		return;
+   	}
+    } else {
+   	switch (size) {
+	   case KB(512):
+      		val |= FENCE_SIZE_512K;
+      		break;
+	   case MB(1):
+      		val |= FENCE_SIZE_1M;
+      		break;
+   	   case MB(2):
+      		val |= FENCE_SIZE_2M;
+      		break;
+   	   case MB(4):
+      		val |= FENCE_SIZE_4M;
+      		break;
+   	   case MB(8):
+      		val |= FENCE_SIZE_8M;
+      		break;
+   	   case MB(16):
+      		val |= FENCE_SIZE_16M;
+      		break;
+   	   case MB(32):
+      		val |= FENCE_SIZE_32M;
+      		break;
+   	   case MB(64):
+      		val |= FENCE_SIZE_64M;
+      		break;
+   	   default:
+      		fprintf(stderr,
+		 "SetFence: %d: illegal size (%d kByte)\n", nr, size / 1024);
+      		return;
+   	}
+   }
+
+   if (IS_I9XX(pI830))
+	fence_pitch = pitch / 512;
+   else
+	fence_pitch = pitch / 128;
+
+   switch (fence_pitch) {
+   case 1:
+      val |= FENCE_PITCH_1;
+      break;
+   case 2:
+      val |= FENCE_PITCH_2;
+      break;
+   case 4:
+      val |= FENCE_PITCH_4;
+      break;
+   case 8:
+      val |= FENCE_PITCH_8;
+      break;
+   case 16:
+      val |= FENCE_PITCH_16;
+      break;
+   case 32:
+      val |= FENCE_PITCH_32;
+      break;
+   case 64:
+      val |= FENCE_PITCH_64;
+      break;
+   default:
+      fprintf(stderr,
+		 "SetFence: %d: illegal pitch (%d)\n", nr, pitch);
+      return;
+   }
+
+   pI830->Fence[nr] = val;
+}
+
+static Bool
+MakeTiles(const DRIDriverContext *ctx, I830Rec *pI830, I830MemRange *pMem)
+{
+   int pitch, ntiles, i;
+
+   pitch = pMem->Pitch * ctx->cpp;
+   /*
+    * Simply try to break the region up into at most four pieces of size
+    * equal to the alignment.
+    */
+   ntiles = ROUND_TO(pMem->Size, pMem->Alignment) / pMem->Alignment;
+   if (ntiles >= 4) {
+      return FALSE;
+   }
+
+   for (i = 0; i < ntiles; i++, nextTile++) {
+     SetFence(ctx, pI830, nextTile, pMem->Start + i * pMem->Alignment,
+	       pitch, pMem->Alignment);
+   }
+   return TRUE;
+}
+
+static void I830SetupMemoryTiling(const DRIDriverContext *ctx, I830Rec *pI830)
+{
+  int i;
+
+  /* Clear out */
+  for (i = 0; i < 8; i++)
+    pI830->Fence[i] = 0;
+  
+  nextTile = 0;
+
+  if (pI830->BackBuffer.Alignment >= KB(512)) {
+    if (MakeTiles(ctx, pI830, &(pI830->BackBuffer))) {
+      fprintf(stderr,
+		 "Activating tiled memory for the back buffer.\n");
+    } else {
+      fprintf(stderr,
+		 "MakeTiles failed for the back buffer.\n");
+      pI830->allowPageFlip = FALSE;
+    }
+  }
+  
+  if (pI830->DepthBuffer.Alignment >= KB(512)) {
+    if (MakeTiles(ctx, pI830, &(pI830->DepthBuffer))) {
+      fprintf(stderr,
+		 "Activating tiled memory for the depth buffer.\n");
+    } else {
+      fprintf(stderr,
+		 "MakeTiles failed for the depth buffer.\n");
+    }
+  }
+
+  return;
+}
+
+static int I830DetectMemory(const DRIDriverContext *ctx, I830Rec *pI830)
+{
+  struct pci_device host_bridge;
+  uint32_t gmch_ctrl;
+  int memsize = 0;
+  int range;
+
+  memset(&host_bridge, 0, sizeof(host_bridge));
+
+  pci_device_cfg_read_u32(&host_bridge, &gmch_ctrl, I830_GMCH_CTRL);
+  
+  /* We need to reduce the stolen size, by the GTT and the popup.
+   * The GTT varying according the the FbMapSize and the popup is 4KB */
+  range = (ctx->shared.fbSize / (1024*1024)) + 4;
+
+   if (IS_I85X(pI830) || IS_I865G(pI830) || IS_I9XX(pI830)) {
+      switch (gmch_ctrl & I830_GMCH_GMS_MASK) {
+      case I855_GMCH_GMS_STOLEN_1M:
+	 memsize = MB(1) - KB(range);
+	 break;
+      case I855_GMCH_GMS_STOLEN_4M:
+	 memsize = MB(4) - KB(range);
+	 break;
+      case I855_GMCH_GMS_STOLEN_8M:
+	 memsize = MB(8) - KB(range);
+	 break;
+      case I855_GMCH_GMS_STOLEN_16M:
+	 memsize = MB(16) - KB(range);
+	 break;
+      case I855_GMCH_GMS_STOLEN_32M:
+	 memsize = MB(32) - KB(range);
+	 break;
+      case I915G_GMCH_GMS_STOLEN_48M:
+	 if (IS_I9XX(pI830))
+	    memsize = MB(48) - KB(range);
+	 break;
+      case I915G_GMCH_GMS_STOLEN_64M:
+	 if (IS_I9XX(pI830))
+	    memsize = MB(64) - KB(range);
+	 break;
+      }
+   } else {
+      switch (gmch_ctrl & I830_GMCH_GMS_MASK) {
+      case I830_GMCH_GMS_STOLEN_512:
+	 memsize = KB(512) - KB(range);
+	 break;
+      case I830_GMCH_GMS_STOLEN_1024:
+	 memsize = MB(1) - KB(range);
+	 break;
+      case I830_GMCH_GMS_STOLEN_8192:
+	 memsize = MB(8) - KB(range);
+	 break;
+      case I830_GMCH_GMS_LOCAL:
+	 memsize = 0;
+	 xf86DrvMsg(pScrn->scrnIndex, X_WARNING,
+		    "Local memory found, but won't be used.\n");
+	 break;
+      }
+   }
+   if (memsize > 0) {
+     fprintf(stderr,
+		 "detected %d kB stolen memory.\n", memsize / 1024);
+   } else {
+     fprintf(stderr,
+		 "no video memory detected.\n");
+   }
+   return memsize;
+}
+
+static int AgpInit(const DRIDriverContext *ctx, I830Rec *info)
+{
+  unsigned long mode = 0x4;
+
+  if (drmAgpAcquire(ctx->drmFD) < 0) {
+    fprintf(stderr, "[gart] AGP not available\n");
+    return 0;
+  }
+  
+  if (drmAgpEnable(ctx->drmFD, mode) < 0) {
+    fprintf(stderr, "[gart] AGP not enabled\n");
+    drmAgpRelease(ctx->drmFD);
+    return 0;
+  }
+  else
+    fprintf(stderr, "[gart] AGP enabled at %dx\n", ctx->agpmode);
+
+  return 1;
+}
+
+/*
+ * Allocate memory from the given pool.  Grow the pool if needed and if
+ * possible.
+ */
+static unsigned long
+AllocFromPool(const DRIDriverContext *ctx, I830Rec *pI830, 
+	      I830MemRange *result, I830MemPool *pool,
+	      long size, unsigned long alignment, int flags)
+{
+   long needed, start, end;
+
+   if (!result || !pool || !size)
+      return 0;
+
+   /* Calculate how much space is needed. */
+   if (alignment <= GTT_PAGE_SIZE)
+      needed = size;
+   else {
+	 start = ROUND_TO(pool->Free.Start, alignment);
+	 end = ROUND_TO(start + size, alignment);
+	 needed = end - pool->Free.Start;
+   }
+   if (needed > pool->Free.Size) {
+     return 0;
+   }
+
+   result->Start = ROUND_TO(pool->Free.Start, alignment);
+   pool->Free.Start += needed;
+   result->End = pool->Free.Start;
+
+   pool->Free.Size = pool->Free.End - pool->Free.Start;
+   result->Size = result->End - result->Start;
+   result->Pool = pool;
+   result->Alignment = alignment;
+   return needed;
+}
+
+static unsigned long AllocFromAGP(const DRIDriverContext *ctx, I830Rec *pI830, long size, unsigned long alignment, I830MemRange  *result)
+{
+   unsigned long start, end;
+   unsigned long newApStart, newApEnd;
+   int ret;
+   if (!result || !size)
+      return 0;
+   
+   if (!alignment)
+     alignment = 4;
+
+   start = ROUND_TO(pI830->MemoryAperture.Start, alignment);
+   end = ROUND_TO(start + size, alignment);
+   newApStart = end;
+   newApEnd = pI830->MemoryAperture.End;
+
+   ret=drmAgpAlloc(ctx->drmFD, size, 0, &(result->Physical), (drm_handle_t *)&(result->Key));
+   
+   if (ret)
+   {
+     fprintf(stderr,"drmAgpAlloc failed %d\n", ret);
+     return 0;
+   }
+   pI830->allocatedMemory += size;
+   pI830->MemoryAperture.Start = newApStart;
+   pI830->MemoryAperture.End = newApEnd;
+   pI830->MemoryAperture.Size = newApEnd - newApStart;
+   //   pI830->FreeMemory -= size;
+   result->Start = start;
+   result->End = start + size;
+   result->Size = size;
+   result->Offset = start;
+   result->Alignment = alignment;
+   result->Pool = NULL;
+  
+   return size;
+}
+
+unsigned long
+I830AllocVidMem(const DRIDriverContext *ctx, I830Rec *pI830, I830MemRange *result, I830MemPool *pool, long size, unsigned long alignment, int flags)
+{
+  int ret;
+
+  if (!result)
+    return 0;
+
+   /* Make sure these are initialised. */
+   result->Size = 0;
+   result->Key = -1;
+
+   if (!size) {
+      return 0;
+   }
+
+   if (pool->Free.Size < size)
+     return AllocFromAGP(ctx, pI830, size, alignment, result);
+   else
+   {
+     ret = AllocFromPool(ctx, pI830, result, pool, size, alignment, flags);
+
+     if (ret==0)
+       return AllocFromAGP(ctx, pI830, size, alignment, result);
+     return ret;
+   }
+}
+
+static Bool BindAgpRange(const DRIDriverContext *ctx, I830MemRange *mem)
+{
+  if (!mem)
+    return FALSE;
+  
+  if (mem->Key == -1)
+    return TRUE;
+
+  return !drmAgpBind(ctx->drmFD, mem->Key, mem->Offset);
+}
+
+/* simple memory allocation routines needed */
+/* put ring buffer in low memory */
+/* need to allocate front, back, depth buffers aligned correctly,
+   allocate ring buffer, 
+*/
+
+/* */
+static Bool
+I830AllocateMemory(const DRIDriverContext *ctx, I830Rec *pI830)
+{
+  unsigned long size, ret;
+  unsigned long lines, lineSize, align;
+
+  /* allocate ring buffer */
+  memset(pI830->LpRing, 0, sizeof(I830RingBuffer));
+  pI830->LpRing->mem.Key = -1;
+
+  size = PRIMARY_RINGBUFFER_SIZE;
+  
+  ret = I830AllocVidMem(ctx, pI830, &pI830->LpRing->mem, &pI830->StolenPool, size, 0x1000, 0);
+  
+  if (ret != size)
+  {
+    fprintf(stderr,"unable to allocate ring buffer %ld\n", ret);
+    return FALSE;
+  }
+
+  pI830->LpRing->tail_mask = pI830->LpRing->mem.Size - 1;
+
+  
+  /* allocate front buffer */
+  memset(&(pI830->FrontBuffer), 0, sizeof(pI830->FrontBuffer));
+  pI830->FrontBuffer.Key = -1;
+  pI830->FrontBuffer.Pitch = ctx->shared.virtualWidth;
+
+  align = KB(512);  
+
+  lineSize = ctx->shared.virtualWidth * ctx->cpp;
+  lines = (ctx->shared.virtualHeight + 15) / 16 * 16;
+  size = lineSize * lines;
+  size = ROUND_TO_PAGE(size);
+
+  align = GetBestTileAlignment(size);
+
+  ret = I830AllocVidMem(ctx, pI830, &pI830->FrontBuffer, &pI830->StolenPool, size, align, 0);
+  if (ret < size)
+  {
+    fprintf(stderr,"unable to allocate front buffer %ld\n", ret);
+    return FALSE;
+  }
+
+  memset(&(pI830->BackBuffer), 0, sizeof(pI830->BackBuffer));
+  pI830->BackBuffer.Key = -1;
+  pI830->BackBuffer.Pitch = ctx->shared.virtualWidth;
+
+  ret = I830AllocVidMem(ctx, pI830, &pI830->BackBuffer, &pI830->StolenPool, size, align, 0);
+  if (ret < size)
+  {
+    fprintf(stderr,"unable to allocate back buffer %ld\n", ret);
+    return FALSE;
+  }
+  
+  memset(&(pI830->DepthBuffer), 0, sizeof(pI830->DepthBuffer));
+  pI830->DepthBuffer.Key = -1;
+  pI830->DepthBuffer.Pitch = ctx->shared.virtualWidth;
+
+  ret = I830AllocVidMem(ctx, pI830, &pI830->DepthBuffer, &pI830->StolenPool, size, align, 0);
+  if (ret < size)
+  {
+    fprintf(stderr,"unable to allocate depth buffer %ld\n", ret);
+    return FALSE;
+  }
+
+  memset(&(pI830->ContextMem), 0, sizeof(pI830->ContextMem));
+  pI830->ContextMem.Key = -1;
+  size = KB(32);
+
+  ret = I830AllocVidMem(ctx, pI830, &pI830->ContextMem, &pI830->StolenPool, size, align, 0);
+  if (ret < size)
+  {
+    fprintf(stderr,"unable to allocate context buffer %ld\n", ret);
+    return FALSE;
+  }
+  
+  memset(&(pI830->TexMem), 0, sizeof(pI830->TexMem));
+  pI830->TexMem.Key = -1;
+
+  size = 32768 * 1024;
+  ret = AllocFromAGP(ctx, pI830, size, align, &pI830->TexMem);
+  if (ret < size)
+  {
+    fprintf(stderr,"unable to allocate texture memory %ld\n", ret);
+    return FALSE;
+  }
+
+  return TRUE;
+}
+
+static Bool
+I830BindMemory(const DRIDriverContext *ctx, I830Rec *pI830)
+{
+  if (!BindAgpRange(ctx, &pI830->LpRing->mem))
+    return FALSE;
+  if (!BindAgpRange(ctx, &pI830->FrontBuffer))
+    return FALSE;
+  if (!BindAgpRange(ctx, &pI830->BackBuffer))
+    return FALSE;
+  if (!BindAgpRange(ctx, &pI830->DepthBuffer))
+    return FALSE;
+  if (!BindAgpRange(ctx, &pI830->ContextMem))
+    return FALSE;
+  if (!BindAgpRange(ctx, &pI830->TexMem))
+    return FALSE;
+
+  return TRUE;
+}
+
+static Bool
+I830CleanupDma(const DRIDriverContext *ctx)
+{
+   drmI830Init info;
+
+   memset(&info, 0, sizeof(drmI830Init));
+   info.func = I830_CLEANUP_DMA;
+
+   if (drmCommandWrite(ctx->drmFD, DRM_I830_INIT,
+		       &info, sizeof(drmI830Init))) {
+     fprintf(stderr, "I830 Dma Cleanup Failed\n");
+      return FALSE;
+   }
+
+   return TRUE;
+}
+
+static Bool
+I830InitDma(const DRIDriverContext *ctx, I830Rec *pI830)
+{
+   I830RingBuffer *ring = pI830->LpRing;
+   drmI830Init info;
+
+   memset(&info, 0, sizeof(drmI830Init));
+   info.func = I830_INIT_DMA;
+
+   info.ring_start = ring->mem.Start + pI830->LinearAddr;
+   info.ring_end = ring->mem.End + pI830->LinearAddr;
+   info.ring_size = ring->mem.Size;
+
+   info.mmio_offset = (unsigned int)ctx->MMIOStart;
+
+   info.sarea_priv_offset = sizeof(drm_sarea_t);
+
+   info.front_offset = pI830->FrontBuffer.Start;
+   info.back_offset = pI830->BackBuffer.Start;
+   info.depth_offset = pI830->DepthBuffer.Start;
+   info.w = ctx->shared.virtualWidth;
+   info.h = ctx->shared.virtualHeight;
+   info.pitch = ctx->shared.virtualWidth;
+   info.back_pitch = pI830->BackBuffer.Pitch;
+   info.depth_pitch = pI830->DepthBuffer.Pitch;
+   info.cpp = ctx->cpp;
+
+   if (drmCommandWrite(ctx->drmFD, DRM_I830_INIT,
+		       &info, sizeof(drmI830Init))) {
+      fprintf(stderr,
+		 "I830 Dma Initialization Failed\n");
+      return FALSE;
+   }
+
+   return TRUE;
+}
+
+static int I830CheckDRMVersion( const DRIDriverContext *ctx,
+				  I830Rec *pI830 )
+{
+   drmVersionPtr  version;
+
+   version = drmGetVersion(ctx->drmFD);
+
+   if (version) {
+     int req_minor, req_patch;
+
+     req_minor = 4;
+     req_patch = 0;	
+
+     if (version->version_major != 1 ||
+	 version->version_minor < req_minor ||
+	 (version->version_minor == req_minor && 
+	  version->version_patchlevel < req_patch)) {
+       /* Incompatible drm version */
+       fprintf(stderr,
+	       "[dri] I830DRIScreenInit failed because of a version "
+	       "mismatch.\n"
+	       "[dri] i915.o kernel module version is %d.%d.%d "
+	       "but version 1.%d.%d or newer is needed.\n"
+	       "[dri] Disabling DRI.\n",
+	       version->version_major,
+	       version->version_minor,
+	       version->version_patchlevel,
+	       req_minor,
+	       req_patch);
+       drmFreeVersion(version);
+       return 0;
+     }
+     
+     pI830->drmMinor = version->version_minor;
+     drmFreeVersion(version);
+   }
+   return 1;
+}
+
+static void
+I830SetRingRegs(const DRIDriverContext *ctx, I830Rec *pI830)
+{
+  unsigned int itemp;
+  unsigned char *MMIO = ctx->MMIOAddress;
+
+   OUTREG(LP_RING + RING_LEN, 0);
+   OUTREG(LP_RING + RING_TAIL, 0);
+   OUTREG(LP_RING + RING_HEAD, 0);
+
+   if ((long)(pI830->LpRing->mem.Start & I830_RING_START_MASK) !=
+       pI830->LpRing->mem.Start) {
+      fprintf(stderr,
+		 "I830SetRingRegs: Ring buffer start (%lx) violates its "
+		 "mask (%x)\n", pI830->LpRing->mem.Start, I830_RING_START_MASK);
+   }
+   /* Don't care about the old value.  Reserved bits must be zero anyway. */
+   itemp = pI830->LpRing->mem.Start & I830_RING_START_MASK;
+   OUTREG(LP_RING + RING_START, itemp);
+
+   if (((pI830->LpRing->mem.Size - 4096) & I830_RING_NR_PAGES) !=
+       pI830->LpRing->mem.Size - 4096) {
+      fprintf(stderr,
+		 "I830SetRingRegs: Ring buffer size - 4096 (%lx) violates its "
+		 "mask (%x)\n", pI830->LpRing->mem.Size - 4096,
+		 I830_RING_NR_PAGES);
+   }
+   /* Don't care about the old value.  Reserved bits must be zero anyway. */
+   itemp = (pI830->LpRing->mem.Size - 4096) & I830_RING_NR_PAGES;
+   itemp |= (RING_NO_REPORT | RING_VALID);
+   OUTREG(LP_RING + RING_LEN, itemp);
+
+   pI830->LpRing->head = INREG(LP_RING + RING_HEAD) & I830_HEAD_MASK;
+   pI830->LpRing->tail = INREG(LP_RING + RING_TAIL);
+   pI830->LpRing->space = pI830->LpRing->head - (pI830->LpRing->tail + 8);
+   if (pI830->LpRing->space < 0)
+      pI830->LpRing->space += pI830->LpRing->mem.Size;
+
+   SetFenceRegs(ctx, pI830);
+   
+   /* RESET THE DISPLAY PIPE TO POINT TO THE FRONTBUFFER - hacky
+      hacky hacky */
+   OUTREG(DSPABASE, pI830->FrontBuffer.Start + pI830->LinearAddr);
+
+}
+
+static Bool
+I830SetParam(const DRIDriverContext *ctx, int param, int value)
+{
+   drmI830SetParam sp;
+
+   memset(&sp, 0, sizeof(sp));
+   sp.param = param;
+   sp.value = value;
+
+   if (drmCommandWrite(ctx->drmFD, DRM_I830_SETPARAM, &sp, sizeof(sp))) {
+      fprintf(stderr, "I830 SetParam Failed\n");
+      return FALSE;
+   }
+
+   return TRUE;
+}
+
+static Bool
+I830DRIMapScreenRegions(DRIDriverContext *ctx, I830Rec *pI830, drmI830Sarea *sarea)
+{
+   fprintf(stderr,
+              "[drm] Mapping front buffer\n");
+
+   if (drmAddMap(ctx->drmFD,
+                 (drm_handle_t)(sarea->front_offset + pI830->LinearAddr),
+                 sarea->front_size,
+                 DRM_FRAME_BUFFER,  /*DRM_AGP,*/
+                 0,
+                 &sarea->front_handle) < 0) {
+     fprintf(stderr,
+	     "[drm] drmAddMap(front_handle) failed. Disabling DRI\n");
+      return FALSE;
+   }
+   ctx->shared.hFrameBuffer = sarea->front_handle;
+   ctx->shared.fbSize = sarea->front_size;
+   fprintf(stderr, "[drm] Front Buffer = 0x%08x\n",
+	   sarea->front_handle);
+
+   if (drmAddMap(ctx->drmFD,
+                 (drm_handle_t)(sarea->back_offset),
+                 sarea->back_size, DRM_AGP, 0,
+                 &sarea->back_handle) < 0) {
+      fprintf(stderr,
+                 "[drm] drmAddMap(back_handle) failed. Disabling DRI\n");
+      return FALSE;
+   }
+   fprintf(stderr, "[drm] Back Buffer = 0x%08x\n",
+              sarea->back_handle);
+
+   if (drmAddMap(ctx->drmFD,
+                 (drm_handle_t)sarea->depth_offset,
+                 sarea->depth_size, DRM_AGP, 0,
+                 &sarea->depth_handle) < 0) {
+      fprintf(stderr,
+                 "[drm] drmAddMap(depth_handle) failed. Disabling DRI\n");
+      return FALSE;
+   }
+   fprintf(stderr, "[drm] Depth Buffer = 0x%08x\n",
+              sarea->depth_handle);
+
+   if (drmAddMap(ctx->drmFD,
+		 (drm_handle_t)sarea->tex_offset,
+		 sarea->tex_size, DRM_AGP, 0,
+		 &sarea->tex_handle) < 0) {
+      fprintf(stderr,
+		 "[drm] drmAddMap(tex_handle) failed. Disabling DRI\n");
+      return FALSE;
+   }
+   fprintf(stderr, "[drm] textures = 0x%08x\n",
+	      sarea->tex_handle);
+
+   return TRUE;
+}
+
+
+static void
+I830DRIUnmapScreenRegions(const DRIDriverContext *ctx, I830Rec *pI830, drmI830Sarea *sarea)
+{
+#if 1
+   if (sarea->front_handle) {
+      drmRmMap(ctx->drmFD, sarea->front_handle);
+      sarea->front_handle = 0;
+   }
+#endif
+   if (sarea->back_handle) {
+      drmRmMap(ctx->drmFD, sarea->back_handle);
+      sarea->back_handle = 0;
+   }
+   if (sarea->depth_handle) {
+      drmRmMap(ctx->drmFD, sarea->depth_handle);
+      sarea->depth_handle = 0;
+   }
+   if (sarea->tex_handle) {
+      drmRmMap(ctx->drmFD, sarea->tex_handle);
+      sarea->tex_handle = 0;
+   }
+}
+
+static void
+I830InitTextureHeap(const DRIDriverContext *ctx, I830Rec *pI830, drmI830Sarea *sarea)
+{
+   /* Start up the simple memory manager for agp space */
+   drmI830MemInitHeap drmHeap;
+   drmHeap.region = I830_MEM_REGION_AGP;
+   drmHeap.start  = 0;
+   drmHeap.size   = sarea->tex_size;
+      
+   if (drmCommandWrite(ctx->drmFD, DRM_I830_INIT_HEAP,
+			  &drmHeap, sizeof(drmHeap))) {
+      fprintf(stderr,
+		    "[drm] Failed to initialized agp heap manager\n");
+   } else {
+      fprintf(stderr,
+		    "[drm] Initialized kernel agp heap manager, %d\n",
+		    sarea->tex_size);
+
+      I830SetParam(ctx, I830_SETPARAM_TEX_LRU_LOG_GRANULARITY, 
+		      sarea->log_tex_granularity);
+   }
+}
+
+static Bool
+I830DRIDoMappings(DRIDriverContext *ctx, I830Rec *pI830, drmI830Sarea *sarea)
+{
+  if (drmAddMap(ctx->drmFD,
+		(drm_handle_t)pI830->LpRing->mem.Start,
+		pI830->LpRing->mem.Size, DRM_AGP, 0,
+		&pI830->ring_map) < 0) {
+    fprintf(stderr,
+	    "[drm] drmAddMap(ring_map) failed. Disabling DRI\n");
+    return FALSE;
+  }
+  fprintf(stderr, "[drm] ring buffer = 0x%08x\n",
+	  pI830->ring_map);
+
+  if (I830InitDma(ctx, pI830) == FALSE) {
+    return FALSE;
+  }
+  
+   /* init to zero to be safe */
+
+  I830DRIMapScreenRegions(ctx, pI830, sarea);
+  I830InitTextureHeap(ctx, pI830, sarea);
+
+   if (ctx->pciDevice != PCI_CHIP_845_G &&
+       ctx->pciDevice != PCI_CHIP_I830_M) {
+      I830SetParam(ctx, I830_SETPARAM_USE_MI_BATCHBUFFER_START, 1 );
+   }
+
+   /* Okay now initialize the dma engine */
+   {
+      pI830->irq = drmGetInterruptFromBusID(ctx->drmFD,
+					    ctx->pciBus,
+					    ctx->pciDevice,
+					    ctx->pciFunc);
+
+      if (drmCtlInstHandler(ctx->drmFD, pI830->irq)) {
+	 fprintf(stderr,
+		    "[drm] failure adding irq handler\n");
+	 pI830->irq = 0;
+	 return FALSE;
+      }
+      else
+	 fprintf(stderr,
+		    "[drm] dma control initialized, using IRQ %d\n",
+		    pI830->irq);
+   }
+
+   fprintf(stderr, "[dri] visual configs initialized\n");
+
+   return TRUE;
+}
+
+static Bool
+I830ClearScreen(DRIDriverContext *ctx, I830Rec *pI830, drmI830Sarea *sarea)
+{
+  /* need to drmMap front and back buffers and zero them */
+  drmAddress map_addr;
+  int ret;
+
+  ret = drmMap(ctx->drmFD,
+	       sarea->front_handle,
+	       sarea->front_size,
+	       &map_addr);
+
+  if (ret)
+  {
+    fprintf(stderr, "Unable to map front buffer\n");
+    return FALSE;
+  }
+
+  drimemsetio((char *)map_addr,
+	      0,
+	      sarea->front_size);
+  drmUnmap(map_addr, sarea->front_size);
+
+
+  ret = drmMap(ctx->drmFD,
+	       sarea->back_handle,
+	       sarea->back_size,
+	       &map_addr);
+
+  if (ret)
+  {
+    fprintf(stderr, "Unable to map back buffer\n");
+    return FALSE;
+  }
+
+  drimemsetio((char *)map_addr,
+	      0,
+	      sarea->back_size);
+  drmUnmap(map_addr, sarea->back_size);
+
+  return TRUE;
+}
+
+static Bool
+I830ScreenInit(DRIDriverContext *ctx, I830Rec *pI830)
+		  
+{
+   I830DRIPtr pI830DRI;
+   drmI830Sarea *pSAREAPriv;
+   int err;
+      
+   drm_page_size = getpagesize();   
+
+   pI830->registerSize = ctx->MMIOSize;
+   /* This is a hack for now.  We have to have more than a 4k page here
+    * because of the size of the state.  However, the state should be
+    * in a per-context mapping.  This will be added in the Mesa 3.5 port
+    * of the I830 driver.
+    */
+   ctx->shared.SAREASize = SAREA_MAX;
+
+   /* Note that drmOpen will try to load the kernel module, if needed. */
+   ctx->drmFD = drmOpen("i915", NULL );
+   if (ctx->drmFD < 0) {
+      fprintf(stderr, "[drm] drmOpen failed\n");
+      return 0;
+   }
+
+   if ((err = drmSetBusid(ctx->drmFD, ctx->pciBusID)) < 0) {
+      fprintf(stderr, "[drm] drmSetBusid failed (%d, %s), %s\n",
+	      ctx->drmFD, ctx->pciBusID, strerror(-err));
+      return 0;
+   }
+
+   if (drmAddMap( ctx->drmFD,
+		  0,
+		  ctx->shared.SAREASize,
+		  DRM_SHM,
+		  DRM_CONTAINS_LOCK,
+		  &ctx->shared.hSAREA) < 0)
+   {
+     fprintf(stderr, "[drm] drmAddMap failed\n");
+     return 0;
+   }
+
+   fprintf(stderr, "[drm] added %d byte SAREA at 0x%08x\n",
+	   ctx->shared.SAREASize, ctx->shared.hSAREA);
+   
+   if (drmMap( ctx->drmFD,
+	       ctx->shared.hSAREA,
+	       ctx->shared.SAREASize,
+	       (drmAddressPtr)(&ctx->pSAREA)) < 0)
+   {
+      fprintf(stderr, "[drm] drmMap failed\n");
+      return 0;
+   
+   }
+   
+   memset(ctx->pSAREA, 0, ctx->shared.SAREASize);
+   fprintf(stderr, "[drm] mapped SAREA 0x%08x to %p, size %d\n",
+	   ctx->shared.hSAREA, ctx->pSAREA, ctx->shared.SAREASize);
+   
+
+   if (drmAddMap(ctx->drmFD, 
+		 ctx->MMIOStart,
+		 ctx->MMIOSize,
+		 DRM_REGISTERS, 
+		 DRM_READ_ONLY, 
+		 &pI830->registerHandle) < 0) {
+      fprintf(stderr, "[drm] drmAddMap mmio failed\n");	
+      return 0;
+   }
+   fprintf(stderr,
+	   "[drm] register handle = 0x%08x\n", pI830->registerHandle);
+
+
+   if (!I830CheckDRMVersion(ctx, pI830)) {
+     return FALSE;
+   }
+
+   /* Create a 'server' context so we can grab the lock for
+    * initialization ioctls.
+    */
+   if ((err = drmCreateContext(ctx->drmFD, &ctx->serverContext)) != 0) {
+      fprintf(stderr, "%s: drmCreateContext failed %d\n", __FUNCTION__, err);
+      return 0;
+   }
+
+   DRM_LOCK(ctx->drmFD, ctx->pSAREA, ctx->serverContext, 0); 
+
+   /* Initialize the SAREA private data structure */
+   pSAREAPriv = (drmI830Sarea *)(((char*)ctx->pSAREA) + 
+				 sizeof(drm_sarea_t));
+   memset(pSAREAPriv, 0, sizeof(*pSAREAPriv));
+
+   pI830->StolenMemory.Size = I830DetectMemory(ctx, pI830);
+   pI830->StolenMemory.Start = 0;
+   pI830->StolenMemory.End = pI830->StolenMemory.Size;
+
+   pI830->MemoryAperture.Start = pI830->StolenMemory.End;
+   pI830->MemoryAperture.End = KB(40000);
+   pI830->MemoryAperture.Size = pI830->MemoryAperture.End - pI830->MemoryAperture.Start;
+
+   pI830->StolenPool.Fixed = pI830->StolenMemory;
+   pI830->StolenPool.Total = pI830->StolenMemory;
+   pI830->StolenPool.Free = pI830->StolenPool.Total;
+   pI830->FreeMemory = pI830->StolenPool.Total.Size;
+
+   if (!AgpInit(ctx, pI830))
+     return FALSE;
+
+   if (I830AllocateMemory(ctx, pI830) == FALSE)
+   {
+     return FALSE;
+   }
+
+   if (I830BindMemory(ctx, pI830) == FALSE)
+   {
+     return FALSE;
+   }
+
+   pSAREAPriv->front_offset = pI830->FrontBuffer.Start;
+   pSAREAPriv->front_size = pI830->FrontBuffer.Size;
+   pSAREAPriv->width = ctx->shared.virtualWidth;
+   pSAREAPriv->height = ctx->shared.virtualHeight;
+   pSAREAPriv->pitch = ctx->shared.virtualWidth;
+   pSAREAPriv->virtualX = ctx->shared.virtualWidth;
+   pSAREAPriv->virtualY = ctx->shared.virtualHeight;
+   pSAREAPriv->back_offset = pI830->BackBuffer.Start;
+   pSAREAPriv->back_size = pI830->BackBuffer.Size;
+   pSAREAPriv->depth_offset = pI830->DepthBuffer.Start;
+   pSAREAPriv->depth_size = pI830->DepthBuffer.Size;
+   pSAREAPriv->tex_offset = pI830->TexMem.Start;
+   pSAREAPriv->tex_size = pI830->TexMem.Size;
+   pSAREAPriv->log_tex_granularity = pI830->TexGranularity;
+
+   ctx->driverClientMsg = malloc(sizeof(I830DRIRec));
+   ctx->driverClientMsgSize = sizeof(I830DRIRec);
+   pI830DRI = (I830DRIPtr)ctx->driverClientMsg;
+   pI830DRI->deviceID = pI830->Chipset;
+   pI830DRI->regsSize = I830_REG_SIZE;
+   pI830DRI->width = ctx->shared.virtualWidth;
+   pI830DRI->height = ctx->shared.virtualHeight;
+   pI830DRI->mem = ctx->shared.fbSize;
+   pI830DRI->cpp = ctx->cpp;
+   pI830DRI->backOffset = pI830->BackBuffer.Start;
+   pI830DRI->backPitch = pI830->BackBuffer.Pitch; 
+
+   pI830DRI->depthOffset = pI830->DepthBuffer.Start;
+   pI830DRI->depthPitch = pI830->DepthBuffer.Pitch; 
+
+   pI830DRI->fbOffset = pI830->FrontBuffer.Start;
+   pI830DRI->fbStride = pI830->FrontBuffer.Pitch;
+
+   pI830DRI->bitsPerPixel = ctx->bpp;
+   pI830DRI->sarea_priv_offset = sizeof(drm_sarea_t);
+   
+   err = I830DRIDoMappings(ctx, pI830, pSAREAPriv);
+   if (err == FALSE)
+       return FALSE;
+
+   I830SetupMemoryTiling(ctx, pI830);
+
+   /* Quick hack to clear the front & back buffers.  Could also use
+    * the clear ioctl to do this, but would need to setup hw state
+    * first.
+    */
+   I830ClearScreen(ctx, pI830, pSAREAPriv);
+
+   I830SetRingRegs(ctx, pI830);
+
+   return TRUE;
+}
+
+
+/**
+ * \brief Validate the fbdev mode.
+ * 
+ * \param ctx display handle.
+ *
+ * \return one on success, or zero on failure.
+ *
+ * Saves some registers and returns 1.
+ *
+ * \sa radeonValidateMode().
+ */
+static int i830ValidateMode( const DRIDriverContext *ctx )
+{
+  return 1;
+}
+
+/**
+ * \brief Examine mode returned by fbdev.
+ * 
+ * \param ctx display handle.
+ *
+ * \return one on success, or zero on failure.
+ *
+ * Restores registers that fbdev has clobbered and returns 1.
+ *
+ * \sa i810ValidateMode().
+ */
+static int i830PostValidateMode( const DRIDriverContext *ctx )
+{
+  I830Rec *pI830 = ctx->driverPrivate;
+
+  I830SetRingRegs(ctx, pI830);
+  return 1;
+}
+
+
+/**
+ * \brief Initialize the framebuffer device mode
+ *
+ * \param ctx display handle.
+ *
+ * \return one on success, or zero on failure.
+ *
+ * Fills in \p info with some default values and some information from \p ctx
+ * and then calls I810ScreenInit() for the screen initialization.
+ * 
+ * Before exiting clears the framebuffer memory accessing it directly.
+ */
+static int i830InitFBDev( DRIDriverContext *ctx )
+{
+  I830Rec *pI830 = calloc(1, sizeof(I830Rec));
+  int i;
+
+   {
+      int  dummy = ctx->shared.virtualWidth;
+
+      switch (ctx->bpp / 8) {
+      case 1: dummy = (ctx->shared.virtualWidth + 127) & ~127; break;
+      case 2: dummy = (ctx->shared.virtualWidth +  31) &  ~31; break;
+      case 3:
+      case 4: dummy = (ctx->shared.virtualWidth +  15) &  ~15; break;
+      }
+
+      ctx->shared.virtualWidth = dummy;
+      ctx->shared.Width = ctx->shared.virtualWidth;
+   }
+
+
+   for (i = 0; pitches[i] != 0; i++) {
+     if (pitches[i] >= ctx->shared.virtualWidth) {
+       ctx->shared.virtualWidth = pitches[i];
+       break;
+     }
+   }
+
+   ctx->driverPrivate = (void *)pI830;
+   
+   pI830->LpRing = calloc(1, sizeof(I830RingBuffer));
+   pI830->Chipset = ctx->chipset;
+   pI830->LinearAddr = ctx->FBStart;
+
+   if (!I830ScreenInit( ctx, pI830 ))
+      return 0;
+
+   
+   return 1;
+}
+
+
+/**
+ * \brief The screen is being closed, so clean up any state and free any
+ * resources used by the DRI.
+ *
+ * \param ctx display handle.
+ *
+ * Unmaps the SAREA, closes the DRM device file descriptor and frees the driver
+ * private data.
+ */
+static void i830HaltFBDev( DRIDriverContext *ctx )
+{
+  drmI830Sarea *pSAREAPriv;
+  I830Rec *pI830 = ctx->driverPrivate;
+
+   if (pI830->irq) {
+       drmCtlUninstHandler(ctx->drmFD);
+       pI830->irq = 0;   }
+
+   I830CleanupDma(ctx);
+
+  pSAREAPriv = (drmI830Sarea *)(((char*)ctx->pSAREA) + 
+				sizeof(drm_sarea_t));
+
+  I830DRIUnmapScreenRegions(ctx, pI830, pSAREAPriv);
+  drmUnmap( ctx->pSAREA, ctx->shared.SAREASize );
+  drmClose(ctx->drmFD);
+  
+  if (ctx->driverPrivate) {
+    free(ctx->driverPrivate);
+    ctx->driverPrivate = 0;
+  }
+}
+
+
+extern void i810NotifyFocus( int );
+
+/**
+ * \brief Exported driver interface for Mini GLX.
+ *
+ * \sa DRIDriverRec.
+ */
+const struct DRIDriverRec __driDriver = {
+   i830ValidateMode,
+   i830PostValidateMode,
+   i830InitFBDev,
+   i830HaltFBDev,
+   NULL,//I830EngineShutdown,
+   NULL, //I830EngineRestore,  
+#ifndef _EMBEDDED
+   0,
+#else
+   i810NotifyFocus, 
+#endif
+};
diff --git a/shared/intel_tex_layout.c b/shared/intel_tex_layout.c
new file mode 100644
index 0000000..39a443c
--- /dev/null
+++ b/shared/intel_tex_layout.c
@@ -0,0 +1,102 @@
+/**************************************************************************
+ * 
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  *   Michel Dänzer <michel@tungstengraphics.com>
+  */
+
+#include "intel_mipmap_tree.h"
+#include "intel_tex_layout.h"
+#include "macros.h"
+
+
+static int align(int value, int alignment)
+{
+   return (value + alignment - 1) & ~(alignment - 1);
+}
+
+void i945_miptree_layout_2d( struct intel_context *intel, struct intel_mipmap_tree *mt )
+{
+   GLint align_h = 2, align_w = 4;
+   GLuint level;
+   GLuint x = 0;
+   GLuint y = 0;
+   GLuint width = mt->width0;
+   GLuint height = mt->height0;
+
+   mt->pitch = mt->width0;
+
+   /* May need to adjust pitch to accomodate the placement of
+    * the 2nd mipmap.  This occurs when the alignment
+    * constraints of mipmap placement push the right edge of the
+    * 2nd mipmap out past the width of its parent.
+    */
+   if (mt->first_level != mt->last_level) {
+      GLuint mip1_width = align(minify(mt->width0), align_w)
+			+ minify(minify(mt->width0));
+
+      if (mip1_width > mt->width0)
+	 mt->pitch = mip1_width;
+   }
+
+   /* Pitch must be a whole number of dwords, even though we
+    * express it in texels.
+    */
+   mt->pitch = intel_miptree_pitch_align (intel, mt, mt->pitch);
+   mt->total_height = 0;
+
+   for ( level = mt->first_level ; level <= mt->last_level ; level++ ) {
+      GLuint img_height;
+
+      intel_miptree_set_level_info(mt, level, 1, x, y, width, 
+				   height, 1);
+
+      if (mt->compressed)
+	 img_height = MAX2(1, height/4);
+      else
+	 img_height = align(height, align_h);
+
+
+      /* Because the images are packed better, the final offset
+       * might not be the maximal one:
+       */
+      mt->total_height = MAX2(mt->total_height, y + img_height);
+
+      /* Layout_below: step right after second mipmap.
+       */
+      if (level == mt->first_level + 1) {
+	 x += align(width, align_w);
+      }
+      else {
+	 y += img_height;
+      }
+
+      width  = minify(width);
+      height = minify(height);
+   }
+}
diff --git a/shared/intel_tex_layout.h b/shared/intel_tex_layout.h
new file mode 100644
index 0000000..46151db
--- /dev/null
+++ b/shared/intel_tex_layout.h
@@ -0,0 +1,41 @@
+/**************************************************************************
+ * 
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  *   Michel Dänzer <michel@tungstengraphics.com>
+  */
+
+#include "macros.h"
+
+
+static GLuint minify( GLuint d )
+{
+   return MAX2(1, d>>1);
+}
+
+extern void i945_miptree_layout_2d( struct intel_context *intel, struct intel_mipmap_tree *mt );
diff --git a/src/Makefile.am b/src/Makefile.am
deleted file mode 100644
index aa854c5..0000000
--- a/src/Makefile.am
+++ /dev/null
@@ -1,7 +0,0 @@
-AM_CFLAGS = -DIN_DRI_DRIVER -DGLX_DIRECT_RENDERING -DGLX_INDIRECT_RENDERING
-
-xxx_dri_la_LTLIBRARIES = xxx_dri.la
-xxx_dri_la_CFLAGS = $(AM_CFLAGS) $(DRM_CFLAGS) $(DRI_CFLAGS) -Iserver
-xxx_dri_la_LDFLAGS = -module -noprefix -lm -ldl $(DRM_LIBS) $(DRI_LIBS)
-xxx_dri_ladir = @libdir@/dri
-xxx_dri_la_SOURCES = \