Import i915 and i965 dri drivers from mesa 7.1.0.7.1.0

author: Luc Verhaegen <libv@skynet.be> 2010-03-13 02:36:00 +0100
committer: Luc Verhaegen <libv@skynet.be> 2010-03-13 02:36:00 +0100
commit: fedcb3219e8f9a587c693bbb2178ec3e83bf0320 (patch)
tree: b37f142039934c27eb13d9ff2344776d7f92bff6
parent: 6e23622cb869c14d82f8c901c4bbea80ded6220e (diff)
184 files changed, 26068 insertions, 27037 deletions
diff --git a/configure.ac b/configure.ac
index 70d46ac..8e2661f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,7 +1,7 @@
 # Process this file with autoconf to produce a configure script
 
 AC_PREREQ(2.57)
-AC_INIT([mesa-dri-i9xx], 7.0.3, [], mesa-dri-i9xx)
+AC_INIT([mesa-dri-i9xx], 7.1.0, [], mesa-dri-i9xx)
 
 AM_INIT_AUTOMAKE([dist-bzip2])
 
@@ -16,8 +16,8 @@ AC_PROG_CC
 AC_HEADER_STDC
 
 PKG_CHECK_MODULES([DRM], [libdrm >= 2.3.0])
-PKG_CHECK_MODULES([DRI], [libmesadri >= 7.0.3 libmesadri < 7.1.0
-			  libmesadricommon >= 7.0.3 libmesadricommon < 7.1.0])
+PKG_CHECK_MODULES([DRI], [libmesadri >= 7.1.0 libmesadri < 7.3.0
+			  libmesadricommon >= 7.1.0 libmesadricommon < 7.3.0])
 
 AC_OUTPUT([
 	Makefile
diff --git a/i915/Makefile.am b/i915/Makefile.am
index c921354..e483d0c 100644
--- a/i915/Makefile.am
+++ b/i915/Makefile.am
@@ -1,21 +1,13 @@
 AM_CFLAGS = -DIN_DRI_DRIVER -DGLX_DIRECT_RENDERING -DGLX_INDIRECT_RENDERING
 
+I915_CFLAGS = -I../shared -I../shared/server -DI915
+
 i915_dri_la_LTLIBRARIES = i915_dri.la
-i915_dri_la_CFLAGS = $(AM_CFLAGS) $(DRM_CFLAGS) $(DRI_CFLAGS) -Iserver
+i915_dri_la_CFLAGS = $(AM_CFLAGS) $(DRM_CFLAGS) $(DRI_CFLAGS) $(I915_CFLAGS)
 i915_dri_la_LDFLAGS = -module -noprefix -avoid-version -lm -ldl \
 			 $(DRM_LIBS) $(DRI_LIBS)
 i915_dri_ladir = @libdir@/dri
 i915_dri_la_SOURCES = \
-	i915_context.c \
-	i915_debug.c \
-	i915_fragprog.c \
-	i915_metaops.c \
-	i915_program.c \
-	i915_state.c \
-	i915_tex.c \
-	i915_texprog.c \
-	i915_texstate.c \
-	i915_vtbl.c \
 	i830_context.c \
 	i830_metaops.c \
 	i830_state.c \
@@ -23,15 +15,43 @@ i915_dri_la_SOURCES = \
 	i830_tex.c \
 	i830_texstate.c \
 	i830_vtbl.c \
-	intel_batchbuffer.c \
-	intel_context.c \
-	intel_ioctl.c \
-	intel_pixel.c \
 	intel_render.c \
-	intel_rotate.c \
-	intel_screen.c \
-	intel_span.c \
+	../shared/intel_regions.c \
+	../shared/intel_buffer_objects.c \
+	../shared/intel_batchbuffer.c \
+	../shared/intel_mipmap_tree.c \
+	i915_tex_layout.c \
+	../shared/intel_tex_layout.c \
+	../shared/intel_tex_image.c \
+	../shared/intel_tex_subimage.c \
+	../shared/intel_tex_copy.c \
+	../shared/intel_tex_validate.c \
+	../shared/intel_tex_format.c \
+	../shared/intel_tex.c \
+	../shared/intel_pixel.c \
+	../shared/intel_pixel_bitmap.c \
+	../shared/intel_pixel_copy.c \
+	intel_pixel_read.c \
+	../shared/intel_pixel_draw.c \
+	../shared/intel_buffers.c \
+	../shared/intel_blit.c \
+	i915_tex.c \
+	i915_texstate.c \
+	i915_context.c \
+	i915_debug.c \
+	i915_debug_fp.c \
+	i915_fragprog.c \
+	i915_metaops.c \
+	i915_program.c \
+	i915_state.c \
+	i915_vtbl.c \
+	../shared/intel_context.c \
+	../shared/intel_decode.c \
+	../shared/intel_ioctl.c \
+	../shared/intel_screen.c \
+	../shared/intel_span.c \
 	intel_state.c \
-	intel_tex.c \
-	intel_texmem.c \
-	intel_tris.c 
+	intel_tris.c \
+	../shared/intel_fbo.c \
+	../shared/intel_depthstencil.c \
+	../shared/intel_bufmgr_ttm.c
diff --git a/i915/i830_context.c b/i915/i830_context.c
index 7ca601e..16c8a8d 100644
--- a/i915/i830_context.c
+++ b/i915/i830_context.c
@@ -32,93 +32,78 @@
 #include "tnl/tnl.h"
 #include "tnl/t_vertex.h"
 #include "tnl/t_context.h"
+#include "tnl/t_pipeline.h"
 #include "utils.h"
+#include "intel_span.h"
+#include "intel_pixel.h"
+#include "intel_tris.h"
 
 /***************************************
  * Mesa's Driver Functions
  ***************************************/
 
-static const struct dri_extension i830_extensions[] =
+static void
+i830InitDriverFunctions(struct dd_function_table *functions)
 {
-    { "GL_ARB_texture_env_crossbar",       NULL },
-    { NULL,                                NULL }
-};
-
-
-static void i830InitDriverFunctions( struct dd_function_table *functions )
-{
-   intelInitDriverFunctions( functions );
-   i830InitStateFuncs( functions );
-   i830InitTextureFuncs( functions );
+   intelInitDriverFunctions(functions);
+   i830InitStateFuncs(functions);
+   i830InitTextureFuncs(functions);
 }
 
+extern const struct tnl_pipeline_stage *intel_pipeline[];
 
-GLboolean i830CreateContext( const __GLcontextModes *mesaVis,
-			    __DRIcontextPrivate *driContextPriv,
-			    void *sharedContextPrivate)
+GLboolean
+i830CreateContext(const __GLcontextModes * mesaVis,
+                  __DRIcontextPrivate * driContextPriv,
+                  void *sharedContextPrivate)
 {
    struct dd_function_table functions;
-   i830ContextPtr i830 = (i830ContextPtr) CALLOC_STRUCT(i830_context);
-   intelContextPtr intel = &i830->intel;
+   struct i830_context *i830 = CALLOC_STRUCT(i830_context);
+   struct intel_context *intel = &i830->intel;
    GLcontext *ctx = &intel->ctx;
-   GLuint i;
-   if (!i830) return GL_FALSE;
+   if (!i830)
+      return GL_FALSE;
 
-   i830InitVtbl( i830 );
-   i830InitDriverFunctions( &functions );
+   i830InitVtbl(i830);
+   i830InitDriverFunctions(&functions);
 
-   if (!intelInitContext( intel, mesaVis, driContextPriv,
-			  sharedContextPrivate, &functions )) {
+   if (!intelInitContext(intel, mesaVis, driContextPriv,
+                         sharedContextPrivate, &functions)) {
       FREE(i830);
       return GL_FALSE;
    }
 
+   /* Initialize swrast, tnl driver tables: */
+   intelInitSpanFuncs(ctx);
+   intelInitTriFuncs(ctx);
+
+   /* Install the customized pipeline: */
+   _tnl_destroy_pipeline(ctx);
+   _tnl_install_pipeline(ctx, intel_pipeline);
+
    intel->ctx.Const.MaxTextureUnits = I830_TEX_UNITS;
    intel->ctx.Const.MaxTextureImageUnits = I830_TEX_UNITS;
    intel->ctx.Const.MaxTextureCoordUnits = I830_TEX_UNITS;
 
-   intel->nr_heaps = 1;
-   intel->texture_heaps[0] = 
-      driCreateTextureHeap( 0, intel,
-			    intel->intelScreen->tex.size,
-			    12,
-			    I830_NR_TEX_REGIONS,
-			    intel->sarea->texList,
-			    (unsigned *) & intel->sarea->texAge,
-			    & intel->swapped,
-			    sizeof( struct i830_texture_object ),
-			    (destroy_texture_object_t *)intelDestroyTexObj );
-
-   /* FIXME: driCalculateMaxTextureLevels assumes that mipmaps are tightly
-    * FIXME: packed, but they're not in Intel graphics hardware.
+   /* Advertise the full hardware capabilities.  The new memory
+    * manager should cope much better with overload situations:
     */
-   intel->ctx.Const.MaxTextureUnits = I830_TEX_UNITS;
-   i = driQueryOptioni( &intel->optionCache, "allow_large_textures");
-   driCalculateMaxTextureLevels( intel->texture_heaps,
-				 intel->nr_heaps,
-				 &intel->ctx.Const,
-				 4,
-				 11, /* max 2D texture size is 2048x2048 */
-				 8,  /* max 3D texture size is 256^3 */
-				 10, /* max CUBE texture size is 1024x1024 */
-				 11, /* max RECT. supported */
-				 12,
-				 GL_FALSE,
-				 i );
-
-   _tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12, 
-		       18 * sizeof(GLfloat) );
-
-   intel->verts = TNL_CONTEXT(ctx)->clipspace.vertex_buf;
+   ctx->Const.MaxTextureLevels = 12;
+   ctx->Const.Max3DTextureLevels = 9;
+   ctx->Const.MaxCubeTextureLevels = 11;
+   ctx->Const.MaxTextureRectSize = (1 << 11);
+   ctx->Const.MaxTextureUnits = I830_TEX_UNITS;
 
-   driInitExtensions( ctx, i830_extensions, GL_FALSE );
+   _tnl_init_vertices(ctx, ctx->Const.MaxArrayLockSize + 12,
+                      18 * sizeof(GLfloat));
 
-   i830InitState( i830 );
+   intel->verts = TNL_CONTEXT(ctx)->clipspace.vertex_buf;
 
+   i830InitState(i830);
+   i830InitMetaFuncs(i830);
 
-   _tnl_allow_vertex_fog( ctx, 1 ); 
-   _tnl_allow_pixel_fog( ctx, 0 ); 
+   _tnl_allow_vertex_fog(ctx, 1);
+   _tnl_allow_pixel_fog(ctx, 0);
 
    return GL_TRUE;
 }
-
diff --git a/i915/i830_context.h b/i915/i830_context.h
index bae777d..a298c14 100644
--- a/i915/i830_context.h
+++ b/i915/i830_context.h
@@ -49,17 +49,15 @@
  */
 #define I830_DESTREG_CBUFADDR0 0
 #define I830_DESTREG_CBUFADDR1 1
-#define I830_DESTREG_CBUFADDR2 2
-#define I830_DESTREG_DBUFADDR0 3
-#define I830_DESTREG_DBUFADDR1 4
-#define I830_DESTREG_DBUFADDR2 5
-#define I830_DESTREG_DV0 6
-#define I830_DESTREG_DV1 7
-#define I830_DESTREG_SENABLE 8
-#define I830_DESTREG_SR0 9
-#define I830_DESTREG_SR1 10
-#define I830_DESTREG_SR2 11
-#define I830_DEST_SETUP_SIZE 12
+#define I830_DESTREG_DBUFADDR0 2
+#define I830_DESTREG_DBUFADDR1 3
+#define I830_DESTREG_DV0 4
+#define I830_DESTREG_DV1 5
+#define I830_DESTREG_SENABLE 6
+#define I830_DESTREG_SR0 7
+#define I830_DESTREG_SR1 8
+#define I830_DESTREG_SR2 9
+#define I830_DEST_SETUP_SIZE 10
 
 #define I830_CTXREG_STATE1		0
 #define I830_CTXREG_STATE2		1
@@ -73,7 +71,7 @@
 #define I830_CTXREG_AA			9
 #define I830_CTXREG_FOGCOLOR		10
 #define I830_CTXREG_BLENDCOLOR0		11
-#define I830_CTXREG_BLENDCOLOR1		12 
+#define I830_CTXREG_BLENDCOLOR1		12
 #define I830_CTXREG_VF			13
 #define I830_CTXREG_VF2			14
 #define I830_CTXREG_MCSB0		15
@@ -84,17 +82,16 @@
 #define I830_STPREG_ST1        1
 #define I830_STP_SETUP_SIZE    2
 
-#define I830_TEXREG_TM0LI      0 /* load immediate 2 texture map n */
-#define I830_TEXREG_TM0S0      1
-#define I830_TEXREG_TM0S1      2
-#define I830_TEXREG_TM0S2      3
-#define I830_TEXREG_TM0S3      4
-#define I830_TEXREG_TM0S4      5
-#define I830_TEXREG_MCS	       6	/* _3DSTATE_MAP_COORD_SETS */
-#define I830_TEXREG_CUBE       7	/* _3DSTATE_MAP_SUBE */
-#define I830_TEX_SETUP_SIZE    8
+#define I830_TEXREG_TM0LI      0        /* load immediate 2 texture map n */
+#define I830_TEXREG_TM0S1      1
+#define I830_TEXREG_TM0S2      2
+#define I830_TEXREG_TM0S3      3
+#define I830_TEXREG_TM0S4      4
+#define I830_TEXREG_MCS	       5        /* _3DSTATE_MAP_COORD_SETS */
+#define I830_TEXREG_CUBE       6        /* _3DSTATE_MAP_SUBE */
+#define I830_TEX_SETUP_SIZE    7
 
-#define I830_TEXBLEND_SIZE	12	/* (4 args + op) * 2 + COLOR_FACTOR */
+#define I830_TEXBLEND_SIZE	12      /* (4 args + op) * 2 + COLOR_FACTOR */
 
 struct i830_texture_object
 {
@@ -104,30 +101,39 @@ struct i830_texture_object
 
 #define I830_TEX_UNITS 4
 
-struct i830_hw_state {
+struct i830_hw_state
+{
    GLuint Ctx[I830_CTX_SETUP_SIZE];
    GLuint Buffer[I830_DEST_SETUP_SIZE];
    GLuint Stipple[I830_STP_SETUP_SIZE];
    GLuint Tex[I830_TEX_UNITS][I830_TEX_SETUP_SIZE];
    GLuint TexBlend[I830_TEX_UNITS][I830_TEXBLEND_SIZE];
    GLuint TexBlendWordsUsed[I830_TEX_UNITS];
-   GLuint emitted;		/* I810_UPLOAD_* */
+
+   struct intel_region *draw_region;
+   struct intel_region *depth_region;
+
+   /* Regions aren't actually that appropriate here as the memory may
+    * be from a PBO or FBO.  Will have to do this for draw and depth for
+    * FBO's...
+    */
+   dri_bo *tex_buffer[I830_TEX_UNITS];
+   GLuint tex_offset[I830_TEX_UNITS];
+
+   GLuint emitted;              /* I810_UPLOAD_* */
    GLuint active;
 };
 
-struct i830_context 
+struct i830_context
 {
    struct intel_context intel;
-   
-   DECLARE_RENDERINPUTS(last_index_bitset);
+
+   GLuint lodbias_tm0s3[MAX_TEXTURE_UNITS];
+     DECLARE_RENDERINPUTS(last_index_bitset);
 
    struct i830_hw_state meta, initial, state, *current;
 };
 
-typedef struct i830_context *i830ContextPtr;
-typedef struct i830_texture_object *i830TextureObjectPtr;
-
-#define I830_CONTEXT(ctx)	((i830ContextPtr)(ctx))
 
 
 
@@ -148,71 +154,56 @@ do {						\
 
 /* i830_vtbl.c
  */
-extern void 
-i830InitVtbl( i830ContextPtr i830 );
+extern void i830InitVtbl(struct i830_context *i830);
 
+extern void
+i830_state_draw_region(struct intel_context *intel,
+                       struct i830_hw_state *state,
+                       struct intel_region *color_region,
+                       struct intel_region *depth_region);
 /* i830_context.c
  */
-extern GLboolean 
-i830CreateContext( const __GLcontextModes *mesaVis,
-		   __DRIcontextPrivate *driContextPriv,
-		   void *sharedContextPrivate);
+extern GLboolean
+i830CreateContext(const __GLcontextModes * mesaVis,
+                  __DRIcontextPrivate * driContextPriv,
+                  void *sharedContextPrivate);
 
 /* i830_tex.c, i830_texstate.c
  */
-extern void 
-i830UpdateTextureState( intelContextPtr intel );
-
-extern void 
-i830InitTextureFuncs( struct dd_function_table *functions );
+extern void i830UpdateTextureState(struct intel_context *intel);
 
-extern intelTextureObjectPtr
-i830AllocTexObj( struct gl_texture_object *tObj );
+extern void i830InitTextureFuncs(struct dd_function_table *functions);
 
 /* i830_texblend.c
  */
-extern GLuint i830SetTexEnvCombine(i830ContextPtr i830,
-    const struct gl_tex_env_combine_state * combine, GLint blendUnit,
-     GLuint texel_op, GLuint *state, const GLfloat *factor );
+extern GLuint i830SetTexEnvCombine(struct i830_context *i830,
+                                   const struct gl_tex_env_combine_state
+                                   *combine, GLint blendUnit, GLuint texel_op,
+                                   GLuint * state, const GLfloat * factor);
 
-extern void 
-i830EmitTextureBlend( i830ContextPtr i830 );
+extern void i830EmitTextureBlend(struct i830_context *i830);
 
 
 /* i830_state.c
  */
-extern void 
-i830InitStateFuncs( struct dd_function_table *functions );
+extern void i830InitStateFuncs(struct dd_function_table *functions);
 
-extern void 
-i830EmitState( i830ContextPtr i830 );
+extern void i830EmitState(struct i830_context *i830);
 
-extern void 
-i830InitState( i830ContextPtr i830 );
+extern void i830InitState(struct i830_context *i830);
 
 /* i830_metaops.c
  */
-extern GLboolean
-i830TryTextureReadPixels( GLcontext *ctx,
-			  GLint x, GLint y, GLsizei width, GLsizei height,
-			  GLenum format, GLenum type,
-			  const struct gl_pixelstore_attrib *pack,
-			  GLvoid *pixels );
-
-extern GLboolean
-i830TryTextureDrawPixels( GLcontext *ctx,
-			  GLint x, GLint y, GLsizei width, GLsizei height,
-			  GLenum format, GLenum type,
-			  const struct gl_pixelstore_attrib *unpack,
-			  const GLvoid *pixels );
-
-extern void 
-i830ClearWithTris( intelContextPtr intel, GLbitfield mask,
-		   GLboolean all, GLint cx, GLint cy, GLint cw, GLint ch);
+extern void i830InitMetaFuncs(struct i830_context *i830);
 
-extern void
-i830RotateWindow(intelContextPtr intel, __DRIdrawablePrivate *dPriv,
-                 GLuint srcBuf);
+/*======================================================================
+ * Inline conversion functions.  These are better-typed than the
+ * macros used previously:
+ */
+static INLINE struct i830_context *
+i830_context(GLcontext * ctx)
+{
+   return (struct i830_context *) ctx;
+}
 
 #endif
-
diff --git a/i915/i830_metaops.c b/i915/i830_metaops.c
index c1d7fe3..13e4ab3 100644
--- a/i915/i830_metaops.c
+++ b/i915/i830_metaops.c
@@ -34,6 +34,7 @@
 #include "intel_screen.h"
 #include "intel_batchbuffer.h"
 #include "intel_ioctl.h"
+#include "intel_regions.h"
 
 #include "i830_context.h"
 #include "i830_reg.h"
@@ -41,34 +42,26 @@
 /* A large amount of state doesn't need to be uploaded.
  */
 #define ACTIVE (I830_UPLOAD_INVARIENT |         \
-		I830_UPLOAD_TEXBLEND(0) |	\
-		I830_UPLOAD_STIPPLE |		\
 		I830_UPLOAD_CTX |		\
 		I830_UPLOAD_BUFFERS |		\
-		I830_UPLOAD_TEX(0))		
+		I830_UPLOAD_STIPPLE |		\
+		I830_UPLOAD_TEXBLEND(0) |	\
+		I830_UPLOAD_TEX(0))
 
 
 #define SET_STATE( i830, STATE )		\
 do {						\
-   i830->current->emitted = 0;			\
+   i830->current->emitted &= ~ACTIVE;			\
    i830->current = &i830->STATE;		\
-   i830->current->emitted = 0;			\
+   i830->current->emitted &= ~ACTIVE;			\
 } while (0)
 
-/* Operations where the 3D engine is decoupled temporarily from the
- * current GL state and used for other purposes than simply rendering
- * incoming triangles.
- */
-static void set_initial_state( i830ContextPtr i830 )
-{
-   memcpy(&i830->meta, &i830->initial, sizeof(i830->meta) );
-   i830->meta.active = ACTIVE;
-   i830->meta.emitted = 0;
-}
-
 
-static void set_no_depth_stencil_write( i830ContextPtr i830 )
+static void
+set_no_stencil_write(struct intel_context *intel)
 {
+   struct i830_context *i830 = i830_context(&intel->ctx);
+
    /* ctx->Driver.Enable( ctx, GL_STENCIL_TEST, GL_FALSE )
     */
    i830->meta.Ctx[I830_CTXREG_ENABLES_1] &= ~ENABLE_STENCIL_TEST;
@@ -76,6 +69,13 @@ static void set_no_depth_stencil_write( i830ContextPtr i830 )
    i830->meta.Ctx[I830_CTXREG_ENABLES_1] |= DISABLE_STENCIL_TEST;
    i830->meta.Ctx[I830_CTXREG_ENABLES_2] |= DISABLE_STENCIL_WRITE;
 
+   i830->meta.emitted &= ~I830_UPLOAD_CTX;
+}
+
+static void
+set_no_depth_write(struct intel_context *intel)
+{
+   struct i830_context *i830 = i830_context(&intel->ctx);
 
    /* ctx->Driver.Enable( ctx, GL_DEPTH_TEST, GL_FALSE )
     */
@@ -87,35 +87,56 @@ static void set_no_depth_stencil_write( i830ContextPtr i830 )
    i830->meta.emitted &= ~I830_UPLOAD_CTX;
 }
 
-/* Set stencil unit to replace always with the reference value.
+/* Set depth unit to replace.
  */
-static void set_stencil_replace( i830ContextPtr i830,
-				 GLuint s_mask,
-				 GLuint s_clear)
+static void
+set_depth_replace(struct intel_context *intel)
 {
-   /* ctx->Driver.Enable( ctx, GL_STENCIL_TEST, GL_TRUE )
-    */
-   i830->meta.Ctx[I830_CTXREG_ENABLES_1] |= ENABLE_STENCIL_TEST;
-   i830->meta.Ctx[I830_CTXREG_ENABLES_2] |= ENABLE_STENCIL_WRITE;
-
+   struct i830_context *i830 = i830_context(&intel->ctx);
 
    /* ctx->Driver.Enable( ctx, GL_DEPTH_TEST, GL_FALSE )
+    * ctx->Driver.DepthMask( ctx, GL_TRUE )
     */
    i830->meta.Ctx[I830_CTXREG_ENABLES_1] &= ~ENABLE_DIS_DEPTH_TEST_MASK;
    i830->meta.Ctx[I830_CTXREG_ENABLES_2] &= ~ENABLE_DIS_DEPTH_WRITE_MASK;
-   i830->meta.Ctx[I830_CTXREG_ENABLES_1] |= DISABLE_DEPTH_TEST;
-   i830->meta.Ctx[I830_CTXREG_ENABLES_2] |= DISABLE_DEPTH_WRITE;
+   i830->meta.Ctx[I830_CTXREG_ENABLES_1] |= ENABLE_DEPTH_TEST;
+   i830->meta.Ctx[I830_CTXREG_ENABLES_2] |= ENABLE_DEPTH_WRITE;
+
+   /* ctx->Driver.DepthFunc( ctx, GL_ALWAYS )
+    */
+   i830->meta.Ctx[I830_CTXREG_STATE3] &= ~DEPTH_TEST_FUNC_MASK;
+   i830->meta.Ctx[I830_CTXREG_STATE3] |= (ENABLE_DEPTH_TEST_FUNC |
+                                          DEPTH_TEST_FUNC
+                                          (COMPAREFUNC_ALWAYS));
+
+   i830->meta.emitted &= ~I830_UPLOAD_CTX;
+}
+
+
+/* Set stencil unit to replace always with the reference value.
+ */
+static void
+set_stencil_replace(struct intel_context *intel,
+                    GLuint s_mask, GLuint s_clear)
+{
+   struct i830_context *i830 = i830_context(&intel->ctx);
+
+   /* ctx->Driver.Enable( ctx, GL_STENCIL_TEST, GL_TRUE )
+    */
+   i830->meta.Ctx[I830_CTXREG_ENABLES_1] |= ENABLE_STENCIL_TEST;
+   i830->meta.Ctx[I830_CTXREG_ENABLES_2] |= ENABLE_STENCIL_WRITE;
 
    /* ctx->Driver.StencilMask( ctx, s_mask )
     */
    i830->meta.Ctx[I830_CTXREG_STATE4] &= ~MODE4_ENABLE_STENCIL_WRITE_MASK;
    i830->meta.Ctx[I830_CTXREG_STATE4] |= (ENABLE_STENCIL_WRITE_MASK |
-					   STENCIL_WRITE_MASK((s_mask&0xff)));
+                                          STENCIL_WRITE_MASK((s_mask &
+                                                              0xff)));
 
    /* ctx->Driver.StencilOp( ctx, GL_REPLACE, GL_REPLACE, GL_REPLACE )
     */
    i830->meta.Ctx[I830_CTXREG_STENCILTST] &= ~(STENCIL_OPS_MASK);
-   i830->meta.Ctx[I830_CTXREG_STENCILTST] |= 
+   i830->meta.Ctx[I830_CTXREG_STENCILTST] |=
       (ENABLE_STENCIL_PARMS |
        STENCIL_FAIL_OP(STENCILOP_REPLACE) |
        STENCIL_PASS_DEPTH_FAIL_OP(STENCILOP_REPLACE) |
@@ -125,14 +146,14 @@ static void set_stencil_replace( i830ContextPtr i830,
     */
    i830->meta.Ctx[I830_CTXREG_STATE4] &= ~MODE4_ENABLE_STENCIL_TEST_MASK;
    i830->meta.Ctx[I830_CTXREG_STATE4] |= (ENABLE_STENCIL_TEST_MASK |
-					   STENCIL_TEST_MASK(0xff));
+                                          STENCIL_TEST_MASK(0xff));
 
    i830->meta.Ctx[I830_CTXREG_STENCILTST] &= ~(STENCIL_REF_VALUE_MASK |
-						ENABLE_STENCIL_TEST_FUNC_MASK);
-   i830->meta.Ctx[I830_CTXREG_STENCILTST] |= 
+                                               ENABLE_STENCIL_TEST_FUNC_MASK);
+   i830->meta.Ctx[I830_CTXREG_STENCILTST] |=
       (ENABLE_STENCIL_REF_VALUE |
        ENABLE_STENCIL_TEST_FUNC |
-       STENCIL_REF_VALUE((s_clear&0xff)) |
+       STENCIL_REF_VALUE((s_clear & 0xff)) |
        STENCIL_TEST_FUNC(COMPAREFUNC_ALWAYS));
 
 
@@ -141,38 +162,43 @@ static void set_stencil_replace( i830ContextPtr i830,
 }
 
 
-static void set_color_mask( i830ContextPtr i830, GLboolean state )
+static void
+set_color_mask(struct intel_context *intel, GLboolean state)
 {
+   struct i830_context *i830 = i830_context(&intel->ctx);
+
    const GLuint mask = ((1 << WRITEMASK_RED_SHIFT) |
-			(1 << WRITEMASK_GREEN_SHIFT) |
-			(1 << WRITEMASK_BLUE_SHIFT) |
-			(1 << WRITEMASK_ALPHA_SHIFT));
+                        (1 << WRITEMASK_GREEN_SHIFT) |
+                        (1 << WRITEMASK_BLUE_SHIFT) |
+                        (1 << WRITEMASK_ALPHA_SHIFT));
 
    i830->meta.Ctx[I830_CTXREG_ENABLES_2] &= ~mask;
 
    if (state) {
-      i830->meta.Ctx[I830_CTXREG_ENABLES_2] |= 
-	 (i830->state.Ctx[I830_CTXREG_ENABLES_2] & mask);
+      i830->meta.Ctx[I830_CTXREG_ENABLES_2] |=
+         (i830->state.Ctx[I830_CTXREG_ENABLES_2] & mask);
    }
-      
+
    i830->meta.emitted &= ~I830_UPLOAD_CTX;
 }
 
 /* Installs a one-stage passthrough texture blend pipeline.  Is there
  * more that can be done to turn off texturing?
  */
-static void set_no_texture( i830ContextPtr i830 )
+static void
+set_no_texture(struct intel_context *intel)
 {
+   struct i830_context *i830 = i830_context(&intel->ctx);
    static const struct gl_tex_env_combine_state comb = {
       GL_NONE, GL_NONE,
-      { GL_TEXTURE, 0, 0, }, { GL_TEXTURE, 0, 0, },
-      { GL_SRC_COLOR, 0, 0 }, { GL_SRC_ALPHA, 0, 0 },
+      {GL_TEXTURE, 0, 0,}, {GL_TEXTURE, 0, 0,},
+      {GL_SRC_COLOR, 0, 0}, {GL_SRC_ALPHA, 0, 0},
       0, 0, 0, 0
    };
 
    i830->meta.TexBlendWordsUsed[0] =
-     i830SetTexEnvCombine( i830, & comb, 0, TEXBLENDARG_TEXEL0,
-			   i830->meta.TexBlend[0], NULL);
+      i830SetTexEnvCombine(i830, &comb, 0, TEXBLENDARG_TEXEL0,
+                           i830->meta.TexBlend[0], NULL);
 
    i830->meta.TexBlend[0][0] |= TEXOP_LAST_STAGE;
    i830->meta.emitted &= ~I830_UPLOAD_TEXBLEND(0);
@@ -181,18 +207,22 @@ static void set_no_texture( i830ContextPtr i830 )
 /* Set up a single element blend stage for 'replace' texturing with no
  * funny ops.
  */
-static void enable_texture_blend_replace( i830ContextPtr i830 )
+static void
+set_texture_blend_replace(struct intel_context *intel)
 {
+   struct i830_context *i830 = i830_context(&intel->ctx);
    static const struct gl_tex_env_combine_state comb = {
       GL_REPLACE, GL_REPLACE,
-      { GL_TEXTURE, GL_TEXTURE, GL_TEXTURE }, { GL_TEXTURE, GL_TEXTURE, GL_TEXTURE, },
-      { GL_SRC_COLOR, GL_SRC_COLOR, GL_SRC_COLOR }, { GL_SRC_ALPHA, GL_SRC_ALPHA, GL_SRC_ALPHA },
+      {GL_TEXTURE, GL_TEXTURE, GL_TEXTURE,}, {GL_TEXTURE, GL_TEXTURE,
+                                              GL_TEXTURE,},
+      {GL_SRC_COLOR, GL_SRC_COLOR, GL_SRC_COLOR}, {GL_SRC_ALPHA, GL_SRC_ALPHA,
+                                                   GL_SRC_ALPHA},
       0, 0, 1, 1
    };
 
    i830->meta.TexBlendWordsUsed[0] =
-     i830SetTexEnvCombine( i830, & comb, 0, TEXBLENDARG_TEXEL0,
-			   i830->meta.TexBlend[0], NULL);
+      i830SetTexEnvCombine(i830, &comb, 0, TEXBLENDARG_TEXEL0,
+                           i830->meta.TexBlend[0], NULL);
 
    i830->meta.TexBlend[0][0] |= TEXOP_LAST_STAGE;
    i830->meta.emitted &= ~I830_UPLOAD_TEXBLEND(0);
@@ -206,717 +236,222 @@ static void enable_texture_blend_replace( i830ContextPtr i830 )
 /* Set up an arbitary piece of memory as a rectangular texture
  * (including the front or back buffer).
  */
-static void set_tex_rect_source( i830ContextPtr i830,
-				 GLuint offset,
-				 GLuint width, 
-				 GLuint height,
-				 GLuint pitch, /* in bytes */
-				 GLuint textureFormat )
+static GLboolean
+set_tex_rect_source(struct intel_context *intel,
+                    dri_bo *buffer,
+                    GLuint offset,
+                    GLuint pitch, GLuint height, GLenum format, GLenum type)
 {
-   GLint numLevels = 1;
+   struct i830_context *i830 = i830_context(&intel->ctx);
    GLuint *setup = i830->meta.Tex[0];
+   GLint numLevels = 1;
+   GLuint textureFormat;
+   GLuint cpp;
 
-/*    fprintf(stderr, "%s: offset: %x w: %d h: %d pitch %d format %x\n", */
-/* 	   __FUNCTION__, offset, width, height, pitch, textureFormat ); */
+   /* A full implementation of this would do the upload through
+    * glTexImage2d, and get all the conversion operations at that
+    * point.  We are restricted, but still at least have access to the
+    * fragment program swizzle.
+    */
+   switch (format) {
+   case GL_BGRA:
+      switch (type) {
+      case GL_UNSIGNED_INT_8_8_8_8_REV:
+      case GL_UNSIGNED_BYTE:
+         textureFormat = (MAPSURF_32BIT | MT_32BIT_ARGB8888);
+         cpp = 4;
+         break;
+      default:
+         return GL_FALSE;
+      }
+      break;
+   case GL_RGBA:
+      switch (type) {
+      case GL_UNSIGNED_INT_8_8_8_8_REV:
+      case GL_UNSIGNED_BYTE:
+         textureFormat = (MAPSURF_32BIT | MT_32BIT_ABGR8888);
+         cpp = 4;
+         break;
+      default:
+         return GL_FALSE;
+      }
+      break;
+   case GL_BGR:
+      switch (type) {
+      case GL_UNSIGNED_SHORT_5_6_5_REV:
+         textureFormat = (MAPSURF_16BIT | MT_16BIT_RGB565);
+         cpp = 2;
+         break;
+      default:
+         return GL_FALSE;
+      }
+      break;
+   case GL_RGB:
+      switch (type) {
+      case GL_UNSIGNED_SHORT_5_6_5:
+         textureFormat = (MAPSURF_16BIT | MT_16BIT_RGB565);
+         cpp = 2;
+         break;
+      default:
+         return GL_FALSE;
+      }
+      break;
 
-   setup[I830_TEXREG_TM0LI] = (_3DSTATE_LOAD_STATE_IMMEDIATE_2 | 
-			       (LOAD_TEXTURE_MAP0 << 0) | 4);
-   setup[I830_TEXREG_TM0S0] = (TM0S0_USE_FENCE | offset);
+   default:
+      return GL_FALSE;
+   }
+
+   i830->meta.tex_buffer[0] = buffer;
+   i830->meta.tex_offset[0] = offset;
+
+   setup[I830_TEXREG_TM0LI] = (_3DSTATE_LOAD_STATE_IMMEDIATE_2 |
+                               (LOAD_TEXTURE_MAP0 << 0) | 4);
    setup[I830_TEXREG_TM0S1] = (((height - 1) << TM0S1_HEIGHT_SHIFT) |
-			       ((width - 1) << TM0S1_WIDTH_SHIFT) |
-			       textureFormat);
-   setup[I830_TEXREG_TM0S2] = ((((pitch / 4) - 1) << TM0S2_PITCH_SHIFT));   
-   setup[I830_TEXREG_TM0S3] &= ~TM0S3_MAX_MIP_MASK;
-   setup[I830_TEXREG_TM0S3] &= ~TM0S3_MIN_MIP_MASK;
-   setup[I830_TEXREG_TM0S3] |= ((numLevels - 1)*4) << TM0S3_MIN_MIP_SHIFT;
+                               ((pitch - 1) << TM0S1_WIDTH_SHIFT) |
+                               textureFormat);
+   setup[I830_TEXREG_TM0S2] =
+      (((((pitch * cpp) / 4) -
+         1) << TM0S2_PITCH_SHIFT) | TM0S2_CUBE_FACE_ENA_MASK);
+
+   setup[I830_TEXREG_TM0S3] =
+      ((((numLevels -
+          1) *
+         4) << TM0S3_MIN_MIP_SHIFT) | (FILTER_NEAREST <<
+                                       TM0S3_MIN_FILTER_SHIFT) |
+       (MIPFILTER_NONE << TM0S3_MIP_FILTER_SHIFT) | (FILTER_NEAREST <<
+                                                     TM0S3_MAG_FILTER_SHIFT));
+
+   setup[I830_TEXREG_CUBE] = (_3DSTATE_MAP_CUBE | MAP_UNIT(0));
 
    setup[I830_TEXREG_MCS] = (_3DSTATE_MAP_COORD_SET_CMD |
-			     MAP_UNIT(0) |
-			     ENABLE_TEXCOORD_PARAMS |
-			     TEXCOORDS_ARE_IN_TEXELUNITS |
-			     TEXCOORDTYPE_CARTESIAN |
-			     ENABLE_ADDR_V_CNTL |
-			     TEXCOORD_ADDR_V_MODE(TEXCOORDMODE_WRAP) |
-			     ENABLE_ADDR_U_CNTL |
-			     TEXCOORD_ADDR_U_MODE(TEXCOORDMODE_WRAP));
+                             MAP_UNIT(0) |
+                             ENABLE_TEXCOORD_PARAMS |
+                             TEXCOORDS_ARE_IN_TEXELUNITS |
+                             TEXCOORDTYPE_CARTESIAN |
+                             ENABLE_ADDR_V_CNTL |
+                             TEXCOORD_ADDR_V_MODE(TEXCOORDMODE_WRAP) |
+                             ENABLE_ADDR_U_CNTL |
+                             TEXCOORD_ADDR_U_MODE(TEXCOORDMODE_WRAP));
 
    i830->meta.emitted &= ~I830_UPLOAD_TEX(0);
+   return GL_TRUE;
 }
 
 
-/* Select between front and back draw buffers.
- */
-static void set_draw_region( i830ContextPtr i830,
-			      const intelRegion *region )
-{
-   i830->meta.Buffer[I830_DESTREG_CBUFADDR1] =
-      (BUF_3D_ID_COLOR_BACK | BUF_3D_PITCH(region->pitch) | BUF_3D_USE_FENCE);
-   i830->meta.Buffer[I830_DESTREG_CBUFADDR2] = region->offset;
-   i830->meta.emitted &= ~I830_UPLOAD_BUFFERS;
-}
-
-/* Setup an arbitary draw format, useful for targeting
- * texture or agp memory.
- */
-#if 0
-static void set_draw_format( i830ContextPtr i830,
-			     GLuint format,
-			     GLuint depth_format)
-{
-   i830->meta.Buffer[I830_DESTREG_DV1] = (DSTORG_HORT_BIAS(0x8) | /* .5 */
-					  DSTORG_VERT_BIAS(0x8) | /* .5 */
-					  format |
-					  DEPTH_IS_Z |
-					  depth_format);
-}
-#endif
-
-
-static void set_vertex_format( i830ContextPtr i830 )
+static void
+set_vertex_format(struct intel_context *intel)
 {
-   i830->meta.Ctx[I830_CTXREG_VF] =  (_3DSTATE_VFT0_CMD |
-				      VFT0_TEX_COUNT(1) |
-				      VFT0_DIFFUSE |
-				      VFT0_SPEC |
-				      VFT0_XYZW);
+   struct i830_context *i830 = i830_context(&intel->ctx);
+   i830->meta.Ctx[I830_CTXREG_VF] = (_3DSTATE_VFT0_CMD |
+                                     VFT0_TEX_COUNT(1) |
+                                     VFT0_DIFFUSE | VFT0_XYZ);
    i830->meta.Ctx[I830_CTXREG_VF2] = (_3DSTATE_VFT1_CMD |
-				      VFT1_TEX0_FMT(TEXCOORDFMT_2D) |
-				      VFT1_TEX1_FMT(TEXCOORDFMT_2D) | 
-				      VFT1_TEX2_FMT(TEXCOORDFMT_2D) |
-				      VFT1_TEX3_FMT(TEXCOORDFMT_2D));
+                                      VFT1_TEX0_FMT(TEXCOORDFMT_2D) |
+                                      VFT1_TEX1_FMT(TEXCOORDFMT_2D) |
+                                      VFT1_TEX2_FMT(TEXCOORDFMT_2D) |
+                                      VFT1_TEX3_FMT(TEXCOORDFMT_2D));
    i830->meta.emitted &= ~I830_UPLOAD_CTX;
 }
 
 
-static void draw_quad(i830ContextPtr i830, 
-		      GLfloat x0, GLfloat x1,
-		      GLfloat y0, GLfloat y1, 
-		      GLubyte red, GLubyte green,
-		      GLubyte blue, GLubyte alpha,
-		      GLfloat s0, GLfloat s1,
-		      GLfloat t0, GLfloat t1 )
-{
-   GLuint vertex_size = 8;
-   GLuint *vb = intelEmitInlinePrimitiveLocked( &i830->intel, 
-						PRIM3D_TRIFAN, 
-						4*vertex_size,
-						vertex_size );
-   intelVertex tmp;
-   int i;
-
-   
-/*    fprintf(stderr, "%s: %f,%f-%f,%f 0x%x%x%x%x %f,%f-%f,%f\n", */
-/* 	   __FUNCTION__, */
-/* 	   x0,y0,x1,y1,red,green,blue,alpha,s0,t0,s1,t1); */
-
-
-   /* initial vertex, left bottom */
-   tmp.v.x = x0;
-   tmp.v.y = y0;
-   tmp.v.z = 1.0;
-   tmp.v.w = 1.0;
-   tmp.v.color.red = red;
-   tmp.v.color.green = green;
-   tmp.v.color.blue = blue;
-   tmp.v.color.alpha = alpha;
-   tmp.v.specular.red = 0;
-   tmp.v.specular.green = 0;
-   tmp.v.specular.blue = 0;
-   tmp.v.specular.alpha = 0;
-   tmp.v.u0 = s0;
-   tmp.v.v0 = t0;
-   for (i = 0 ; i < 8 ; i++)
-      vb[i] = tmp.ui[i];
-
-   /* right bottom */
-   vb += 8;
-   tmp.v.x = x1;
-   tmp.v.u0 = s1;
-   for (i = 0 ; i < 8 ; i++)
-      vb[i] = tmp.ui[i];
-
-   /* right top */
-   vb += 8;
-   tmp.v.y = y1;
-   tmp.v.v0 = t1;
-   for (i = 0 ; i < 8 ; i++)
-      vb[i] = tmp.ui[i];
-
-   /* left top */
-   vb += 8;
-   tmp.v.x = x0;
-   tmp.v.u0 = s0;
-   for (i = 0 ; i < 8 ; i++)
-      vb[i] = tmp.ui[i];
-
-/*    fprintf(stderr, "%s: DV1: %x\n",  */
-/* 	   __FUNCTION__, i830->meta.Buffer[I830_DESTREG_DV1]); */
-}
-
-static void draw_poly(i830ContextPtr i830, 
-		      GLubyte red, GLubyte green, GLubyte blue, GLubyte alpha,
-                      GLuint numVerts,
-                      GLfloat verts[][2],
-                      GLfloat texcoords[][2])
+static void
+meta_import_pixel_state(struct intel_context *intel)
 {
-   GLuint vertex_size = 8;
-   GLuint *vb = intelEmitInlinePrimitiveLocked( &i830->intel, 
-						PRIM3D_TRIFAN, 
-						numVerts * vertex_size,
-						vertex_size );
-   intelVertex tmp;
-   int i, k;
-
-   /* initial constant vertex fields */
-   tmp.v.z = 1.0;
-   tmp.v.w = 1.0; 
-   tmp.v.color.red = red;
-   tmp.v.color.green = green;
-   tmp.v.color.blue = blue;
-   tmp.v.color.alpha = alpha;
-   tmp.v.specular.red = 0;
-   tmp.v.specular.green = 0;
-   tmp.v.specular.blue = 0;
-   tmp.v.specular.alpha = 0;
-
-   for (k = 0; k < numVerts; k++) {
-      tmp.v.x = verts[k][0];
-      tmp.v.y = verts[k][1];
-      tmp.v.u0 = texcoords[k][0];
-      tmp.v.v0 = texcoords[k][1];
-
-      for (i = 0 ; i < vertex_size ; i++)
-         vb[i] = tmp.ui[i];
-
-      vb += vertex_size;
-   }
-}
-
-void 
-i830ClearWithTris(intelContextPtr intel, GLbitfield mask,
-		  GLboolean allFoo,
-		  GLint cxFoo, GLint cyFoo, GLint cwFoo, GLint chFoo)
-{
-   i830ContextPtr i830 = I830_CONTEXT( intel );
-   __DRIdrawablePrivate *dPriv = intel->driDrawable;
-   intelScreenPrivate *screen = intel->intelScreen;
-   int x0, y0, x1, y1;
-   GLint cx, cy, cw, ch;
-   GLboolean all;
-
-   INTEL_FIREVERTICES(intel);
-   SET_STATE( i830, meta );
-   set_initial_state( i830 );
-/*    set_no_texture( i830 ); */
-   set_vertex_format( i830 ); 
-
-   LOCK_HARDWARE(intel);
-
-   /* get clear bounds after locking */
-   cx = intel->ctx.DrawBuffer->_Xmin;
-   cy = intel->ctx.DrawBuffer->_Ymin;
-   cw = intel->ctx.DrawBuffer->_Xmax - cx;
-   ch = intel->ctx.DrawBuffer->_Ymax - cy;
-   all = (cw == intel->ctx.DrawBuffer->Width &&
-          ch == intel->ctx.DrawBuffer->Height);
-
-   if(!all) {
-      x0 = cx;
-      y0 = cy;
-      x1 = x0 + cw;
-      y1 = y0 + ch;
-   } else {
-      x0 = 0;
-      y0 = 0;
-      x1 = x0 + dPriv->w;
-      y1 = y0 + dPriv->h;
-   }
-
-   /* Don't do any clipping to screen - these are window coordinates.
-    * The active cliprects will be applied as for any other geometry.
-    */
-
-   if(mask & BUFFER_BIT_FRONT_LEFT) {
-      set_no_depth_stencil_write( i830 );
-      set_color_mask( i830, GL_TRUE );
-      set_draw_region( i830, &screen->front );
-      draw_quad(i830, x0, x1, y0, y1,
-		intel->clear_red, intel->clear_green,
-		intel->clear_blue, intel->clear_alpha,
-		0, 0, 0, 0);
-   }
-
-   if(mask & BUFFER_BIT_BACK_LEFT) {
-      set_no_depth_stencil_write( i830 );
-      set_color_mask( i830, GL_TRUE );
-      set_draw_region( i830, &screen->back );
-
-      draw_quad(i830, x0, x1, y0, y1,
-		intel->clear_red, intel->clear_green,
-		intel->clear_blue, intel->clear_alpha,
-		0, 0, 0, 0);
-   }
-
-   if(mask & BUFFER_BIT_STENCIL) {
-      set_stencil_replace( i830, 
-			   intel->ctx.Stencil.WriteMask[0], 
-			   intel->ctx.Stencil.Clear);
-
-      set_color_mask( i830, GL_FALSE );
-      set_draw_region( i830, &screen->front );
-      draw_quad( i830, x0, x1, y0, y1, 0, 0, 0, 0, 0, 0, 0, 0 );
-   }
+   struct i830_context *i830 = i830_context(&intel->ctx);
+
+   i830->meta.Ctx[I830_CTXREG_STATE1] = i830->state.Ctx[I830_CTXREG_STATE1];
+   i830->meta.Ctx[I830_CTXREG_STATE2] = i830->state.Ctx[I830_CTXREG_STATE2];
+   i830->meta.Ctx[I830_CTXREG_STATE3] = i830->state.Ctx[I830_CTXREG_STATE3];
+   i830->meta.Ctx[I830_CTXREG_STATE4] = i830->state.Ctx[I830_CTXREG_STATE4];
+   i830->meta.Ctx[I830_CTXREG_STATE5] = i830->state.Ctx[I830_CTXREG_STATE5];
+   i830->meta.Ctx[I830_CTXREG_IALPHAB] = i830->state.Ctx[I830_CTXREG_IALPHAB];
+   i830->meta.Ctx[I830_CTXREG_STENCILTST] =
+      i830->state.Ctx[I830_CTXREG_STENCILTST];
+   i830->meta.Ctx[I830_CTXREG_ENABLES_1] =
+      i830->state.Ctx[I830_CTXREG_ENABLES_1];
+   i830->meta.Ctx[I830_CTXREG_ENABLES_2] =
+      i830->state.Ctx[I830_CTXREG_ENABLES_2];
+   i830->meta.Ctx[I830_CTXREG_AA] = i830->state.Ctx[I830_CTXREG_AA];
+   i830->meta.Ctx[I830_CTXREG_FOGCOLOR] =
+      i830->state.Ctx[I830_CTXREG_FOGCOLOR];
+   i830->meta.Ctx[I830_CTXREG_BLENDCOLOR0] =
+      i830->state.Ctx[I830_CTXREG_BLENDCOLOR0];
+   i830->meta.Ctx[I830_CTXREG_BLENDCOLOR1] =
+      i830->state.Ctx[I830_CTXREG_BLENDCOLOR1];
+   i830->meta.Ctx[I830_CTXREG_MCSB0] = i830->state.Ctx[I830_CTXREG_MCSB0];
+   i830->meta.Ctx[I830_CTXREG_MCSB1] = i830->state.Ctx[I830_CTXREG_MCSB1];
+
+
+   i830->meta.Ctx[I830_CTXREG_STATE3] &= ~CULLMODE_MASK;
+   i830->meta.Stipple[I830_STPREG_ST1] &= ~ST1_ENABLE;
+   i830->meta.emitted &= ~I830_UPLOAD_CTX;
 
-   UNLOCK_HARDWARE(intel);
 
-   INTEL_FIREVERTICES(intel);
-   SET_STATE( i830, state );
+   i830->meta.Buffer[I830_DESTREG_SENABLE] =
+      i830->state.Buffer[I830_DESTREG_SENABLE];
+   i830->meta.Buffer[I830_DESTREG_SR1] = i830->state.Buffer[I830_DESTREG_SR1];
+   i830->meta.Buffer[I830_DESTREG_SR2] = i830->state.Buffer[I830_DESTREG_SR2];
+   i830->meta.emitted &= ~I830_UPLOAD_BUFFERS;
 }
 
 
-#if 0
 
-GLboolean
-i830TryTextureReadPixels( GLcontext *ctx,
-			  GLint x, GLint y, GLsizei width, GLsizei height,
-			  GLenum format, GLenum type,
-			  const struct gl_pixelstore_attrib *pack,
-			  GLvoid *pixels )
+/* Select between front and back draw buffers.
+ */
+static void
+meta_draw_region(struct intel_context *intel,
+                 struct intel_region *color_region,
+                 struct intel_region *depth_region)
 {
-   i830ContextPtr i830 = I830_CONTEXT(ctx);
-   intelContextPtr intel = INTEL_CONTEXT(ctx);
-   intelScreenPrivate *screen = i830->intel.intelScreen;
-   GLint pitch = pack->RowLength ? pack->RowLength : width;
-   __DRIdrawablePrivate *dPriv = i830->intel.driDrawable;
-   int textureFormat;
-   GLenum glTextureFormat;
-   int src_offset = i830->meta.Buffer[I830_DESTREG_CBUFADDR2];
-   int destOffset = intelAgpOffsetFromVirtual( &i830->intel, pixels);
-   int destFormat, depthFormat, destPitch;
-   drm_clip_rect_t tmp;
-
-   if (INTEL_DEBUG & DEBUG_PIXEL)
-      fprintf(stderr, "%s\n", __FUNCTION__);
-
-
-   if (	ctx->_ImageTransferState ||
-	pack->SwapBytes ||
-	pack->LsbFirst ||
-	!pack->Invert) {
-      fprintf(stderr, "%s: check_color failed\n", __FUNCTION__);
-      return GL_FALSE;
-   }
-
-   switch (screen->fbFormat) {
-   case DV_PF_565:
-      textureFormat = MAPSURF_16BIT | MT_16BIT_RGB565;
-      glTextureFormat = GL_RGB;
-      break;
-   case DV_PF_555:
-      textureFormat = MAPSURF_16BIT | MT_16BIT_ARGB1555;
-      glTextureFormat = GL_RGBA;
-      break;
-   case DV_PF_8888:
-      textureFormat = MAPSURF_32BIT | MT_32BIT_ARGB8888;
-      glTextureFormat = GL_RGBA;
-      break;
-   default:
-      fprintf(stderr, "%s: textureFormat failed %x\n", __FUNCTION__,
-	      screen->fbFormat);
-      return GL_FALSE;
-   }
-
-
-   switch (type) {
-   case GL_UNSIGNED_SHORT_5_6_5: 
-      if (format != GL_RGB) return GL_FALSE;
-      destFormat = COLR_BUF_RGB565; 
-      depthFormat = DEPTH_FRMT_16_FIXED;
-      destPitch = pitch * 2;
-      break;
-   case GL_UNSIGNED_INT_8_8_8_8_REV: 
-      if (format != GL_BGRA) return GL_FALSE;
-      destFormat = COLR_BUF_ARGB8888; 
-      depthFormat = DEPTH_FRMT_24_FIXED_8_OTHER;
-      destPitch = pitch * 4;
-      break;
-   default:
-      fprintf(stderr, "%s: destFormat failed %s\n", __FUNCTION__,
-	      _mesa_lookup_enum_by_nr(type));
-      return GL_FALSE;
-   }
-
-   destFormat |= (0x02<<24);
-
-/*    fprintf(stderr, "type: %s destFormat: %x\n", */
-/* 	   _mesa_lookup_enum_by_nr(type), */
-/* 	   destFormat); */
-
-   intelFlush( ctx );
-
-   SET_STATE( i830, meta );
-   set_initial_state( i830 );
-   set_no_depth_stencil_write( i830 );
-
-   LOCK_HARDWARE( intel );
-   {
-      intelWaitForIdle( intel ); /* required by GL */
-
-      if (!driClipRectToFramebuffer(ctx->ReadBuffer, &x, &y, &width, &height)) {
-	 UNLOCK_HARDWARE( intel );
-	 SET_STATE(i830, state);
-	 fprintf(stderr, "%s: cliprect failed\n", __FUNCTION__);
-	 return GL_TRUE;
-      }
-
-#if 0
-      /* FIXME -- Just emit the correct state
-       */
-      if (i830SetParam(i830->driFd, I830_SETPARAM_CBUFFER_PITCH, 
-		      destPitch) != 0) {
-	 UNLOCK_HARDWARE( intel );
-	 SET_STATE(i830, state);
-	 fprintf(stderr, "%s: setparam failed\n", __FUNCTION__);
-	 return GL_FALSE;
-      }
-#endif
-
-
-      y = dPriv->h - y - height;
-      x += dPriv->x;
-      y += dPriv->y;
-
-
-      /* Set the frontbuffer up as a large rectangular texture.
-       */
-      set_tex_rect_source( i830, 
-			   src_offset, 
-			   screen->width, 
-			   screen->height, 
-			   screen->front.pitch, 
-			   textureFormat ); 
-   
-   
-      enable_texture_blend_replace( i830 ); 
-
-
-      /* Set the 3d engine to draw into the agp memory
-       */
+   struct i830_context *i830 = i830_context(&intel->ctx);
 
-      set_draw_region( i830, destOffset ); 
-      set_draw_format( i830, destFormat, depthFormat );  
-
-
-      /* Draw a single quad, no cliprects:
-       */
-      i830->intel.numClipRects = 1;
-      i830->intel.pClipRects = &tmp;
-      i830->intel.pClipRects[0].x1 = 0;
-      i830->intel.pClipRects[0].y1 = 0;
-      i830->intel.pClipRects[0].x2 = width;
-      i830->intel.pClipRects[0].y2 = height;
-
-      draw_quad( i830, 
-		 0, width, 0, height, 
-		 0, 255, 0, 0, 
-		 x, x+width, y, y+height );
-
-      intelWindowMoved( intel );
-   }
-   UNLOCK_HARDWARE( intel );
-   intelFinish( ctx ); /* required by GL */
-
-   SET_STATE( i830, state );
-   return GL_TRUE;
+   i830_state_draw_region(intel, &i830->meta, color_region, depth_region);
 }
 
 
-GLboolean
-i830TryTextureDrawPixels( GLcontext *ctx,
-			  GLint x, GLint y, GLsizei width, GLsizei height,
-			  GLenum format, GLenum type,
-			  const struct gl_pixelstore_attrib *unpack,
-			  const GLvoid *pixels )
+/* Operations where the 3D engine is decoupled temporarily from the
+ * current GL state and used for other purposes than simply rendering
+ * incoming triangles.
+ */
+static void
+install_meta_state(struct intel_context *intel)
 {
-   intelContextPtr intel = INTEL_CONTEXT(ctx);
-   i830ContextPtr i830 = I830_CONTEXT(ctx);
-   GLint pitch = unpack->RowLength ? unpack->RowLength : width;
-   __DRIdrawablePrivate *dPriv = intel->driDrawable;
-   int textureFormat;
-   GLenum glTextureFormat;
-   int dst_offset = i830->meta.Buffer[I830_DESTREG_CBUFADDR2];
-   int src_offset = intelAgpOffsetFromVirtual( intel, pixels );
-
-   if (INTEL_DEBUG & DEBUG_PIXEL)
-      fprintf(stderr, "%s\n", __FUNCTION__);
-
-   /* Todo -- upload images that aren't in agp space, then texture
-    * from them.  
-    */
+   struct i830_context *i830 = i830_context(&intel->ctx);
+   memcpy(&i830->meta, &i830->initial, sizeof(i830->meta));
 
-   if ( !intelIsAgpMemory( intel, pixels, pitch*height ) ) {
-      fprintf(stderr, "%s: intelIsAgpMemory failed\n", __FUNCTION__);
-      return GL_FALSE;
-   }
-
-   /* Todo -- don't want to clobber all the drawing state like we do
-    * for readpixels -- most of this state can be handled just fine.
-    */
-   if (	ctx->_ImageTransferState ||
-	unpack->SwapBytes ||
-	unpack->LsbFirst ||
-	ctx->Color.AlphaEnabled || 
-	ctx->Depth.Test ||
-	ctx->Fog.Enabled ||
-	ctx->Scissor.Enabled ||
-	ctx->Stencil.Enabled ||
-	!ctx->Color.ColorMask[0] ||
-	!ctx->Color.ColorMask[1] ||
-	!ctx->Color.ColorMask[2] ||
-	!ctx->Color.ColorMask[3] ||
-	ctx->Color.ColorLogicOpEnabled ||
-	ctx->Texture._EnabledUnits ||
-	ctx->Depth.OcclusionTest) {
-      fprintf(stderr, "%s: other tests failed\n", __FUNCTION__);
-      return GL_FALSE;
-   }
-
-   /* Todo -- remove these restrictions:
-    */
-   if (ctx->Pixel.ZoomX != 1.0F ||
-       ctx->Pixel.ZoomY != -1.0F)
-      return GL_FALSE;
-
-
-
-   switch (type) {
-   case GL_UNSIGNED_SHORT_1_5_5_5_REV:
-      if (format != GL_BGRA) return GL_FALSE;
-      textureFormat = MAPSURF_16BIT | MT_16BIT_ARGB1555;
-      glTextureFormat = GL_RGBA;
-      break;
-   case GL_UNSIGNED_SHORT_5_6_5: 
-      if (format != GL_RGB) return GL_FALSE;
-      textureFormat = MAPSURF_16BIT | MT_16BIT_RGB565;
-      glTextureFormat = GL_RGB;
-      break;
-   case GL_UNSIGNED_SHORT_8_8_MESA: 
-      if (format != GL_YCBCR_MESA) return GL_FALSE;
-      textureFormat = (MAPSURF_422 | MT_422_YCRCB_SWAPY 
-/*  		       | TM0S1_COLORSPACE_CONVERSION */
-	 );
-      glTextureFormat = GL_YCBCR_MESA;
-      break;
-   case GL_UNSIGNED_SHORT_8_8_REV_MESA: 
-      if (format != GL_YCBCR_MESA) return GL_FALSE;
-      textureFormat = (MAPSURF_422 | MT_422_YCRCB_NORMAL 
-/* 		       | TM0S1_COLORSPACE_CONVERSION */
-	 );
-      glTextureFormat = GL_YCBCR_MESA;
-      break;
-   case GL_UNSIGNED_INT_8_8_8_8_REV: 
-      if (format != GL_BGRA) return GL_FALSE;
-      textureFormat = MAPSURF_32BIT | MT_32BIT_ARGB8888;
-      glTextureFormat = GL_RGBA;
-      break;
-   default:
-      fprintf(stderr, "%s: destFormat failed\n", __FUNCTION__);
-      return GL_FALSE;
-   }
-
-   intelFlush( ctx );
-
-   SET_STATE( i830, meta );
-
-   LOCK_HARDWARE( intel );
-   {
-      intelWaitForIdle( intel ); /* required by GL */
-
-      y -= height;			/* cope with pixel zoom */
-
-      if (!driClipRectToFramebuffer(ctx->ReadBuffer, &x, &y, &width, &height)) {
-	 UNLOCK_HARDWARE( intel );
-	 SET_STATE(i830, state);
-	 fprintf(stderr, "%s: cliprect failed\n", __FUNCTION__);
-	 return GL_TRUE;
-      }
-
-
-      y = dPriv->h - y - height;
-
-      set_initial_state( i830 );
-
-      /* Set the pixel image up as a rectangular texture.
-       */
-      set_tex_rect_source( i830, 
-			   src_offset, 
-			   width, 
-			   height, 
-			   pitch, /* XXXX!!!! -- /2 sometimes */
-			   textureFormat ); 
-   
-   
-      enable_texture_blend_replace( i830 ); 
-
-   
-      /* Draw to the current draw buffer:
-       */
-      set_draw_offset( i830, dst_offset );
-
-      /* Draw a quad, use regular cliprects
-       */
-/*       fprintf(stderr, "x: %d y: %d width %d height %d\n", x, y, width, height); */
+   i830->meta.active = ACTIVE;
+   i830->meta.emitted = 0;
 
-      draw_quad( i830, 
-		 x, x+width, y, y+height,
-		 0, 255, 0, 0, 
-		 0, width, 0, height );
+   SET_STATE(i830, meta);
+   set_vertex_format(intel);
+   set_no_texture(intel);
+}
 
-      intelWindowMoved( intel );
-   }
-   UNLOCK_HARDWARE( intel );
-   intelFinish( ctx ); /* required by GL */
-   
+static void
+leave_meta_state(struct intel_context *intel)
+{
+   struct i830_context *i830 = i830_context(&intel->ctx);
+   intel_region_release(&i830->meta.draw_region);
+   intel_region_release(&i830->meta.depth_region);
+/*    intel_region_release(intel, &i830->meta.tex_region[0]); */
    SET_STATE(i830, state);
-
-   return GL_TRUE;
 }
 
-#endif
 
-/**
- * Copy the window contents named by dPriv to the rotated (or reflected)
- * color buffer.
- * srcBuf is BUFFER_BIT_FRONT_LEFT or BUFFER_BIT_BACK_LEFT to indicate the source.
- */
+
 void
-i830RotateWindow(intelContextPtr intel, __DRIdrawablePrivate *dPriv,
-                 GLuint srcBuf)
+i830InitMetaFuncs(struct i830_context *i830)
 {
-   i830ContextPtr i830 = I830_CONTEXT( intel );
-   intelScreenPrivate *screen = intel->intelScreen;
-   const GLuint cpp = screen->cpp;
-   drm_clip_rect_t fullRect;
-   GLuint textureFormat, srcOffset, srcPitch;
-   const drm_clip_rect_t *clipRects;
-   int numClipRects;
-   int i;
-
-   int xOrig, yOrig;
-   int origNumClipRects;
-   drm_clip_rect_t *origRects;
-
-   /*
-    * set up hardware state
-    */
-   intelFlush( &intel->ctx );
-
-   SET_STATE( i830, meta ); 
-   set_initial_state( i830 ); 
-   set_no_texture( i830 ); 
-   set_vertex_format( i830 ); 
-   set_no_depth_stencil_write( i830 );
-   set_color_mask( i830, GL_FALSE );
-
-   LOCK_HARDWARE(intel);
-
-   /* save current drawing origin and cliprects (restored at end) */
-   xOrig = intel->drawX;
-   yOrig = intel->drawY;
-   origNumClipRects = intel->numClipRects;
-   origRects = intel->pClipRects;
-
-   if (!intel->numClipRects)
-      goto done;
-
-   /*
-    * set drawing origin, cliprects for full-screen access to rotated screen
-    */
-   fullRect.x1 = 0;
-   fullRect.y1 = 0;
-   fullRect.x2 = screen->rotatedWidth;
-   fullRect.y2 = screen->rotatedHeight;
-   intel->drawX = 0;
-   intel->drawY = 0;
-   intel->numClipRects = 1;
-   intel->pClipRects = &fullRect;
-
-   set_draw_region( i830, &screen->rotated );
-
-   if (cpp == 4)
-      textureFormat = MAPSURF_32BIT | MT_32BIT_ARGB8888;
-   else
-      textureFormat = MAPSURF_16BIT | MT_16BIT_RGB565;
-
-   if (srcBuf == BUFFER_BIT_FRONT_LEFT) {
-      srcPitch = screen->front.pitch;   /* in bytes */
-      srcOffset = screen->front.offset; /* bytes */
-      clipRects = dPriv->pClipRects;
-      numClipRects = dPriv->numClipRects;
-   }
-   else {
-      srcPitch = screen->back.pitch;   /* in bytes */
-      srcOffset = screen->back.offset; /* bytes */
-      clipRects = dPriv->pBackClipRects;
-      numClipRects = dPriv->numBackClipRects;
-   }
-
-   /* set the whole screen up as a texture to avoid alignment issues */
-   set_tex_rect_source(i830,
-                       srcOffset,
-                       screen->width,
-                       screen->height,
-                       srcPitch,
-                       textureFormat);
-
-   enable_texture_blend_replace(i830);
-
-   /*
-    * loop over the source window's cliprects
-    */
-   for (i = 0; i < numClipRects; i++) {
-      int srcX0 = clipRects[i].x1;
-      int srcY0 = clipRects[i].y1;
-      int srcX1 = clipRects[i].x2;
-      int srcY1 = clipRects[i].y2;
-      GLfloat verts[4][2], tex[4][2];
-      int j;
-
-      /* build vertices for four corners of clip rect */
-      verts[0][0] = srcX0;  verts[0][1] = srcY0;
-      verts[1][0] = srcX1;  verts[1][1] = srcY0;
-      verts[2][0] = srcX1;  verts[2][1] = srcY1;
-      verts[3][0] = srcX0;  verts[3][1] = srcY1;
-
-      /* .. and texcoords */
-      tex[0][0] = srcX0;  tex[0][1] = srcY0;
-      tex[1][0] = srcX1;  tex[1][1] = srcY0;
-      tex[2][0] = srcX1;  tex[2][1] = srcY1;
-      tex[3][0] = srcX0;  tex[3][1] = srcY1;
-
-      /* transform coords to rotated screen coords */
-
-      for (j = 0; j < 4; j++) {
-         matrix23TransformCoordf(&screen->rotMatrix,
-                                 &verts[j][0], &verts[j][1]);
-      }
-
-      /* draw polygon to map source image to dest region */
-      draw_poly(i830, 255, 255, 255, 255, 4, verts, tex);
-
-   } /* cliprect loop */
-
-   intelFlushBatchLocked( intel, GL_FALSE, GL_FALSE, GL_FALSE );
-
- done:
-   /* restore original drawing origin and cliprects */
-   intel->drawX = xOrig;
-   intel->drawY = yOrig;
-   intel->numClipRects = origNumClipRects;
-   intel->pClipRects = origRects;
-
-   UNLOCK_HARDWARE(intel);
-
-   SET_STATE( i830, state );
+   i830->intel.vtbl.install_meta_state = install_meta_state;
+   i830->intel.vtbl.leave_meta_state = leave_meta_state;
+   i830->intel.vtbl.meta_no_depth_write = set_no_depth_write;
+   i830->intel.vtbl.meta_no_stencil_write = set_no_stencil_write;
+   i830->intel.vtbl.meta_stencil_replace = set_stencil_replace;
+   i830->intel.vtbl.meta_depth_replace = set_depth_replace;
+   i830->intel.vtbl.meta_color_mask = set_color_mask;
+   i830->intel.vtbl.meta_no_texture = set_no_texture;
+   i830->intel.vtbl.meta_texture_blend_replace = set_texture_blend_replace;
+   i830->intel.vtbl.meta_tex_rect_source = set_tex_rect_source;
+   i830->intel.vtbl.meta_draw_region = meta_draw_region;
+   i830->intel.vtbl.meta_import_pixel_state = meta_import_pixel_state;
 }
-
diff --git a/i915/i830_reg.h b/i915/i830_reg.h
index 98cee2f..41280bc 100644
--- a/i915/i830_reg.h
+++ b/i915/i830_reg.h
@@ -407,7 +407,7 @@
 #define LOGICOP_SET			0xf
 #define MODE4_ENABLE_STENCIL_TEST_MASK	((1<<17)|(0xff00))
 #define ENABLE_STENCIL_TEST_MASK	(1<<17)
-#define STENCIL_TEST_MASK(x)		((x)<<8)
+#define STENCIL_TEST_MASK(x)		(((x)&0xff)<<8)
 #define MODE4_ENABLE_STENCIL_WRITE_MASK	((1<<16)|(0x00ff))
 #define ENABLE_STENCIL_WRITE_MASK	(1<<16)
 #define STENCIL_WRITE_MASK(x)		((x)&0xff)
@@ -554,8 +554,8 @@
 #define    MAPSURF_4BIT_INDEXED		   (7<<6)
 #define TM0S1_MT_FORMAT_MASK         (0x7 << 3)
 #define TM0S1_MT_FORMAT_SHIFT        3
-#define    MT_4BIT_IDX_ARGB8888	           (7<<3) /* SURFACE_4BIT_INDEXED */
-#define    MT_8BIT_IDX_RGB565	           (0<<3) /* SURFACE_8BIT_INDEXED */
+#define    MT_4BIT_IDX_ARGB8888	           (7<<3)       /* SURFACE_4BIT_INDEXED */
+#define    MT_8BIT_IDX_RGB565	           (0<<3)       /* SURFACE_8BIT_INDEXED */
 #define    MT_8BIT_IDX_ARGB1555	           (1<<3)
 #define    MT_8BIT_IDX_ARGB4444	           (2<<3)
 #define    MT_8BIT_IDX_AY88		   (3<<3)
@@ -563,9 +563,9 @@
 #define    MT_8BIT_IDX_BUMP_88DVDU 	   (5<<3)
 #define    MT_8BIT_IDX_BUMP_655LDVDU	   (6<<3)
 #define    MT_8BIT_IDX_ARGB8888	           (7<<3)
-#define    MT_8BIT_I8		           (0<<3) /* SURFACE_8BIT */
+#define    MT_8BIT_I8		           (0<<3)       /* SURFACE_8BIT */
 #define    MT_8BIT_L8		           (1<<3)
-#define    MT_16BIT_RGB565 		   (0<<3) /* SURFACE_16BIT */
+#define    MT_16BIT_RGB565 		   (0<<3)       /* SURFACE_16BIT */
 #define    MT_16BIT_ARGB1555		   (1<<3)
 #define    MT_16BIT_ARGB4444		   (2<<3)
 #define    MT_16BIT_AY88		   (3<<3)
@@ -573,16 +573,17 @@
 #define    MT_16BIT_BUMP_88DVDU	           (5<<3)
 #define    MT_16BIT_BUMP_655LDVDU	   (6<<3)
 #define    MT_16BIT_DIB_RGB565_8888	   (7<<3)
-#define    MT_32BIT_ARGB8888		   (0<<3) /* SURFACE_32BIT */
+#define    MT_32BIT_ARGB8888		   (0<<3)       /* SURFACE_32BIT */
 #define    MT_32BIT_ABGR8888		   (1<<3)
+#define    MT_32BIT_XRGB8888		   (2<<3)       /* XXX: Guess from i915_reg.h */
 #define    MT_32BIT_BUMP_XLDVDU_8888	   (6<<3)
 #define    MT_32BIT_DIB_8888		   (7<<3)
-#define    MT_411_YUV411		   (0<<3) /* SURFACE_411 */
-#define    MT_422_YCRCB_SWAPY	           (0<<3) /* SURFACE_422 */
+#define    MT_411_YUV411		   (0<<3)       /* SURFACE_411 */
+#define    MT_422_YCRCB_SWAPY	           (0<<3)       /* SURFACE_422 */
 #define    MT_422_YCRCB_NORMAL	           (1<<3)
 #define    MT_422_YCRCB_SWAPUV	           (2<<3)
 #define    MT_422_YCRCB_SWAPUVY	           (3<<3)
-#define    MT_COMPRESS_DXT1		   (0<<3) /* SURFACE_COMPRESSED */
+#define    MT_COMPRESS_DXT1		   (0<<3)       /* SURFACE_COMPRESSED */
 #define    MT_COMPRESS_DXT2_3	           (1<<3)
 #define    MT_COMPRESS_DXT4_5	           (2<<3)
 #define    MT_COMPRESS_FXT1		   (3<<3)
diff --git a/i915/i830_state.c b/i915/i830_state.c
index 9512519..e44a7df 100644
--- a/i915/i830_state.c
+++ b/i915/i830_state.c
@@ -34,151 +34,155 @@
 
 #include "texmem.h"
 
+#include "drivers/common/driverfuncs.h"
+
 #include "intel_screen.h"
 #include "intel_batchbuffer.h"
+#include "intel_fbo.h"
 
 #include "i830_context.h"
 #include "i830_reg.h"
 
+#define FILE_DEBUG_FLAG DEBUG_STATE
+
 static void
-i830StencilFuncSeparate(GLcontext *ctx, GLenum face, GLenum func, GLint ref,
+i830StencilFuncSeparate(GLcontext * ctx, GLenum face, GLenum func, GLint ref,
                         GLuint mask)
 {
-   i830ContextPtr i830 = I830_CONTEXT(ctx);
+   struct i830_context *i830 = i830_context(ctx);
    int test = intel_translate_compare_func(func);
 
    mask = mask & 0xff;
 
-   if (INTEL_DEBUG&DEBUG_DRI)
-      fprintf(stderr, "%s : func: %s, ref : 0x%x, mask: 0x%x\n", __FUNCTION__,
-	      _mesa_lookup_enum_by_nr(func), ref, mask);
+   DBG("%s : func: %s, ref : 0x%x, mask: 0x%x\n", __FUNCTION__,
+       _mesa_lookup_enum_by_nr(func), ref, mask);
 
 
    I830_STATECHANGE(i830, I830_UPLOAD_CTX);
    i830->state.Ctx[I830_CTXREG_STATE4] &= ~MODE4_ENABLE_STENCIL_TEST_MASK;
    i830->state.Ctx[I830_CTXREG_STATE4] |= (ENABLE_STENCIL_TEST_MASK |
-					STENCIL_TEST_MASK(mask));
+                                           STENCIL_TEST_MASK(mask));
    i830->state.Ctx[I830_CTXREG_STENCILTST] &= ~(STENCIL_REF_VALUE_MASK |
-					     ENABLE_STENCIL_TEST_FUNC_MASK);
+                                                ENABLE_STENCIL_TEST_FUNC_MASK);
    i830->state.Ctx[I830_CTXREG_STENCILTST] |= (ENABLE_STENCIL_REF_VALUE |
-					    ENABLE_STENCIL_TEST_FUNC |
-					    STENCIL_REF_VALUE(ref) |
-					    STENCIL_TEST_FUNC(test));
+                                               ENABLE_STENCIL_TEST_FUNC |
+                                               STENCIL_REF_VALUE(ref) |
+                                               STENCIL_TEST_FUNC(test));
 }
 
 static void
-i830StencilMaskSeparate(GLcontext *ctx, GLenum face, GLuint mask)
+i830StencilMaskSeparate(GLcontext * ctx, GLenum face, GLuint mask)
 {
-   i830ContextPtr i830 = I830_CONTEXT(ctx);
-
-   if (INTEL_DEBUG&DEBUG_DRI)
-      fprintf(stderr, "%s : mask 0x%x\n", __FUNCTION__, mask);
+   struct i830_context *i830 = i830_context(ctx);
 
+   DBG("%s : mask 0x%x\n", __FUNCTION__, mask);
+   
    mask = mask & 0xff;
 
    I830_STATECHANGE(i830, I830_UPLOAD_CTX);
    i830->state.Ctx[I830_CTXREG_STATE4] &= ~MODE4_ENABLE_STENCIL_WRITE_MASK;
    i830->state.Ctx[I830_CTXREG_STATE4] |= (ENABLE_STENCIL_WRITE_MASK |
-					STENCIL_WRITE_MASK(mask));
+                                           STENCIL_WRITE_MASK(mask));
 }
 
 static void
-i830StencilOpSeparate(GLcontext *ctx, GLenum face, GLenum fail, GLenum zfail,
+i830StencilOpSeparate(GLcontext * ctx, GLenum face, GLenum fail, GLenum zfail,
                       GLenum zpass)
 {
-   i830ContextPtr i830 = I830_CONTEXT(ctx);
+   struct i830_context *i830 = i830_context(ctx);
    int fop, dfop, dpop;
 
-   if (INTEL_DEBUG&DEBUG_DRI)
-      fprintf(stderr, "%s: fail : %s, zfail: %s, zpass : %s\n", __FUNCTION__,
-	      _mesa_lookup_enum_by_nr(fail),
-	      _mesa_lookup_enum_by_nr(zfail),
-	      _mesa_lookup_enum_by_nr(zpass));
+   DBG("%s: fail : %s, zfail: %s, zpass : %s\n", __FUNCTION__,
+       _mesa_lookup_enum_by_nr(fail),
+       _mesa_lookup_enum_by_nr(zfail), 
+       _mesa_lookup_enum_by_nr(zpass));
 
-   fop = 0; dfop = 0; dpop = 0;
+   fop = 0;
+   dfop = 0;
+   dpop = 0;
 
-   switch(fail) {
-   case GL_KEEP: 
-      fop = STENCILOP_KEEP; 
+   switch (fail) {
+   case GL_KEEP:
+      fop = STENCILOP_KEEP;
       break;
-   case GL_ZERO: 
-      fop = STENCILOP_ZERO; 
+   case GL_ZERO:
+      fop = STENCILOP_ZERO;
       break;
-   case GL_REPLACE: 
-      fop = STENCILOP_REPLACE; 
+   case GL_REPLACE:
+      fop = STENCILOP_REPLACE;
       break;
-   case GL_INCR: 
+   case GL_INCR:
       fop = STENCILOP_INCRSAT;
       break;
-   case GL_DECR: 
+   case GL_DECR:
       fop = STENCILOP_DECRSAT;
       break;
    case GL_INCR_WRAP:
-      fop = STENCILOP_INCR; 
+      fop = STENCILOP_INCR;
       break;
    case GL_DECR_WRAP:
-      fop = STENCILOP_DECR; 
+      fop = STENCILOP_DECR;
       break;
-   case GL_INVERT: 
-      fop = STENCILOP_INVERT; 
+   case GL_INVERT:
+      fop = STENCILOP_INVERT;
       break;
-   default: 
+   default:
       break;
    }
-   switch(zfail) {
-   case GL_KEEP: 
-      dfop = STENCILOP_KEEP; 
+   switch (zfail) {
+   case GL_KEEP:
+      dfop = STENCILOP_KEEP;
       break;
-   case GL_ZERO: 
-      dfop = STENCILOP_ZERO; 
+   case GL_ZERO:
+      dfop = STENCILOP_ZERO;
       break;
-   case GL_REPLACE: 
-      dfop = STENCILOP_REPLACE; 
+   case GL_REPLACE:
+      dfop = STENCILOP_REPLACE;
       break;
-   case GL_INCR: 
+   case GL_INCR:
       dfop = STENCILOP_INCRSAT;
       break;
-   case GL_DECR: 
+   case GL_DECR:
       dfop = STENCILOP_DECRSAT;
       break;
    case GL_INCR_WRAP:
-      dfop = STENCILOP_INCR; 
+      dfop = STENCILOP_INCR;
       break;
    case GL_DECR_WRAP:
-      dfop = STENCILOP_DECR; 
+      dfop = STENCILOP_DECR;
       break;
-   case GL_INVERT: 
-      dfop = STENCILOP_INVERT; 
+   case GL_INVERT:
+      dfop = STENCILOP_INVERT;
       break;
-   default: 
+   default:
       break;
    }
-   switch(zpass) {
-   case GL_KEEP: 
-      dpop = STENCILOP_KEEP; 
+   switch (zpass) {
+   case GL_KEEP:
+      dpop = STENCILOP_KEEP;
       break;
-   case GL_ZERO: 
-      dpop = STENCILOP_ZERO; 
+   case GL_ZERO:
+      dpop = STENCILOP_ZERO;
       break;
-   case GL_REPLACE: 
-      dpop = STENCILOP_REPLACE; 
+   case GL_REPLACE:
+      dpop = STENCILOP_REPLACE;
       break;
-   case GL_INCR: 
+   case GL_INCR:
       dpop = STENCILOP_INCRSAT;
       break;
-   case GL_DECR: 
+   case GL_DECR:
       dpop = STENCILOP_DECRSAT;
       break;
    case GL_INCR_WRAP:
-      dpop = STENCILOP_INCR; 
+      dpop = STENCILOP_INCR;
       break;
    case GL_DECR_WRAP:
-      dpop = STENCILOP_DECR; 
+      dpop = STENCILOP_DECR;
       break;
-   case GL_INVERT: 
-      dpop = STENCILOP_INVERT; 
+   case GL_INVERT:
+      dpop = STENCILOP_INVERT;
       break;
-   default: 
+   default:
       break;
    }
 
@@ -186,27 +190,30 @@ i830StencilOpSeparate(GLcontext *ctx, GLenum face, GLenum fail, GLenum zfail,
    I830_STATECHANGE(i830, I830_UPLOAD_CTX);
    i830->state.Ctx[I830_CTXREG_STENCILTST] &= ~(STENCIL_OPS_MASK);
    i830->state.Ctx[I830_CTXREG_STENCILTST] |= (ENABLE_STENCIL_PARMS |
-					    STENCIL_FAIL_OP(fop) |
-					    STENCIL_PASS_DEPTH_FAIL_OP(dfop) |
-					    STENCIL_PASS_DEPTH_PASS_OP(dpop));
+                                               STENCIL_FAIL_OP(fop) |
+                                               STENCIL_PASS_DEPTH_FAIL_OP
+                                               (dfop) |
+                                               STENCIL_PASS_DEPTH_PASS_OP
+                                               (dpop));
 }
 
-static void i830AlphaFunc(GLcontext *ctx, GLenum func, GLfloat ref)
+static void
+i830AlphaFunc(GLcontext * ctx, GLenum func, GLfloat ref)
 {
-   i830ContextPtr i830 = I830_CONTEXT(ctx);
+   struct i830_context *i830 = i830_context(ctx);
    int test = intel_translate_compare_func(func);
    GLubyte refByte;
    GLuint refInt;
 
    UNCLAMPED_FLOAT_TO_UBYTE(refByte, ref);
-   refInt = (GLuint)refByte;
+   refInt = (GLuint) refByte;
 
    I830_STATECHANGE(i830, I830_UPLOAD_CTX);
    i830->state.Ctx[I830_CTXREG_STATE2] &= ~ALPHA_TEST_REF_MASK;
    i830->state.Ctx[I830_CTXREG_STATE2] |= (ENABLE_ALPHA_TEST_FUNC |
-					ENABLE_ALPHA_REF_VALUE |
-					ALPHA_TEST_FUNC(test) |
-					ALPHA_REF_VALUE(refInt));
+                                           ENABLE_ALPHA_REF_VALUE |
+                                           ALPHA_TEST_FUNC(test) |
+                                           ALPHA_REF_VALUE(refInt));
 }
 
 /**
@@ -219,45 +226,49 @@ static void i830AlphaFunc(GLcontext *ctx, GLenum func, GLfloat ref)
  * This function is substantially different from the old i830-specific driver.
  * I'm not sure which is correct.
  */
-static void i830EvalLogicOpBlendState(GLcontext *ctx)
+static void
+i830EvalLogicOpBlendState(GLcontext * ctx)
 {
-   i830ContextPtr i830 = I830_CONTEXT(ctx);
+   struct i830_context *i830 = i830_context(ctx);
 
    I830_STATECHANGE(i830, I830_UPLOAD_CTX);
 
    if (RGBA_LOGICOP_ENABLED(ctx)) {
       i830->state.Ctx[I830_CTXREG_ENABLES_1] &= ~(ENABLE_COLOR_BLEND |
-					       ENABLE_LOGIC_OP_MASK);
+                                                  ENABLE_LOGIC_OP_MASK);
       i830->state.Ctx[I830_CTXREG_ENABLES_1] |= (DISABLE_COLOR_BLEND |
-					      ENABLE_LOGIC_OP);
-   } else if (ctx->Color.BlendEnabled) {
+                                                 ENABLE_LOGIC_OP);
+   }
+   else if (ctx->Color.BlendEnabled) {
       i830->state.Ctx[I830_CTXREG_ENABLES_1] &= ~(ENABLE_COLOR_BLEND |
-					       ENABLE_LOGIC_OP_MASK);
+                                                  ENABLE_LOGIC_OP_MASK);
       i830->state.Ctx[I830_CTXREG_ENABLES_1] |= (ENABLE_COLOR_BLEND |
-					      DISABLE_LOGIC_OP);
-   } else {
+                                                 DISABLE_LOGIC_OP);
+   }
+   else {
       i830->state.Ctx[I830_CTXREG_ENABLES_1] &= ~(ENABLE_COLOR_BLEND |
-					       ENABLE_LOGIC_OP_MASK);
+                                                  ENABLE_LOGIC_OP_MASK);
       i830->state.Ctx[I830_CTXREG_ENABLES_1] |= (DISABLE_COLOR_BLEND |
-					      DISABLE_LOGIC_OP);
+                                                 DISABLE_LOGIC_OP);
    }
 }
 
-static void i830BlendColor(GLcontext *ctx, const GLfloat color[4])
+static void
+i830BlendColor(GLcontext * ctx, const GLfloat color[4])
 {
-   i830ContextPtr i830 = I830_CONTEXT(ctx);
+   struct i830_context *i830 = i830_context(ctx);
    GLubyte r, g, b, a;
 
-   if (INTEL_DEBUG&DEBUG_DRI)
-      fprintf(stderr, "%s\n", __FUNCTION__);
-
+   DBG("%s\n", __FUNCTION__);
+   
    UNCLAMPED_FLOAT_TO_UBYTE(r, color[RCOMP]);
    UNCLAMPED_FLOAT_TO_UBYTE(g, color[GCOMP]);
    UNCLAMPED_FLOAT_TO_UBYTE(b, color[BCOMP]);
    UNCLAMPED_FLOAT_TO_UBYTE(a, color[ACOMP]);
 
    I830_STATECHANGE(i830, I830_UPLOAD_CTX);
-   i830->state.Ctx[I830_CTXREG_BLENDCOLOR1] = (a<<24) | (r<<16) | (g<<8) | b;
+   i830->state.Ctx[I830_CTXREG_BLENDCOLOR1] =
+      (a << 24) | (r << 16) | (g << 8) | b;
 }
 
 /**
@@ -266,9 +277,10 @@ static void i830BlendColor(GLcontext *ctx, const GLfloat color[4])
  * function because some blend equations (i.e., \c GL_MIN and \c GL_MAX)
  * change the interpretation of the blend function.
  */
-static void i830_set_blend_state( GLcontext * ctx )
+static void
+i830_set_blend_state(GLcontext * ctx)
 {
-   i830ContextPtr i830 = I830_CONTEXT(ctx);
+   struct i830_context *i830 = i830_context(ctx);
    int funcA;
    int funcRGB;
    int eqnA;
@@ -277,71 +289,72 @@ static void i830_set_blend_state( GLcontext * ctx )
    int s1;
 
 
-   funcRGB = SRC_BLND_FACT( intel_translate_blend_factor( ctx->Color.BlendSrcRGB ) )
-       | DST_BLND_FACT( intel_translate_blend_factor( ctx->Color.BlendDstRGB ) );
+   funcRGB =
+      SRC_BLND_FACT(intel_translate_blend_factor(ctx->Color.BlendSrcRGB))
+      | DST_BLND_FACT(intel_translate_blend_factor(ctx->Color.BlendDstRGB));
 
-   switch(ctx->Color.BlendEquationRGB) {
+   switch (ctx->Color.BlendEquationRGB) {
    case GL_FUNC_ADD:
-      eqnRGB = BLENDFUNC_ADD; 
+      eqnRGB = BLENDFUNC_ADD;
       break;
    case GL_MIN:
       eqnRGB = BLENDFUNC_MIN;
       funcRGB = SRC_BLND_FACT(BLENDFACT_ONE) | DST_BLND_FACT(BLENDFACT_ONE);
       break;
-   case GL_MAX: 
+   case GL_MAX:
       eqnRGB = BLENDFUNC_MAX;
       funcRGB = SRC_BLND_FACT(BLENDFACT_ONE) | DST_BLND_FACT(BLENDFACT_ONE);
       break;
-   case GL_FUNC_SUBTRACT: 
-      eqnRGB = BLENDFUNC_SUB; 
+   case GL_FUNC_SUBTRACT:
+      eqnRGB = BLENDFUNC_SUB;
       break;
    case GL_FUNC_REVERSE_SUBTRACT:
-      eqnRGB = BLENDFUNC_RVRSE_SUB; 
+      eqnRGB = BLENDFUNC_RVRSE_SUB;
       break;
    default:
-      fprintf( stderr, "[%s:%u] Invalid RGB blend equation (0x%04x).\n",
-	       __FUNCTION__, __LINE__, ctx->Color.BlendEquationRGB );
+      fprintf(stderr, "[%s:%u] Invalid RGB blend equation (0x%04x).\n",
+              __FUNCTION__, __LINE__, ctx->Color.BlendEquationRGB);
       return;
    }
 
 
-   funcA = SRC_ABLEND_FACT( intel_translate_blend_factor( ctx->Color.BlendSrcA ) )
-       | DST_ABLEND_FACT( intel_translate_blend_factor( ctx->Color.BlendDstA ) );
+   funcA = SRC_ABLEND_FACT(intel_translate_blend_factor(ctx->Color.BlendSrcA))
+      | DST_ABLEND_FACT(intel_translate_blend_factor(ctx->Color.BlendDstA));
 
-   switch(ctx->Color.BlendEquationA) {
+   switch (ctx->Color.BlendEquationA) {
    case GL_FUNC_ADD:
-      eqnA = BLENDFUNC_ADD; 
+      eqnA = BLENDFUNC_ADD;
       break;
-   case GL_MIN: 
+   case GL_MIN:
       eqnA = BLENDFUNC_MIN;
       funcA = SRC_BLND_FACT(BLENDFACT_ONE) | DST_BLND_FACT(BLENDFACT_ONE);
       break;
-   case GL_MAX: 
+   case GL_MAX:
       eqnA = BLENDFUNC_MAX;
       funcA = SRC_BLND_FACT(BLENDFACT_ONE) | DST_BLND_FACT(BLENDFACT_ONE);
       break;
-   case GL_FUNC_SUBTRACT: 
-      eqnA = BLENDFUNC_SUB; 
+   case GL_FUNC_SUBTRACT:
+      eqnA = BLENDFUNC_SUB;
       break;
    case GL_FUNC_REVERSE_SUBTRACT:
-      eqnA = BLENDFUNC_RVRSE_SUB; 
+      eqnA = BLENDFUNC_RVRSE_SUB;
       break;
    default:
-      fprintf( stderr, "[%s:%u] Invalid alpha blend equation (0x%04x).\n",
-	       __FUNCTION__, __LINE__, ctx->Color.BlendEquationA );
+      fprintf(stderr, "[%s:%u] Invalid alpha blend equation (0x%04x).\n",
+              __FUNCTION__, __LINE__, ctx->Color.BlendEquationA);
       return;
    }
 
    iab = eqnA | funcA
-       | _3DSTATE_INDPT_ALPHA_BLEND_CMD
-       | ENABLE_SRC_ABLEND_FACTOR | ENABLE_DST_ABLEND_FACTOR
-       | ENABLE_ALPHA_BLENDFUNC;
+      | _3DSTATE_INDPT_ALPHA_BLEND_CMD
+      | ENABLE_SRC_ABLEND_FACTOR | ENABLE_DST_ABLEND_FACTOR
+      | ENABLE_ALPHA_BLENDFUNC;
    s1 = eqnRGB | funcRGB
-       | _3DSTATE_MODES_1_CMD
-       | ENABLE_SRC_BLND_FACTOR | ENABLE_DST_BLND_FACTOR
-       | ENABLE_COLR_BLND_FUNC;
+      | _3DSTATE_MODES_1_CMD
+      | ENABLE_SRC_BLND_FACTOR | ENABLE_DST_BLND_FACTOR
+      | ENABLE_COLR_BLND_FUNC;
 
-   if ( (eqnA | funcA) != (eqnRGB | funcRGB) )
+   if ((eqnA | funcA) != (eqnRGB | funcRGB))
       iab |= ENABLE_INDPT_ALPHA_BLEND;
    else
       iab |= DISABLE_INDPT_ALPHA_BLEND;
@@ -361,70 +374,68 @@ static void i830_set_blend_state( GLcontext * ctx )
    i830EvalLogicOpBlendState(ctx);
 
    if (0) {
-      fprintf(stderr, "[%s:%u] STATE1: 0x%08x IALPHAB: 0x%08x blend is %sabled\n",
-	      __FUNCTION__, __LINE__,
-	      i830->state.Ctx[I830_CTXREG_STATE1],
-	      i830->state.Ctx[I830_CTXREG_IALPHAB],
-	      (ctx->Color.BlendEnabled) ? "en" : "dis");
+      fprintf(stderr,
+              "[%s:%u] STATE1: 0x%08x IALPHAB: 0x%08x blend is %sabled\n",
+              __FUNCTION__, __LINE__, i830->state.Ctx[I830_CTXREG_STATE1],
+              i830->state.Ctx[I830_CTXREG_IALPHAB],
+              (ctx->Color.BlendEnabled) ? "en" : "dis");
    }
 }
 
 
-static void i830BlendEquationSeparate(GLcontext *ctx, GLenum modeRGB,
-				      GLenum modeA) 
+static void
+i830BlendEquationSeparate(GLcontext * ctx, GLenum modeRGB, GLenum modeA)
 {
-   if (INTEL_DEBUG&DEBUG_DRI)
-     fprintf(stderr, "%s -> %s, %s\n", __FUNCTION__,
-	     _mesa_lookup_enum_by_nr(modeRGB),
-	     _mesa_lookup_enum_by_nr(modeA));
+   DBG("%s -> %s, %s\n", __FUNCTION__,
+       _mesa_lookup_enum_by_nr(modeRGB),
+       _mesa_lookup_enum_by_nr(modeA));
 
    (void) modeRGB;
    (void) modeA;
-   i830_set_blend_state( ctx );
+   i830_set_blend_state(ctx);
 }
 
 
-static void i830BlendFuncSeparate(GLcontext *ctx, GLenum sfactorRGB, 
-				  GLenum dfactorRGB, GLenum sfactorA,
-				  GLenum dfactorA )
+static void
+i830BlendFuncSeparate(GLcontext * ctx, GLenum sfactorRGB,
+                      GLenum dfactorRGB, GLenum sfactorA, GLenum dfactorA)
 {
-   if (INTEL_DEBUG&DEBUG_DRI)
-     fprintf(stderr, "%s -> RGB(%s, %s) A(%s, %s)\n", __FUNCTION__,
-	     _mesa_lookup_enum_by_nr(sfactorRGB),
-	     _mesa_lookup_enum_by_nr(dfactorRGB),
-	     _mesa_lookup_enum_by_nr(sfactorA),
-	     _mesa_lookup_enum_by_nr(dfactorA));
+   DBG("%s -> RGB(%s, %s) A(%s, %s)\n", __FUNCTION__,
+       _mesa_lookup_enum_by_nr(sfactorRGB),
+       _mesa_lookup_enum_by_nr(dfactorRGB),
+       _mesa_lookup_enum_by_nr(sfactorA),
+       _mesa_lookup_enum_by_nr(dfactorA));
 
    (void) sfactorRGB;
    (void) dfactorRGB;
    (void) sfactorA;
    (void) dfactorA;
-   i830_set_blend_state( ctx );
+   i830_set_blend_state(ctx);
 }
 
 
 
-static void i830DepthFunc(GLcontext *ctx, GLenum func)
+static void
+i830DepthFunc(GLcontext * ctx, GLenum func)
 {
-   i830ContextPtr i830 = I830_CONTEXT(ctx);
+   struct i830_context *i830 = i830_context(ctx);
    int test = intel_translate_compare_func(func);
 
-   if (INTEL_DEBUG&DEBUG_DRI)
-      fprintf(stderr, "%s\n", __FUNCTION__);
-
+   DBG("%s\n", __FUNCTION__);
+   
    I830_STATECHANGE(i830, I830_UPLOAD_CTX);
    i830->state.Ctx[I830_CTXREG_STATE3] &= ~DEPTH_TEST_FUNC_MASK;
    i830->state.Ctx[I830_CTXREG_STATE3] |= (ENABLE_DEPTH_TEST_FUNC |
-				       DEPTH_TEST_FUNC(test));
+                                           DEPTH_TEST_FUNC(test));
 }
 
-static void i830DepthMask(GLcontext *ctx, GLboolean flag)
+static void
+i830DepthMask(GLcontext * ctx, GLboolean flag)
 {
-   i830ContextPtr i830 = I830_CONTEXT(ctx);
-
-   if (INTEL_DEBUG&DEBUG_DRI)
-      fprintf(stderr, "%s flag (%d)\n", __FUNCTION__, flag);
+   struct i830_context *i830 = i830_context(ctx);
 
+   DBG("%s flag (%d)\n", __FUNCTION__, flag);
+   
    I830_STATECHANGE(i830, I830_UPLOAD_CTX);
 
    i830->state.Ctx[I830_CTXREG_ENABLES_2] &= ~ENABLE_DIS_DEPTH_WRITE_MASK;
@@ -441,14 +452,15 @@ static void i830DepthMask(GLcontext *ctx, GLboolean flag)
  * The i830 supports a 4x4 stipple natively, GL wants 32x32.
  * Fortunately stipple is usually a repeating pattern.
  */
-static void i830PolygonStipple( GLcontext *ctx, const GLubyte *mask )
+static void
+i830PolygonStipple(GLcontext * ctx, const GLubyte * mask)
 {
-   i830ContextPtr i830 = I830_CONTEXT(ctx);
+   struct i830_context *i830 = i830_context(ctx);
    const GLubyte *m = mask;
    GLubyte p[4];
-   int i,j,k;
+   int i, j, k;
    int active = (ctx->Polygon.StippleFlag &&
-		 i830->intel.reduced_primitive == GL_TRIANGLES);
+                 i830->intel.reduced_primitive == GL_TRIANGLES);
    GLuint newMask;
 
    if (active) {
@@ -456,23 +468,26 @@ static void i830PolygonStipple( GLcontext *ctx, const GLubyte *mask )
       i830->state.Stipple[I830_STPREG_ST1] &= ~ST1_ENABLE;
    }
 
-   p[0] = mask[12] & 0xf; p[0] |= p[0] << 4;
-   p[1] = mask[8] & 0xf; p[1] |= p[1] << 4;
-   p[2] = mask[4] & 0xf; p[2] |= p[2] << 4;
-   p[3] = mask[0] & 0xf; p[3] |= p[3] << 4;
-
-   for (k = 0 ; k < 8 ; k++)
-      for (j = 3 ; j >= 0; j--)
-	 for (i = 0 ; i < 4 ; i++, m++)
-	    if (*m != p[j]) {
-	       i830->intel.hw_stipple = 0;
-	       return;
-	    }
+   p[0] = mask[12] & 0xf;
+   p[0] |= p[0] << 4;
+   p[1] = mask[8] & 0xf;
+   p[1] |= p[1] << 4;
+   p[2] = mask[4] & 0xf;
+   p[2] |= p[2] << 4;
+   p[3] = mask[0] & 0xf;
+   p[3] |= p[3] << 4;
+
+   for (k = 0; k < 8; k++)
+      for (j = 3; j >= 0; j--)
+         for (i = 0; i < 4; i++, m++)
+            if (*m != p[j]) {
+               i830->intel.hw_stipple = 0;
+               return;
+            }
 
    newMask = (((p[0] & 0xf) << 0) |
-	      ((p[1] & 0xf) << 4) |
-	      ((p[2] & 0xf) << 8) |
-	      ((p[3] & 0xf) << 12));
+              ((p[1] & 0xf) << 4) |
+              ((p[2] & 0xf) << 8) | ((p[3] & 0xf) << 12));
 
 
    if (newMask == 0xffff || newMask == 0x0) {
@@ -493,49 +508,54 @@ static void i830PolygonStipple( GLcontext *ctx, const GLubyte *mask )
 /* =============================================================
  * Hardware clipping
  */
-static void i830Scissor(GLcontext *ctx, GLint x, GLint y, 
-			GLsizei w, GLsizei h)
+static void
+i830Scissor(GLcontext * ctx, GLint x, GLint y, GLsizei w, GLsizei h)
 {
-   i830ContextPtr i830 = I830_CONTEXT(ctx);
-   intelScreenPrivate *screen = i830->intel.intelScreen;
+   struct i830_context *i830 = i830_context(ctx);
    int x1, y1, x2, y2;
 
-   if (!i830->intel.driDrawable)
+   if (!ctx->DrawBuffer)
       return;
 
-   x1 = x;
-   y1 = i830->intel.driDrawable->h - (y + h);
-   x2 = x + w - 1;
-   y2 = y1 + h - 1;
+   DBG("%s %d,%d %dx%d\n", __FUNCTION__, x, y, w, h);
 
-   if (INTEL_DEBUG&DEBUG_DRI)
-      fprintf(stderr, "[%s] x(%d) y(%d) w(%d) h(%d)\n", __FUNCTION__,
-	      x, y, w, h);
-
-   if (x1 < 0) x1 = 0;
-   if (y1 < 0) y1 = 0;
-   if (x2 < 0) x2 = 0;
-   if (y2 < 0) y2 = 0;
-
-   if (x2 >= screen->width) x2 = screen->width-1;
-   if (y2 >= screen->height) y2 = screen->height-1;
-   if (x1 >= screen->width) x1 = screen->width-1;
-   if (y1 >= screen->height) y1 = screen->height-1;
+   if (ctx->DrawBuffer->Name == 0) {
+      x1 = x;
+      y1 = ctx->DrawBuffer->Height - (y + h);
+      x2 = x + w - 1;
+      y2 = y1 + h - 1;
+      DBG("%s %d..%d,%d..%d (inverted)\n", __FUNCTION__, x1, x2, y1, y2);
+   }
+   else {
+      /* FBO - not inverted
+       */
+      x1 = x;
+      y1 = y;
+      x2 = x + w - 1;
+      y2 = y + h - 1;
+      DBG("%s %d..%d,%d..%d (not inverted)\n", __FUNCTION__, x1, x2, y1, y2);
+   }
 
+   x1 = CLAMP(x1, 0, ctx->DrawBuffer->Width - 1);
+   y1 = CLAMP(y1, 0, ctx->DrawBuffer->Height - 1);
+   x2 = CLAMP(x2, 0, ctx->DrawBuffer->Width - 1);
+   y2 = CLAMP(y2, 0, ctx->DrawBuffer->Height - 1);
+   
+   DBG("%s %d..%d,%d..%d (clamped)\n", __FUNCTION__, x1, x2, y1, y2);
 
    I830_STATECHANGE(i830, I830_UPLOAD_BUFFERS);
    i830->state.Buffer[I830_DESTREG_SR1] = (y1 << 16) | (x1 & 0xffff);
    i830->state.Buffer[I830_DESTREG_SR2] = (y2 << 16) | (x2 & 0xffff);
 }
 
-static void i830LogicOp(GLcontext *ctx, GLenum opcode)
+static void
+i830LogicOp(GLcontext * ctx, GLenum opcode)
 {
-   i830ContextPtr i830 = I830_CONTEXT(ctx);
-   int tmp = intel_translate_logic_op( opcode );
-
-   if (INTEL_DEBUG&DEBUG_DRI)
-      fprintf(stderr, "%s\n", __FUNCTION__);
+   struct i830_context *i830 = i830_context(ctx);
+   int tmp = intel_translate_logic_op(opcode);
 
+   DBG("%s\n", __FUNCTION__);
+   
    I830_STATECHANGE(i830, I830_UPLOAD_CTX);
    i830->state.Ctx[I830_CTXREG_STATE4] &= ~LOGICOP_MASK;
    i830->state.Ctx[I830_CTXREG_STATE4] |= LOGIC_OP_FUNC(tmp);
@@ -543,14 +563,14 @@ static void i830LogicOp(GLcontext *ctx, GLenum opcode)
 
 
 
-static void i830CullFaceFrontFace(GLcontext *ctx, GLenum unused)
+static void
+i830CullFaceFrontFace(GLcontext * ctx, GLenum unused)
 {
-   i830ContextPtr i830 = I830_CONTEXT(ctx);
+   struct i830_context *i830 = i830_context(ctx);
    GLuint mode;
 
-   if (INTEL_DEBUG&DEBUG_DRI)
-      fprintf(stderr, "%s\n", __FUNCTION__);
-
+   DBG("%s\n", __FUNCTION__);
+   
    if (!ctx->Polygon.CullFlag) {
       mode = CULLMODE_NONE;
    }
@@ -558,9 +578,9 @@ static void i830CullFaceFrontFace(GLcontext *ctx, GLenum unused)
       mode = CULLMODE_CW;
 
       if (ctx->Polygon.CullFaceMode == GL_FRONT)
-	 mode ^= (CULLMODE_CW ^ CULLMODE_CCW);
+         mode ^= (CULLMODE_CW ^ CULLMODE_CCW);
       if (ctx->Polygon.FrontFace != GL_CCW)
-	 mode ^= (CULLMODE_CW ^ CULLMODE_CCW);
+         mode ^= (CULLMODE_CW ^ CULLMODE_CCW);
    }
    else {
       mode = CULLMODE_BOTH;
@@ -571,18 +591,18 @@ static void i830CullFaceFrontFace(GLcontext *ctx, GLenum unused)
    i830->state.Ctx[I830_CTXREG_STATE3] |= ENABLE_CULL_MODE | mode;
 }
 
-static void i830LineWidth( GLcontext *ctx, GLfloat widthf )
+static void
+i830LineWidth(GLcontext * ctx, GLfloat widthf)
 {
-   i830ContextPtr i830 = I830_CONTEXT( ctx );
+   struct i830_context *i830 = i830_context(ctx);
    int width;
    int state5;
 
-   if (INTEL_DEBUG&DEBUG_DRI)
-      fprintf(stderr, "%s\n", __FUNCTION__);
-
-   width = (int)(widthf * 2);
-   CLAMP_SELF(width, 1, 15);
+   DBG("%s\n", __FUNCTION__);
    
+   width = (int) (widthf * 2);
+   CLAMP_SELF(width, 1, 15);
+
    state5 = i830->state.Ctx[I830_CTXREG_STATE5] & ~FIXED_LINE_WIDTH_MASK;
    state5 |= (ENABLE_FIXED_LINE_WIDTH | FIXED_LINE_WIDTH(width));
 
@@ -592,19 +612,19 @@ static void i830LineWidth( GLcontext *ctx, GLfloat widthf )
    }
 }
 
-static void i830PointSize(GLcontext *ctx, GLfloat size)
+static void
+i830PointSize(GLcontext * ctx, GLfloat size)
 {
-   i830ContextPtr i830 = I830_CONTEXT(ctx);
-   GLint point_size = (int)size;
-
-   if (INTEL_DEBUG&DEBUG_DRI)
-     fprintf(stderr, "%s\n", __FUNCTION__);
+   struct i830_context *i830 = i830_context(ctx);
+   GLint point_size = (int) size;
 
+   DBG("%s\n", __FUNCTION__);
+   
    CLAMP_SELF(point_size, 1, 256);
    I830_STATECHANGE(i830, I830_UPLOAD_CTX);
    i830->state.Ctx[I830_CTXREG_STATE5] &= ~FIXED_POINT_WIDTH_MASK;
    i830->state.Ctx[I830_CTXREG_STATE5] |= (ENABLE_FIXED_POINT_WIDTH |
-				       FIXED_POINT_WIDTH(point_size));
+                                           FIXED_POINT_WIDTH(point_size));
 }
 
 
@@ -612,23 +632,21 @@ static void i830PointSize(GLcontext *ctx, GLfloat size)
  * Color masks
  */
 
-static void i830ColorMask(GLcontext *ctx,
-			  GLboolean r, GLboolean g,
-			  GLboolean b, GLboolean a)
+static void
+i830ColorMask(GLcontext * ctx,
+              GLboolean r, GLboolean g, GLboolean b, GLboolean a)
 {
-   i830ContextPtr i830 = I830_CONTEXT( ctx );
+   struct i830_context *i830 = i830_context(ctx);
    GLuint tmp = 0;
 
-   if (INTEL_DEBUG&DEBUG_DRI)
-      fprintf(stderr, "%s r(%d) g(%d) b(%d) a(%d)\n", __FUNCTION__, r, g, b, a);
+   DBG("%s r(%d) g(%d) b(%d) a(%d)\n", __FUNCTION__, r, g, b, a);
 
    tmp = ((i830->state.Ctx[I830_CTXREG_ENABLES_2] & ~WRITEMASK_MASK) |
-	  ENABLE_COLOR_MASK |
-	  ENABLE_COLOR_WRITE |
-	  ((!r) << WRITEMASK_RED_SHIFT) |
-	  ((!g) << WRITEMASK_GREEN_SHIFT) |
-	  ((!b) << WRITEMASK_BLUE_SHIFT) |
-	  ((!a) << WRITEMASK_ALPHA_SHIFT));
+          ENABLE_COLOR_MASK |
+          ENABLE_COLOR_WRITE |
+          ((!r) << WRITEMASK_RED_SHIFT) |
+          ((!g) << WRITEMASK_GREEN_SHIFT) |
+          ((!b) << WRITEMASK_BLUE_SHIFT) | ((!a) << WRITEMASK_ALPHA_SHIFT));
 
    if (tmp != i830->state.Ctx[I830_CTXREG_ENABLES_2]) {
       I830_STATECHANGE(i830, I830_UPLOAD_CTX);
@@ -636,9 +654,10 @@ static void i830ColorMask(GLcontext *ctx,
    }
 }
 
-static void update_specular( GLcontext *ctx )
+static void
+update_specular(GLcontext * ctx)
 {
-   i830ContextPtr i830 = I830_CONTEXT( ctx );
+   struct i830_context *i830 = i830_context(ctx);
 
    I830_STATECHANGE(i830, I830_UPLOAD_CTX);
    i830->state.Ctx[I830_CTXREG_ENABLES_1] &= ~ENABLE_SPEC_ADD_MASK;
@@ -649,22 +668,22 @@ static void update_specular( GLcontext *ctx )
       i830->state.Ctx[I830_CTXREG_ENABLES_1] |= DISABLE_SPEC_ADD;
 }
 
-static void i830LightModelfv(GLcontext *ctx, GLenum pname, 
-			     const GLfloat *param)
+static void
+i830LightModelfv(GLcontext * ctx, GLenum pname, const GLfloat * param)
 {
-   if (INTEL_DEBUG&DEBUG_DRI)
-      fprintf(stderr, "%s\n", __FUNCTION__);
-
+   DBG("%s\n", __FUNCTION__);
+   
    if (pname == GL_LIGHT_MODEL_COLOR_CONTROL) {
-      update_specular( ctx );
+      update_specular(ctx);
    }
 }
 
 /* In Mesa 3.5 we can reliably do native flatshading.
  */
-static void i830ShadeModel(GLcontext *ctx, GLenum mode)
+static void
+i830ShadeModel(GLcontext * ctx, GLenum mode)
 {
-   i830ContextPtr i830 = I830_CONTEXT(ctx);
+   struct i830_context *i830 = i830_context(ctx);
    I830_STATECHANGE(i830, I830_UPLOAD_CTX);
 
 
@@ -673,58 +692,62 @@ static void i830ShadeModel(GLcontext *ctx, GLenum mode)
    i830->state.Ctx[I830_CTXREG_STATE3] &= ~SHADE_MODE_MASK;
 
    if (mode == GL_FLAT) {
-     i830->state.Ctx[I830_CTXREG_STATE3] |= (ALPHA_SHADE_MODE(SHADE_MODE_FLAT) |
-					  FOG_SHADE_MODE(SHADE_MODE_FLAT) |
-					  SPEC_SHADE_MODE(SHADE_MODE_FLAT) |
-					  COLOR_SHADE_MODE(SHADE_MODE_FLAT));
-   } else {
-     i830->state.Ctx[I830_CTXREG_STATE3] |= (ALPHA_SHADE_MODE(SHADE_MODE_LINEAR) |
-					  FOG_SHADE_MODE(SHADE_MODE_LINEAR) |
-					  SPEC_SHADE_MODE(SHADE_MODE_LINEAR) |
-					  COLOR_SHADE_MODE(SHADE_MODE_LINEAR));
+      i830->state.Ctx[I830_CTXREG_STATE3] |=
+         (ALPHA_SHADE_MODE(SHADE_MODE_FLAT) | FOG_SHADE_MODE(SHADE_MODE_FLAT)
+          | SPEC_SHADE_MODE(SHADE_MODE_FLAT) |
+          COLOR_SHADE_MODE(SHADE_MODE_FLAT));
+   }
+   else {
+      i830->state.Ctx[I830_CTXREG_STATE3] |=
+         (ALPHA_SHADE_MODE(SHADE_MODE_LINEAR) |
+          FOG_SHADE_MODE(SHADE_MODE_LINEAR) |
+          SPEC_SHADE_MODE(SHADE_MODE_LINEAR) |
+          COLOR_SHADE_MODE(SHADE_MODE_LINEAR));
    }
 }
 
 /* =============================================================
  * Fog
  */
-static void i830Fogfv(GLcontext *ctx, GLenum pname, const GLfloat *param)
+static void
+i830Fogfv(GLcontext * ctx, GLenum pname, const GLfloat * param)
 {
-   i830ContextPtr i830 = I830_CONTEXT(ctx);
-
-   if (INTEL_DEBUG&DEBUG_DRI)
-      fprintf(stderr, "%s\n", __FUNCTION__);
+   struct i830_context *i830 = i830_context(ctx);
 
-   if (pname == GL_FOG_COLOR) {      
-      GLuint color = (((GLubyte)(ctx->Fog.Color[0]*255.0F) << 16) |
-		      ((GLubyte)(ctx->Fog.Color[1]*255.0F) << 8) |
-		      ((GLubyte)(ctx->Fog.Color[2]*255.0F) << 0));
+   DBG("%s\n", __FUNCTION__);
+   
+   if (pname == GL_FOG_COLOR) {
+      GLuint color = (((GLubyte) (ctx->Fog.Color[0] * 255.0F) << 16) |
+                      ((GLubyte) (ctx->Fog.Color[1] * 255.0F) << 8) |
+                      ((GLubyte) (ctx->Fog.Color[2] * 255.0F) << 0));
 
       I830_STATECHANGE(i830, I830_UPLOAD_CTX);
-      i830->state.Ctx[I830_CTXREG_FOGCOLOR] = (_3DSTATE_FOG_COLOR_CMD | color);
+      i830->state.Ctx[I830_CTXREG_FOGCOLOR] =
+         (_3DSTATE_FOG_COLOR_CMD | color);
    }
 }
 
 /* =============================================================
  */
 
-static void i830Enable(GLcontext *ctx, GLenum cap, GLboolean state)
+static void
+i830Enable(GLcontext * ctx, GLenum cap, GLboolean state)
 {
-   i830ContextPtr i830 = I830_CONTEXT(ctx);
+   struct i830_context *i830 = i830_context(ctx);
 
-   switch(cap) {
+   switch (cap) {
    case GL_LIGHTING:
    case GL_COLOR_SUM:
-      update_specular( ctx );
+      update_specular(ctx);
       break;
 
    case GL_ALPHA_TEST:
       I830_STATECHANGE(i830, I830_UPLOAD_CTX);
       i830->state.Ctx[I830_CTXREG_ENABLES_1] &= ~ENABLE_DIS_ALPHA_TEST_MASK;
       if (state)
-	 i830->state.Ctx[I830_CTXREG_ENABLES_1] |= ENABLE_ALPHA_TEST;
+         i830->state.Ctx[I830_CTXREG_ENABLES_1] |= ENABLE_ALPHA_TEST;
       else
-	 i830->state.Ctx[I830_CTXREG_ENABLES_1] |= DISABLE_ALPHA_TEST;
+         i830->state.Ctx[I830_CTXREG_ENABLES_1] |= DISABLE_ALPHA_TEST;
 
       break;
 
@@ -737,18 +760,18 @@ static void i830Enable(GLcontext *ctx, GLenum cap, GLboolean state)
 
       /* Logicop doesn't seem to work at 16bpp:
        */
-      if (i830->intel.intelScreen->cpp == 2)
-	 FALLBACK( &i830->intel, I830_FALLBACK_LOGICOP, state );
+      if (i830->intel.ctx.Visual.rgbBits == 16)
+         FALLBACK(&i830->intel, I830_FALLBACK_LOGICOP, state);
       break;
- 
+
    case GL_DITHER:
       I830_STATECHANGE(i830, I830_UPLOAD_CTX);
       i830->state.Ctx[I830_CTXREG_ENABLES_2] &= ~ENABLE_DITHER;
 
       if (state)
-	 i830->state.Ctx[I830_CTXREG_ENABLES_2] |= ENABLE_DITHER;
+         i830->state.Ctx[I830_CTXREG_ENABLES_2] |= ENABLE_DITHER;
       else
-	 i830->state.Ctx[I830_CTXREG_ENABLES_2] |= DISABLE_DITHER;
+         i830->state.Ctx[I830_CTXREG_ENABLES_2] |= DISABLE_DITHER;
       break;
 
    case GL_DEPTH_TEST:
@@ -756,46 +779,44 @@ static void i830Enable(GLcontext *ctx, GLenum cap, GLboolean state)
       i830->state.Ctx[I830_CTXREG_ENABLES_1] &= ~ENABLE_DIS_DEPTH_TEST_MASK;
 
       if (state)
-	 i830->state.Ctx[I830_CTXREG_ENABLES_1] |= ENABLE_DEPTH_TEST;
+         i830->state.Ctx[I830_CTXREG_ENABLES_1] |= ENABLE_DEPTH_TEST;
       else
-	 i830->state.Ctx[I830_CTXREG_ENABLES_1] |= DISABLE_DEPTH_TEST;
+         i830->state.Ctx[I830_CTXREG_ENABLES_1] |= DISABLE_DEPTH_TEST;
 
       /* Also turn off depth writes when GL_DEPTH_TEST is disabled:
        */
-      i830DepthMask( ctx, ctx->Depth.Mask );
+      i830DepthMask(ctx, ctx->Depth.Mask);
       break;
 
    case GL_SCISSOR_TEST:
       I830_STATECHANGE(i830, I830_UPLOAD_BUFFERS);
-      
+
       if (state)
-	 i830->state.Buffer[I830_DESTREG_SENABLE] = 
-	    (_3DSTATE_SCISSOR_ENABLE_CMD |
-	     ENABLE_SCISSOR_RECT);
+         i830->state.Buffer[I830_DESTREG_SENABLE] =
+            (_3DSTATE_SCISSOR_ENABLE_CMD | ENABLE_SCISSOR_RECT);
       else
-	 i830->state.Buffer[I830_DESTREG_SENABLE] = 
-	    (_3DSTATE_SCISSOR_ENABLE_CMD |
-	     DISABLE_SCISSOR_RECT);
+         i830->state.Buffer[I830_DESTREG_SENABLE] =
+            (_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT);
 
       break;
 
    case GL_LINE_SMOOTH:
       I830_STATECHANGE(i830, I830_UPLOAD_CTX);
-      
+
       i830->state.Ctx[I830_CTXREG_AA] &= ~AA_LINE_ENABLE;
       if (state)
-	 i830->state.Ctx[I830_CTXREG_AA] |= AA_LINE_ENABLE;
+         i830->state.Ctx[I830_CTXREG_AA] |= AA_LINE_ENABLE;
       else
-	 i830->state.Ctx[I830_CTXREG_AA] |= AA_LINE_DISABLE;
+         i830->state.Ctx[I830_CTXREG_AA] |= AA_LINE_DISABLE;
       break;
 
    case GL_FOG:
       I830_STATECHANGE(i830, I830_UPLOAD_CTX);
       i830->state.Ctx[I830_CTXREG_ENABLES_1] &= ~ENABLE_DIS_FOG_MASK;
       if (state)
-	 i830->state.Ctx[I830_CTXREG_ENABLES_1] |= ENABLE_FOG;
+         i830->state.Ctx[I830_CTXREG_ENABLES_1] |= ENABLE_FOG;
       else
-	 i830->state.Ctx[I830_CTXREG_ENABLES_1] |= DISABLE_FOG;
+         i830->state.Ctx[I830_CTXREG_ENABLES_1] |= DISABLE_FOG;
       break;
 
    case GL_CULL_FACE:
@@ -806,20 +827,32 @@ static void i830Enable(GLcontext *ctx, GLenum cap, GLboolean state)
       break;
 
    case GL_STENCIL_TEST:
-      if (i830->intel.hw_stencil) {
-	 I830_STATECHANGE(i830, I830_UPLOAD_CTX);
-
-	 if (state) {
-	    i830->state.Ctx[I830_CTXREG_ENABLES_1] |= ENABLE_STENCIL_TEST;
-	    i830->state.Ctx[I830_CTXREG_ENABLES_2] |= ENABLE_STENCIL_WRITE;
-	 } else {
-	    i830->state.Ctx[I830_CTXREG_ENABLES_1] &= ~ENABLE_STENCIL_TEST;
-	    i830->state.Ctx[I830_CTXREG_ENABLES_2] &= ~ENABLE_STENCIL_WRITE;
-	    i830->state.Ctx[I830_CTXREG_ENABLES_1] |= DISABLE_STENCIL_TEST;
-	    i830->state.Ctx[I830_CTXREG_ENABLES_2] |= DISABLE_STENCIL_WRITE;
-	 }
-      } else {
-	 FALLBACK( &i830->intel, I830_FALLBACK_STENCIL, state );
+      {
+         GLboolean hw_stencil = GL_FALSE;
+         if (ctx->DrawBuffer) {
+            struct intel_renderbuffer *irbStencil
+               = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_STENCIL);
+            hw_stencil = (irbStencil && irbStencil->region);
+         }
+         if (hw_stencil) {
+            I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+
+            if (state) {
+               i830->state.Ctx[I830_CTXREG_ENABLES_1] |= ENABLE_STENCIL_TEST;
+               i830->state.Ctx[I830_CTXREG_ENABLES_2] |= ENABLE_STENCIL_WRITE;
+            }
+            else {
+               i830->state.Ctx[I830_CTXREG_ENABLES_1] &= ~ENABLE_STENCIL_TEST;
+               i830->state.Ctx[I830_CTXREG_ENABLES_2] &=
+                  ~ENABLE_STENCIL_WRITE;
+               i830->state.Ctx[I830_CTXREG_ENABLES_1] |= DISABLE_STENCIL_TEST;
+               i830->state.Ctx[I830_CTXREG_ENABLES_2] |=
+                  DISABLE_STENCIL_WRITE;
+            }
+         }
+         else {
+            FALLBACK(&i830->intel, I830_FALLBACK_STENCIL, state);
+         }
       }
       break;
 
@@ -828,13 +861,12 @@ static void i830Enable(GLcontext *ctx, GLenum cap, GLboolean state)
        * I'll do more testing later to find out exactly which hardware
        * supports it.  Disabled for now.
        */
-      if (i830->intel.hw_stipple && 
-	  i830->intel.reduced_primitive == GL_TRIANGLES)
-      {
-	 I830_STATECHANGE(i830, I830_UPLOAD_STIPPLE);
-	 i830->state.Stipple[I830_STPREG_ST1] &= ~ST1_ENABLE;
-	 if (state)
-	    i830->state.Stipple[I830_STPREG_ST1] |= ST1_ENABLE;
+      if (i830->intel.hw_stipple &&
+          i830->intel.reduced_primitive == GL_TRIANGLES) {
+         I830_STATECHANGE(i830, I830_UPLOAD_STIPPLE);
+         i830->state.Stipple[I830_STPREG_ST1] &= ~ST1_ENABLE;
+         if (state)
+            i830->state.Stipple[I830_STPREG_ST1] |= ST1_ENABLE;
       }
       break;
 
@@ -844,206 +876,172 @@ static void i830Enable(GLcontext *ctx, GLenum cap, GLboolean state)
 }
 
 
-static void i830_init_packets( i830ContextPtr i830 )
+static void
+i830_init_packets(struct i830_context *i830)
 {
-   intelScreenPrivate *screen = i830->intel.intelScreen;
-
    /* Zero all state */
    memset(&i830->state, 0, sizeof(i830->state));
 
    /* Set default blend state */
    i830->state.TexBlend[0][0] = (_3DSTATE_MAP_BLEND_OP_CMD(0) |
-				  TEXPIPE_COLOR |
-				  ENABLE_TEXOUTPUT_WRT_SEL |
-				  TEXOP_OUTPUT_CURRENT |
-				  DISABLE_TEX_CNTRL_STAGE |
-				  TEXOP_SCALE_1X |
-				  TEXOP_MODIFY_PARMS |
-				  TEXOP_LAST_STAGE |
-				  TEXBLENDOP_ARG1);
+                                 TEXPIPE_COLOR |
+                                 ENABLE_TEXOUTPUT_WRT_SEL |
+                                 TEXOP_OUTPUT_CURRENT |
+                                 DISABLE_TEX_CNTRL_STAGE |
+                                 TEXOP_SCALE_1X |
+                                 TEXOP_MODIFY_PARMS |
+                                 TEXOP_LAST_STAGE | TEXBLENDOP_ARG1);
    i830->state.TexBlend[0][1] = (_3DSTATE_MAP_BLEND_OP_CMD(0) |
-				  TEXPIPE_ALPHA |
-				  ENABLE_TEXOUTPUT_WRT_SEL |
-				  TEXOP_OUTPUT_CURRENT |
-				  TEXOP_SCALE_1X |
-				  TEXOP_MODIFY_PARMS |
-				  TEXBLENDOP_ARG1);
+                                 TEXPIPE_ALPHA |
+                                 ENABLE_TEXOUTPUT_WRT_SEL |
+                                 TEXOP_OUTPUT_CURRENT |
+                                 TEXOP_SCALE_1X |
+                                 TEXOP_MODIFY_PARMS | TEXBLENDOP_ARG1);
    i830->state.TexBlend[0][2] = (_3DSTATE_MAP_BLEND_ARG_CMD(0) |
-				  TEXPIPE_COLOR |
-				  TEXBLEND_ARG1 |
-				  TEXBLENDARG_MODIFY_PARMS |
-				  TEXBLENDARG_DIFFUSE);
+                                 TEXPIPE_COLOR |
+                                 TEXBLEND_ARG1 |
+                                 TEXBLENDARG_MODIFY_PARMS |
+                                 TEXBLENDARG_DIFFUSE);
    i830->state.TexBlend[0][3] = (_3DSTATE_MAP_BLEND_ARG_CMD(0) |
-				  TEXPIPE_ALPHA |
-				  TEXBLEND_ARG1 |
-				  TEXBLENDARG_MODIFY_PARMS |
-				  TEXBLENDARG_DIFFUSE);
+                                 TEXPIPE_ALPHA |
+                                 TEXBLEND_ARG1 |
+                                 TEXBLENDARG_MODIFY_PARMS |
+                                 TEXBLENDARG_DIFFUSE);
 
    i830->state.TexBlendWordsUsed[0] = 4;
 
 
-   i830->state.Ctx[I830_CTXREG_VF] =  0;
+   i830->state.Ctx[I830_CTXREG_VF] = 0;
    i830->state.Ctx[I830_CTXREG_VF2] = 0;
 
    i830->state.Ctx[I830_CTXREG_AA] = (_3DSTATE_AA_CMD |
-				      AA_LINE_ECAAR_WIDTH_ENABLE |
-				      AA_LINE_ECAAR_WIDTH_1_0 |
-				      AA_LINE_REGION_WIDTH_ENABLE |
-				      AA_LINE_REGION_WIDTH_1_0 | 
-				      AA_LINE_DISABLE);
+                                      AA_LINE_ECAAR_WIDTH_ENABLE |
+                                      AA_LINE_ECAAR_WIDTH_1_0 |
+                                      AA_LINE_REGION_WIDTH_ENABLE |
+                                      AA_LINE_REGION_WIDTH_1_0 |
+                                      AA_LINE_DISABLE);
 
    i830->state.Ctx[I830_CTXREG_ENABLES_1] = (_3DSTATE_ENABLES_1_CMD |
-					     DISABLE_LOGIC_OP |
-					     DISABLE_STENCIL_TEST |
-					     DISABLE_DEPTH_BIAS |
-					     DISABLE_SPEC_ADD |
-					     DISABLE_FOG |
-					     DISABLE_ALPHA_TEST |
-					     DISABLE_COLOR_BLEND |
-					     DISABLE_DEPTH_TEST);
-
+                                             DISABLE_LOGIC_OP |
+                                             DISABLE_STENCIL_TEST |
+                                             DISABLE_DEPTH_BIAS |
+                                             DISABLE_SPEC_ADD |
+                                             DISABLE_FOG |
+                                             DISABLE_ALPHA_TEST |
+                                             DISABLE_COLOR_BLEND |
+                                             DISABLE_DEPTH_TEST);
+
+#if 000                         /* XXX all the stencil enable state is set in i830Enable(), right? */
    if (i830->intel.hw_stencil) {
       i830->state.Ctx[I830_CTXREG_ENABLES_2] = (_3DSTATE_ENABLES_2_CMD |
-						ENABLE_STENCIL_WRITE |
-						ENABLE_TEX_CACHE |
-						ENABLE_DITHER |
-						ENABLE_COLOR_MASK |
-						/* set no color comps disabled */
-						ENABLE_COLOR_WRITE |
-						ENABLE_DEPTH_WRITE);
-   } else {
+                                                ENABLE_STENCIL_WRITE |
+                                                ENABLE_TEX_CACHE |
+                                                ENABLE_DITHER |
+                                                ENABLE_COLOR_MASK |
+                                                /* set no color comps disabled */
+                                                ENABLE_COLOR_WRITE |
+                                                ENABLE_DEPTH_WRITE);
+   }
+   else
+#endif
+   {
       i830->state.Ctx[I830_CTXREG_ENABLES_2] = (_3DSTATE_ENABLES_2_CMD |
-						DISABLE_STENCIL_WRITE |
-						ENABLE_TEX_CACHE |
-						ENABLE_DITHER |
-						ENABLE_COLOR_MASK |
-						/* set no color comps disabled */
-						ENABLE_COLOR_WRITE |
-						ENABLE_DEPTH_WRITE);
+                                                DISABLE_STENCIL_WRITE |
+                                                ENABLE_TEX_CACHE |
+                                                ENABLE_DITHER |
+                                                ENABLE_COLOR_MASK |
+                                                /* set no color comps disabled */
+                                                ENABLE_COLOR_WRITE |
+                                                ENABLE_DEPTH_WRITE);
    }
 
    i830->state.Ctx[I830_CTXREG_STATE1] = (_3DSTATE_MODES_1_CMD |
-					  ENABLE_COLR_BLND_FUNC |
-					  BLENDFUNC_ADD |
-					  ENABLE_SRC_BLND_FACTOR |
-					  SRC_BLND_FACT(BLENDFACT_ONE) | 
-					  ENABLE_DST_BLND_FACTOR |
-					  DST_BLND_FACT(BLENDFACT_ZERO) );
+                                          ENABLE_COLR_BLND_FUNC |
+                                          BLENDFUNC_ADD |
+                                          ENABLE_SRC_BLND_FACTOR |
+                                          SRC_BLND_FACT(BLENDFACT_ONE) |
+                                          ENABLE_DST_BLND_FACTOR |
+                                          DST_BLND_FACT(BLENDFACT_ZERO));
 
    i830->state.Ctx[I830_CTXREG_STATE2] = (_3DSTATE_MODES_2_CMD |
-					  ENABLE_GLOBAL_DEPTH_BIAS | 
-					  GLOBAL_DEPTH_BIAS(0) |
-					  ENABLE_ALPHA_TEST_FUNC | 
-					  ALPHA_TEST_FUNC(COMPAREFUNC_ALWAYS) |
-					  ALPHA_REF_VALUE(0) );
+                                          ENABLE_GLOBAL_DEPTH_BIAS |
+                                          GLOBAL_DEPTH_BIAS(0) |
+                                          ENABLE_ALPHA_TEST_FUNC |
+                                          ALPHA_TEST_FUNC(COMPAREFUNC_ALWAYS)
+                                          | ALPHA_REF_VALUE(0));
 
    i830->state.Ctx[I830_CTXREG_STATE3] = (_3DSTATE_MODES_3_CMD |
-					  ENABLE_DEPTH_TEST_FUNC |
-					  DEPTH_TEST_FUNC(COMPAREFUNC_LESS) |
-					  ENABLE_ALPHA_SHADE_MODE |
-					  ALPHA_SHADE_MODE(SHADE_MODE_LINEAR) |
-					  ENABLE_FOG_SHADE_MODE |
-					  FOG_SHADE_MODE(SHADE_MODE_LINEAR) |
-					  ENABLE_SPEC_SHADE_MODE |
-					  SPEC_SHADE_MODE(SHADE_MODE_LINEAR) |
-					  ENABLE_COLOR_SHADE_MODE |
-					  COLOR_SHADE_MODE(SHADE_MODE_LINEAR) |
-					  ENABLE_CULL_MODE |
-					  CULLMODE_NONE);
+                                          ENABLE_DEPTH_TEST_FUNC |
+                                          DEPTH_TEST_FUNC(COMPAREFUNC_LESS) |
+                                          ENABLE_ALPHA_SHADE_MODE |
+                                          ALPHA_SHADE_MODE(SHADE_MODE_LINEAR)
+                                          | ENABLE_FOG_SHADE_MODE |
+                                          FOG_SHADE_MODE(SHADE_MODE_LINEAR) |
+                                          ENABLE_SPEC_SHADE_MODE |
+                                          SPEC_SHADE_MODE(SHADE_MODE_LINEAR) |
+                                          ENABLE_COLOR_SHADE_MODE |
+                                          COLOR_SHADE_MODE(SHADE_MODE_LINEAR)
+                                          | ENABLE_CULL_MODE | CULLMODE_NONE);
 
    i830->state.Ctx[I830_CTXREG_STATE4] = (_3DSTATE_MODES_4_CMD |
-					  ENABLE_LOGIC_OP_FUNC |
-					  LOGIC_OP_FUNC(LOGICOP_COPY) |
-					  ENABLE_STENCIL_TEST_MASK |
-					  STENCIL_TEST_MASK(0xff) |
-					  ENABLE_STENCIL_WRITE_MASK |
-					  STENCIL_WRITE_MASK(0xff));
+                                          ENABLE_LOGIC_OP_FUNC |
+                                          LOGIC_OP_FUNC(LOGICOP_COPY) |
+                                          ENABLE_STENCIL_TEST_MASK |
+                                          STENCIL_TEST_MASK(0xff) |
+                                          ENABLE_STENCIL_WRITE_MASK |
+                                          STENCIL_WRITE_MASK(0xff));
 
    i830->state.Ctx[I830_CTXREG_STENCILTST] = (_3DSTATE_STENCIL_TEST_CMD |
-					      ENABLE_STENCIL_PARMS |
-					      STENCIL_FAIL_OP(STENCILOP_KEEP) |
-					      STENCIL_PASS_DEPTH_FAIL_OP(STENCILOP_KEEP) |
-					      STENCIL_PASS_DEPTH_PASS_OP(STENCILOP_KEEP) |
-					      ENABLE_STENCIL_TEST_FUNC |
-					      STENCIL_TEST_FUNC(COMPAREFUNC_ALWAYS) |
-					      ENABLE_STENCIL_REF_VALUE |
-					      STENCIL_REF_VALUE(0) );
-
-   i830->state.Ctx[I830_CTXREG_STATE5] = (_3DSTATE_MODES_5_CMD |
-					  FLUSH_TEXTURE_CACHE |
-					  ENABLE_SPRITE_POINT_TEX |
-					  SPRITE_POINT_TEX_OFF |
-					  ENABLE_FIXED_LINE_WIDTH |
-					  FIXED_LINE_WIDTH(0x2) | /* 1.0 */
-					  ENABLE_FIXED_POINT_WIDTH |
-					  FIXED_POINT_WIDTH(1) );
+                                              ENABLE_STENCIL_PARMS |
+                                              STENCIL_FAIL_OP(STENCILOP_KEEP)
+                                              |
+                                              STENCIL_PASS_DEPTH_FAIL_OP
+                                              (STENCILOP_KEEP) |
+                                              STENCIL_PASS_DEPTH_PASS_OP
+                                              (STENCILOP_KEEP) |
+                                              ENABLE_STENCIL_TEST_FUNC |
+                                              STENCIL_TEST_FUNC
+                                              (COMPAREFUNC_ALWAYS) |
+                                              ENABLE_STENCIL_REF_VALUE |
+                                              STENCIL_REF_VALUE(0));
+
+   i830->state.Ctx[I830_CTXREG_STATE5] = (_3DSTATE_MODES_5_CMD | FLUSH_TEXTURE_CACHE | ENABLE_SPRITE_POINT_TEX | SPRITE_POINT_TEX_OFF | ENABLE_FIXED_LINE_WIDTH | FIXED_LINE_WIDTH(0x2) |       /* 1.0 */
+                                          ENABLE_FIXED_POINT_WIDTH |
+                                          FIXED_POINT_WIDTH(1));
 
    i830->state.Ctx[I830_CTXREG_IALPHAB] = (_3DSTATE_INDPT_ALPHA_BLEND_CMD |
-					   DISABLE_INDPT_ALPHA_BLEND |
-					   ENABLE_ALPHA_BLENDFUNC |
-					   ABLENDFUNC_ADD);
+                                           DISABLE_INDPT_ALPHA_BLEND |
+                                           ENABLE_ALPHA_BLENDFUNC |
+                                           ABLENDFUNC_ADD);
 
    i830->state.Ctx[I830_CTXREG_FOGCOLOR] = (_3DSTATE_FOG_COLOR_CMD |
-					    FOG_COLOR_RED(0) |
-					    FOG_COLOR_GREEN(0) |
-					    FOG_COLOR_BLUE(0));
+                                            FOG_COLOR_RED(0) |
+                                            FOG_COLOR_GREEN(0) |
+                                            FOG_COLOR_BLUE(0));
 
    i830->state.Ctx[I830_CTXREG_BLENDCOLOR0] = _3DSTATE_CONST_BLEND_COLOR_CMD;
    i830->state.Ctx[I830_CTXREG_BLENDCOLOR1] = 0;
 
    i830->state.Ctx[I830_CTXREG_MCSB0] = _3DSTATE_MAP_COORD_SETBIND_CMD;
    i830->state.Ctx[I830_CTXREG_MCSB1] = (TEXBIND_SET3(TEXCOORDSRC_VTXSET_3) |
-					 TEXBIND_SET2(TEXCOORDSRC_VTXSET_2) |
-					 TEXBIND_SET1(TEXCOORDSRC_VTXSET_1) |
-					 TEXBIND_SET0(TEXCOORDSRC_VTXSET_0));
-					 
-
-   i830->state.Stipple[I830_STPREG_ST0] = _3DSTATE_STIPPLE;
-
-   i830->state.Buffer[I830_DESTREG_CBUFADDR0] = _3DSTATE_BUF_INFO_CMD;
-   i830->state.Buffer[I830_DESTREG_CBUFADDR1] = 
-      (BUF_3D_ID_COLOR_BACK | 
-       BUF_3D_PITCH(screen->front.pitch) |  /* pitch in bytes */
-       BUF_3D_USE_FENCE);
+                                         TEXBIND_SET2(TEXCOORDSRC_VTXSET_2) |
+                                         TEXBIND_SET1(TEXCOORDSRC_VTXSET_1) |
+                                         TEXBIND_SET0(TEXCOORDSRC_VTXSET_0));
 
 
-   i830->state.Buffer[I830_DESTREG_DBUFADDR0] = _3DSTATE_BUF_INFO_CMD;
-   i830->state.Buffer[I830_DESTREG_DBUFADDR1] = 
-      (BUF_3D_ID_DEPTH |
-       BUF_3D_PITCH(screen->depth.pitch) |  /* pitch in bytes */
-       BUF_3D_USE_FENCE);
-   i830->state.Buffer[I830_DESTREG_DBUFADDR2] = screen->depth.offset;
-
+   i830->state.Stipple[I830_STPREG_ST0] = _3DSTATE_STIPPLE;
 
    i830->state.Buffer[I830_DESTREG_DV0] = _3DSTATE_DST_BUF_VARS_CMD;
-
-   switch (screen->fbFormat) {
-   case DV_PF_555:
-   case DV_PF_565:
-      i830->state.Buffer[I830_DESTREG_DV1] = (DSTORG_HORT_BIAS(0x8) | /* .5 */
-					      DSTORG_VERT_BIAS(0x8) | /* .5 */
-					      screen->fbFormat |
-					      DEPTH_IS_Z |
-					      DEPTH_FRMT_16_FIXED);
-      break;
-   case DV_PF_8888:
-      i830->state.Buffer[I830_DESTREG_DV1] = (DSTORG_HORT_BIAS(0x8) | /* .5 */
-					      DSTORG_VERT_BIAS(0x8) | /* .5 */
-					      screen->fbFormat |
-					      DEPTH_IS_Z |
-					      DEPTH_FRMT_24_FIXED_8_OTHER);
-      break;
-   }
-
    i830->state.Buffer[I830_DESTREG_SENABLE] = (_3DSTATE_SCISSOR_ENABLE_CMD |
-					       DISABLE_SCISSOR_RECT);
+                                               DISABLE_SCISSOR_RECT);
    i830->state.Buffer[I830_DESTREG_SR0] = _3DSTATE_SCISSOR_RECT_0_CMD;
    i830->state.Buffer[I830_DESTREG_SR1] = 0;
    i830->state.Buffer[I830_DESTREG_SR2] = 0;
 }
 
 
-void i830InitStateFuncs( struct dd_function_table *functions )
+void
+i830InitStateFuncs(struct dd_function_table *functions)
 {
    functions->AlphaFunc = i830AlphaFunc;
    functions->BlendColor = i830BlendColor;
@@ -1068,25 +1066,21 @@ void i830InitStateFuncs( struct dd_function_table *functions )
    functions->StencilOpSeparate = i830StencilOpSeparate;
 }
 
-void i830InitState( i830ContextPtr i830 )
+void
+i830InitState(struct i830_context *i830)
 {
    GLcontext *ctx = &i830->intel.ctx;
 
-   i830_init_packets( i830 );
+   i830_init_packets(i830);
 
-   intelInitState( ctx );
+   _mesa_init_driver_state(ctx);
 
-   memcpy( &i830->initial, &i830->state, sizeof(i830->state) );
+   memcpy(&i830->initial, &i830->state, sizeof(i830->state));
 
    i830->current = &i830->state;
    i830->state.emitted = 0;
-   i830->state.active = (I830_UPLOAD_TEXBLEND(0) |
-			 I830_UPLOAD_STIPPLE |
-			 I830_UPLOAD_CTX |
-			 I830_UPLOAD_BUFFERS);
+   i830->state.active = (I830_UPLOAD_INVARIENT |
+                         I830_UPLOAD_TEXBLEND(0) |
+                         I830_UPLOAD_STIPPLE |
+                         I830_UPLOAD_CTX | I830_UPLOAD_BUFFERS);
 }
-
-
-
-
-
diff --git a/i915/i830_tex.c b/i915/i830_tex.c
index 3c4aedb..79b0fcf 100644
--- a/i915/i830_tex.c
+++ b/i915/i830_tex.c
@@ -45,261 +45,13 @@
 
 
 
-
-/**
- * Set the texture wrap modes.
- * 
- * The i830M (and related graphics cores) do not support GL_CLAMP.  The Intel
- * drivers for "other operating systems" implement GL_CLAMP as
- * GL_CLAMP_TO_EDGE, so the same is done here.
- * 
- * \param t Texture object whose wrap modes are to be set
- * \param swrap Wrap mode for the \a s texture coordinate
- * \param twrap Wrap mode for the \a t texture coordinate
- */
-static void i830SetTexWrapping(i830TextureObjectPtr tex,
-			       GLenum swrap, 
-			       GLenum twrap)
-{
-   tex->Setup[I830_TEXREG_MCS] &= ~(TEXCOORD_ADDR_U_MASK|TEXCOORD_ADDR_V_MASK);
-
-   switch( swrap ) {
-   case GL_REPEAT:
-      tex->Setup[I830_TEXREG_MCS] |= TEXCOORD_ADDR_U_MODE(TEXCOORDMODE_WRAP);
-      break;
-   case GL_CLAMP:
-   case GL_CLAMP_TO_EDGE:
-      tex->Setup[I830_TEXREG_MCS] |= TEXCOORD_ADDR_U_MODE(TEXCOORDMODE_CLAMP);
-      break;
-   case GL_CLAMP_TO_BORDER:
-      tex->Setup[I830_TEXREG_MCS] |= 
-			TEXCOORD_ADDR_U_MODE(TEXCOORDMODE_CLAMP_BORDER);
-      break;
-   case GL_MIRRORED_REPEAT:
-      tex->Setup[I830_TEXREG_MCS] |= 
-			TEXCOORD_ADDR_U_MODE(TEXCOORDMODE_MIRROR);
-      break;
-   default:
-      break;
-   }
-
-   switch( twrap ) {
-   case GL_REPEAT:
-      tex->Setup[I830_TEXREG_MCS] |= TEXCOORD_ADDR_V_MODE(TEXCOORDMODE_WRAP);
-      break;
-   case GL_CLAMP:
-   case GL_CLAMP_TO_EDGE:
-      tex->Setup[I830_TEXREG_MCS] |= TEXCOORD_ADDR_V_MODE(TEXCOORDMODE_CLAMP);
-      break;
-   case GL_CLAMP_TO_BORDER:
-      tex->Setup[I830_TEXREG_MCS] |= 
-			TEXCOORD_ADDR_V_MODE(TEXCOORDMODE_CLAMP_BORDER);
-      break;
-   case GL_MIRRORED_REPEAT:
-      tex->Setup[I830_TEXREG_MCS] |=
-			TEXCOORD_ADDR_V_MODE(TEXCOORDMODE_MIRROR);
-      break;
-   default:
-      break;
-   }
-}
-
-
-/**
- * Set the texture magnification and minification modes.
- * 
- * \param t Texture whose filter modes are to be set
- * \param minf Texture minification mode
- * \param magf Texture magnification mode
- * \param bias LOD bias for this texture unit.
- */
-
-static void i830SetTexFilter( i830TextureObjectPtr t, GLenum minf, GLenum magf,
-			      GLfloat maxanisotropy )
-{
-   int minFilt = 0, mipFilt = 0, magFilt = 0;
-
-   if(INTEL_DEBUG&DEBUG_DRI)
-      fprintf(stderr, "%s\n", __FUNCTION__);
-
-   if ( maxanisotropy > 1.0 ) {
-      minFilt = FILTER_ANISOTROPIC;
-      magFilt = FILTER_ANISOTROPIC;
-   }
-   else {
-      switch (minf) {
-      case GL_NEAREST:
-	 minFilt = FILTER_NEAREST;
-	 mipFilt = MIPFILTER_NONE;
-	 break;
-      case GL_LINEAR:
-	 minFilt = FILTER_LINEAR;
-	 mipFilt = MIPFILTER_NONE;
-	 break;
-      case GL_NEAREST_MIPMAP_NEAREST:
-	 minFilt = FILTER_NEAREST;
-	 mipFilt = MIPFILTER_NEAREST;
-	 break;
-      case GL_LINEAR_MIPMAP_NEAREST:
-	 minFilt = FILTER_LINEAR;
-	 mipFilt = MIPFILTER_NEAREST;
-	 break;
-      case GL_NEAREST_MIPMAP_LINEAR:
-	 minFilt = FILTER_NEAREST;
-	 mipFilt = MIPFILTER_LINEAR;
-	 break;
-      case GL_LINEAR_MIPMAP_LINEAR:
-	 minFilt = FILTER_LINEAR;
-	 mipFilt = MIPFILTER_LINEAR;
-	 break;
-      default:
-	 break;
-      }
-
-      switch (magf) {
-      case GL_NEAREST:
-	 magFilt = FILTER_NEAREST;
-	 break;
-      case GL_LINEAR:
-	 magFilt = FILTER_LINEAR;
-	 break;
-      default:
-	 break;
-      }  
-   }
-
-   t->Setup[I830_TEXREG_TM0S3] &= ~TM0S3_MIN_FILTER_MASK;
-   t->Setup[I830_TEXREG_TM0S3] &= ~TM0S3_MIP_FILTER_MASK;
-   t->Setup[I830_TEXREG_TM0S3] &= ~TM0S3_MAG_FILTER_MASK;
-   t->Setup[I830_TEXREG_TM0S3] |= ((minFilt << TM0S3_MIN_FILTER_SHIFT) |
-				   (mipFilt << TM0S3_MIP_FILTER_SHIFT) |
-				   (magFilt << TM0S3_MAG_FILTER_SHIFT));
-}
-
-static void i830SetTexBorderColor(i830TextureObjectPtr t, GLubyte color[4])
-{
-   if(INTEL_DEBUG&DEBUG_DRI)
-      fprintf(stderr, "%s\n", __FUNCTION__);
-
-    t->Setup[I830_TEXREG_TM0S4] = 
-        INTEL_PACKCOLOR8888(color[0],color[1],color[2],color[3]);
-}
-
-
-/**
- * Allocate space for and load the mesa images into the texture memory block.
- * This will happen before drawing with a new texture, or drawing with a
- * texture after it was swapped out or teximaged again.
- */
-
-intelTextureObjectPtr i830AllocTexObj( struct gl_texture_object *texObj )
-{
-   i830TextureObjectPtr t = CALLOC_STRUCT( i830_texture_object );
-   if ( !t ) 
-      return NULL;
-
-   texObj->DriverData = t;
-   t->intel.base.tObj = texObj;
-   t->intel.dirty = I830_UPLOAD_TEX_ALL;
-   make_empty_list( &t->intel.base );
-
-   t->Setup[I830_TEXREG_TM0LI] = 0; /* not used */
-   t->Setup[I830_TEXREG_TM0S0] = 0;
-   t->Setup[I830_TEXREG_TM0S1] = 0;
-   t->Setup[I830_TEXREG_TM0S2] = 0;
-   t->Setup[I830_TEXREG_TM0S3] = 0;
-   t->Setup[I830_TEXREG_MCS] = (_3DSTATE_MAP_COORD_SET_CMD |
-				MAP_UNIT(0) |
-				ENABLE_TEXCOORD_PARAMS |
-				TEXCOORDS_ARE_NORMAL |
-				TEXCOORDTYPE_CARTESIAN |
-				ENABLE_ADDR_V_CNTL |
-				TEXCOORD_ADDR_V_MODE(TEXCOORDMODE_WRAP) |
-				ENABLE_ADDR_U_CNTL |
-				TEXCOORD_ADDR_U_MODE(TEXCOORDMODE_WRAP));
-
-   
-   i830SetTexWrapping( t, texObj->WrapS, texObj->WrapT );
-   i830SetTexFilter( t, texObj->MinFilter, texObj->MagFilter, 
-		     texObj->MaxAnisotropy );
-   i830SetTexBorderColor( t, texObj->_BorderChan );
-
-   return &t->intel;
-}
-
-
-static void i830TexParameter( GLcontext *ctx, GLenum target,
-			      struct gl_texture_object *tObj,
-			      GLenum pname, const GLfloat *params )
-{
-   i830TextureObjectPtr t = (i830TextureObjectPtr) tObj->DriverData;
-   if (!t)
-      return;
-
-   switch (pname) {
-   case GL_TEXTURE_MIN_FILTER:
-   case GL_TEXTURE_MAG_FILTER:
-   case GL_TEXTURE_MAX_ANISOTROPY_EXT:
-      i830SetTexFilter( t, tObj->MinFilter, tObj->MagFilter,
-			tObj->MaxAnisotropy);
-      break;
-
-   case GL_TEXTURE_WRAP_S:
-   case GL_TEXTURE_WRAP_T:
-      i830SetTexWrapping( t, tObj->WrapS, tObj->WrapT );
-      break;
-  
-   case GL_TEXTURE_BORDER_COLOR:
-      i830SetTexBorderColor( t, tObj->_BorderChan );
-      break;
-
-   case GL_TEXTURE_BASE_LEVEL:
-   case GL_TEXTURE_MAX_LEVEL:
-   case GL_TEXTURE_MIN_LOD:
-   case GL_TEXTURE_MAX_LOD:
-      /* The i830 and its successors can do a lot of this without
-       * reloading the textures.  A project for someone?
-       */
-      intelFlush( ctx );
-      driSwapOutTextureObject( (driTextureObject *) t );
-      break;
-
-   default:
-      return;
-   }
-
-   t->intel.dirty = I830_UPLOAD_TEX_ALL;
-}
-
-
-static void i830TexEnv( GLcontext *ctx, GLenum target, 
-			GLenum pname, const GLfloat *param )
+static void
+i830TexEnv(GLcontext * ctx, GLenum target,
+           GLenum pname, const GLfloat * param)
 {
-   i830ContextPtr i830 = I830_CONTEXT( ctx );
-   GLuint unit = ctx->Texture.CurrentUnit;
 
    switch (pname) {
-   case GL_TEXTURE_ENV_COLOR: 
-#if 0
-   {
-      GLubyte r, g, b, a;
-      GLuint col;
-      
-      UNCLAMPED_FLOAT_TO_UBYTE(r, param[RCOMP]);
-      UNCLAMPED_FLOAT_TO_UBYTE(g, param[GCOMP]);
-      UNCLAMPED_FLOAT_TO_UBYTE(b, param[BCOMP]);
-      UNCLAMPED_FLOAT_TO_UBYTE(a, param[ACOMP]);
-
-      col = ((a << 24) | (r << 16) | (g << 8) | b);
-
-      if (col != i830->state.TexEnv[unit][I830_TEXENVREG_COL1]) {
-	 I830_STATECHANGE(i830, I830_UPLOAD_TEXENV);
-	 i830->state.TexEnv[unit][I830_TEXENVREG_COL1] = col;
-      }
-
-      break;
-   }
-#endif
+   case GL_TEXTURE_ENV_COLOR:
    case GL_TEXTURE_ENV_MODE:
    case GL_COMBINE_RGB:
    case GL_COMBINE_ALPHA:
@@ -319,38 +71,32 @@ static void i830TexEnv( GLcontext *ctx, GLenum target,
    case GL_ALPHA_SCALE:
       break;
 
-   case GL_TEXTURE_LOD_BIAS: {
-      int b = (int) ((*param) * 16.0);
-      if (b > 63) b = 63;
-      if (b < -64) b = -64;
-      I830_STATECHANGE(i830, I830_UPLOAD_TEX(unit));
-      i830->state.Tex[unit][I830_TEXREG_TM0S3] &= ~TM0S3_LOD_BIAS_MASK;
-      i830->state.Tex[unit][I830_TEXREG_TM0S3] |= 
-	 ((b << TM0S3_LOD_BIAS_SHIFT) & TM0S3_LOD_BIAS_MASK);
-      break;
-   }
+   case GL_TEXTURE_LOD_BIAS:{
+         struct i830_context *i830 = i830_context(ctx);
+         GLuint unit = ctx->Texture.CurrentUnit;
+         int b = (int) ((*param) * 16.0);
+         if (b > 63)
+            b = 63;
+         if (b < -64)
+            b = -64;
+         I830_STATECHANGE(i830, I830_UPLOAD_TEX(unit));
+         i830->lodbias_tm0s3[unit] =
+            ((b << TM0S3_LOD_BIAS_SHIFT) & TM0S3_LOD_BIAS_MASK);
+         break;
+      }
 
    default:
       break;
    }
 }
 
-static void i830BindTexture( GLcontext *ctx, GLenum target,
-			    struct gl_texture_object *texObj )
-{
-   i830TextureObjectPtr tex;
-   
-   if (!texObj->DriverData)
-      i830AllocTexObj( texObj );
-   
-   tex = (i830TextureObjectPtr)texObj->DriverData;
-}
 
 
 
-void i830InitTextureFuncs( struct dd_function_table *functions )
+void
+i830InitTextureFuncs(struct dd_function_table *functions)
 {
-   functions->BindTexture 		= i830BindTexture;
-   functions->TexEnv                    = i830TexEnv;
-   functions->TexParameter              = i830TexParameter;
+/*
+   functions->TexEnv = i830TexEnv;
+*/
 }
diff --git a/i915/i830_texblend.c b/i915/i830_texblend.c
index 49e0347..58f220e 100644
--- a/i915/i830_texblend.c
+++ b/i915/i830_texblend.c
@@ -46,46 +46,42 @@
 /* ================================================================
  * Texture combine functions
  */
-static GLuint pass_through( GLuint *state, GLuint blendUnit )
+static GLuint
+pass_through(GLuint * state, GLuint blendUnit)
 {
    state[0] = (_3DSTATE_MAP_BLEND_OP_CMD(blendUnit) |
-	       TEXPIPE_COLOR |
-	       ENABLE_TEXOUTPUT_WRT_SEL |
-	       TEXOP_OUTPUT_CURRENT |
-	       DISABLE_TEX_CNTRL_STAGE |
-	       TEXOP_SCALE_1X |
-	       TEXOP_MODIFY_PARMS |
-	       TEXBLENDOP_ARG1);
+               TEXPIPE_COLOR |
+               ENABLE_TEXOUTPUT_WRT_SEL |
+               TEXOP_OUTPUT_CURRENT |
+               DISABLE_TEX_CNTRL_STAGE |
+               TEXOP_SCALE_1X | TEXOP_MODIFY_PARMS | TEXBLENDOP_ARG1);
    state[1] = (_3DSTATE_MAP_BLEND_OP_CMD(blendUnit) |
-	       TEXPIPE_ALPHA |
-	       ENABLE_TEXOUTPUT_WRT_SEL |
-	       TEXOP_OUTPUT_CURRENT |
-	       TEXOP_SCALE_1X |
-	       TEXOP_MODIFY_PARMS |
-	       TEXBLENDOP_ARG1);
+               TEXPIPE_ALPHA |
+               ENABLE_TEXOUTPUT_WRT_SEL |
+               TEXOP_OUTPUT_CURRENT |
+               TEXOP_SCALE_1X | TEXOP_MODIFY_PARMS | TEXBLENDOP_ARG1);
    state[2] = (_3DSTATE_MAP_BLEND_ARG_CMD(blendUnit) |
-	       TEXPIPE_COLOR |
-	       TEXBLEND_ARG1 |
-	       TEXBLENDARG_MODIFY_PARMS |
-	       TEXBLENDARG_CURRENT);
+               TEXPIPE_COLOR |
+               TEXBLEND_ARG1 |
+               TEXBLENDARG_MODIFY_PARMS | TEXBLENDARG_CURRENT);
    state[3] = (_3DSTATE_MAP_BLEND_ARG_CMD(blendUnit) |
-	       TEXPIPE_ALPHA |
-	       TEXBLEND_ARG1 |
-	       TEXBLENDARG_MODIFY_PARMS |
-	       TEXBLENDARG_CURRENT);
+               TEXPIPE_ALPHA |
+               TEXBLEND_ARG1 |
+               TEXBLENDARG_MODIFY_PARMS | TEXBLENDARG_CURRENT);
 
    return 4;
 }
 
-static GLuint emit_factor( GLuint blendUnit, GLuint *state, GLuint count, 
-			   const GLfloat *factor )
+static GLuint
+emit_factor(GLuint blendUnit, GLuint * state, GLuint count,
+            const GLfloat * factor)
 {
    GLubyte r, g, b, a;
    GLuint col;
-      
+
    if (0)
       fprintf(stderr, "emit constant %d: %.2f %.2f %.2f %.2f\n",
-	  blendUnit, factor[0], factor[1], factor[2], factor[3]);
+              blendUnit, factor[0], factor[1], factor[2], factor[3]);
 
    UNCLAMPED_FLOAT_TO_UBYTE(r, factor[0]);
    UNCLAMPED_FLOAT_TO_UBYTE(g, factor[1]);
@@ -94,21 +90,27 @@ static GLuint emit_factor( GLuint blendUnit, GLuint *state, GLuint count,
 
    col = ((a << 24) | (r << 16) | (g << 8) | b);
 
-   state[count++] = _3DSTATE_COLOR_FACTOR_N_CMD(blendUnit); 
+   state[count++] = _3DSTATE_COLOR_FACTOR_N_CMD(blendUnit);
    state[count++] = col;
 
    return count;
 }
 
 
-static __inline__ GLuint GetTexelOp(GLint unit)
+static INLINE GLuint
+GetTexelOp(GLint unit)
 {
-   switch(unit) {
-   case 0: return TEXBLENDARG_TEXEL0;
-   case 1: return TEXBLENDARG_TEXEL1;
-   case 2: return TEXBLENDARG_TEXEL2;
-   case 3: return TEXBLENDARG_TEXEL3;
-   default: return TEXBLENDARG_TEXEL0;
+   switch (unit) {
+   case 0:
+      return TEXBLENDARG_TEXEL0;
+   case 1:
+      return TEXBLENDARG_TEXEL1;
+   case 2:
+      return TEXBLENDARG_TEXEL2;
+   case 3:
+      return TEXBLENDARG_TEXEL3;
+   default:
+      return TEXBLENDARG_TEXEL0;
    }
 }
 
@@ -132,12 +134,10 @@ static __inline__ GLuint GetTexelOp(GLint unit)
  * partial support for the extension?
  */
 GLuint
-i830SetTexEnvCombine(i830ContextPtr i830,
-		     const struct gl_tex_env_combine_state * combine,
-		     GLint blendUnit,
-		     GLuint texel_op,
-		     GLuint *state,
-		     const GLfloat *factor )
+i830SetTexEnvCombine(struct i830_context * i830,
+                     const struct gl_tex_env_combine_state * combine,
+                     GLint blendUnit,
+                     GLuint texel_op, GLuint * state, const GLfloat * factor)
 {
    const GLuint numColorArgs = combine->_NumArgsRGB;
    const GLuint numAlphaArgs = combine->_NumArgsA;
@@ -162,7 +162,7 @@ i830SetTexEnvCombine(i830ContextPtr i830,
       TEXPIPE_ALPHA | TEXBLEND_ARG0 | TEXBLENDARG_MODIFY_PARMS,
    };
 
-   if(INTEL_DEBUG&DEBUG_TEXTURE)
+   if (INTEL_DEBUG & DEBUG_TEXTURE)
       fprintf(stderr, "%s\n", __FUNCTION__);
 
 
@@ -188,23 +188,23 @@ i830SetTexEnvCombine(i830ContextPtr i830,
    }
 
 
-   switch(combine->ModeRGB) {
-   case GL_REPLACE: 
+   switch (combine->ModeRGB) {
+   case GL_REPLACE:
       blendop = TEXBLENDOP_ARG1;
       break;
-   case GL_MODULATE: 
+   case GL_MODULATE:
       blendop = TEXBLENDOP_MODULATE;
       break;
-   case GL_ADD: 
+   case GL_ADD:
       blendop = TEXBLENDOP_ADD;
       break;
    case GL_ADD_SIGNED:
-      blendop = TEXBLENDOP_ADDSIGNED; 
+      blendop = TEXBLENDOP_ADDSIGNED;
       break;
    case GL_INTERPOLATE:
-      blendop = TEXBLENDOP_BLEND; 
+      blendop = TEXBLENDOP_BLEND;
       break;
-   case GL_SUBTRACT: 
+   case GL_SUBTRACT:
       blendop = TEXBLENDOP_SUBTRACT;
       break;
    case GL_DOT3_RGB_EXT:
@@ -215,55 +215,54 @@ i830SetTexEnvCombine(i830ContextPtr i830,
    case GL_DOT3_RGBA:
       blendop = TEXBLENDOP_DOT3;
       break;
-   default: 
-      return pass_through( state, blendUnit );
+   default:
+      return pass_through(state, blendUnit);
    }
 
    blendop |= (rgb_shift << TEXOP_SCALE_SHIFT);
 
 
    /* Handle RGB args */
-   for(i = 0; i < 3; i++) {
-      switch(combine->SourceRGB[i]) {
-      case GL_TEXTURE: 
-	 args_RGB[i] = texel_op;
-	 break;
+   for (i = 0; i < 3; i++) {
+      switch (combine->SourceRGB[i]) {
+      case GL_TEXTURE:
+         args_RGB[i] = texel_op;
+         break;
       case GL_TEXTURE0:
       case GL_TEXTURE1:
       case GL_TEXTURE2:
       case GL_TEXTURE3:
-	 args_RGB[i] = GetTexelOp( combine->SourceRGB[i] - GL_TEXTURE0 );
-	 break;
+         args_RGB[i] = GetTexelOp(combine->SourceRGB[i] - GL_TEXTURE0);
+         break;
       case GL_CONSTANT:
-	 args_RGB[i] = TEXBLENDARG_FACTOR_N; 
-	 need_factor = 1;
-	 break;
+         args_RGB[i] = TEXBLENDARG_FACTOR_N;
+         need_factor = 1;
+         break;
       case GL_PRIMARY_COLOR:
-	 args_RGB[i] = TEXBLENDARG_DIFFUSE;
-	 break;
+         args_RGB[i] = TEXBLENDARG_DIFFUSE;
+         break;
       case GL_PREVIOUS:
-	 args_RGB[i] = TEXBLENDARG_CURRENT; 
-	 break;
-      default: 
-	 return pass_through( state, blendUnit );
+         args_RGB[i] = TEXBLENDARG_CURRENT;
+         break;
+      default:
+         return pass_through(state, blendUnit);
       }
 
-      switch(combine->OperandRGB[i]) {
-      case GL_SRC_COLOR: 
-	 args_RGB[i] |= 0;
-	 break;
-      case GL_ONE_MINUS_SRC_COLOR: 
-	 args_RGB[i] |= TEXBLENDARG_INV_ARG;
-	 break;
-      case GL_SRC_ALPHA: 
-	 args_RGB[i] |= TEXBLENDARG_REPLICATE_ALPHA;
-	 break;
-      case GL_ONE_MINUS_SRC_ALPHA: 
-	 args_RGB[i] |= (TEXBLENDARG_REPLICATE_ALPHA | 
-			 TEXBLENDARG_INV_ARG);
-	 break;
-      default: 
-	 return pass_through( state, blendUnit );
+      switch (combine->OperandRGB[i]) {
+      case GL_SRC_COLOR:
+         args_RGB[i] |= 0;
+         break;
+      case GL_ONE_MINUS_SRC_COLOR:
+         args_RGB[i] |= TEXBLENDARG_INV_ARG;
+         break;
+      case GL_SRC_ALPHA:
+         args_RGB[i] |= TEXBLENDARG_REPLICATE_ALPHA;
+         break;
+      case GL_ONE_MINUS_SRC_ALPHA:
+         args_RGB[i] |= (TEXBLENDARG_REPLICATE_ALPHA | TEXBLENDARG_INV_ARG);
+         break;
+      default:
+         return pass_through(state, blendUnit);
       }
    }
 
@@ -275,76 +274,76 @@ i830SetTexEnvCombine(i830ContextPtr i830,
     * Note - the global factor is set up with alpha == .5, so 
     * the alpha part of the DOT4 calculation should be zero.
     */
-   if ( combine->ModeRGB == GL_DOT3_RGBA_EXT || 
-	combine->ModeRGB == GL_DOT3_RGBA ) {
+   if (combine->ModeRGB == GL_DOT3_RGBA_EXT ||
+       combine->ModeRGB == GL_DOT3_RGBA) {
       ablendop = TEXBLENDOP_DOT4;
-      args_A[0] = TEXBLENDARG_FACTOR; /* the global factor */
+      args_A[0] = TEXBLENDARG_FACTOR;   /* the global factor */
       args_A[1] = TEXBLENDARG_FACTOR;
       args_A[2] = TEXBLENDARG_FACTOR;
    }
    else {
-      switch(combine->ModeA) {
-      case GL_REPLACE: 
-	 ablendop = TEXBLENDOP_ARG1;
-	 break;
-      case GL_MODULATE: 
-	 ablendop = TEXBLENDOP_MODULATE;
-	 break;
-      case GL_ADD: 
-	 ablendop = TEXBLENDOP_ADD;
-	 break;
+      switch (combine->ModeA) {
+      case GL_REPLACE:
+         ablendop = TEXBLENDOP_ARG1;
+         break;
+      case GL_MODULATE:
+         ablendop = TEXBLENDOP_MODULATE;
+         break;
+      case GL_ADD:
+         ablendop = TEXBLENDOP_ADD;
+         break;
       case GL_ADD_SIGNED:
-	 ablendop = TEXBLENDOP_ADDSIGNED; 
-	 break;
+         ablendop = TEXBLENDOP_ADDSIGNED;
+         break;
       case GL_INTERPOLATE:
-	 ablendop = TEXBLENDOP_BLEND; 
-	 break;
-      case GL_SUBTRACT: 
-	 ablendop = TEXBLENDOP_SUBTRACT;
-	 break;
+         ablendop = TEXBLENDOP_BLEND;
+         break;
+      case GL_SUBTRACT:
+         ablendop = TEXBLENDOP_SUBTRACT;
+         break;
       default:
-	 return pass_through( state, blendUnit );
+         return pass_through(state, blendUnit);
       }
 
 
       ablendop |= (alpha_shift << TEXOP_SCALE_SHIFT);
 
       /* Handle A args */
-      for(i = 0; i < 3; i++) {
-	 switch(combine->SourceA[i]) {
-	 case GL_TEXTURE: 
-	    args_A[i] = texel_op;
-	    break;
-	 case GL_TEXTURE0:
-	 case GL_TEXTURE1:
-	 case GL_TEXTURE2:
-	 case GL_TEXTURE3:
-	    args_A[i] = GetTexelOp( combine->SourceA[i] - GL_TEXTURE0 );
-	    break;
-	 case GL_CONSTANT:
-	    args_A[i] = TEXBLENDARG_FACTOR_N; 
-	    need_factor = 1;
-	    break;
-	 case GL_PRIMARY_COLOR:
-	    args_A[i] = TEXBLENDARG_DIFFUSE; 
-	    break;
-	 case GL_PREVIOUS:
-	    args_A[i] = TEXBLENDARG_CURRENT; 
-	    break;
-	 default: 
-	    return pass_through( state, blendUnit );
-	 }
-
-	 switch(combine->OperandA[i]) {
-	 case GL_SRC_ALPHA: 
-	    args_A[i] |= 0;
-	    break;
-	 case GL_ONE_MINUS_SRC_ALPHA: 
-	    args_A[i] |= TEXBLENDARG_INV_ARG;
-	    break;
-	 default: 
-	    return pass_through( state, blendUnit );
-	 }
+      for (i = 0; i < 3; i++) {
+         switch (combine->SourceA[i]) {
+         case GL_TEXTURE:
+            args_A[i] = texel_op;
+            break;
+         case GL_TEXTURE0:
+         case GL_TEXTURE1:
+         case GL_TEXTURE2:
+         case GL_TEXTURE3:
+            args_A[i] = GetTexelOp(combine->SourceA[i] - GL_TEXTURE0);
+            break;
+         case GL_CONSTANT:
+            args_A[i] = TEXBLENDARG_FACTOR_N;
+            need_factor = 1;
+            break;
+         case GL_PRIMARY_COLOR:
+            args_A[i] = TEXBLENDARG_DIFFUSE;
+            break;
+         case GL_PREVIOUS:
+            args_A[i] = TEXBLENDARG_CURRENT;
+            break;
+         default:
+            return pass_through(state, blendUnit);
+         }
+
+         switch (combine->OperandA[i]) {
+         case GL_SRC_ALPHA:
+            args_A[i] |= 0;
+            break;
+         case GL_ONE_MINUS_SRC_ALPHA:
+            args_A[i] |= TEXBLENDARG_INV_ARG;
+            break;
+         default:
+            return pass_through(state, blendUnit);
+         }
       }
    }
 
@@ -363,86 +362,86 @@ i830SetTexEnvCombine(i830ContextPtr i830,
 
    used = 0;
    state[used++] = (_3DSTATE_MAP_BLEND_OP_CMD(blendUnit) |
-		    TEXPIPE_COLOR |
-		    ENABLE_TEXOUTPUT_WRT_SEL |
-		    TEXOP_OUTPUT_CURRENT |
-		    DISABLE_TEX_CNTRL_STAGE |
-		    TEXOP_MODIFY_PARMS |
-		    blendop);
+                    TEXPIPE_COLOR |
+                    ENABLE_TEXOUTPUT_WRT_SEL |
+                    TEXOP_OUTPUT_CURRENT |
+                    DISABLE_TEX_CNTRL_STAGE | TEXOP_MODIFY_PARMS | blendop);
    state[used++] = (_3DSTATE_MAP_BLEND_OP_CMD(blendUnit) |
-		    TEXPIPE_ALPHA |
-		    ENABLE_TEXOUTPUT_WRT_SEL |
-		    TEXOP_OUTPUT_CURRENT |
-		    TEXOP_MODIFY_PARMS |
-		    ablendop);
+                    TEXPIPE_ALPHA |
+                    ENABLE_TEXOUTPUT_WRT_SEL |
+                    TEXOP_OUTPUT_CURRENT | TEXOP_MODIFY_PARMS | ablendop);
 
-   for ( i = 0 ; i < numColorArgs ; i++ ) {
+   for (i = 0; i < numColorArgs; i++) {
       state[used++] = (_3DSTATE_MAP_BLEND_ARG_CMD(blendUnit) |
-		       tex_blend_rgb[i] | args_RGB[i]);
+                       tex_blend_rgb[i] | args_RGB[i]);
    }
 
-   for ( i = 0 ; i < numAlphaArgs ; i++ ) {
+   for (i = 0; i < numAlphaArgs; i++) {
       state[used++] = (_3DSTATE_MAP_BLEND_ARG_CMD(blendUnit) |
-		       tex_blend_a[i] | args_A[i]);
+                       tex_blend_a[i] | args_A[i]);
    }
 
 
-   if (need_factor) 
-      return emit_factor( blendUnit, state, used, factor );
-   else 
+   if (need_factor)
+      return emit_factor(blendUnit, state, used, factor);
+   else
       return used;
 }
 
 
-static void emit_texblend( i830ContextPtr i830, GLuint unit, GLuint blendUnit,
-			   GLboolean last_stage )
+static void
+emit_texblend(struct i830_context *i830, GLuint unit, GLuint blendUnit,
+              GLboolean last_stage)
 {
    struct gl_texture_unit *texUnit = &i830->intel.ctx.Texture.Unit[unit];
    GLuint tmp[I830_TEXBLEND_SIZE], tmp_sz;
 
 
-   if (0) fprintf(stderr, "%s unit %d\n", __FUNCTION__, unit);
+   if (0)
+      fprintf(stderr, "%s unit %d\n", __FUNCTION__, unit);
 
    /* Update i830->state.TexBlend
-    */ 
-   tmp_sz = i830SetTexEnvCombine(i830, texUnit->_CurrentCombine, blendUnit, 
-				 GetTexelOp(unit), tmp,
-				 texUnit->EnvColor );
+    */
+   tmp_sz = i830SetTexEnvCombine(i830, texUnit->_CurrentCombine, blendUnit,
+                                 GetTexelOp(unit), tmp, texUnit->EnvColor);
 
-   if (last_stage) 
+   if (last_stage)
       tmp[0] |= TEXOP_LAST_STAGE;
 
    if (tmp_sz != i830->state.TexBlendWordsUsed[blendUnit] ||
-       memcmp( tmp, i830->state.TexBlend[blendUnit], tmp_sz * sizeof(GLuint))) {
-      
-      I830_STATECHANGE( i830, I830_UPLOAD_TEXBLEND(blendUnit) );
-      memcpy( i830->state.TexBlend[blendUnit], tmp, tmp_sz * sizeof(GLuint));
+       memcmp(tmp, i830->state.TexBlend[blendUnit],
+              tmp_sz * sizeof(GLuint))) {
+
+      I830_STATECHANGE(i830, I830_UPLOAD_TEXBLEND(blendUnit));
+      memcpy(i830->state.TexBlend[blendUnit], tmp, tmp_sz * sizeof(GLuint));
       i830->state.TexBlendWordsUsed[blendUnit] = tmp_sz;
    }
 
    I830_ACTIVESTATE(i830, I830_UPLOAD_TEXBLEND(blendUnit), GL_TRUE);
 }
 
-static void emit_passthrough( i830ContextPtr i830 )
+static void
+emit_passthrough(struct i830_context *i830)
 {
    GLuint tmp[I830_TEXBLEND_SIZE], tmp_sz;
    GLuint unit = 0;
 
-   tmp_sz = pass_through( tmp, unit );
+   tmp_sz = pass_through(tmp, unit);
    tmp[0] |= TEXOP_LAST_STAGE;
 
    if (tmp_sz != i830->state.TexBlendWordsUsed[unit] ||
-       memcmp( tmp, i830->state.TexBlend[unit], tmp_sz * sizeof(GLuint))) {
-      
-      I830_STATECHANGE( i830, I830_UPLOAD_TEXBLEND(unit) );
-      memcpy( i830->state.TexBlend[unit], tmp, tmp_sz * sizeof(GLuint));
+       memcmp(tmp, i830->state.TexBlend[unit], tmp_sz * sizeof(GLuint))) {
+
+      I830_STATECHANGE(i830, I830_UPLOAD_TEXBLEND(unit));
+      memcpy(i830->state.TexBlend[unit], tmp, tmp_sz * sizeof(GLuint));
       i830->state.TexBlendWordsUsed[unit] = tmp_sz;
    }
 
    I830_ACTIVESTATE(i830, I830_UPLOAD_TEXBLEND(unit), GL_TRUE);
 }
 
-void i830EmitTextureBlend( i830ContextPtr i830 )
+void
+i830EmitTextureBlend(struct i830_context *i830)
 {
    GLcontext *ctx = &i830->intel.ctx;
    GLuint unit, last_stage = 0, blendunit = 0;
@@ -450,16 +449,15 @@ void i830EmitTextureBlend( i830ContextPtr i830 )
    I830_ACTIVESTATE(i830, I830_UPLOAD_TEXBLEND_ALL, GL_FALSE);
 
    if (ctx->Texture._EnabledUnits) {
-      for (unit = 0 ; unit < ctx->Const.MaxTextureUnits ; unit++)
-	 if (ctx->Texture.Unit[unit]._ReallyEnabled) 
-	    last_stage = unit;
+      for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++)
+         if (ctx->Texture.Unit[unit]._ReallyEnabled)
+            last_stage = unit;
 
-      for (unit = 0 ; unit < ctx->Const.MaxTextureUnits ; unit++)
-	 if (ctx->Texture.Unit[unit]._ReallyEnabled) 
-	    emit_texblend( i830, unit, blendunit++, last_stage == unit );
+      for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++)
+         if (ctx->Texture.Unit[unit]._ReallyEnabled)
+            emit_texblend(i830, unit, blendunit++, last_stage == unit);
    }
    else {
-      emit_passthrough( i830 );
+      emit_passthrough(i830);
    }
 }
-
diff --git a/i915/i830_texstate.c b/i915/i830_texstate.c
index ba972da..4e9b022 100644
--- a/i915/i830_texstate.c
+++ b/i915/i830_texstate.c
@@ -25,459 +25,327 @@
  * 
  **************************************************************************/
 
-#include "glheader.h"
-#include "macros.h"
 #include "mtypes.h"
-#include "simple_list.h"
 #include "enums.h"
 #include "texformat.h"
-#include "texstore.h"
+#include "dri_bufmgr.h"
 
-#include "mm.h"
-
-#include "intel_screen.h"
-#include "intel_ioctl.h"
+#include "intel_mipmap_tree.h"
 #include "intel_tex.h"
 
 #include "i830_context.h"
 #include "i830_reg.h"
 
-static const GLint initial_offsets[6][2] = { {0,0},
-				       {0,2},
-				       {1,0},
-				       {1,2},
-				       {1,1},
-				       {1,3} };
-
-static const GLint step_offsets[6][2] = { {0,2},
-				    {0,2},
-				    {-1,2},
-				    {-1,2},
-				    {-1,1},
-				    {-1,1} };
 
-#define I830_TEX_UNIT_ENABLED(unit)		(1<<unit)
 
-static GLboolean i830SetTexImages( i830ContextPtr i830, 
-				  struct gl_texture_object *tObj )
+static GLuint
+translate_texture_format(GLuint mesa_format)
 {
-   GLuint total_height, pitch, i, textureFormat;
-   i830TextureObjectPtr t = (i830TextureObjectPtr) tObj->DriverData;
-   const struct gl_texture_image *baseImage = tObj->Image[0][tObj->BaseLevel];
-   GLint firstLevel, lastLevel, numLevels;
-
-   switch( baseImage->TexFormat->MesaFormat ) {
+   switch (mesa_format) {
    case MESA_FORMAT_L8:
-      t->intel.texelBytes = 1;
-      textureFormat = MAPSURF_8BIT | MT_8BIT_L8;
-      break;
-
+      return MAPSURF_8BIT | MT_8BIT_L8;
    case MESA_FORMAT_I8:
-      t->intel.texelBytes = 1;
-      textureFormat = MAPSURF_8BIT | MT_8BIT_I8;
-      break;
-
+      return MAPSURF_8BIT | MT_8BIT_I8;
    case MESA_FORMAT_A8:
-      t->intel.texelBytes = 1;
-      textureFormat = MAPSURF_8BIT | MT_8BIT_I8; /* Kludge -- check with conform, glean */
-      break;
-
+      return MAPSURF_8BIT | MT_8BIT_I8; /* Kludge! */
    case MESA_FORMAT_AL88:
-      t->intel.texelBytes = 2;
-      textureFormat = MAPSURF_16BIT | MT_16BIT_AY88;
-      break;
-
+      return MAPSURF_16BIT | MT_16BIT_AY88;
    case MESA_FORMAT_RGB565:
-      t->intel.texelBytes = 2;
-      textureFormat = MAPSURF_16BIT | MT_16BIT_RGB565;
-      break;
-
+      return MAPSURF_16BIT | MT_16BIT_RGB565;
    case MESA_FORMAT_ARGB1555:
-      t->intel.texelBytes = 2;
-      textureFormat = MAPSURF_16BIT | MT_16BIT_ARGB1555;
-      break;
-
+      return MAPSURF_16BIT | MT_16BIT_ARGB1555;
    case MESA_FORMAT_ARGB4444:
-      t->intel.texelBytes = 2;
-      textureFormat = MAPSURF_16BIT | MT_16BIT_ARGB4444;
-      break;
-
+      return MAPSURF_16BIT | MT_16BIT_ARGB4444;
    case MESA_FORMAT_ARGB8888:
-      t->intel.texelBytes = 4;
-      textureFormat = MAPSURF_32BIT | MT_32BIT_ARGB8888;
-      break;
-
+      return MAPSURF_32BIT | MT_32BIT_ARGB8888;
    case MESA_FORMAT_YCBCR_REV:
-      t->intel.texelBytes = 2;
-      textureFormat = (MAPSURF_422 | MT_422_YCRCB_NORMAL | 
-		       TM0S1_COLORSPACE_CONVERSION);
-      break;
-
+      return (MAPSURF_422 | MT_422_YCRCB_NORMAL);
    case MESA_FORMAT_YCBCR:
-      t->intel.texelBytes = 2;
-      textureFormat = (MAPSURF_422 | MT_422_YCRCB_SWAPY | /* ??? */
-		       TM0S1_COLORSPACE_CONVERSION);
-      break;
-
+      return (MAPSURF_422 | MT_422_YCRCB_SWAPY);
    case MESA_FORMAT_RGB_FXT1:
    case MESA_FORMAT_RGBA_FXT1:
-     t->intel.texelBytes = 2;
-     textureFormat = MAPSURF_COMPRESSED | MT_COMPRESS_FXT1;
-     break;
-
+      return (MAPSURF_COMPRESSED | MT_COMPRESS_FXT1);
    case MESA_FORMAT_RGBA_DXT1:
    case MESA_FORMAT_RGB_DXT1:
-     /* 
-      * DXTn pitches are Width/4 * blocksize in bytes 
-      * for DXT1: blocksize=8 so Width/4*8 = Width * 2 
-      * for DXT3/5: blocksize=16 so Width/4*16 = Width * 4
-      */
-     t->intel.texelBytes = 2;
-     textureFormat = (MAPSURF_COMPRESSED | MT_COMPRESS_DXT1);
-     break;
+      return (MAPSURF_COMPRESSED | MT_COMPRESS_DXT1);
    case MESA_FORMAT_RGBA_DXT3:
-     t->intel.texelBytes = 4;
-     textureFormat = (MAPSURF_COMPRESSED | MT_COMPRESS_DXT2_3);
-     break;
+      return (MAPSURF_COMPRESSED | MT_COMPRESS_DXT2_3);
    case MESA_FORMAT_RGBA_DXT5:
-     t->intel.texelBytes = 4;
-     textureFormat = (MAPSURF_COMPRESSED | MT_COMPRESS_DXT4_5);
-     break;
-
+      return (MAPSURF_COMPRESSED | MT_COMPRESS_DXT4_5);
    default:
-      fprintf(stderr, "%s: bad image format\n", __FUNCTION__);
+      fprintf(stderr, "%s: bad image format %x\n", __FUNCTION__, mesa_format);
       abort();
+      return 0;
    }
-
-   /* Compute which mipmap levels we really want to send to the hardware.
-    * This depends on the base image size, GL_TEXTURE_MIN_LOD,
-    * GL_TEXTURE_MAX_LOD, GL_TEXTURE_BASE_LEVEL, and GL_TEXTURE_MAX_LEVEL.
-    * Yes, this looks overly complicated, but it's all needed.
-    */
-   driCalculateTextureFirstLastLevel( (driTextureObject *) t );
+}
 
 
-   /* Figure out the amount of memory required to hold all the mipmap
-    * levels.  Choose the smallest pitch to accomodate the largest
-    * mipmap:
-    */
-   firstLevel = t->intel.base.firstLevel;
-   lastLevel = t->intel.base.lastLevel;
-   numLevels = lastLevel - firstLevel + 1;
 
 
-   /* All images must be loaded at this pitch.  Count the number of
-    * lines required:
-    */
-   switch (tObj->Target) {
-   case GL_TEXTURE_CUBE_MAP: {
-      const GLuint dim = tObj->Image[0][firstLevel]->Width;
-      GLuint face;
-
-      pitch = dim * t->intel.texelBytes;
-      pitch *= 2;		/* double pitch for cube layouts */
-      pitch = (pitch + 3) & ~3;
-      
-      total_height = dim * 4;
-
-      for ( face = 0 ; face < 6 ; face++) {
-	 GLuint x = initial_offsets[face][0] * dim;
-	 GLuint y = initial_offsets[face][1] * dim;
-	 GLuint d = dim;
-	 
-	 t->intel.base.dirty_images[face] = ~0;
-
-	 assert(tObj->Image[face][firstLevel]->Width == dim);
-	 assert(tObj->Image[face][firstLevel]->Height == dim);
-
-	 for (i = 0; i < numLevels; i++) {
-	    t->intel.image[face][i].image = tObj->Image[face][firstLevel + i];
-	    if (!t->intel.image[face][i].image) {
-	       fprintf(stderr, "no image %d %d\n", face, i);
-	       break;		/* can't happen */
-	    }
-	 
-	    t->intel.image[face][i].offset = 
-	       y * pitch + x * t->intel.texelBytes;
-	    t->intel.image[face][i].internalFormat = baseImage->_BaseFormat;
-
-	    d >>= 1;
-	    x += step_offsets[face][0] * d;
-	    y += step_offsets[face][1] * d;
-	 }
-      }
-      break;
-   }
+/* The i915 (and related graphics cores) do not support GL_CLAMP.  The
+ * Intel drivers for "other operating systems" implement GL_CLAMP as
+ * GL_CLAMP_TO_EDGE, so the same is done here.
+ */
+static GLuint
+translate_wrap_mode(GLenum wrap)
+{
+   switch (wrap) {
+   case GL_REPEAT:
+      return TEXCOORDMODE_WRAP;
+   case GL_CLAMP:
+   case GL_CLAMP_TO_EDGE:
+      return TEXCOORDMODE_CLAMP;        /* not really correct */
+   case GL_CLAMP_TO_BORDER:
+      return TEXCOORDMODE_CLAMP_BORDER;
+   case GL_MIRRORED_REPEAT:
+      return TEXCOORDMODE_MIRROR;
    default:
-      pitch = tObj->Image[0][firstLevel]->Width * t->intel.texelBytes;
-      pitch = (pitch + 3) & ~3;
-      t->intel.base.dirty_images[0] = ~0;
-
-      for ( total_height = i = 0 ; i < numLevels ; i++ ) {
-	 t->intel.image[0][i].image = tObj->Image[0][firstLevel + i];
-	 if (!t->intel.image[0][i].image) 
-	    break;
-	 
-	 t->intel.image[0][i].offset = total_height * pitch;
-	 t->intel.image[0][i].internalFormat = baseImage->_BaseFormat;
-	 if (t->intel.image[0][i].image->IsCompressed)
-	 {
-	   if (t->intel.image[0][i].image->Height > 4)
-	     total_height += t->intel.image[0][i].image->Height/4;
-	   else
-	     total_height += 1;
-	 }
-	 else
-	   total_height += MAX2(2, t->intel.image[0][i].image->Height);
-      }
-      break;
+      return TEXCOORDMODE_WRAP;
    }
-
-   t->intel.Pitch = pitch;
-   t->intel.base.totalSize = total_height*pitch;
-   t->intel.max_level = i-1;
-   t->Setup[I830_TEXREG_TM0S1] = 
-      (((tObj->Image[0][firstLevel]->Height - 1) << TM0S1_HEIGHT_SHIFT) |
-       ((tObj->Image[0][firstLevel]->Width - 1) << TM0S1_WIDTH_SHIFT) |
-       textureFormat);
-   t->Setup[I830_TEXREG_TM0S2] = 
-      (((pitch / 4) - 1) << TM0S2_PITCH_SHIFT) |
-      TM0S2_CUBE_FACE_ENA_MASK;
-   t->Setup[I830_TEXREG_TM0S3] &= ~TM0S3_MAX_MIP_MASK;
-   t->Setup[I830_TEXREG_TM0S3] &= ~TM0S3_MIN_MIP_MASK;
-   t->Setup[I830_TEXREG_TM0S3] |= ((numLevels - 1)*4) << TM0S3_MIN_MIP_SHIFT;
-   t->intel.dirty = I830_UPLOAD_TEX_ALL;
-
-   return intelUploadTexImages( &i830->intel, &t->intel, 0 );
 }
 
 
-static void i830_import_tex_unit( i830ContextPtr i830, 
-			   i830TextureObjectPtr t,
-			   GLuint unit )
+/* Recalculate all state from scratch.  Perhaps not the most
+ * efficient, but this has gotten complex enough that we need
+ * something which is understandable and reliable.
+ */
+static GLboolean
+i830_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3)
 {
-   if(INTEL_DEBUG&DEBUG_TEXTURE)
-      fprintf(stderr, "%s unit(%d)\n", __FUNCTION__, unit);
-   
-   if (i830->intel.CurrentTexObj[unit]) 
-      i830->intel.CurrentTexObj[unit]->base.bound &= ~(1U << unit);
-
-   i830->intel.CurrentTexObj[unit] = (intelTextureObjectPtr)t;
-   t->intel.base.bound |= (1 << unit);
-
-   I830_STATECHANGE( i830, I830_UPLOAD_TEX(unit) );
-
-   i830->state.Tex[unit][I830_TEXREG_TM0LI] = (_3DSTATE_LOAD_STATE_IMMEDIATE_2 | 
-					       (LOAD_TEXTURE_MAP0 << unit) | 4);
-   i830->state.Tex[unit][I830_TEXREG_TM0S0] = (TM0S0_USE_FENCE |
-					       t->intel.TextureOffset);
-
-   i830->state.Tex[unit][I830_TEXREG_TM0S1] = t->Setup[I830_TEXREG_TM0S1];
-   i830->state.Tex[unit][I830_TEXREG_TM0S2] = t->Setup[I830_TEXREG_TM0S2];
-
-   i830->state.Tex[unit][I830_TEXREG_TM0S3] &= TM0S3_LOD_BIAS_MASK;
-   i830->state.Tex[unit][I830_TEXREG_TM0S3] |= (t->Setup[I830_TEXREG_TM0S3] &
-						~TM0S3_LOD_BIAS_MASK);
-
-   i830->state.Tex[unit][I830_TEXREG_TM0S4] = t->Setup[I830_TEXREG_TM0S4];
-   i830->state.Tex[unit][I830_TEXREG_MCS] = (t->Setup[I830_TEXREG_MCS] & 
-					     ~MAP_UNIT_MASK);   
-   i830->state.Tex[unit][I830_TEXREG_CUBE] = t->Setup[I830_TEXREG_CUBE];
-   i830->state.Tex[unit][I830_TEXREG_MCS] |= MAP_UNIT(unit);
-
-   t->intel.dirty &= ~I830_UPLOAD_TEX(unit);
-}
-
+   GLcontext *ctx = &intel->ctx;
+   struct i830_context *i830 = i830_context(ctx);
+   struct gl_texture_unit *tUnit = &ctx->Texture.Unit[unit];
+   struct gl_texture_object *tObj = tUnit->_Current;
+   struct intel_texture_object *intelObj = intel_texture_object(tObj);
+   struct gl_texture_image *firstImage;
+   GLuint *state = i830->state.Tex[unit], format, pitch;
+   GLint lodbias;
 
+   memset(state, 0, sizeof(state));
 
-static GLboolean enable_tex_common( GLcontext *ctx, GLuint unit )
-{
-   i830ContextPtr i830 = I830_CONTEXT(ctx);
-   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
-   struct gl_texture_object *tObj = texUnit->_Current;
-   i830TextureObjectPtr t = (i830TextureObjectPtr)tObj->DriverData;
+   /*We need to refcount these. */
 
-   if (0) fprintf(stderr, "%s\n", __FUNCTION__);
+   if (i830->state.tex_buffer[unit] != NULL) {
+       dri_bo_unreference(i830->state.tex_buffer[unit]);
+       i830->state.tex_buffer[unit] = NULL;
+   }
 
-   /* Fallback if there's a texture border */
-   if ( tObj->Image[0][tObj->BaseLevel]->Border > 0 ) {
-      fprintf(stderr, "Texture border\n");
+   if (!intelObj->imageOverride && !intel_finalize_mipmap_tree(intel, unit))
       return GL_FALSE;
-   }
 
-   /* Upload teximages (not pipelined)
+   /* Get first image here, since intelObj->firstLevel will get set in
+    * the intel_finalize_mipmap_tree() call above.
     */
-   if (t->intel.base.dirty_images[0]) {
-      if (!i830SetTexImages( i830, tObj )) {
-	 return GL_FALSE;
+   firstImage = tObj->Image[0][intelObj->firstLevel];
+
+   if (intelObj->imageOverride) {
+      i830->state.tex_buffer[unit] = NULL;
+      i830->state.tex_offset[unit] = intelObj->textureOffset;
+
+      switch (intelObj->depthOverride) {
+      case 32:
+	 format = MAPSURF_32BIT | MT_32BIT_ARGB8888;
+	 break;
+      case 24:
+      default:
+	 format = MAPSURF_32BIT | MT_32BIT_XRGB8888;
+	 break;
+      case 16:
+	 format = MAPSURF_16BIT | MT_16BIT_RGB565;
+	 break;
       }
-   }
-
-   /* Update state if this is a different texture object to last
-    * time.
-    */
-   if (i830->intel.CurrentTexObj[unit] != &t->intel || 
-       (t->intel.dirty & I830_UPLOAD_TEX(unit))) {
-      i830_import_tex_unit( i830, t, unit);
-   }
-
-   I830_ACTIVESTATE(i830, I830_UPLOAD_TEX(unit), GL_TRUE);
-
-   return GL_TRUE;
-}
-
-static GLboolean enable_tex_rect( GLcontext *ctx, GLuint unit )
-{
-   i830ContextPtr i830 = I830_CONTEXT(ctx);
-   GLuint mcs = i830->state.Tex[unit][I830_TEXREG_MCS];
 
-   mcs &= ~TEXCOORDS_ARE_NORMAL;
-   mcs |= TEXCOORDS_ARE_IN_TEXELUNITS;
+      pitch = intelObj->pitchOverride;
+   } else {
+      dri_bo_reference(intelObj->mt->region->buffer);
+      i830->state.tex_buffer[unit] = intelObj->mt->region->buffer;
+      i830->state.tex_offset[unit] = intel_miptree_image_offset(intelObj->mt,
+								0, intelObj->
+								firstLevel);
 
-   if ((mcs != i830->state.Tex[unit][I830_TEXREG_MCS])
-       || (0 != i830->state.Tex[unit][I830_TEXREG_CUBE])) {
-      I830_STATECHANGE(i830, I830_UPLOAD_TEX(unit));
-      i830->state.Tex[unit][I830_TEXREG_MCS] = mcs;
-      i830->state.Tex[unit][I830_TEXREG_CUBE] = 0;
+      format = translate_texture_format(firstImage->TexFormat->MesaFormat);
+      pitch = intelObj->mt->pitch * intelObj->mt->cpp;
    }
 
-   return GL_TRUE;
-}
+   state[I830_TEXREG_TM0LI] = (_3DSTATE_LOAD_STATE_IMMEDIATE_2 |
+                               (LOAD_TEXTURE_MAP0 << unit) | 4);
 
+/*    state[I830_TEXREG_TM0S0] = (TM0S0_USE_FENCE | */
+/* 			       t->intel.TextureOffset); */
 
-static GLboolean enable_tex_2d( GLcontext *ctx, GLuint unit )
-{
-   i830ContextPtr i830 = I830_CONTEXT(ctx);
-   GLuint mcs = i830->state.Tex[unit][I830_TEXREG_MCS];
 
-   mcs &= ~TEXCOORDS_ARE_IN_TEXELUNITS;
-   mcs |= TEXCOORDS_ARE_NORMAL;
+   state[I830_TEXREG_TM0S1] =
+      (((firstImage->Height - 1) << TM0S1_HEIGHT_SHIFT) |
+       ((firstImage->Width - 1) << TM0S1_WIDTH_SHIFT) | format);
+
+   state[I830_TEXREG_TM0S2] =
+      ((((pitch / 4) - 1) << TM0S2_PITCH_SHIFT) | TM0S2_CUBE_FACE_ENA_MASK);
 
-   if ((mcs != i830->state.Tex[unit][I830_TEXREG_MCS])
-       || (0 != i830->state.Tex[unit][I830_TEXREG_CUBE])) {
-      I830_STATECHANGE(i830, I830_UPLOAD_TEX(unit));
-      i830->state.Tex[unit][I830_TEXREG_MCS] = mcs;
-      i830->state.Tex[unit][I830_TEXREG_CUBE] = 0;
+   {
+      if (tObj->Target == GL_TEXTURE_CUBE_MAP)
+         state[I830_TEXREG_CUBE] = (_3DSTATE_MAP_CUBE | MAP_UNIT(unit) |
+                                    CUBE_NEGX_ENABLE |
+                                    CUBE_POSX_ENABLE |
+                                    CUBE_NEGY_ENABLE |
+                                    CUBE_POSY_ENABLE |
+                                    CUBE_NEGZ_ENABLE | CUBE_POSZ_ENABLE);
+      else
+         state[I830_TEXREG_CUBE] = (_3DSTATE_MAP_CUBE | MAP_UNIT(unit));
    }
 
-   return GL_TRUE;
-}
 
- 
-static GLboolean enable_tex_cube( GLcontext *ctx, GLuint unit )
-{
-   i830ContextPtr i830 = I830_CONTEXT(ctx);
-   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
-   struct gl_texture_object *tObj = texUnit->_Current;
-   i830TextureObjectPtr t = (i830TextureObjectPtr)tObj->DriverData;
-   GLuint mcs = i830->state.Tex[unit][I830_TEXREG_MCS];
-   const GLuint cube = CUBE_NEGX_ENABLE | CUBE_POSX_ENABLE
-     | CUBE_NEGY_ENABLE | CUBE_POSY_ENABLE
-     | CUBE_NEGZ_ENABLE | CUBE_POSZ_ENABLE;
-   GLuint face;
-
-   mcs &= ~TEXCOORDS_ARE_IN_TEXELUNITS;
-   mcs |= TEXCOORDS_ARE_NORMAL;
-
-   if ((mcs != i830->state.Tex[unit][I830_TEXREG_MCS])
-       || (cube != i830->state.Tex[unit][I830_TEXREG_CUBE])) {
-      I830_STATECHANGE(i830, I830_UPLOAD_TEX(unit));
-      i830->state.Tex[unit][I830_TEXREG_MCS] = mcs;
-      i830->state.Tex[unit][I830_TEXREG_CUBE] = cube;
-   }
 
-   /* Upload teximages (not pipelined)
-    */
-   if ( t->intel.base.dirty_images[0] || t->intel.base.dirty_images[1] ||
-        t->intel.base.dirty_images[2] || t->intel.base.dirty_images[3] ||
-        t->intel.base.dirty_images[4] || t->intel.base.dirty_images[5] ) {
-      i830SetTexImages( i830, tObj );
-   }
 
-   /* upload (per face) */
-   for (face = 0; face < 6; face++) {
-      if (t->intel.base.dirty_images[face]) {
-	 if (!intelUploadTexImages( &i830->intel, &t->intel, face )) {
-	    return GL_FALSE;
-	 }
+   {
+      GLuint minFilt, mipFilt, magFilt;
+
+      switch (tObj->MinFilter) {
+      case GL_NEAREST:
+         minFilt = FILTER_NEAREST;
+         mipFilt = MIPFILTER_NONE;
+         break;
+      case GL_LINEAR:
+         minFilt = FILTER_LINEAR;
+         mipFilt = MIPFILTER_NONE;
+         break;
+      case GL_NEAREST_MIPMAP_NEAREST:
+         minFilt = FILTER_NEAREST;
+         mipFilt = MIPFILTER_NEAREST;
+         break;
+      case GL_LINEAR_MIPMAP_NEAREST:
+         minFilt = FILTER_LINEAR;
+         mipFilt = MIPFILTER_NEAREST;
+         break;
+      case GL_NEAREST_MIPMAP_LINEAR:
+         minFilt = FILTER_NEAREST;
+         mipFilt = MIPFILTER_LINEAR;
+         break;
+      case GL_LINEAR_MIPMAP_LINEAR:
+         minFilt = FILTER_LINEAR;
+         mipFilt = MIPFILTER_LINEAR;
+         break;
+      default:
+         return GL_FALSE;
       }
-   }
-
 
-   return GL_TRUE;
-}
+      if (tObj->MaxAnisotropy > 1.0) {
+         minFilt = FILTER_ANISOTROPIC;
+         magFilt = FILTER_ANISOTROPIC;
+      }
+      else {
+         switch (tObj->MagFilter) {
+         case GL_NEAREST:
+            magFilt = FILTER_NEAREST;
+            break;
+         case GL_LINEAR:
+            magFilt = FILTER_LINEAR;
+            break;
+         default:
+            return GL_FALSE;
+         }
+      }
 
+      lodbias = (int) ((tUnit->LodBias + tObj->LodBias) * 16.0);
+      if (lodbias < -64)
+          lodbias = -64;
+      if (lodbias > 63)
+          lodbias = 63;
+      
+      state[I830_TEXREG_TM0S3] = ((lodbias << TM0S3_LOD_BIAS_SHIFT) & 
+                                  TM0S3_LOD_BIAS_MASK);
+#if 0
+      /* YUV conversion:
+       */
+      if (firstImage->TexFormat->MesaFormat == MESA_FORMAT_YCBCR ||
+          firstImage->TexFormat->MesaFormat == MESA_FORMAT_YCBCR_REV)
+         state[I830_TEXREG_TM0S3] |= SS2_COLORSPACE_CONVERSION;
+#endif
+
+      state[I830_TEXREG_TM0S3] |= ((intelObj->lastLevel -
+                                    intelObj->firstLevel) *
+                                   4) << TM0S3_MIN_MIP_SHIFT;
+
+      state[I830_TEXREG_TM0S3] |= ((minFilt << TM0S3_MIN_FILTER_SHIFT) |
+                                   (mipFilt << TM0S3_MIP_FILTER_SHIFT) |
+                                   (magFilt << TM0S3_MAG_FILTER_SHIFT));
+   }
 
-static GLboolean disable_tex( GLcontext *ctx, GLuint unit )
-{
-   i830ContextPtr i830 = I830_CONTEXT(ctx);
+   {
+      GLenum ws = tObj->WrapS;
+      GLenum wt = tObj->WrapT;
 
-   /* This is happening too often.  I need to conditionally send diffuse
-    * state to the card.  Perhaps a diffuse dirty flag of some kind.
-    * Will need to change this logic if more than 2 texture units are
-    * used.  We need to only do this up to the last unit enabled, or unit
-    * one if nothing is enabled.
-    */
 
-   if ( i830->intel.CurrentTexObj[unit] != NULL ) {
-      /* The old texture is no longer bound to this texture unit.
-       * Mark it as such.
+      /* 3D textures not available on i830
        */
-
-      i830->intel.CurrentTexObj[unit]->base.bound &= ~(1U << 0);
-      i830->intel.CurrentTexObj[unit] = NULL;
+      if (tObj->Target == GL_TEXTURE_3D)
+         return GL_FALSE;
+
+      state[I830_TEXREG_MCS] = (_3DSTATE_MAP_COORD_SET_CMD |
+                                MAP_UNIT(unit) |
+                                ENABLE_TEXCOORD_PARAMS |
+                                ss3 |
+                                ENABLE_ADDR_V_CNTL |
+                                TEXCOORD_ADDR_V_MODE(translate_wrap_mode(wt))
+                                | ENABLE_ADDR_U_CNTL |
+                                TEXCOORD_ADDR_U_MODE(translate_wrap_mode
+                                                     (ws)));
    }
 
-   return GL_TRUE;
-}
 
-static GLboolean i830UpdateTexUnit( GLcontext *ctx, GLuint unit )
-{
-   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+   state[I830_TEXREG_TM0S4] = INTEL_PACKCOLOR8888(tObj->_BorderChan[0],
+                                                  tObj->_BorderChan[1],
+                                                  tObj->_BorderChan[2],
+                                                  tObj->_BorderChan[3]);
 
-   if (texUnit->_ReallyEnabled &&
-       INTEL_CONTEXT(ctx)->intelScreen->tex.size < 2048 * 1024)
-      return GL_FALSE;
 
-   switch(texUnit->_ReallyEnabled) {
-   case TEXTURE_1D_BIT:
-   case TEXTURE_2D_BIT:
-      return (enable_tex_common( ctx, unit ) &&
-	      enable_tex_2d( ctx, unit ));
-   case TEXTURE_RECT_BIT:
-      return (enable_tex_common( ctx, unit ) &&
-	      enable_tex_rect( ctx, unit ));
-   case TEXTURE_CUBE_BIT:
-      return (enable_tex_common( ctx, unit ) &&
-	      enable_tex_cube( ctx, unit ));
-   case 0:
-      return disable_tex( ctx, unit );
-   default:
-      return GL_FALSE;
-   }
+   I830_ACTIVESTATE(i830, I830_UPLOAD_TEX(unit), GL_TRUE);
+   /* memcmp was already disabled, but definitely won't work as the
+    * region might now change and that wouldn't be detected:
+    */
+   I830_STATECHANGE(i830, I830_UPLOAD_TEX(unit));
+   return GL_TRUE;
 }
 
 
-void i830UpdateTextureState( intelContextPtr intel )
-{
-   i830ContextPtr i830 = I830_CONTEXT(intel);
-   GLcontext *ctx = &intel->ctx;
-   GLboolean ok;
-
-   if (0) fprintf(stderr, "%s\n", __FUNCTION__);
 
-   I830_ACTIVESTATE(i830, I830_UPLOAD_TEX_ALL, GL_FALSE);
 
-   ok = (i830UpdateTexUnit( ctx, 0 ) &&
-	 i830UpdateTexUnit( ctx, 1 ) &&
-	 i830UpdateTexUnit( ctx, 2 ) &&
-	 i830UpdateTexUnit( ctx, 3 ));
+void
+i830UpdateTextureState(struct intel_context *intel)
+{
+   struct i830_context *i830 = i830_context(&intel->ctx);
+   GLboolean ok = GL_TRUE;
+   GLuint i;
+
+   for (i = 0; i < I830_TEX_UNITS && ok; i++) {
+      switch (intel->ctx.Texture.Unit[i]._ReallyEnabled) {
+      case TEXTURE_1D_BIT:
+      case TEXTURE_2D_BIT:
+      case TEXTURE_CUBE_BIT:
+         ok = i830_update_tex_unit(intel, i, TEXCOORDS_ARE_NORMAL);
+         break;
+      case TEXTURE_RECT_BIT:
+         ok = i830_update_tex_unit(intel, i, TEXCOORDS_ARE_IN_TEXELUNITS);
+         break;
+      case 0:{
+	 struct i830_context *i830 = i830_context(&intel->ctx);
+         if (i830->state.active & I830_UPLOAD_TEX(i)) 
+            I830_ACTIVESTATE(i830, I830_UPLOAD_TEX(i), GL_FALSE);
+
+	 if (i830->state.tex_buffer[i] != NULL) {
+	    dri_bo_unreference(i830->state.tex_buffer[i]);
+	    i830->state.tex_buffer[i] = NULL;
+	 }
+         break;
+      }
+      case TEXTURE_3D_BIT:
+      default:
+         ok = GL_FALSE;
+         break;
+      }
+   }
 
-   FALLBACK( intel, I830_FALLBACK_TEXTURE, !ok );
+   FALLBACK(intel, I830_FALLBACK_TEXTURE, !ok);
 
    if (ok)
-      i830EmitTextureBlend( i830 );
+      i830EmitTextureBlend(i830);
 }
-
-
-
diff --git a/i915/i830_vtbl.c b/i915/i830_vtbl.c
index d40cf70..c5a85fe 100644
--- a/i915/i830_vtbl.c
+++ b/i915/i830_vtbl.c
@@ -25,17 +25,19 @@
  * 
  **************************************************************************/
 
+#include "glapi.h"
 
 #include "i830_context.h"
 #include "i830_reg.h"
-
 #include "intel_batchbuffer.h"
-
+#include "intel_regions.h"
 #include "tnl/t_context.h"
 #include "tnl/t_vertex.h"
 
-static GLboolean i830_check_vertex_size( intelContextPtr intel,
-					 GLuint expected );
+#define FILE_DEBUG_FLAG DEBUG_STATE
+
+static GLboolean i830_check_vertex_size(struct intel_context *intel,
+                                        GLuint expected);
 
 #define SZ_TO_HW(sz)  ((sz-2)&0x3)
 #define EMIT_SZ(sz)   (EMIT_1F + (sz) - 1)
@@ -59,10 +61,16 @@ do {									\
 #define VRTX_TEX_SET_FMT(n, x)          ((x)<<((n)*2))
 #define TEXBIND_SET(n, x) 		((x)<<((n)*4))
 
-static void i830_render_start( intelContextPtr intel )
+static void
+i830_render_prevalidate(struct intel_context *intel)
+{
+}
+
+static void
+i830_render_start(struct intel_context *intel)
 {
    GLcontext *ctx = &intel->ctx;
-   i830ContextPtr i830 = I830_CONTEXT(intel);
+   struct i830_context *i830 = i830_context(ctx);
    TNLcontext *tnl = TNL_CONTEXT(ctx);
    struct vertex_buffer *VB = &tnl->vb;
    DECLARE_RENDERINPUTS(index_bitset);
@@ -70,7 +78,7 @@ static void i830_render_start( intelContextPtr intel )
    GLuint v2 = _3DSTATE_VFT1_CMD;
    GLuint mcsb1 = 0;
 
-   RENDERINPUTS_COPY( index_bitset, tnl->render_inputs_bitset );
+   RENDERINPUTS_COPY(index_bitset, tnl->render_inputs_bitset);
 
    /* Important:
     */
@@ -80,196 +88,215 @@ static void i830_render_start( intelContextPtr intel )
    /* EMIT_ATTR's must be in order as they tell t_vertex.c how to
     * build up a hardware vertex.
     */
-   if (RENDERINPUTS_TEST_RANGE( index_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX )) {
-      EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_4F_VIEWPORT, VFT0_XYZW );
+   if (RENDERINPUTS_TEST_RANGE(index_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX)) {
+      EMIT_ATTR(_TNL_ATTRIB_POS, EMIT_4F_VIEWPORT, VFT0_XYZW);
       intel->coloroffset = 4;
    }
    else {
-      EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_3F_VIEWPORT, VFT0_XYZ );
+      EMIT_ATTR(_TNL_ATTRIB_POS, EMIT_3F_VIEWPORT, VFT0_XYZ);
       intel->coloroffset = 3;
    }
 
-   if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_POINTSIZE )) {
-      EMIT_ATTR( _TNL_ATTRIB_POINTSIZE, EMIT_1F, VFT0_POINT_WIDTH );
+   if (RENDERINPUTS_TEST(index_bitset, _TNL_ATTRIB_POINTSIZE)) {
+      EMIT_ATTR(_TNL_ATTRIB_POINTSIZE, EMIT_1F, VFT0_POINT_WIDTH);
    }
 
-   EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_BGRA, VFT0_DIFFUSE );
-      
+   EMIT_ATTR(_TNL_ATTRIB_COLOR0, EMIT_4UB_4F_BGRA, VFT0_DIFFUSE);
+
    intel->specoffset = 0;
-   if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_COLOR1 ) ||
-       RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_FOG )) {
-      if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_COLOR1 )) {
+   if (RENDERINPUTS_TEST(index_bitset, _TNL_ATTRIB_COLOR1) ||
+       RENDERINPUTS_TEST(index_bitset, _TNL_ATTRIB_FOG)) {
+      if (RENDERINPUTS_TEST(index_bitset, _TNL_ATTRIB_COLOR1)) {
          intel->specoffset = intel->coloroffset + 1;
-         EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_3UB_3F_BGR, VFT0_SPEC );
+         EMIT_ATTR(_TNL_ATTRIB_COLOR1, EMIT_3UB_3F_BGR, VFT0_SPEC);
       }
       else
-         EMIT_PAD( 3 );
+         EMIT_PAD(3);
 
-      if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_FOG ))
-         EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1UB_1F, VFT0_SPEC );
+      if (RENDERINPUTS_TEST(index_bitset, _TNL_ATTRIB_FOG))
+         EMIT_ATTR(_TNL_ATTRIB_FOG, EMIT_1UB_1F, VFT0_SPEC);
       else
-         EMIT_PAD( 1 );
+         EMIT_PAD(1);
    }
 
-   if (RENDERINPUTS_TEST_RANGE( index_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX )) {
+   if (RENDERINPUTS_TEST_RANGE(index_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX)) {
       int i, count = 0;
 
       for (i = 0; i < I830_TEX_UNITS; i++) {
-         if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_TEX(i) )) {
+         if (RENDERINPUTS_TEST(index_bitset, _TNL_ATTRIB_TEX(i))) {
             GLuint sz = VB->TexCoordPtr[i]->size;
             GLuint emit;
-            GLuint mcs = (i830->state.Tex[i][I830_TEXREG_MCS] & 
+            GLuint mcs = (i830->state.Tex[i][I830_TEXREG_MCS] &
                           ~TEXCOORDTYPE_MASK);
 
-	    switch (sz) {
-	    case 1: 
-	    case 2: 
-	       emit = EMIT_2F; 
-	       sz = 2; 
-	       mcs |= TEXCOORDTYPE_CARTESIAN; 
-	       break;
-	    case 3:
-	       emit = EMIT_3F; 
-	       sz = 3;
-	       mcs |= TEXCOORDTYPE_VECTOR;
-	       break;
-	    case 4: 
-	       emit = EMIT_3F_XYW; 
-	       sz = 3;     
-	       mcs |= TEXCOORDTYPE_HOMOGENEOUS;
-	       break;
-	    default: 
-	       continue;
-	    };
-	      
-
-	    EMIT_ATTR( _TNL_ATTRIB_TEX0+i, emit, 0 );	       
-	    v2 |= VRTX_TEX_SET_FMT(count, SZ_TO_HW(sz));
-	    mcsb1 |= (count+8)<<(i*4);
-
-	    if (mcs != i830->state.Tex[i][I830_TEXREG_MCS]) {
-	       I830_STATECHANGE(i830, I830_UPLOAD_TEX(i));
-	       i830->state.Tex[i][I830_TEXREG_MCS] = mcs;
-	    }
-
-	    count++;
-	 }
+            switch (sz) {
+            case 1:
+            case 2:
+               emit = EMIT_2F;
+               sz = 2;
+               mcs |= TEXCOORDTYPE_CARTESIAN;
+               break;
+            case 3:
+               emit = EMIT_3F;
+               sz = 3;
+               mcs |= TEXCOORDTYPE_VECTOR;
+               break;
+            case 4:
+               emit = EMIT_3F_XYW;
+               sz = 3;
+               mcs |= TEXCOORDTYPE_HOMOGENEOUS;
+               break;
+            default:
+               continue;
+            };
+
+
+            EMIT_ATTR(_TNL_ATTRIB_TEX0 + i, emit, 0);
+            v2 |= VRTX_TEX_SET_FMT(count, SZ_TO_HW(sz));
+            mcsb1 |= (count + 8) << (i * 4);
+
+            if (mcs != i830->state.Tex[i][I830_TEXREG_MCS]) {
+               I830_STATECHANGE(i830, I830_UPLOAD_TEX(i));
+               i830->state.Tex[i][I830_TEXREG_MCS] = mcs;
+            }
+
+            count++;
+         }
       }
 
       v0 |= VFT0_TEX_COUNT(count);
    }
-   
+
    /* Only need to change the vertex emit code if there has been a
     * statechange to a new hardware vertex format:
     */
    if (v0 != i830->state.Ctx[I830_CTXREG_VF] ||
        v2 != i830->state.Ctx[I830_CTXREG_VF2] ||
        mcsb1 != i830->state.Ctx[I830_CTXREG_MCSB1] ||
-       !RENDERINPUTS_EQUAL( index_bitset, i830->last_index_bitset )) {
-    
-      I830_STATECHANGE( i830, I830_UPLOAD_CTX );
+       !RENDERINPUTS_EQUAL(index_bitset, i830->last_index_bitset)) {
+      int k;
+
+      I830_STATECHANGE(i830, I830_UPLOAD_CTX);
 
       /* Must do this *after* statechange, so as not to affect
        * buffered vertices reliant on the old state:
        */
-      intel->vertex_size = 
-	 _tnl_install_attrs( ctx, 
-			     intel->vertex_attrs, 
-			     intel->vertex_attr_count,
-			     intel->ViewportMatrix.m, 0 );
+      intel->vertex_size =
+         _tnl_install_attrs(ctx,
+                            intel->vertex_attrs,
+                            intel->vertex_attr_count,
+                            intel->ViewportMatrix.m, 0);
 
       intel->vertex_size >>= 2;
 
       i830->state.Ctx[I830_CTXREG_VF] = v0;
       i830->state.Ctx[I830_CTXREG_VF2] = v2;
       i830->state.Ctx[I830_CTXREG_MCSB1] = mcsb1;
-      RENDERINPUTS_COPY( i830->last_index_bitset, index_bitset );
+      RENDERINPUTS_COPY(i830->last_index_bitset, index_bitset);
 
-      assert(i830_check_vertex_size( intel, intel->vertex_size ));
+      k = i830_check_vertex_size(intel, intel->vertex_size);
+      assert(k);
    }
 }
 
-static void i830_reduced_primitive_state( intelContextPtr intel,
-					  GLenum rprim )
+static void
+i830_reduced_primitive_state(struct intel_context *intel, GLenum rprim)
 {
-    i830ContextPtr i830 = I830_CONTEXT(intel);
-    GLuint st1 = i830->state.Stipple[I830_STPREG_ST1];
-
-    st1 &= ~ST1_ENABLE;
-
-    switch (rprim) {
-    case GL_TRIANGLES:
-       if (intel->ctx.Polygon.StippleFlag &&
-	   intel->hw_stipple)
-	  st1 |= ST1_ENABLE;
-       break;
-    case GL_LINES:
-    case GL_POINTS:
-    default:
-       break;
-    }
-
-    i830->intel.reduced_primitive = rprim;
-
-    if (st1 != i830->state.Stipple[I830_STPREG_ST1]) {
-       I830_STATECHANGE(i830, I830_UPLOAD_STIPPLE);
-       i830->state.Stipple[I830_STPREG_ST1] = st1;
-    }
+   struct i830_context *i830 = i830_context(&intel->ctx);
+   GLuint st1 = i830->state.Stipple[I830_STPREG_ST1];
+
+   st1 &= ~ST1_ENABLE;
+
+   switch (rprim) {
+   case GL_TRIANGLES:
+      if (intel->ctx.Polygon.StippleFlag && intel->hw_stipple)
+         st1 |= ST1_ENABLE;
+      break;
+   case GL_LINES:
+   case GL_POINTS:
+   default:
+      break;
+   }
+
+   i830->intel.reduced_primitive = rprim;
+
+   if (st1 != i830->state.Stipple[I830_STPREG_ST1]) {
+      INTEL_FIREVERTICES(intel);
+
+      I830_STATECHANGE(i830, I830_UPLOAD_STIPPLE);
+      i830->state.Stipple[I830_STPREG_ST1] = st1;
+   }
 }
 
 /* Pull apart the vertex format registers and figure out how large a
  * vertex is supposed to be. 
  */
-static GLboolean i830_check_vertex_size( intelContextPtr intel,
-					 GLuint expected )
+static GLboolean
+i830_check_vertex_size(struct intel_context *intel, GLuint expected)
 {
-   i830ContextPtr i830 = I830_CONTEXT(intel);
+   struct i830_context *i830 = i830_context(&intel->ctx);
    int vft0 = i830->current->Ctx[I830_CTXREG_VF];
    int vft1 = i830->current->Ctx[I830_CTXREG_VF2];
    int nrtex = (vft0 & VFT0_TEX_COUNT_MASK) >> VFT0_TEX_COUNT_SHIFT;
    int i, sz = 0;
 
    switch (vft0 & VFT0_XYZW_MASK) {
-   case VFT0_XY: sz = 2; break;
-   case VFT0_XYZ: sz = 3; break;
-   case VFT0_XYW: sz = 3; break;
-   case VFT0_XYZW: sz = 4; break;
-   default: 
+   case VFT0_XY:
+      sz = 2;
+      break;
+   case VFT0_XYZ:
+      sz = 3;
+      break;
+   case VFT0_XYW:
+      sz = 3;
+      break;
+   case VFT0_XYZW:
+      sz = 4;
+      break;
+   default:
       fprintf(stderr, "no xyzw specified\n");
       return 0;
    }
 
-   if (vft0 & VFT0_SPEC) sz++;
-   if (vft0 & VFT0_DIFFUSE) sz++;
-   if (vft0 & VFT0_DEPTH_OFFSET) sz++;
-   if (vft0 & VFT0_POINT_WIDTH) sz++;
-	
-   for (i = 0 ; i < nrtex ; i++) { 
+   if (vft0 & VFT0_SPEC)
+      sz++;
+   if (vft0 & VFT0_DIFFUSE)
+      sz++;
+   if (vft0 & VFT0_DEPTH_OFFSET)
+      sz++;
+   if (vft0 & VFT0_POINT_WIDTH)
+      sz++;
+
+   for (i = 0; i < nrtex; i++) {
       switch (vft1 & VFT1_TEX0_MASK) {
-      case TEXCOORDFMT_2D: sz += 2; break;
-      case TEXCOORDFMT_3D: sz += 3; break;
-      case TEXCOORDFMT_4D: sz += 4; break;
-      case TEXCOORDFMT_1D: sz += 1; break;
+      case TEXCOORDFMT_2D:
+         sz += 2;
+         break;
+      case TEXCOORDFMT_3D:
+         sz += 3;
+         break;
+      case TEXCOORDFMT_4D:
+         sz += 4;
+         break;
+      case TEXCOORDFMT_1D:
+         sz += 1;
+         break;
       }
       vft1 >>= VFT1_TEX1_SHIFT;
    }
-	
-   if (sz != expected) 
+
+   if (sz != expected)
       fprintf(stderr, "vertex size mismatch %d/%d\n", sz, expected);
-   
+
    return sz == expected;
 }
 
-static void i830_emit_invarient_state( intelContextPtr intel )
+static void
+i830_emit_invarient_state(struct intel_context *intel)
 {
    BATCH_LOCALS;
 
-   BEGIN_BATCH( 40 );
-
-   OUT_BATCH(_3DSTATE_MAP_CUBE | MAP_UNIT(0));
-   OUT_BATCH(_3DSTATE_MAP_CUBE | MAP_UNIT(1));
-   OUT_BATCH(_3DSTATE_MAP_CUBE | MAP_UNIT(2));
-   OUT_BATCH(_3DSTATE_MAP_CUBE | MAP_UNIT(3));
+   BEGIN_BATCH(40, IGNORE_CLIPRECTS);
 
    OUT_BATCH(_3DSTATE_DFLT_DIFFUSE_CMD);
    OUT_BATCH(0);
@@ -282,37 +309,35 @@ static void i830_emit_invarient_state( intelContextPtr intel )
 
    OUT_BATCH(_3DSTATE_FOG_MODE_CMD);
    OUT_BATCH(FOGFUNC_ENABLE |
-	     FOG_LINEAR_CONST | 
-	     FOGSRC_INDEX_Z | 
-	     ENABLE_FOG_DENSITY);
+             FOG_LINEAR_CONST | FOGSRC_INDEX_Z | ENABLE_FOG_DENSITY);
    OUT_BATCH(0);
    OUT_BATCH(0);
 
 
    OUT_BATCH(_3DSTATE_MAP_TEX_STREAM_CMD |
-	     MAP_UNIT(0) |
-	     DISABLE_TEX_STREAM_BUMP |
-	     ENABLE_TEX_STREAM_COORD_SET |
-	     TEX_STREAM_COORD_SET(0) |
-	     ENABLE_TEX_STREAM_MAP_IDX | TEX_STREAM_MAP_IDX(0));
+             MAP_UNIT(0) |
+             DISABLE_TEX_STREAM_BUMP |
+             ENABLE_TEX_STREAM_COORD_SET |
+             TEX_STREAM_COORD_SET(0) |
+             ENABLE_TEX_STREAM_MAP_IDX | TEX_STREAM_MAP_IDX(0));
    OUT_BATCH(_3DSTATE_MAP_TEX_STREAM_CMD |
-	     MAP_UNIT(1) |
-	     DISABLE_TEX_STREAM_BUMP |
-	     ENABLE_TEX_STREAM_COORD_SET |
-	     TEX_STREAM_COORD_SET(1) |
-	     ENABLE_TEX_STREAM_MAP_IDX | TEX_STREAM_MAP_IDX(1));
+             MAP_UNIT(1) |
+             DISABLE_TEX_STREAM_BUMP |
+             ENABLE_TEX_STREAM_COORD_SET |
+             TEX_STREAM_COORD_SET(1) |
+             ENABLE_TEX_STREAM_MAP_IDX | TEX_STREAM_MAP_IDX(1));
    OUT_BATCH(_3DSTATE_MAP_TEX_STREAM_CMD |
-	     MAP_UNIT(2) |
-	     DISABLE_TEX_STREAM_BUMP |
-	     ENABLE_TEX_STREAM_COORD_SET |
-	     TEX_STREAM_COORD_SET(2) |
-	     ENABLE_TEX_STREAM_MAP_IDX | TEX_STREAM_MAP_IDX(2));
+             MAP_UNIT(2) |
+             DISABLE_TEX_STREAM_BUMP |
+             ENABLE_TEX_STREAM_COORD_SET |
+             TEX_STREAM_COORD_SET(2) |
+             ENABLE_TEX_STREAM_MAP_IDX | TEX_STREAM_MAP_IDX(2));
    OUT_BATCH(_3DSTATE_MAP_TEX_STREAM_CMD |
-	     MAP_UNIT(3) |
-	     DISABLE_TEX_STREAM_BUMP |
-	     ENABLE_TEX_STREAM_COORD_SET |
-	     TEX_STREAM_COORD_SET(3) |
-	     ENABLE_TEX_STREAM_MAP_IDX | TEX_STREAM_MAP_IDX(3));
+             MAP_UNIT(3) |
+             DISABLE_TEX_STREAM_BUMP |
+             ENABLE_TEX_STREAM_COORD_SET |
+             TEX_STREAM_COORD_SET(3) |
+             ENABLE_TEX_STREAM_MAP_IDX | TEX_STREAM_MAP_IDX(3));
 
    OUT_BATCH(_3DSTATE_MAP_COORD_TRANSFORM);
    OUT_BATCH(DISABLE_TEX_TRANSFORM | TEXTURE_SET(0));
@@ -324,21 +349,13 @@ static void i830_emit_invarient_state( intelContextPtr intel )
    OUT_BATCH(DISABLE_TEX_TRANSFORM | TEXTURE_SET(3));
 
    OUT_BATCH(_3DSTATE_RASTER_RULES_CMD |
-	     ENABLE_POINT_RASTER_RULE |
-	     OGL_POINT_RASTER_RULE |
-	     ENABLE_LINE_STRIP_PROVOKE_VRTX |
-	     ENABLE_TRI_FAN_PROVOKE_VRTX |
-	     ENABLE_TRI_STRIP_PROVOKE_VRTX |
-	     LINE_STRIP_PROVOKE_VRTX(1) |
-	     TRI_FAN_PROVOKE_VRTX(2) | 
-	     TRI_STRIP_PROVOKE_VRTX(2));
-
-   OUT_BATCH(_3DSTATE_SCISSOR_ENABLE_CMD | 
-	     DISABLE_SCISSOR_RECT);
-
-   OUT_BATCH(_3DSTATE_SCISSOR_RECT_0_CMD);
-   OUT_BATCH(0);
-   OUT_BATCH(0);
+             ENABLE_POINT_RASTER_RULE |
+             OGL_POINT_RASTER_RULE |
+             ENABLE_LINE_STRIP_PROVOKE_VRTX |
+             ENABLE_TRI_FAN_PROVOKE_VRTX |
+             ENABLE_TRI_STRIP_PROVOKE_VRTX |
+             LINE_STRIP_PROVOKE_VRTX(1) |
+             TRI_FAN_PROVOKE_VRTX(2) | TRI_STRIP_PROVOKE_VRTX(2));
 
    OUT_BATCH(_3DSTATE_VERTEX_TRANSFORM);
    OUT_BATCH(DISABLE_VIEWPORT_TRANSFORM | DISABLE_PERSPECTIVE_DIVIDE);
@@ -349,45 +366,46 @@ static void i830_emit_invarient_state( intelContextPtr intel )
 
 
    OUT_BATCH(_3DSTATE_COLOR_FACTOR_CMD);
-   OUT_BATCH(0x80808080);	/* .5 required in alpha for GL_DOT3_RGBA_EXT */
+   OUT_BATCH(0x80808080);       /* .5 required in alpha for GL_DOT3_RGBA_EXT */
 
    ADVANCE_BATCH();
 }
 
 
 #define emit( intel, state, size )			\
-do {							\
-   int k;						\
-   BEGIN_BATCH( size / sizeof(GLuint));			\
-   for (k = 0 ; k < size / sizeof(GLuint) ; k++)	\
-      OUT_BATCH(state[k]);				\
-   ADVANCE_BATCH();					\
-} while (0);
-
-static GLuint get_state_size( struct i830_hw_state *state )
+   intel_batchbuffer_data(intel->batch, state, size, IGNORE_CLIPRECTS )
+
+static GLuint
+get_dirty(struct i830_hw_state *state)
 {
-   GLuint dirty = state->active & ~state->emitted;
+   return state->active & ~state->emitted;
+}
+
+static GLuint
+get_state_size(struct i830_hw_state *state)
+{
+   GLuint dirty = get_dirty(state);
    GLuint sz = 0;
    GLuint i;
 
    if (dirty & I830_UPLOAD_INVARIENT)
       sz += 40 * sizeof(int);
 
-   if (dirty & I830_UPLOAD_CTX) 
+   if (dirty & I830_UPLOAD_CTX)
       sz += sizeof(state->Ctx);
 
-   if (dirty & I830_UPLOAD_BUFFERS) 
+   if (dirty & I830_UPLOAD_BUFFERS)
       sz += sizeof(state->Buffer);
 
-   if (dirty & I830_UPLOAD_STIPPLE) 
+   if (dirty & I830_UPLOAD_STIPPLE)
       sz += sizeof(state->Stipple);
 
    for (i = 0; i < I830_TEX_UNITS; i++) {
-      if ((dirty & I830_UPLOAD_TEX(i)))  
-	 sz += sizeof(state->Tex[i]); 
+      if ((dirty & I830_UPLOAD_TEX(i)))
+         sz += sizeof(state->Tex[i]);
 
-      if (dirty & I830_UPLOAD_TEXBLEND(i)) 
-	 sz += state->TexBlendWordsUsed[i] * 4;
+      if (dirty & I830_UPLOAD_TEXBLEND(i))
+         sz += state->TexBlendWordsUsed[i] * 4;
    }
 
    return sz;
@@ -396,139 +414,307 @@ static GLuint get_state_size( struct i830_hw_state *state )
 
 /* Push the state into the sarea and/or texture memory.
  */
-static void i830_emit_state( intelContextPtr intel )
+static void
+i830_emit_state(struct intel_context *intel)
 {
-   i830ContextPtr i830 = I830_CONTEXT(intel);
+   struct i830_context *i830 = i830_context(&intel->ctx);
    struct i830_hw_state *state = i830->current;
-   int i;
-   GLuint dirty = state->active & ~state->emitted;
-   GLuint counter = intel->batch.counter;
+   int i, ret, count;
+   GLuint dirty;
+   GET_CURRENT_CONTEXT(ctx);
    BATCH_LOCALS;
 
-   if (intel->batch.space < get_state_size(state)) {
-      intelFlushBatch(intel, GL_TRUE);
-      dirty = state->active & ~state->emitted;
-      counter = intel->batch.counter;
+   /* We don't hold the lock at this point, so want to make sure that
+    * there won't be a buffer wrap between the state emits and the primitive
+    * emit header.
+    *
+    * It might be better to talk about explicit places where
+    * scheduling is allowed, rather than assume that it is whenever a
+    * batchbuffer fills up.
+    *
+    * Set the space as LOOP_CLIPRECTS now, since that's what our primitives
+    * will be emitted under.
+    */
+   intel_batchbuffer_require_space(intel->batch, get_state_size(state) + 8,
+				   LOOP_CLIPRECTS);
+   count = 0;
+ again:
+   dirty = get_dirty(state);
+
+   ret = 0;
+   if (dirty & I830_UPLOAD_BUFFERS) {
+     ret |= dri_bufmgr_check_aperture_space(state->draw_region->buffer);
+     ret |= dri_bufmgr_check_aperture_space(state->depth_region->buffer);
    }
+   
+   for (i = 0; i < I830_TEX_UNITS; i++)
+     if (dirty & I830_UPLOAD_TEX(i)) {
+	if (state->tex_buffer[i]) {
+	  ret |= dri_bufmgr_check_aperture_space(state->tex_buffer[i]);
+	}
+     }
+
+   if (ret) {
+       if (count == 0) {
+	   count++;
+	   intel_batchbuffer_flush(intel->batch);
+	   goto again;
+       } else {
+	   _mesa_error(ctx, GL_OUT_OF_MEMORY, "i830 emit state");
+	   assert(0);
+       }
+   }
+
+
+   /* Do this here as we may have flushed the batchbuffer above,
+    * causing more state to be dirty!
+    */
+   dirty = get_dirty(state);
+   state->emitted |= dirty;
+   assert(get_dirty(state) == 0);
 
    if (dirty & I830_UPLOAD_INVARIENT) {
-      if (VERBOSE) fprintf(stderr, "I830_UPLOAD_INVARIENT:\n"); 
-      i830_emit_invarient_state( intel );
+      DBG("I830_UPLOAD_INVARIENT:\n");
+      i830_emit_invarient_state(intel);
    }
 
    if (dirty & I830_UPLOAD_CTX) {
-      if (VERBOSE) fprintf(stderr, "I830_UPLOAD_CTX:\n"); 
-      emit( i830, state->Ctx, sizeof(state->Ctx) );
+      DBG("I830_UPLOAD_CTX:\n");
+      emit(intel, state->Ctx, sizeof(state->Ctx));
+
    }
 
    if (dirty & I830_UPLOAD_BUFFERS) {
-      if (VERBOSE) fprintf(stderr, "I830_UPLOAD_BUFFERS:\n"); 
-      emit( i830, state->Buffer, sizeof(state->Buffer) );
-   }
+      DBG("I830_UPLOAD_BUFFERS:\n");
+      BEGIN_BATCH(I830_DEST_SETUP_SIZE + 2, IGNORE_CLIPRECTS);
+      OUT_BATCH(state->Buffer[I830_DESTREG_CBUFADDR0]);
+      OUT_BATCH(state->Buffer[I830_DESTREG_CBUFADDR1]);
+      OUT_RELOC(state->draw_region->buffer,
+                DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE,
+                state->draw_region->draw_offset);
+
+      if (state->depth_region) {
+         OUT_BATCH(state->Buffer[I830_DESTREG_DBUFADDR0]);
+         OUT_BATCH(state->Buffer[I830_DESTREG_DBUFADDR1]);
+         OUT_RELOC(state->depth_region->buffer,
+                   DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE,
+                   state->depth_region->draw_offset);
+      }
 
+      OUT_BATCH(state->Buffer[I830_DESTREG_DV0]);
+      OUT_BATCH(state->Buffer[I830_DESTREG_DV1]);
+      OUT_BATCH(state->Buffer[I830_DESTREG_SENABLE]);
+      OUT_BATCH(state->Buffer[I830_DESTREG_SR0]);
+      OUT_BATCH(state->Buffer[I830_DESTREG_SR1]);
+      OUT_BATCH(state->Buffer[I830_DESTREG_SR2]);
+      ADVANCE_BATCH();
+   }
+   
    if (dirty & I830_UPLOAD_STIPPLE) {
-      if (VERBOSE) fprintf(stderr, "I830_UPLOAD_STIPPLE:\n"); 
-      emit( i830, state->Stipple, sizeof(state->Stipple) );
+      DBG("I830_UPLOAD_STIPPLE:\n");
+      emit(intel, state->Stipple, sizeof(state->Stipple));
    }
 
    for (i = 0; i < I830_TEX_UNITS; i++) {
-      if ((dirty & I830_UPLOAD_TEX(i))) { 
- 	 if (VERBOSE) fprintf(stderr, "I830_UPLOAD_TEX(%d):\n", i); 
-	 emit( i830, state->Tex[i], sizeof(state->Tex[i])); 
-      } 
+      if ((dirty & I830_UPLOAD_TEX(i))) {
+         DBG("I830_UPLOAD_TEX(%d):\n", i);
+
+         BEGIN_BATCH(I830_TEX_SETUP_SIZE + 1, IGNORE_CLIPRECTS);
+         OUT_BATCH(state->Tex[i][I830_TEXREG_TM0LI]);
+
+         if (state->tex_buffer[i]) {
+            OUT_RELOC(state->tex_buffer[i],
+                      DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
+                      state->tex_offset[i] | TM0S0_USE_FENCE);
+         }
+	 else if (state == &i830->meta) {
+	    assert(i == 0);
+	    OUT_BATCH(0);
+	 }
+	 else {
+	    OUT_BATCH(state->tex_offset[i]);
+	 }
+
+         OUT_BATCH(state->Tex[i][I830_TEXREG_TM0S1]);
+         OUT_BATCH(state->Tex[i][I830_TEXREG_TM0S2]);
+         OUT_BATCH(state->Tex[i][I830_TEXREG_TM0S3]);
+         OUT_BATCH(state->Tex[i][I830_TEXREG_TM0S4]);
+         OUT_BATCH(state->Tex[i][I830_TEXREG_MCS]);
+         OUT_BATCH(state->Tex[i][I830_TEXREG_CUBE]);
+      }
 
       if (dirty & I830_UPLOAD_TEXBLEND(i)) {
-	 if (VERBOSE) fprintf(stderr, "I830_UPLOAD_TEXBLEND(%d):\n", i); 
-	 emit( i830, state->TexBlend[i], 
-	       state->TexBlendWordsUsed[i] * 4 );
+         DBG("I830_UPLOAD_TEXBLEND(%d): %d words\n", i,
+             state->TexBlendWordsUsed[i]);
+         emit(intel, state->TexBlend[i], state->TexBlendWordsUsed[i] * 4);
       }
    }
 
-   state->emitted |= dirty;
-   intel->batch.last_emit_state = counter;
-   assert(counter == intel->batch.counter);
+   intel->batch->dirty_state &= ~dirty;
+   assert(get_dirty(state) == 0);
+   assert((intel->batch->dirty_state & (1<<1)) == 0);
 }
 
-static void i830_destroy_context( intelContextPtr intel )
+static void
+i830_destroy_context(struct intel_context *intel)
 {
+   GLuint i;
+   struct i830_context *i830 = i830_context(&intel->ctx);
+
+   for (i = 0; i < I830_TEX_UNITS; i++) {
+      if (i830->state.tex_buffer[i] != NULL) {
+	 dri_bo_unreference(i830->state.tex_buffer[i]);
+	 i830->state.tex_buffer[i] = NULL;
+      }
+   }
+
    _tnl_free_vertices(&intel->ctx);
 }
 
-static void
-i830_set_color_region(intelContextPtr intel, const intelRegion *region)
+
+void
+i830_state_draw_region(struct intel_context *intel,
+		       struct i830_hw_state *state,
+		       struct intel_region *color_region,
+		       struct intel_region *depth_region)
 {
-   i830ContextPtr i830 = I830_CONTEXT(intel);
-   I830_STATECHANGE( i830, I830_UPLOAD_BUFFERS );
-   i830->state.Buffer[I830_DESTREG_CBUFADDR1] =
-      (BUF_3D_ID_COLOR_BACK | BUF_3D_PITCH(region->pitch) | BUF_3D_USE_FENCE);
-   i830->state.Buffer[I830_DESTREG_CBUFADDR2] = region->offset;
+   struct i830_context *i830 = i830_context(&intel->ctx);
+   GLuint value;
+
+   ASSERT(state == &i830->state || state == &i830->meta);
+
+   if (state->draw_region != color_region) {
+      intel_region_release(&state->draw_region);
+      intel_region_reference(&state->draw_region, color_region);
+   }
+   if (state->depth_region != depth_region) {
+      intel_region_release(&state->depth_region);
+      intel_region_reference(&state->depth_region, depth_region);
+   }
+
+   /*
+    * Set stride/cpp values
+    */
+   if (color_region) {
+      state->Buffer[I830_DESTREG_CBUFADDR0] = _3DSTATE_BUF_INFO_CMD;
+      state->Buffer[I830_DESTREG_CBUFADDR1] =
+         (BUF_3D_ID_COLOR_BACK |
+          BUF_3D_PITCH(color_region->pitch * color_region->cpp) |
+          BUF_3D_USE_FENCE);
+   }
+
+   if (depth_region) {
+      state->Buffer[I830_DESTREG_DBUFADDR0] = _3DSTATE_BUF_INFO_CMD;
+      state->Buffer[I830_DESTREG_DBUFADDR1] =
+         (BUF_3D_ID_DEPTH |
+          BUF_3D_PITCH(depth_region->pitch * depth_region->cpp) |
+          BUF_3D_USE_FENCE);
+   }
+
+   /*
+    * Compute/set I830_DESTREG_DV1 value
+    */
+   value = (DSTORG_HORT_BIAS(0x8) |     /* .5 */
+            DSTORG_VERT_BIAS(0x8) | DEPTH_IS_Z);    /* .5 */
+            
+   if (color_region && color_region->cpp == 4) {
+      value |= DV_PF_8888;
+   }
+   else {
+      value |= DV_PF_565;
+   }
+   if (depth_region && depth_region->cpp == 4) {
+      value |= DEPTH_FRMT_24_FIXED_8_OTHER;
+   }
+   else {
+      value |= DEPTH_FRMT_16_FIXED;
+   }
+   state->Buffer[I830_DESTREG_DV1] = value;
+
+   I830_STATECHANGE(i830, I830_UPLOAD_BUFFERS);
+
+
 }
 
 
 static void
-i830_set_z_region(intelContextPtr intel, const intelRegion *region)
+i830_set_draw_region(struct intel_context *intel,
+                     struct intel_region *color_regions[],
+                     struct intel_region *depth_region,
+		     GLuint num_regions)
 {
-   i830ContextPtr i830 = I830_CONTEXT(intel);
-   I830_STATECHANGE( i830, I830_UPLOAD_BUFFERS );
-   i830->state.Buffer[I830_DESTREG_DBUFADDR1] =
-      (BUF_3D_ID_DEPTH | BUF_3D_PITCH(region->pitch) | BUF_3D_USE_FENCE);
-   i830->state.Buffer[I830_DESTREG_DBUFADDR2] = region->offset;
+   struct i830_context *i830 = i830_context(&intel->ctx);
+   i830_state_draw_region(intel, &i830->state, color_regions[0], depth_region);
 }
 
-
+#if 0
 static void
 i830_update_color_z_regions(intelContextPtr intel,
-                            const intelRegion *colorRegion,
-                            const intelRegion *depthRegion)
+                            const intelRegion * colorRegion,
+                            const intelRegion * depthRegion)
 {
    i830ContextPtr i830 = I830_CONTEXT(intel);
 
    i830->state.Buffer[I830_DESTREG_CBUFADDR1] =
-      (BUF_3D_ID_COLOR_BACK | BUF_3D_PITCH(colorRegion->pitch) | BUF_3D_USE_FENCE);
+      (BUF_3D_ID_COLOR_BACK | BUF_3D_PITCH(colorRegion->pitch) |
+       BUF_3D_USE_FENCE);
    i830->state.Buffer[I830_DESTREG_CBUFADDR2] = colorRegion->offset;
 
    i830->state.Buffer[I830_DESTREG_DBUFADDR1] =
       (BUF_3D_ID_DEPTH | BUF_3D_PITCH(depthRegion->pitch) | BUF_3D_USE_FENCE);
    i830->state.Buffer[I830_DESTREG_DBUFADDR2] = depthRegion->offset;
 }
+#endif
 
 
 /* This isn't really handled at the moment.
  */
-static void i830_lost_hardware( intelContextPtr intel )
+static void
+i830_new_batch(struct intel_context *intel)
 {
-   I830_CONTEXT(intel)->state.emitted = 0;
+   struct i830_context *i830 = i830_context(&intel->ctx);
+   i830->state.emitted = 0;
+
+   /* Check that we didn't just wrap our batchbuffer at a bad time. */
+   assert(!intel->no_batch_wrap);
 }
 
 
 
-static void i830_emit_flush( intelContextPtr intel )
+static GLuint
+i830_flush_cmd(void)
 {
-   BATCH_LOCALS;
-
-   BEGIN_BATCH(2);
-   OUT_BATCH( MI_FLUSH | FLUSH_MAP_CACHE ); 
-   OUT_BATCH( 0 );
-   ADVANCE_BATCH();
+   return MI_FLUSH | FLUSH_MAP_CACHE;
 }
 
 
+static void 
+i830_assert_not_dirty( struct intel_context *intel )
+{
+   struct i830_context *i830 = i830_context(&intel->ctx);
+   struct i830_hw_state *state = i830->current;
+   assert(!get_dirty(state));
+}
 
+static void
+i830_note_unlock( struct intel_context *intel )
+{
+    /* nothing */
+}
 
-void i830InitVtbl( i830ContextPtr i830 )
+void
+i830InitVtbl(struct i830_context *i830)
 {
-   i830->intel.vtbl.alloc_tex_obj = i830AllocTexObj;
    i830->intel.vtbl.check_vertex_size = i830_check_vertex_size;
-   i830->intel.vtbl.clear_with_tris = i830ClearWithTris;
-   i830->intel.vtbl.rotate_window = i830RotateWindow;
    i830->intel.vtbl.destroy = i830_destroy_context;
    i830->intel.vtbl.emit_state = i830_emit_state;
-   i830->intel.vtbl.lost_hardware = i830_lost_hardware;
+   i830->intel.vtbl.new_batch = i830_new_batch;
    i830->intel.vtbl.reduced_primitive_state = i830_reduced_primitive_state;
-   i830->intel.vtbl.set_color_region = i830_set_color_region;
-   i830->intel.vtbl.set_z_region = i830_set_z_region;
-   i830->intel.vtbl.update_color_z_regions = i830_update_color_z_regions;
+   i830->intel.vtbl.set_draw_region = i830_set_draw_region;
    i830->intel.vtbl.update_texture_state = i830UpdateTextureState;
-   i830->intel.vtbl.emit_flush = i830_emit_flush;
+   i830->intel.vtbl.flush_cmd = i830_flush_cmd;
    i830->intel.vtbl.render_start = i830_render_start;
+   i830->intel.vtbl.render_prevalidate = i830_render_prevalidate;
+   i830->intel.vtbl.assert_not_dirty = i830_assert_not_dirty;
+   i830->intel.vtbl.note_unlock = i830_note_unlock; 
 }
diff --git a/i915/i915_context.c b/i915/i915_context.c
index 2bc1cae..bd9f1d5 100644
--- a/i915/i915_context.c
+++ b/i915/i915_context.c
@@ -36,151 +36,150 @@
 #include "swrast/swrast.h"
 #include "swrast_setup/swrast_setup.h"
 #include "tnl/tnl.h"
-#include "vbo/vbo.h"
-
 
 #include "utils.h"
 #include "i915_reg.h"
 
+#include "intel_regions.h"
+#include "intel_batchbuffer.h"
+#include "intel_tris.h"
+#include "intel_span.h"
+#include "intel_pixel.h"
+
 /***************************************
  * Mesa's Driver Functions
  ***************************************/
 
-static const struct dri_extension i915_extensions[] =
-{
-    { "GL_ARB_depth_texture",              NULL },
-    { "GL_ARB_fragment_program",           NULL },
-    { "GL_ARB_shadow",                     NULL },
-    { "GL_ARB_texture_env_crossbar",       NULL },
-    { "GL_EXT_shadow_funcs",               NULL },
-    /* ARB extn won't work if not enabled */
-    { "GL_SGIX_depth_texture",             NULL },
-    { NULL,                                NULL }
+static const struct dri_extension i915_extensions[] = {
+   {"GL_ARB_depth_texture", NULL},
+   {"GL_ARB_fragment_program", NULL},
+   {"GL_ARB_shadow", NULL},
+   {"GL_ARB_texture_non_power_of_two", NULL},
+   {"GL_EXT_shadow_funcs", NULL},
+   /* ARB extn won't work if not enabled */
+   {"GL_SGIX_depth_texture", NULL},
+   {NULL, NULL}
 };
 
 /* Override intel default.
  */
-static void i915InvalidateState( GLcontext *ctx, GLuint new_state )
+static void
+i915InvalidateState(GLcontext * ctx, GLuint new_state)
 {
-   _swrast_InvalidateState( ctx, new_state );
-   _swsetup_InvalidateState( ctx, new_state );
-   _vbo_InvalidateState( ctx, new_state );
-   _tnl_InvalidateState( ctx, new_state );
-   _tnl_invalidate_vertex_state( ctx, new_state );
-   INTEL_CONTEXT(ctx)->NewGLState |= new_state;
+   _swrast_InvalidateState(ctx, new_state);
+   _swsetup_InvalidateState(ctx, new_state);
+   _vbo_InvalidateState(ctx, new_state);
+   _tnl_InvalidateState(ctx, new_state);
+   _tnl_invalidate_vertex_state(ctx, new_state);
+   intel_context(ctx)->NewGLState |= new_state;
 
    /* Todo: gather state values under which tracked parameters become
     * invalidated, add callbacks for things like
     * ProgramLocalParameters, etc.
     */
    {
-      struct i915_fragment_program *p = 
-	 (struct i915_fragment_program *)ctx->FragmentProgram._Current;
+      struct i915_fragment_program *p =
+         (struct i915_fragment_program *) ctx->FragmentProgram._Current;
       if (p && p->nr_params)
-	 p->params_uptodate = 0;
+         p->params_uptodate = 0;
    }
 
-   if (new_state & (_NEW_FOG|_NEW_HINT|_NEW_PROGRAM))
+   if (new_state & (_NEW_FOG | _NEW_HINT | _NEW_PROGRAM))
       i915_update_fog(ctx);
 }
 
 
-static void i915InitDriverFunctions( struct dd_function_table *functions )
+static void
+i915InitDriverFunctions(struct dd_function_table *functions)
 {
-   intelInitDriverFunctions( functions );
-   i915InitStateFunctions( functions );
-   i915InitTextureFuncs( functions );
-   i915InitFragProgFuncs( functions );
+   intelInitDriverFunctions(functions);
+   i915InitStateFunctions(functions);
+   i915InitTextureFuncs(functions);
+   i915InitFragProgFuncs(functions);
    functions->UpdateState = i915InvalidateState;
 }
 
 
+extern const struct tnl_pipeline_stage *intel_pipeline[];
 
-GLboolean i915CreateContext( const __GLcontextModes *mesaVis,
-			    __DRIcontextPrivate *driContextPriv,
-			    void *sharedContextPrivate)
+GLboolean
+i915CreateContext(const __GLcontextModes * mesaVis,
+                  __DRIcontextPrivate * driContextPriv,
+                  void *sharedContextPrivate)
 {
    struct dd_function_table functions;
-   i915ContextPtr i915 = (i915ContextPtr) CALLOC_STRUCT(i915_context);
-   intelContextPtr intel = &i915->intel;
+   struct i915_context *i915 =
+      (struct i915_context *) CALLOC_STRUCT(i915_context);
+   struct intel_context *intel = &i915->intel;
    GLcontext *ctx = &intel->ctx;
-   GLuint i;
 
-   if (!i915) return GL_FALSE;
+   if (!i915)
+      return GL_FALSE;
+
+   if (0)
+      _mesa_printf("\ntexmem-0-3 branch\n\n");
 
-   i915InitVtbl( i915 );
+   i915InitVtbl(i915);
+   i915InitMetaFuncs(i915);
 
-   i915InitDriverFunctions( &functions );
+   i915InitDriverFunctions(&functions);
 
-   if (!intelInitContext( intel, mesaVis, driContextPriv,
-			  sharedContextPrivate, &functions )) {
+   if (!intelInitContext(intel, mesaVis, driContextPriv,
+                         sharedContextPrivate, &functions)) {
       FREE(i915);
       return GL_FALSE;
    }
 
+   /* Initialize swrast, tnl driver tables: */
+   intelInitSpanFuncs(ctx);
+   intelInitTriFuncs(ctx);
+
+   /* Install the customized pipeline: */
+   _tnl_destroy_pipeline(ctx);
+   _tnl_install_pipeline(ctx, intel_pipeline);
+
    ctx->Const.MaxTextureUnits = I915_TEX_UNITS;
    ctx->Const.MaxTextureImageUnits = I915_TEX_UNITS;
    ctx->Const.MaxTextureCoordUnits = I915_TEX_UNITS;
 
-   intel->nr_heaps = 1;
-   intel->texture_heaps[0] = 
-      driCreateTextureHeap( 0, intel,
-			    intel->intelScreen->tex.size,
-			    12,
-			    I830_NR_TEX_REGIONS,
-			    intel->sarea->texList,
-			    (unsigned *) & intel->sarea->texAge,
-			    & intel->swapped,
-			    sizeof( struct i915_texture_object ),
-			    (destroy_texture_object_t *)intelDestroyTexObj );
-
-   /* FIXME: driCalculateMaxTextureLevels assumes that mipmaps are
-    * tightly packed, but they're not in Intel graphics
-    * hardware.
+
+   /* Advertise the full hardware capabilities.  The new memory
+    * manager should cope much better with overload situations:
     */
+   ctx->Const.MaxTextureLevels = 12;
+   ctx->Const.Max3DTextureLevels = 9;
+   ctx->Const.MaxCubeTextureLevels = 12;
+   ctx->Const.MaxTextureRectSize = (1 << 11);
    ctx->Const.MaxTextureUnits = I915_TEX_UNITS;
-   i = driQueryOptioni( &intel->optionCache, "allow_large_textures");
-   driCalculateMaxTextureLevels( intel->texture_heaps,
-				 intel->nr_heaps,
-				 &intel->ctx.Const,
-				 4,
-				 11, /* max 2D texture size is 2048x2048 */
-				 8,  /* 3D texture */
-				 11, /* cube texture. */
-				 11, /* rect texture */
-				 12,
-				 GL_FALSE,
-				 i );
 
    /* GL_ARB_fragment_program limits - don't think Mesa actually
     * validates programs against these, and in any case one ARB
     * instruction can translate to more than one HW instruction, so
     * we'll still have to check and fallback each time.
     */
-   
    ctx->Const.FragmentProgram.MaxNativeTemps = I915_MAX_TEMPORARY;
-   ctx->Const.FragmentProgram.MaxNativeAttribs = 11; /* 8 tex, 2 color, fog */
+   ctx->Const.FragmentProgram.MaxNativeAttribs = 11;    /* 8 tex, 2 color, fog */
    ctx->Const.FragmentProgram.MaxNativeParameters = I915_MAX_CONSTANT;
    ctx->Const.FragmentProgram.MaxNativeAluInstructions = I915_MAX_ALU_INSN;
    ctx->Const.FragmentProgram.MaxNativeTexInstructions = I915_MAX_TEX_INSN;
-   ctx->Const.FragmentProgram.MaxNativeInstructions = (I915_MAX_ALU_INSN + 
-						I915_MAX_TEX_INSN);
-   ctx->Const.FragmentProgram.MaxNativeTexIndirections = I915_MAX_TEX_INDIRECT;
+   ctx->Const.FragmentProgram.MaxNativeInstructions = (I915_MAX_ALU_INSN +
+                                                       I915_MAX_TEX_INSN);
+   ctx->Const.FragmentProgram.MaxNativeTexIndirections =
+      I915_MAX_TEX_INDIRECT;
    ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0; /* I don't think we have one */
+
    ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE;
    ctx->FragmentProgram._UseTexEnvProgram = GL_TRUE;
 
-
-   driInitExtensions( ctx, i915_extensions, GL_FALSE );
+   driInitExtensions(ctx, i915_extensions, GL_FALSE);
 
 
-   _tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12, 
-		       36 * sizeof(GLfloat) );
+   _tnl_init_vertices(ctx, ctx->Const.MaxArrayLockSize + 12,
+                      36 * sizeof(GLfloat));
 
    intel->verts = TNL_CONTEXT(ctx)->clipspace.vertex_buf;
 
-   i915InitState( i915 );
+   i915InitState(i915);
 
    return GL_TRUE;
 }
-
diff --git a/i915/i915_context.h b/i915/i915_context.h
index ec15501..c6958dd 100644
--- a/i915/i915_context.h
+++ b/i915/i915_context.h
@@ -29,6 +29,7 @@
 #define I915CONTEXT_INC
 
 #include "intel_context.h"
+#include "i915_reg.h"
 
 #define I915_FALLBACK_TEXTURE		 0x1000
 #define I915_FALLBACK_COLORMASK		 0x2000
@@ -46,6 +47,7 @@
 #define I915_UPLOAD_CONSTANTS        0x10
 #define I915_UPLOAD_FOG              0x20
 #define I915_UPLOAD_INVARIENT        0x40
+#define I915_UPLOAD_DEFAULTS         0x80
 #define I915_UPLOAD_TEX(i)           (0x00010000<<(i))
 #define I915_UPLOAD_TEX_ALL          (0x00ff0000)
 #define I915_UPLOAD_TEX_0_SHIFT      16
@@ -55,10 +57,8 @@
  */
 #define I915_DESTREG_CBUFADDR0 0
 #define I915_DESTREG_CBUFADDR1 1
-#define I915_DESTREG_CBUFADDR2 2
 #define I915_DESTREG_DBUFADDR0 3
 #define I915_DESTREG_DBUFADDR1 4
-#define I915_DESTREG_DBUFADDR2 5
 #define I915_DESTREG_DV0 6
 #define I915_DESTREG_DV1 7
 #define I915_DESTREG_SENABLE 8
@@ -89,7 +89,6 @@
 #define I915_STPREG_ST1        1
 #define I915_STP_SETUP_SIZE    2
 
-#define I915_TEXREG_MS2        0
 #define I915_TEXREG_MS3        1
 #define I915_TEXREG_MS4        2
 #define I915_TEXREG_SS2        3
@@ -97,23 +96,34 @@
 #define I915_TEXREG_SS4        5
 #define I915_TEX_SETUP_SIZE    6
 
+#define I915_DEFREG_C0    0
+#define I915_DEFREG_C1    1
+#define I915_DEFREG_S0    2
+#define I915_DEFREG_S1    3
+#define I915_DEFREG_Z0    4
+#define I915_DEFREG_Z1    5
+#define I915_DEF_SETUP_SIZE    6
+
+
 #define I915_MAX_CONSTANT      32
 #define I915_CONSTANT_SIZE     (2+(4*I915_MAX_CONSTANT))
 
 
 #define I915_PROGRAM_SIZE      192
 
+#define I915_MAX_INSN          (I915_MAX_TEX_INSN+I915_MAX_ALU_INSN)
 
 /* Hardware version of a parsed fragment program.  "Derived" from the
  * mesa fragment_program struct.
  */
-struct i915_fragment_program {
+struct i915_fragment_program
+{
    struct gl_fragment_program FragProg;
 
    GLboolean translated;
    GLboolean params_uptodate;
    GLboolean on_hardware;
-   GLboolean error;		/* If program is malformed for any reason. */
+   GLboolean error;             /* If program is malformed for any reason. */
 
    GLuint nr_tex_indirect;
    GLuint nr_tex_insn;
@@ -135,52 +145,40 @@ struct i915_fragment_program {
    GLuint constant_flags[I915_MAX_CONSTANT];
    GLuint nr_constants;
 
-   GLuint *csr;			/* Cursor, points into program.
-				 */
+   GLuint *csr;                 /* Cursor, points into program.
+                                 */
 
-   GLuint *decl;		/* Cursor, points into declarations.
-				 */
-   
-   GLuint decl_s;		/* flags for which s regs need to be decl'd */
-   GLuint decl_t;		/* flags for which t regs need to be decl'd */
+   GLuint *decl;                /* Cursor, points into declarations.
+                                 */
 
-   GLuint temp_flag;		/* Tracks temporary regs which are in
-				 * use.
-				 */
+   GLuint decl_s;               /* flags for which s regs need to be decl'd */
+   GLuint decl_t;               /* flags for which t regs need to be decl'd */
 
-   GLuint utemp_flag;		/* Tracks TYPE_U temporary regs which are in
-				 * use.
-				 */
+   GLuint temp_flag;            /* Tracks temporary regs which are in
+                                 * use.
+                                 */
 
+   GLuint utemp_flag;           /* Tracks TYPE_U temporary regs which are in
+                                 * use.
+                                 */
 
 
+   /* Track which R registers are "live" for each instruction.
+    * A register is live between the time it's written to and the last time
+    * it's read. */
+   GLuint usedRegs[I915_MAX_INSN];
+
    /* Helpers for i915_fragprog.c:
     */
    GLuint wpos_tex;
    GLboolean depth_written;
 
-   struct { 
-      GLuint reg;		/* Hardware constant idx */
-      const GLfloat *values; 	/* Pointer to tracked values */
+   struct
+   {
+      GLuint reg;               /* Hardware constant idx */
+      const GLfloat *values;    /* Pointer to tracked values */
    } param[I915_MAX_CONSTANT];
    GLuint nr_params;
-      
-
-
-
-   /* Helpers for i915_texprog.c:
-    */
-   GLuint src_texture;		/* Reg containing sampled texture color,
-				 * else UREG_BAD.
-				 */
-
-   GLuint src_previous;		/* Reg containing color from previous 
-				 * stage.  May need to be decl'd.
-				 */
-
-   GLuint last_tex_stage;	/* Number of last enabled texture unit */
-
-   struct vertex_buffer *VB;
 };
 
 
@@ -188,67 +186,68 @@ struct i915_fragment_program {
 
 
 
-struct i915_texture_object
-{
-   struct intel_texture_object intel;
-   GLenum lastTarget;
-   GLboolean refs_border_color;
-   GLuint Setup[I915_TEX_SETUP_SIZE];
-};
 
 #define I915_TEX_UNITS 8
 
 
-struct i915_hw_state {
+struct i915_hw_state
+{
    GLuint Ctx[I915_CTX_SETUP_SIZE];
    GLuint Buffer[I915_DEST_SETUP_SIZE];
    GLuint Stipple[I915_STP_SETUP_SIZE];
    GLuint Fog[I915_FOG_SETUP_SIZE];
+   GLuint Defaults[I915_DEF_SETUP_SIZE];
    GLuint Tex[I915_TEX_UNITS][I915_TEX_SETUP_SIZE];
    GLuint Constant[I915_CONSTANT_SIZE];
    GLuint ConstantSize;
    GLuint Program[I915_PROGRAM_SIZE];
    GLuint ProgramSize;
-   GLuint active;		/* I915_UPLOAD_* */
-   GLuint emitted;		/* I915_UPLOAD_* */
+
+   /* Region pointers for relocation: 
+    */
+   struct intel_region *draw_region;
+   struct intel_region *depth_region;
+/*    struct intel_region *tex_region[I915_TEX_UNITS]; */
+
+   /* Regions aren't actually that appropriate here as the memory may
+    * be from a PBO or FBO.  Will have to do this for draw and depth for
+    * FBO's...
+    */
+   dri_bo *tex_buffer[I915_TEX_UNITS];
+   GLuint tex_offset[I915_TEX_UNITS];
+
+
+   GLuint active;               /* I915_UPLOAD_* */
+   GLuint emitted;              /* I915_UPLOAD_* */
 };
 
 #define I915_FOG_PIXEL  2
 #define I915_FOG_VERTEX 1
 #define I915_FOG_NONE   0
 
-struct i915_context 
+struct i915_context
 {
    struct intel_context intel;
 
    GLuint last_ReallyEnabled;
    GLuint vertex_fog;
+   GLuint lodbias_ss2[MAX_TEXTURE_UNITS];
+
 
-   struct i915_fragment_program tex_program;
    struct i915_fragment_program *current_program;
 
    struct i915_hw_state meta, initial, state, *current;
 };
 
 
-typedef struct i915_context *i915ContextPtr;
-typedef struct i915_texture_object *i915TextureObjectPtr;
-
-#define I915_CONTEXT(ctx)	((i915ContextPtr)(ctx))
-
-
-
 #define I915_STATECHANGE(i915, flag)					\
 do {									\
-   if (0) fprintf(stderr, "I915_STATECHANGE %x in %s\n", flag, __FUNCTION__);	\
    INTEL_FIREVERTICES( &(i915)->intel );					\
    (i915)->state.emitted &= ~(flag);					\
 } while (0)
 
 #define I915_ACTIVESTATE(i915, flag, mode)			\
 do {								\
-   if (0) fprintf(stderr, "I915_ACTIVESTATE %x %d in %s\n",	\
-		  flag, mode, __FUNCTION__);			\
    INTEL_FIREVERTICES( &(i915)->intel );				\
    if (mode)							\
       (i915)->state.active |= (flag);				\
@@ -260,7 +259,13 @@ do {								\
 /*======================================================================
  * i915_vtbl.c
  */
-extern void i915InitVtbl( i915ContextPtr i915 );
+extern void i915InitVtbl(struct i915_context *i915);
+
+extern void
+i915_state_draw_region(struct intel_context *intel,
+                       struct i915_hw_state *state,
+                       struct intel_region *color_region,
+                       struct intel_region *depth_region);
 
 
 
@@ -289,70 +294,58 @@ do {									\
 /*======================================================================
  * i915_context.c
  */
-extern GLboolean i915CreateContext( const __GLcontextModes *mesaVis,
-				    __DRIcontextPrivate *driContextPriv,
-				    void *sharedContextPrivate);
-
-
-/*======================================================================
- * i915_texprog.c
- */
-extern void i915ValidateTextureProgram( i915ContextPtr i915 );
+extern GLboolean i915CreateContext(const __GLcontextModes * mesaVis,
+                                   __DRIcontextPrivate * driContextPriv,
+                                   void *sharedContextPrivate);
 
 
 /*======================================================================
  * i915_debug.c
  */
-extern void i915_disassemble_program( const GLuint *program, GLuint sz );
-extern void i915_print_ureg( const char *msg, GLuint ureg );
+extern void i915_disassemble_program(const GLuint * program, GLuint sz);
+extern void i915_print_ureg(const char *msg, GLuint ureg);
 
 
 /*======================================================================
  * i915_state.c
  */
-extern void i915InitStateFunctions( struct dd_function_table *functions );
-extern void i915InitState( i915ContextPtr i915 );
-extern void i915_update_fog(GLcontext *ctxx);
+extern void i915InitStateFunctions(struct dd_function_table *functions);
+extern void i915InitState(struct i915_context *i915);
+extern void i915_update_fog(GLcontext * ctx);
 
 
 /*======================================================================
  * i915_tex.c
  */
-extern void i915UpdateTextureState( intelContextPtr intel );
-extern void i915InitTextureFuncs( struct dd_function_table *functions );
-extern intelTextureObjectPtr i915AllocTexObj( struct gl_texture_object *texObj );
+extern void i915UpdateTextureState(struct intel_context *intel);
+extern void i915InitTextureFuncs(struct dd_function_table *functions);
 
 /*======================================================================
  * i915_metaops.c
  */
-extern GLboolean
-i915TryTextureReadPixels( GLcontext *ctx,
-			  GLint x, GLint y, GLsizei width, GLsizei height,
-			  GLenum format, GLenum type,
-			  const struct gl_pixelstore_attrib *pack,
-			  GLvoid *pixels );
-
-extern GLboolean
-i915TryTextureDrawPixels( GLcontext *ctx,
-			  GLint x, GLint y, GLsizei width, GLsizei height,
-			  GLenum format, GLenum type,
-			  const struct gl_pixelstore_attrib *unpack,
-			  const GLvoid *pixels );
+void i915InitMetaFuncs(struct i915_context *i915);
 
-extern void 
-i915ClearWithTris( intelContextPtr intel, GLbitfield mask,
-		   GLboolean all, GLint cx, GLint cy, GLint cw, GLint ch);
-
-
-extern void
-i915RotateWindow(intelContextPtr intel, __DRIdrawablePrivate *dPriv,
-                 GLuint srcBuf);
 
 /*======================================================================
  * i915_fragprog.c
  */
-extern void i915ValidateFragmentProgram( i915ContextPtr i915 );
-extern void i915InitFragProgFuncs( struct dd_function_table *functions );
-	
-#endif
+extern void i915ValidateFragmentProgram(struct i915_context *i915);
+extern void i915InitFragProgFuncs(struct dd_function_table *functions);
+
+/*======================================================================
+ * Inline conversion functions.  These are better-typed than the
+ * macros used previously:
+ */
+static INLINE struct i915_context *
+i915_context(GLcontext * ctx)
+{
+   return (struct i915_context *) ctx;
+}
+
 
+
+#define I915_CONTEXT(ctx)	i915_context(ctx)
+
+
+
+#endif
diff --git a/i915/i915_debug.c b/i915/i915_debug.c
index 054b561..8eb1c5b 100644
--- a/i915/i915_debug.c
+++ b/i915/i915_debug.c
@@ -25,275 +25,824 @@
  * 
  **************************************************************************/
 
+#include "imports.h"
+
 #include "i915_reg.h"
 #include "i915_context.h"
-#include <stdio.h>
-
-
-static const char *opcodes[0x20] = {
-   "NOP",
-   "ADD",
-   "MOV",
-   "MUL",
-   "MAD",
-   "DP2ADD",
-   "DP3",
-   "DP4",
-   "FRC",
-   "RCP",
-   "RSQ",
-   "EXP",
-   "LOG",
-   "CMP",
-   "MIN",
-   "MAX",
-   "FLR",
-   "MOD",
-   "TRC",
-   "SGE",
-   "SLT",
-   "TEXLD",
-   "TEXLDP",
-   "TEXLDB",
-   "TEXKILL",
-   "DCL",
-   "0x1a",
-   "0x1b",
-   "0x1c",
-   "0x1d",
-   "0x1e",
-   "0x1f",
-};
-
-
-static const int args[0x20] = {
-   0,				/* 0 nop */
-   2,				/* 1 add */
-   1,				/* 2 mov */
-   2,				/* 3 m ul */
-   3, 				/* 4 mad */
-   3,				/* 5 dp2add */
-   2,				/* 6 dp3 */
-   2,				/* 7 dp4 */
-   1,				/* 8 frc */
-   1,				/* 9 rcp */
-   1,				/* a rsq */
-   1,				/* b exp */
-   1,				/* c log */
-   3,				/* d cmp */
-   2,				/* e min */
-   2,				/* f max */
-   1,				/* 10 flr */
-   1,				/* 11 mod */
-   1,				/* 12 trc */
-   2,				/* 13 sge */
-   2,				/* 14 slt */
-   1,
-   1,
-   1,
-   1,
-   0,
-   0,
-   0,
-   0,
-   0,
-   0,
-   0,
-};
-
-
-static const char *regname[0x8] = {
-   "R",
-   "T",
-   "CONST",
-   "S",
-   "OC",
-   "OD",
-   "U",
-   "UNKNOWN",
-};
-
-static void print_reg_type_nr( GLuint type, GLuint nr )
+#include "i915_debug.h"
+
+#define PRINTF( ... ) _mesa_printf( __VA_ARGS__ )
+
+static GLboolean debug( struct debug_stream *stream, const char *name, GLuint len )
 {
-   switch (type) {
-   case REG_TYPE_T:
-      switch (nr) {
-      case T_DIFFUSE: fprintf(stderr, "T_DIFFUSE"); return;
-      case T_SPECULAR: fprintf(stderr, "T_SPECULAR"); return;
-      case T_FOG_W: fprintf(stderr, "T_FOG_W"); return;
-      default: fprintf(stderr, "T_TEX%d", nr); return;
-      }
-   case REG_TYPE_OC:
-      if (nr == 0) {
-	 fprintf(stderr, "oC");
-	 return;
-      }
-      break;
-   case REG_TYPE_OD:
-      if (nr == 0) {
-	 fprintf(stderr, "oD");
-	 return;
-      }
-      break;
-   default:
-      break;
+   GLuint i;
+   GLuint *ptr = (GLuint *)(stream->ptr + stream->offset);
+   
+   if (len == 0) {
+      PRINTF("Error - zero length packet (0x%08x)\n", stream->ptr[0]);
+      assert(0);
+      return GL_FALSE;
    }
 
-   fprintf(stderr, "%s[%d]", regname[type], nr);
-}
+   if (stream->print_addresses)
+      PRINTF("%08x:  ", stream->offset);
+
 
-#define REG_SWIZZLE_MASK 0x7777
-#define REG_NEGATE_MASK 0x8888
+   PRINTF("%s (%d dwords):\n", name, len);
+   for (i = 0; i < len; i++)
+      PRINTF("\t0x%08x\n",  ptr[i]);   
+   PRINTF("\n");
 
-#define REG_SWIZZLE_XYZW ((SRC_X << A2_SRC2_CHANNEL_X_SHIFT) |	\
-		      (SRC_Y << A2_SRC2_CHANNEL_Y_SHIFT) |	\
-		      (SRC_Z << A2_SRC2_CHANNEL_Z_SHIFT) |	\
-		      (SRC_W << A2_SRC2_CHANNEL_W_SHIFT))
+   stream->offset += len * sizeof(GLuint);
+   
+   return GL_TRUE;
+}
 
 
-static void print_reg_neg_swizzle( GLuint reg )
+static const char *get_prim_name( GLuint val )
 {
-   int i;
-
-   if ((reg & REG_SWIZZLE_MASK) == REG_SWIZZLE_XYZW &&
-       (reg & REG_NEGATE_MASK) == 0)
-      return;
-
-   fprintf(stderr, ".");
-
-   for (i = 3 ; i >= 0; i--) {
-      if (reg & (1<<((i*4)+3))) 
-	 fprintf(stderr, "-");
-	 
-      switch ((reg>>(i*4)) & 0x7) {
-      case 0: fprintf(stderr, "x"); break;
-      case 1: fprintf(stderr, "y"); break;
-      case 2: fprintf(stderr, "z"); break;
-      case 3: fprintf(stderr, "w"); break;
-      case 4: fprintf(stderr, "0"); break;
-      case 5: fprintf(stderr, "1"); break;
-      default: fprintf(stderr, "?"); break;
-      }
+   switch (val & PRIM3D_MASK) {
+   case PRIM3D_TRILIST: return "TRILIST"; break;
+   case PRIM3D_TRISTRIP: return "TRISTRIP"; break;
+   case PRIM3D_TRISTRIP_RVRSE: return "TRISTRIP_RVRSE"; break;
+   case PRIM3D_TRIFAN: return "TRIFAN"; break;
+   case PRIM3D_POLY: return "POLY"; break;
+   case PRIM3D_LINELIST: return "LINELIST"; break;
+   case PRIM3D_LINESTRIP: return "LINESTRIP"; break;
+   case PRIM3D_RECTLIST: return "RECTLIST"; break;
+   case PRIM3D_POINTLIST: return "POINTLIST"; break;
+   case PRIM3D_DIB: return "DIB"; break;
+   case PRIM3D_CLEAR_RECT: return "CLEAR_RECT"; break;
+   case PRIM3D_ZONE_INIT: return "ZONE_INIT"; break;
+   default: return "????"; break;
    }
 }
 
-
-static void print_src_reg( GLuint dword )
+static GLboolean debug_prim( struct debug_stream *stream, const char *name, 
+			     GLboolean dump_floats,
+			     GLuint len )
 {
-   GLuint nr = (dword >> A2_SRC2_NR_SHIFT) & REG_NR_MASK;
-   GLuint type = (dword >> A2_SRC2_TYPE_SHIFT) & REG_TYPE_MASK;
-   print_reg_type_nr( type, nr );
-   print_reg_neg_swizzle( dword );
+   GLuint *ptr = (GLuint *)(stream->ptr + stream->offset);
+   const char *prim = get_prim_name( ptr[0] );
+   GLuint i;
+   
+
+
+   PRINTF("%s %s (%d dwords):\n", name, prim, len);
+   PRINTF("\t0x%08x\n",  ptr[0]);   
+   for (i = 1; i < len; i++) {
+      if (dump_floats)
+	 PRINTF("\t0x%08x // %f\n",  ptr[i], *(GLfloat *)&ptr[i]);   
+      else
+	 PRINTF("\t0x%08x\n",  ptr[i]);   
+   }
+
+      
+   PRINTF("\n");
+
+   stream->offset += len * sizeof(GLuint);
+   
+   return GL_TRUE;
 }
+   
+
 
-void i915_print_ureg( const char *msg, GLuint ureg )
+
+static GLboolean debug_program( struct debug_stream *stream, const char *name, GLuint len )
 {
-   fprintf(stderr, "%s: ", msg);
-   print_src_reg( ureg >> 8 );
-   fprintf(stderr, "\n");
+   GLuint *ptr = (GLuint *)(stream->ptr + stream->offset);
+
+   if (len == 0) {
+      PRINTF("Error - zero length packet (0x%08x)\n", stream->ptr[0]);
+      assert(0);
+      return GL_FALSE;
+   }
+
+   if (stream->print_addresses)
+      PRINTF("%08x:  ", stream->offset);
+
+   PRINTF("%s (%d dwords):\n", name, len);
+   i915_disassemble_program( ptr, len );
+
+   stream->offset += len * sizeof(GLuint);
+   return GL_TRUE;
 }
 
-static void print_dest_reg( GLuint dword )
+
+static GLboolean debug_chain( struct debug_stream *stream, const char *name, GLuint len )
 {
-   GLuint nr = (dword >> A0_DEST_NR_SHIFT) & REG_NR_MASK;
-   GLuint type = (dword >> A0_DEST_TYPE_SHIFT) & REG_TYPE_MASK;
-   print_reg_type_nr( type, nr );
-   if ((dword & A0_DEST_CHANNEL_ALL) == A0_DEST_CHANNEL_ALL)
-      return;
-   fprintf(stderr, ".");
-   if (dword & A0_DEST_CHANNEL_X) fprintf(stderr, "x");
-   if (dword & A0_DEST_CHANNEL_Y) fprintf(stderr, "y");
-   if (dword & A0_DEST_CHANNEL_Z) fprintf(stderr, "z");
-   if (dword & A0_DEST_CHANNEL_W) fprintf(stderr, "w");
+   GLuint *ptr = (GLuint *)(stream->ptr + stream->offset);
+   GLuint old_offset = stream->offset + len * sizeof(GLuint);
+   GLuint i;
+
+   PRINTF("%s (%d dwords):\n", name, len);
+   for (i = 0; i < len; i++)
+      PRINTF("\t0x%08x\n",  ptr[i]);
+
+   stream->offset = ptr[1] & ~0x3;
+   
+   if (stream->offset < old_offset)
+      PRINTF("\n... skipping backwards from 0x%x --> 0x%x ...\n\n", 
+		   old_offset, stream->offset );
+   else
+      PRINTF("\n... skipping from 0x%x --> 0x%x ...\n\n", 
+		   old_offset, stream->offset );
+
+
+   return GL_TRUE;
 }
 
 
-#define GET_SRC0_REG(r0, r1) ((r0<<14)|(r1>>A1_SRC0_CHANNEL_W_SHIFT))
-#define GET_SRC1_REG(r0, r1) ((r0<<8)|(r1>>A2_SRC1_CHANNEL_W_SHIFT))
-#define GET_SRC2_REG(r)      (r)
+static GLboolean debug_variable_length_prim( struct debug_stream *stream )
+{
+   GLuint *ptr = (GLuint *)(stream->ptr + stream->offset);
+   const char *prim = get_prim_name( ptr[0] );
+   GLuint i, len;
+
+   GLushort *idx = (GLushort *)(ptr+1);
+   for (i = 0; idx[i] != 0xffff; i++)
+      ;
+
+   len = 1+(i+2)/2;
+
+   PRINTF("3DPRIM, %s variable length %d indicies (%d dwords):\n", prim, i, len);
+   for (i = 0; i < len; i++)
+      PRINTF("\t0x%08x\n",  ptr[i]);
+   PRINTF("\n");
+
+   stream->offset += len * sizeof(GLuint);
+   return GL_TRUE;
+}
 
 
-static void print_arith_op( GLuint opcode, const GLuint *program )
+#define BITS( dw, hi, lo, ... )				\
+do {							\
+   unsigned himask = ~0UL >> (31 - (hi));		\
+   PRINTF("\t\t ");				\
+   PRINTF(__VA_ARGS__);			\
+   PRINTF(": 0x%x\n", ((dw) & himask) >> (lo));	\
+} while (0)
+
+#define MBZ( dw, hi, lo) do {							\
+   unsigned x = (dw) >> (lo);				\
+   unsigned lomask = (1 << (lo)) - 1;			\
+   unsigned himask;					\
+   himask = (1UL << (hi)) - 1;				\
+   assert ((x & himask & ~lomask) == 0);	\
+} while (0)
+
+#define FLAG( dw, bit, ... )			\
+do {							\
+   if (((dw) >> (bit)) & 1) {				\
+      PRINTF("\t\t ");				\
+      PRINTF(__VA_ARGS__);			\
+      PRINTF("\n");				\
+   }							\
+} while (0)
+
+static GLboolean debug_load_immediate( struct debug_stream *stream,
+				       const char *name,
+				       GLuint len )
 {
-   if (opcode != A0_NOP) {
-      print_dest_reg(program[0]);
-      if (program[0] & A0_DEST_SATURATE)
-	 fprintf(stderr, " = SATURATE ");
-      else
-	 fprintf(stderr, " = ");
+   GLuint *ptr = (GLuint *)(stream->ptr + stream->offset);
+   GLuint bits = (ptr[0] >> 4) & 0xff;
+   GLuint j = 0;
+   
+   PRINTF("%s (%d dwords, flags: %x):\n", name, len, bits);
+   PRINTF("\t0x%08x\n",  ptr[j++]);
+
+   if (bits & (1<<0)) {
+      PRINTF("\t  LIS0: 0x%08x\n", ptr[j]);
+      PRINTF("\t vb address: 0x%08x\n", (ptr[j] & ~0x3));
+      BITS(ptr[j], 0, 0, "vb invalidate disable");
+      j++;
+   }
+   if (bits & (1<<1)) {
+      PRINTF("\t  LIS1: 0x%08x\n", ptr[j]);
+      BITS(ptr[j], 29, 24, "vb dword width");
+      BITS(ptr[j], 21, 16, "vb dword pitch");
+      BITS(ptr[j], 15, 0, "vb max index");
+      j++;
    }
+   if (bits & (1<<2)) {
+      int i;
+      PRINTF("\t  LIS2: 0x%08x\n", ptr[j]);
+      for (i = 0; i < 8; i++) {
+	 unsigned tc = (ptr[j] >> (i * 4)) & 0xf;
+	 if (tc != 0xf)
+	    BITS(tc, 3, 0, "tex coord %d", i);
+      }
+      j++;
+   }
+   if (bits & (1<<3)) {
+      PRINTF("\t  LIS3: 0x%08x\n", ptr[j]);
+      j++;
+   }
+   if (bits & (1<<4)) {
+      PRINTF("\t  LIS4: 0x%08x\n", ptr[j]);
+      BITS(ptr[j], 31, 23, "point width");
+      BITS(ptr[j], 22, 19, "line width");
+      FLAG(ptr[j], 18, "alpha flatshade");
+      FLAG(ptr[j], 17, "fog flatshade");
+      FLAG(ptr[j], 16, "spec flatshade");
+      FLAG(ptr[j], 15, "rgb flatshade");
+      BITS(ptr[j], 14, 13, "cull mode");
+      FLAG(ptr[j], 12, "vfmt: point width");
+      FLAG(ptr[j], 11, "vfmt: specular/fog");
+      FLAG(ptr[j], 10, "vfmt: rgba");
+      FLAG(ptr[j], 9, "vfmt: depth offset");
+      BITS(ptr[j], 8, 6, "vfmt: position (2==xyzw)");
+      FLAG(ptr[j], 5, "force dflt diffuse");
+      FLAG(ptr[j], 4, "force dflt specular");
+      FLAG(ptr[j], 3, "local depth offset enable");
+      FLAG(ptr[j], 2, "vfmt: fp32 fog coord");
+      FLAG(ptr[j], 1, "sprite point");
+      FLAG(ptr[j], 0, "antialiasing");
+      j++;
+   }
+   if (bits & (1<<5)) {
+      PRINTF("\t  LIS5: 0x%08x\n", ptr[j]);
+      BITS(ptr[j], 31, 28, "rgba write disables");
+      FLAG(ptr[j], 27,     "force dflt point width");
+      FLAG(ptr[j], 26,     "last pixel enable");
+      FLAG(ptr[j], 25,     "global z offset enable");
+      FLAG(ptr[j], 24,     "fog enable");
+      BITS(ptr[j], 23, 16, "stencil ref");
+      BITS(ptr[j], 15, 13, "stencil test");
+      BITS(ptr[j], 12, 10, "stencil fail op");
+      BITS(ptr[j], 9, 7,   "stencil pass z fail op");
+      BITS(ptr[j], 6, 4,   "stencil pass z pass op");
+      FLAG(ptr[j], 3,      "stencil write enable");
+      FLAG(ptr[j], 2,      "stencil test enable");
+      FLAG(ptr[j], 1,      "color dither enable");
+      FLAG(ptr[j], 0,      "logiop enable");
+      j++;
+   }
+   if (bits & (1<<6)) {
+      PRINTF("\t  LIS6: 0x%08x\n", ptr[j]);
+      FLAG(ptr[j], 31,      "alpha test enable");
+      BITS(ptr[j], 30, 28,  "alpha func");
+      BITS(ptr[j], 27, 20,  "alpha ref");
+      FLAG(ptr[j], 19,      "depth test enable");
+      BITS(ptr[j], 18, 16,  "depth func");
+      FLAG(ptr[j], 15,      "blend enable");
+      BITS(ptr[j], 14, 12,  "blend func");
+      BITS(ptr[j], 11, 8,   "blend src factor");
+      BITS(ptr[j], 7,  4,   "blend dst factor");
+      FLAG(ptr[j], 3,       "depth write enable");
+      FLAG(ptr[j], 2,       "color write enable");
+      BITS(ptr[j], 1,  0,   "provoking vertex"); 
+      j++;
+   }
+
 
-   fprintf(stderr, "%s ", opcodes[opcode]);
+   PRINTF("\n");
 
-   print_src_reg(GET_SRC0_REG(program[0], program[1]));
-   if (args[opcode] == 1) {
-      fprintf(stderr, "\n");
-      return;
+   assert(j == len);
+
+   stream->offset += len * sizeof(GLuint);
+   
+   return GL_TRUE;
+}
+ 
+
+
+static GLboolean debug_load_indirect( struct debug_stream *stream,
+				      const char *name,
+				      GLuint len )
+{
+   GLuint *ptr = (GLuint *)(stream->ptr + stream->offset);
+   GLuint bits = (ptr[0] >> 8) & 0x3f;
+   GLuint i, j = 0;
+   
+   PRINTF("%s (%d dwords):\n", name, len);
+   PRINTF("\t0x%08x\n",  ptr[j++]);
+
+   for (i = 0; i < 6; i++) {
+      if (bits & (1<<i)) {
+	 switch (1<<(8+i)) {
+	 case LI0_STATE_STATIC_INDIRECT:
+	    PRINTF("        STATIC: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++;
+	    PRINTF("                0x%08x\n", ptr[j++]);
+	    break;
+	 case LI0_STATE_DYNAMIC_INDIRECT:
+	    PRINTF("       DYNAMIC: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++;
+	    break;
+	 case LI0_STATE_SAMPLER:
+	    PRINTF("       SAMPLER: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++;
+	    PRINTF("                0x%08x\n", ptr[j++]);
+	    break;
+	 case LI0_STATE_MAP:
+	    PRINTF("           MAP: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++;
+	    PRINTF("                0x%08x\n", ptr[j++]);
+	    break;
+	 case LI0_STATE_PROGRAM:
+	    PRINTF("       PROGRAM: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++;
+	    PRINTF("                0x%08x\n", ptr[j++]);
+	    break;
+	 case LI0_STATE_CONSTANTS:
+	    PRINTF("     CONSTANTS: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++;
+	    PRINTF("                0x%08x\n", ptr[j++]);
+	    break;
+	 default:
+	    assert(0);
+	    break;
+	 }
+      }
    }
 
-   fprintf(stderr, ", ");
-   print_src_reg(GET_SRC1_REG(program[1], program[2]));
-   if (args[opcode] == 2) { 
-      fprintf(stderr, "\n");
-      return;
+   if (bits == 0) {
+      PRINTF("\t  DUMMY: 0x%08x\n", ptr[j++]);
    }
 
-   fprintf(stderr, ", ");
-   print_src_reg(GET_SRC2_REG(program[2]));
-   fprintf(stderr, "\n");
-   return;
+   PRINTF("\n");
+
+
+   assert(j == len);
+
+   stream->offset += len * sizeof(GLuint);
+   
+   return GL_TRUE;
+}
+ 	
+static void BR13( struct debug_stream *stream,
+		  GLuint val )
+{
+   PRINTF("\t0x%08x\n",  val);
+   FLAG(val, 30, "clipping enable");
+   BITS(val, 25, 24, "color depth (3==32bpp)");
+   BITS(val, 23, 16, "raster op");
+   BITS(val, 15, 0,  "dest pitch");
 }
 
 
-static void print_tex_op( GLuint opcode, const GLuint *program )
+static void BR2223( struct debug_stream *stream,
+		    GLuint val22, GLuint val23 )
 {
-   print_dest_reg(program[0] | A0_DEST_CHANNEL_ALL);
-   fprintf(stderr, " = ");
+   union { GLuint val; short field[2]; } BR22, BR23;
 
-   fprintf(stderr, "%s ", opcodes[opcode]);
+   BR22.val = val22;
+   BR23.val = val23;
 
-   fprintf(stderr, "S[%d],", 
-	   program[0] & T0_SAMPLER_NR_MASK);
+   PRINTF("\t0x%08x\n",  val22);
+   BITS(val22, 31, 16, "dest y1");
+   BITS(val22, 15, 0,  "dest x1");
 
-   print_reg_type_nr( (program[1]>>T1_ADDRESS_REG_TYPE_SHIFT) & REG_TYPE_MASK,
-		      (program[1]>>T1_ADDRESS_REG_NR_SHIFT) & REG_NR_MASK );
-   fprintf(stderr, "\n");
+   PRINTF("\t0x%08x\n",  val23);
+   BITS(val23, 31, 16, "dest y2");
+   BITS(val23, 15, 0,  "dest x2");
+
+   /* The blit engine may produce unexpected results when these aren't met */
+   assert(BR22.field[0] < BR23.field[0]);
+   assert(BR22.field[1] < BR23.field[1]);
 }
 
-static void print_dcl_op( GLuint opcode, const GLuint *program )
+static void BR09( struct debug_stream *stream,
+		  GLuint val )
 {
-   fprintf(stderr, "%s ", opcodes[opcode]);
-   print_dest_reg(program[0] | A0_DEST_CHANNEL_ALL);
-   fprintf(stderr, "\n");
+   PRINTF("\t0x%08x -- dest address\n",  val);
 }
 
+static void BR26( struct debug_stream *stream,
+		  GLuint val )
+{
+   PRINTF("\t0x%08x\n",  val);
+   BITS(val, 31, 16, "src y1");
+   BITS(val, 15, 0,  "src x1");
+}
+
+static void BR11( struct debug_stream *stream,
+		  GLuint val )
+{
+   PRINTF("\t0x%08x\n",  val);
+   BITS(val, 15, 0,  "src pitch");
+}
 
-void i915_disassemble_program( const GLuint *program, GLuint sz )
+static void BR12( struct debug_stream *stream,
+		  GLuint val )
 {
-   GLuint size = program[0] & 0x1ff;
-   GLint i;
+   PRINTF("\t0x%08x -- src address\n",  val);
+}
+
+static void BR16( struct debug_stream *stream,
+		  GLuint val )
+{
+   PRINTF("\t0x%08x -- color\n",  val);
+}
+   
+static GLboolean debug_copy_blit( struct debug_stream *stream,
+				  const char *name,
+				  GLuint len )
+{
+   GLuint *ptr = (GLuint *)(stream->ptr + stream->offset);
+   int j = 0;
+
+   PRINTF("%s (%d dwords):\n", name, len);
+   PRINTF("\t0x%08x\n",  ptr[j++]);
+   
+   BR13(stream, ptr[j++]);
+   BR2223(stream, ptr[j], ptr[j+1]);
+   j += 2;
+   BR09(stream, ptr[j++]);
+   BR26(stream, ptr[j++]);
+   BR11(stream, ptr[j++]);
+   BR12(stream, ptr[j++]);
+
+   stream->offset += len * sizeof(GLuint);
+   assert(j == len);
+   return GL_TRUE;
+}
+
+static GLboolean debug_color_blit( struct debug_stream *stream,
+				  const char *name,
+				  GLuint len )
+{
+   GLuint *ptr = (GLuint *)(stream->ptr + stream->offset);
+   int j = 0;
+
+   PRINTF("%s (%d dwords):\n", name, len);
+   PRINTF("\t0x%08x\n",  ptr[j++]);
+
+   BR13(stream, ptr[j++]);
+   BR2223(stream, ptr[j], ptr[j+1]);
+   j += 2;
+   BR09(stream, ptr[j++]);
+   BR16(stream, ptr[j++]);
+
+   stream->offset += len * sizeof(GLuint);
+   assert(j == len);
+   return GL_TRUE;
+}
+
+static GLboolean debug_modes4( struct debug_stream *stream,
+				  const char *name,
+				  GLuint len )
+{
+   GLuint *ptr = (GLuint *)(stream->ptr + stream->offset);
+   int j = 0;
+
+   PRINTF("%s (%d dwords):\n", name, len);
+   PRINTF("\t0x%08x\n",  ptr[j]);
+   BITS(ptr[j], 21, 18, "logicop func");
+   FLAG(ptr[j], 17, "stencil test mask modify-enable");
+   FLAG(ptr[j], 16, "stencil write mask modify-enable");
+   BITS(ptr[j], 15, 8, "stencil test mask");
+   BITS(ptr[j], 7, 0,  "stencil write mask");
+   j++;
+
+   stream->offset += len * sizeof(GLuint);
+   assert(j == len);
+   return GL_TRUE;
+}
+
+static GLboolean debug_map_state( struct debug_stream *stream,
+				  const char *name,
+				  GLuint len )
+{
+   GLuint *ptr = (GLuint *)(stream->ptr + stream->offset);
+   int j = 0;
+
+   PRINTF("%s (%d dwords):\n", name, len);
+   PRINTF("\t0x%08x\n",  ptr[j++]);
+   
+   {
+      PRINTF("\t0x%08x\n",  ptr[j]);
+      BITS(ptr[j], 15, 0,   "map mask");
+      j++;
+   }
+
+   while (j < len) {
+      {
+	 PRINTF("\t  TMn.0: 0x%08x\n", ptr[j]);
+	 PRINTF("\t map address: 0x%08x\n", (ptr[j] & ~0x3));
+	 FLAG(ptr[j], 1, "vertical line stride");
+	 FLAG(ptr[j], 0, "vertical line stride offset");
+	 j++;
+      }
+
+      {
+	 PRINTF("\t  TMn.1: 0x%08x\n", ptr[j]);
+	 BITS(ptr[j], 31, 21, "height");
+	 BITS(ptr[j], 20, 10, "width");
+	 BITS(ptr[j], 9, 7, "surface format");
+	 BITS(ptr[j], 6, 3, "texel format");
+	 FLAG(ptr[j], 2, "use fence regs");
+	 FLAG(ptr[j], 1, "tiled surface");
+	 FLAG(ptr[j], 0, "tile walk ymajor");
+	 j++;
+      }
+      {
+	 PRINTF("\t  TMn.2: 0x%08x\n", ptr[j]);
+	 BITS(ptr[j], 31, 21, "dword pitch");
+	 BITS(ptr[j], 20, 15, "cube face enables");
+	 BITS(ptr[j], 14, 9, "max lod");
+	 FLAG(ptr[j], 8,     "mip layout right");
+	 BITS(ptr[j], 7, 0, "depth");
+	 j++;
+      }
+   }
+
+   stream->offset += len * sizeof(GLuint);
+   assert(j == len);
+   return GL_TRUE;
+}
+
+static GLboolean debug_sampler_state( struct debug_stream *stream,
+				  const char *name,
+				  GLuint len )
+{
+   GLuint *ptr = (GLuint *)(stream->ptr + stream->offset);
+   int j = 0;
+
+   PRINTF("%s (%d dwords):\n", name, len);
+   PRINTF("\t0x%08x\n",  ptr[j++]);
+   
+   {
+      PRINTF("\t0x%08x\n",  ptr[j]);
+      BITS(ptr[j], 15, 0,   "sampler mask");
+      j++;
+   }
+
+   while (j < len) {
+      {
+	 PRINTF("\t  TSn.0: 0x%08x\n", ptr[j]);
+	 FLAG(ptr[j], 31, "reverse gamma");
+	 FLAG(ptr[j], 30, "planar to packed");
+	 FLAG(ptr[j], 29, "yuv->rgb");
+	 BITS(ptr[j], 28, 27, "chromakey index");
+	 BITS(ptr[j], 26, 22, "base mip level");
+	 BITS(ptr[j], 21, 20, "mip mode filter");
+	 BITS(ptr[j], 19, 17, "mag mode filter");
+	 BITS(ptr[j], 16, 14, "min mode filter");
+	 BITS(ptr[j], 13, 5,  "lod bias (s4.4)");
+	 FLAG(ptr[j], 4,      "shadow enable");
+	 FLAG(ptr[j], 3,      "max-aniso-4");
+	 BITS(ptr[j], 2, 0,   "shadow func");
+	 j++;
+      }
+
+      {
+	 PRINTF("\t  TSn.1: 0x%08x\n", ptr[j]);
+	 BITS(ptr[j], 31, 24, "min lod");
+	 MBZ( ptr[j], 23, 18 );
+	 FLAG(ptr[j], 17,     "kill pixel enable");
+	 FLAG(ptr[j], 16,     "keyed tex filter mode");
+	 FLAG(ptr[j], 15,     "chromakey enable");
+	 BITS(ptr[j], 14, 12, "tcx wrap mode");
+	 BITS(ptr[j], 11, 9,  "tcy wrap mode");
+	 BITS(ptr[j], 8,  6,  "tcz wrap mode");
+	 FLAG(ptr[j], 5,      "normalized coords");
+	 BITS(ptr[j], 4,  1,  "map (surface) index");
+	 FLAG(ptr[j], 0,      "EAST deinterlacer enable");
+	 j++;
+      }
+      {
+	 PRINTF("\t  TSn.2: 0x%08x  (default color)\n", ptr[j]);
+	 j++;
+      }
+   }
+
+   stream->offset += len * sizeof(GLuint);
+   assert(j == len);
+   return GL_TRUE;
+}
+
+static GLboolean debug_dest_vars( struct debug_stream *stream,
+				  const char *name,
+				  GLuint len )
+{
+   GLuint *ptr = (GLuint *)(stream->ptr + stream->offset);
+   int j = 0;
+
+   PRINTF("%s (%d dwords):\n", name, len);
+   PRINTF("\t0x%08x\n",  ptr[j++]);
+
+   {
+      PRINTF("\t0x%08x\n",  ptr[j]);
+      FLAG(ptr[j], 31,     "early classic ztest");
+      FLAG(ptr[j], 30,     "opengl tex default color");
+      FLAG(ptr[j], 29,     "bypass iz");
+      FLAG(ptr[j], 28,     "lod preclamp");
+      BITS(ptr[j], 27, 26, "dither pattern");
+      FLAG(ptr[j], 25,     "linear gamma blend");
+      FLAG(ptr[j], 24,     "debug dither");
+      BITS(ptr[j], 23, 20, "dstorg x");
+      BITS(ptr[j], 19, 16, "dstorg y");
+      MBZ (ptr[j], 15, 15 );
+      BITS(ptr[j], 14, 12, "422 write select");
+      BITS(ptr[j], 11, 8,  "cbuf format");
+      BITS(ptr[j], 3, 2,   "zbuf format");
+      FLAG(ptr[j], 1,      "vert line stride");
+      FLAG(ptr[j], 1,      "vert line stride offset");
+      j++;
+   }
    
-   fprintf(stderr, "BEGIN\n");
+   stream->offset += len * sizeof(GLuint);
+   assert(j == len);
+   return GL_TRUE;
+}
 
-   if (size+2 != sz) {
-      fprintf(stderr, "%s: program size mismatch %d/%d\n", __FUNCTION__,
-	      size+2, sz);
-      exit(1);
+static GLboolean debug_buf_info( struct debug_stream *stream,
+				  const char *name,
+				  GLuint len )
+{
+   GLuint *ptr = (GLuint *)(stream->ptr + stream->offset);
+   int j = 0;
+
+   PRINTF("%s (%d dwords):\n", name, len);
+   PRINTF("\t0x%08x\n",  ptr[j++]);
+
+   {
+      PRINTF("\t0x%08x\n",  ptr[j]);
+      BITS(ptr[j], 28, 28, "aux buffer id");
+      BITS(ptr[j], 27, 24, "buffer id (7=depth, 3=back)");
+      FLAG(ptr[j], 23,     "use fence regs");
+      FLAG(ptr[j], 22,     "tiled surface");
+      FLAG(ptr[j], 21,     "tile walk ymajor");
+      MBZ (ptr[j], 20, 14);
+      BITS(ptr[j], 13, 2,  "dword pitch");
+      MBZ (ptr[j], 2,  0);
+      j++;
    }
+   
+   PRINTF("\t0x%08x -- buffer base address\n",  ptr[j++]);
+
+   stream->offset += len * sizeof(GLuint);
+   assert(j == len);
+   return GL_TRUE;
+}
 
-   program ++;
-   for (i = 1 ; i < sz ; i+=3, program+=3) {
-      GLuint opcode = program[0] & (0x1f<<24);
-
-      if ((GLint) opcode >= A0_NOP && opcode <= A0_SLT)
-	 print_arith_op(opcode >> 24, program);
-      else if (opcode >= T0_TEXLD && opcode <= T0_TEXKILL)
-	 print_tex_op(opcode >> 24, program);
-      else if (opcode == D0_DCL)
-	 print_dcl_op(opcode >> 24, program);
-      else 
-	 fprintf(stderr, "Unknown opcode 0x%x\n", opcode);
+static GLboolean i915_debug_packet( struct debug_stream *stream )
+{
+   GLuint *ptr = (GLuint *)(stream->ptr + stream->offset);
+   GLuint cmd = *ptr;
+   
+   switch (((cmd >> 29) & 0x7)) {
+   case 0x0:
+      switch ((cmd >> 23) & 0x3f) {
+      case 0x0:
+	 return debug(stream, "MI_NOOP", 1);
+      case 0x3:
+	 return debug(stream, "MI_WAIT_FOR_EVENT", 1);
+      case 0x4:
+	 return debug(stream, "MI_FLUSH", 1);
+      case 0xA:
+	 debug(stream, "MI_BATCH_BUFFER_END", 1);
+	 return GL_FALSE;
+      case 0x22:
+	 return debug(stream, "MI_LOAD_REGISTER_IMM", 3);
+      case 0x31:
+	 return debug_chain(stream, "MI_BATCH_BUFFER_START", 2);
+      default:
+	 break;
+      }
+      break;
+   case 0x1:
+      break;
+   case 0x2:
+      switch ((cmd >> 22) & 0xff) {	 
+      case 0x50:
+	 return debug_color_blit(stream, "XY_COLOR_BLT", (cmd & 0xff) + 2);
+      case 0x53:
+	 return debug_copy_blit(stream, "XY_SRC_COPY_BLT", (cmd & 0xff) + 2);
+      default:
+	 return debug(stream, "blit command", (cmd & 0xff) + 2);
+      }
+      break;
+   case 0x3:
+      switch ((cmd >> 24) & 0x1f) {	 
+      case 0x6:
+	 return debug(stream, "3DSTATE_ANTI_ALIASING", 1);
+      case 0x7:
+	 return debug(stream, "3DSTATE_RASTERIZATION_RULES", 1);
+      case 0x8:
+	 return debug(stream, "3DSTATE_BACKFACE_STENCIL_OPS", 2);
+      case 0x9:
+	 return debug(stream, "3DSTATE_BACKFACE_STENCIL_MASKS", 1);
+      case 0xb:
+	 return debug(stream, "3DSTATE_INDEPENDENT_ALPHA_BLEND", 1);
+      case 0xc:
+	 return debug(stream, "3DSTATE_MODES5", 1);	 
+      case 0xd:
+	 return debug_modes4(stream, "3DSTATE_MODES4", 1);
+      case 0x15:
+	 return debug(stream, "3DSTATE_FOG_COLOR", 1);
+      case 0x16:
+	 return debug(stream, "3DSTATE_COORD_SET_BINDINGS", 1);
+      case 0x1c:
+	 /* 3DState16NP */
+	 switch((cmd >> 19) & 0x1f) {
+	 case 0x10:
+	    return debug(stream, "3DSTATE_SCISSOR_ENABLE", 1);
+	 case 0x11:
+	    return debug(stream, "3DSTATE_DEPTH_SUBRECTANGLE_DISABLE", 1);
+	 default:
+	    break;
+	 }
+	 break;
+      case 0x1d:
+	 /* 3DStateMW */
+	 switch ((cmd >> 16) & 0xff) {
+	 case 0x0:
+	    return debug_map_state(stream, "3DSTATE_MAP_STATE", (cmd & 0x1f) + 2);
+	 case 0x1:
+	    return debug_sampler_state(stream, "3DSTATE_SAMPLER_STATE", (cmd & 0x1f) + 2);
+	 case 0x4:
+	    return debug_load_immediate(stream, "3DSTATE_LOAD_STATE_IMMEDIATE", (cmd & 0xf) + 2);
+	 case 0x5:
+	    return debug_program(stream, "3DSTATE_PIXEL_SHADER_PROGRAM", (cmd & 0x1ff) + 2);
+	 case 0x6:
+	    return debug(stream, "3DSTATE_PIXEL_SHADER_CONSTANTS", (cmd & 0xff) + 2);
+	 case 0x7:
+	    return debug_load_indirect(stream, "3DSTATE_LOAD_INDIRECT", (cmd & 0xff) + 2);
+	 case 0x80:
+	    return debug(stream, "3DSTATE_DRAWING_RECTANGLE", (cmd & 0xffff) + 2);
+	 case 0x81:
+	    return debug(stream, "3DSTATE_SCISSOR_RECTANGLE", (cmd & 0xffff) + 2);
+	 case 0x83:
+	    return debug(stream, "3DSTATE_SPAN_STIPPLE", (cmd & 0xffff) + 2);
+	 case 0x85:
+	    return debug_dest_vars(stream, "3DSTATE_DEST_BUFFER_VARS", (cmd & 0xffff) + 2);
+	 case 0x88:
+	    return debug(stream, "3DSTATE_CONSTANT_BLEND_COLOR", (cmd & 0xffff) + 2);
+	 case 0x89:
+	    return debug(stream, "3DSTATE_FOG_MODE", (cmd & 0xffff) + 2);
+	 case 0x8e:
+	    return debug_buf_info(stream, "3DSTATE_BUFFER_INFO", (cmd & 0xffff) + 2);
+	 case 0x97:
+	    return debug(stream, "3DSTATE_DEPTH_OFFSET_SCALE", (cmd & 0xffff) + 2);
+	 case 0x98:
+	    return debug(stream, "3DSTATE_DEFAULT_Z", (cmd & 0xffff) + 2);
+	 case 0x99:
+	    return debug(stream, "3DSTATE_DEFAULT_DIFFUSE", (cmd & 0xffff) + 2);
+	 case 0x9a:
+	    return debug(stream, "3DSTATE_DEFAULT_SPECULAR", (cmd & 0xffff) + 2);
+	 case 0x9c:
+	    return debug(stream, "3DSTATE_CLEAR_PARAMETERS", (cmd & 0xffff) + 2);
+	 default:
+	    assert(0);
+	    return 0;
+	 }
+	 break;
+      case 0x1e:
+	 if (cmd & (1 << 23))
+	    return debug(stream, "???", (cmd & 0xffff) + 1);
+	 else
+	    return debug(stream, "", 1);
+	 break;
+      case 0x1f:
+	 if ((cmd & (1 << 23)) == 0)	
+	    return debug_prim(stream, "3DPRIM (inline)", 1, (cmd & 0x1ffff) + 2);
+	 else if (cmd & (1 << 17)) 
+	 {
+	    if ((cmd & 0xffff) == 0)
+	       return debug_variable_length_prim(stream);
+	    else
+	       return debug_prim(stream, "3DPRIM (indexed)", 0, (((cmd & 0xffff) + 1) / 2) + 1);
+	 }
+	 else
+	    return debug_prim(stream, "3DPRIM  (indirect sequential)", 0, 2); 
+	 break;
+      default:
+	 return debug(stream, "", 0);
+      }
+   default:
+      assert(0);
+      return 0;
+   }
+
+   assert(0);
+   return 0;
+}
+
+
+
+void
+i915_dump_batchbuffer( GLuint *start,
+		       GLuint *end )
+{
+   struct debug_stream stream;
+   GLuint bytes = (end - start) * 4;
+   GLboolean done = GL_FALSE;
+
+   PRINTF("\n\nBATCH: (%d)\n", bytes / 4);
+
+   stream.offset = 0;
+   stream.ptr = (char *)start;
+   stream.print_addresses = 0;
+
+   while (!done &&
+	  stream.offset < bytes &&
+	  stream.offset >= 0)
+   {
+      if (!i915_debug_packet( &stream ))
+	 break;
+
+      assert(stream.offset <= bytes &&
+	     stream.offset >= 0);
    }
 
-   fprintf(stderr, "END\n\n");
+   PRINTF("END-BATCH\n\n\n");
 }
+
+
diff --git a/i965/intel_tex.h b/i915/i915_debug.h
index e389d52..0643a8c 100644
--- a/i965/intel_tex.h
+++ b/i915/i915_debug.h
@@ -1,8 +1,8 @@
 /**************************************************************************
  * 
- * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
  * All Rights Reserved.
- * 
+ *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the
  * "Software"), to deal in the Software without restriction, including
@@ -25,18 +25,31 @@
  * 
  **************************************************************************/
 
-#ifndef INTELTEX_INC
-#define INTELTEX_INC
+/* Authors:  Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef I915_DEBUG_H
+#define I915_DEBUG_H
+
+struct i915_context;
+
+struct debug_stream 
+{
+   unsigned offset;		/* current gtt offset */
+   char *ptr;		/* pointer to gtt offset zero */
+   char *end;		/* pointer to gtt offset zero */
+   unsigned print_addresses;
+};
 
-#include "mtypes.h"
-#include "intel_context.h"
 
 
-void intelInitTextureFuncs( struct dd_function_table *functions );
+extern void i915_disassemble_program(const unsigned *program, unsigned sz);
+extern void i915_print_ureg(const char *msg, unsigned ureg);
 
 
-GLuint intel_finalize_mipmap_tree( struct intel_context *intel,
-				   struct gl_texture_object *tObj );
+void
+i915_dump_batchbuffer( unsigned *start,
+		       unsigned *end );
 
 
 #endif
diff --git a/i915/i915_debug_fp.c b/i915/i915_debug_fp.c
new file mode 100644
index 0000000..84347a0
--- /dev/null
+++ b/i915/i915_debug_fp.c
@@ -0,0 +1,333 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include <stdio.h>
+
+#include "i915_reg.h"
+#include "i915_debug.h"
+#include "main/imports.h"
+#include "shader/program.h"
+#include "shader/prog_instruction.h"
+#include "shader/prog_print.h"
+
+#define PRINTF( ... ) _mesa_printf( __VA_ARGS__ )
+
+static const char *opcodes[0x20] = {
+   "NOP",
+   "ADD",
+   "MOV",
+   "MUL",
+   "MAD",
+   "DP2ADD",
+   "DP3",
+   "DP4",
+   "FRC",
+   "RCP",
+   "RSQ",
+   "EXP",
+   "LOG",
+   "CMP",
+   "MIN",
+   "MAX",
+   "FLR",
+   "MOD",
+   "TRC",
+   "SGE",
+   "SLT",
+   "TEXLD",
+   "TEXLDP",
+   "TEXLDB",
+   "TEXKILL",
+   "DCL",
+   "0x1a",
+   "0x1b",
+   "0x1c",
+   "0x1d",
+   "0x1e",
+   "0x1f",
+};
+
+
+static const int args[0x20] = {
+   0,                           /* 0 nop */
+   2,                           /* 1 add */
+   1,                           /* 2 mov */
+   2,                           /* 3 m ul */
+   3,                           /* 4 mad */
+   3,                           /* 5 dp2add */
+   2,                           /* 6 dp3 */
+   2,                           /* 7 dp4 */
+   1,                           /* 8 frc */
+   1,                           /* 9 rcp */
+   1,                           /* a rsq */
+   1,                           /* b exp */
+   1,                           /* c log */
+   3,                           /* d cmp */
+   2,                           /* e min */
+   2,                           /* f max */
+   1,                           /* 10 flr */
+   1,                           /* 11 mod */
+   1,                           /* 12 trc */
+   2,                           /* 13 sge */
+   2,                           /* 14 slt */
+   1,
+   1,
+   1,
+   1,
+   0,
+   0,
+   0,
+   0,
+   0,
+   0,
+   0,
+};
+
+
+static const char *regname[0x8] = {
+   "R",
+   "T",
+   "CONST",
+   "S",
+   "OC",
+   "OD",
+   "U",
+   "UNKNOWN",
+};
+
+static void
+print_reg_type_nr(GLuint type, GLuint nr)
+{
+   switch (type) {
+   case REG_TYPE_T:
+      switch (nr) {
+      case T_DIFFUSE:
+         PRINTF("T_DIFFUSE");
+         return;
+      case T_SPECULAR:
+         PRINTF("T_SPECULAR");
+         return;
+      case T_FOG_W:
+         PRINTF("T_FOG_W");
+         return;
+      default:
+         PRINTF("T_TEX%d", nr);
+         return;
+      }
+   case REG_TYPE_OC:
+      if (nr == 0) {
+         PRINTF("oC");
+         return;
+      }
+      break;
+   case REG_TYPE_OD:
+      if (nr == 0) {
+         PRINTF("oD");
+         return;
+      }
+      break;
+   default:
+      break;
+   }
+
+   PRINTF("%s[%d]", regname[type], nr);
+}
+
+#define REG_SWIZZLE_MASK 0x7777
+#define REG_NEGATE_MASK 0x8888
+
+#define REG_SWIZZLE_XYZW ((SRC_X << A2_SRC2_CHANNEL_X_SHIFT) |	\
+		      (SRC_Y << A2_SRC2_CHANNEL_Y_SHIFT) |	\
+		      (SRC_Z << A2_SRC2_CHANNEL_Z_SHIFT) |	\
+		      (SRC_W << A2_SRC2_CHANNEL_W_SHIFT))
+
+
+static void
+print_reg_neg_swizzle(GLuint reg)
+{
+   int i;
+
+   if ((reg & REG_SWIZZLE_MASK) == REG_SWIZZLE_XYZW &&
+       (reg & REG_NEGATE_MASK) == 0)
+      return;
+
+   PRINTF(".");
+
+   for (i = 3; i >= 0; i--) {
+      if (reg & (1 << ((i * 4) + 3)))
+         PRINTF("-");
+
+      switch ((reg >> (i * 4)) & 0x7) {
+      case 0:
+         PRINTF("x");
+         break;
+      case 1:
+         PRINTF("y");
+         break;
+      case 2:
+         PRINTF("z");
+         break;
+      case 3:
+         PRINTF("w");
+         break;
+      case 4:
+         PRINTF("0");
+         break;
+      case 5:
+         PRINTF("1");
+         break;
+      default:
+         PRINTF("?");
+         break;
+      }
+   }
+}
+
+
+static void
+print_src_reg(GLuint dword)
+{
+   GLuint nr = (dword >> A2_SRC2_NR_SHIFT) & REG_NR_MASK;
+   GLuint type = (dword >> A2_SRC2_TYPE_SHIFT) & REG_TYPE_MASK;
+   print_reg_type_nr(type, nr);
+   print_reg_neg_swizzle(dword);
+}
+
+
+static void
+print_dest_reg(GLuint dword)
+{
+   GLuint nr = (dword >> A0_DEST_NR_SHIFT) & REG_NR_MASK;
+   GLuint type = (dword >> A0_DEST_TYPE_SHIFT) & REG_TYPE_MASK;
+   print_reg_type_nr(type, nr);
+   if ((dword & A0_DEST_CHANNEL_ALL) == A0_DEST_CHANNEL_ALL)
+      return;
+   PRINTF(".");
+   if (dword & A0_DEST_CHANNEL_X)
+      PRINTF("x");
+   if (dword & A0_DEST_CHANNEL_Y)
+      PRINTF("y");
+   if (dword & A0_DEST_CHANNEL_Z)
+      PRINTF("z");
+   if (dword & A0_DEST_CHANNEL_W)
+      PRINTF("w");
+}
+
+
+#define GET_SRC0_REG(r0, r1) ((r0<<14)|(r1>>A1_SRC0_CHANNEL_W_SHIFT))
+#define GET_SRC1_REG(r0, r1) ((r0<<8)|(r1>>A2_SRC1_CHANNEL_W_SHIFT))
+#define GET_SRC2_REG(r)      (r)
+
+
+static void
+print_arith_op(GLuint opcode, const GLuint * program)
+{
+   if (opcode != A0_NOP) {
+      print_dest_reg(program[0]);
+      if (program[0] & A0_DEST_SATURATE)
+         PRINTF(" = SATURATE ");
+      else
+         PRINTF(" = ");
+   }
+
+   PRINTF("%s ", opcodes[opcode]);
+
+   print_src_reg(GET_SRC0_REG(program[0], program[1]));
+   if (args[opcode] == 1) {
+      PRINTF("\n");
+      return;
+   }
+
+   PRINTF(", ");
+   print_src_reg(GET_SRC1_REG(program[1], program[2]));
+   if (args[opcode] == 2) {
+      PRINTF("\n");
+      return;
+   }
+
+   PRINTF(", ");
+   print_src_reg(GET_SRC2_REG(program[2]));
+   PRINTF("\n");
+   return;
+}
+
+
+static void
+print_tex_op(GLuint opcode, const GLuint * program)
+{
+   print_dest_reg(program[0] | A0_DEST_CHANNEL_ALL);
+   PRINTF(" = ");
+
+   PRINTF("%s ", opcodes[opcode]);
+
+   PRINTF("S[%d],", program[0] & T0_SAMPLER_NR_MASK);
+
+   print_reg_type_nr((program[1] >> T1_ADDRESS_REG_TYPE_SHIFT) &
+                     REG_TYPE_MASK,
+                     (program[1] >> T1_ADDRESS_REG_NR_SHIFT) & REG_NR_MASK);
+   PRINTF("\n");
+}
+
+static void
+print_dcl_op(GLuint opcode, const GLuint * program)
+{
+   PRINTF("%s ", opcodes[opcode]);
+   print_dest_reg(program[0] | A0_DEST_CHANNEL_ALL);
+   PRINTF("\n");
+}
+
+
+void
+i915_disassemble_program(const GLuint * program, GLuint sz)
+{
+   GLuint size = program[0] & 0x1ff;
+   GLint i;
+
+   PRINTF("\t\tBEGIN\n");
+
+   assert(size + 2 == sz);
+
+   program++;
+   for (i = 1; i < sz; i += 3, program += 3) {
+      GLuint opcode = program[0] & (0x1f << 24);
+
+      PRINTF("\t\t");
+
+      if ((GLint) opcode >= A0_NOP && opcode <= A0_SLT)
+         print_arith_op(opcode >> 24, program);
+      else if (opcode >= T0_TEXLD && opcode <= T0_TEXKILL)
+         print_tex_op(opcode >> 24, program);
+      else if (opcode == D0_DCL)
+         print_dcl_op(opcode >> 24, program);
+      else
+         PRINTF("Unknown opcode 0x%x\n", opcode);
+   }
+
+   PRINTF("\t\tEND\n\n");
+}
+
+
diff --git a/i915/i915_fragprog.c b/i915/i915_fragprog.c
index a28c8bb..1876218 100644
--- a/i915/i915_fragprog.c
+++ b/i915/i915_fragprog.c
@@ -29,40 +29,56 @@
 #include "macros.h"
 #include "enums.h"
 
+#include "shader/prog_instruction.h"
+#include "shader/prog_parameter.h"
+#include "shader/program.h"
+#include "shader/programopt.h"
+
 #include "tnl/tnl.h"
 #include "tnl/t_context.h"
+
 #include "intel_batchbuffer.h"
 
 #include "i915_reg.h"
 #include "i915_context.h"
 #include "i915_program.h"
 
-#include "prog_instruction.h"
-#include "prog_parameter.h"
-#include "program.h"
-#include "programopt.h"
-
-
+static const GLfloat sin_quad_constants[2][4] = {
+   {
+      2.0,
+      -1.0,
+      .5,
+      .75
+   },
+   {
+      4.0,
+      -4.0,
+      1.0 / (2.0 * M_PI),
+      .2225
+   }
+};
 
-/* 1, -1/3!, 1/5!, -1/7! */
-static const GLfloat sin_constants[4] = {  1.0, 
-					   -1.0/(3*2*1),
-					   1.0/(5*4*3*2*1),
-					   -1.0/(7*6*5*4*3*2*1) };
+static const GLfloat sin_constants[4] = { 1.0,
+   -1.0 / (3 * 2 * 1),
+   1.0 / (5 * 4 * 3 * 2 * 1),
+   -1.0 / (7 * 6 * 5 * 4 * 3 * 2 * 1)
+};
 
 /* 1, -1/2!, 1/4!, -1/6! */
-static const GLfloat cos_constants[4] = {  1.0, 
-					   -1.0/(2*1),
-					   1.0/(4*3*2*1),
-					   -1.0/(6*5*4*3*2*1) };
+static const GLfloat cos_constants[4] = { 1.0,
+   -1.0 / (2 * 1),
+   1.0 / (4 * 3 * 2 * 1),
+   -1.0 / (6 * 5 * 4 * 3 * 2 * 1)
+};
 
 /**
  * Retrieve a ureg for the given source register.  Will emit
  * constants, apply swizzling and negation as needed.
  */
-static GLuint src_vector( struct i915_fragment_program *p,
-			  const struct prog_src_register *source,
-			  const struct gl_fragment_program *program )
+static GLuint
+src_vector(struct i915_fragment_program *p,
+           const struct prog_src_register *source,
+           const struct gl_fragment_program *program)
 {
    GLuint src;
 
@@ -70,136 +86,152 @@ static GLuint src_vector( struct i915_fragment_program *p,
 
       /* Registers:
        */
-      case PROGRAM_TEMPORARY:
-	 if (source->Index >= I915_MAX_TEMPORARY) {
-	    i915_program_error( p, "Exceeded max temporary reg" );
-	    return 0;
-	 }
-	 src = UREG( REG_TYPE_R, source->Index );
+   case PROGRAM_TEMPORARY:
+      if (source->Index >= I915_MAX_TEMPORARY) {
+         i915_program_error(p, "Exceeded max temporary reg");
+         return 0;
+      }
+      src = UREG(REG_TYPE_R, source->Index);
+      break;
+   case PROGRAM_INPUT:
+      switch (source->Index) {
+      case FRAG_ATTRIB_WPOS:
+         src = i915_emit_decl(p, REG_TYPE_T, p->wpos_tex, D0_CHANNEL_ALL);
          break;
-      case PROGRAM_INPUT:
-	 switch (source->Index) {
-	 case FRAG_ATTRIB_WPOS:
-	    src = i915_emit_decl( p,  REG_TYPE_T, p->wpos_tex, D0_CHANNEL_ALL ); 
-	    break;
-	 case FRAG_ATTRIB_COL0:
-	    src = i915_emit_decl( p,  REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL ); 
-	    break;
-	 case FRAG_ATTRIB_COL1:
-	    src = i915_emit_decl( p,  REG_TYPE_T, T_SPECULAR, D0_CHANNEL_XYZ ); 
-	    src = swizzle( src, X, Y, Z, ONE );
-	    break;
-	 case FRAG_ATTRIB_FOGC:
-	    src = i915_emit_decl( p,  REG_TYPE_T, T_FOG_W, D0_CHANNEL_W ); 
-	    src = swizzle( src, W, W, W, W );
-	    break;
-	 case FRAG_ATTRIB_TEX0:
-	 case FRAG_ATTRIB_TEX1:
-	 case FRAG_ATTRIB_TEX2:
-	 case FRAG_ATTRIB_TEX3:
-	 case FRAG_ATTRIB_TEX4:
-	 case FRAG_ATTRIB_TEX5:
-	 case FRAG_ATTRIB_TEX6:
-	 case FRAG_ATTRIB_TEX7:
-	    src = i915_emit_decl( p,  REG_TYPE_T, 
-				 T_TEX0 + (source->Index - FRAG_ATTRIB_TEX0),
-				 D0_CHANNEL_ALL ); 
-	    break;
-
-	 default:
-	    i915_program_error( p, "Bad source->Index" ); 
-	    return 0;
-	 }
+      case FRAG_ATTRIB_COL0:
+         src = i915_emit_decl(p, REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL);
+         break;
+      case FRAG_ATTRIB_COL1:
+         src = i915_emit_decl(p, REG_TYPE_T, T_SPECULAR, D0_CHANNEL_XYZ);
+         src = swizzle(src, X, Y, Z, ONE);
+         break;
+      case FRAG_ATTRIB_FOGC:
+         src = i915_emit_decl(p, REG_TYPE_T, T_FOG_W, D0_CHANNEL_W);
+         src = swizzle(src, W, ZERO, ZERO, ONE);
+         break;
+      case FRAG_ATTRIB_TEX0:
+      case FRAG_ATTRIB_TEX1:
+      case FRAG_ATTRIB_TEX2:
+      case FRAG_ATTRIB_TEX3:
+      case FRAG_ATTRIB_TEX4:
+      case FRAG_ATTRIB_TEX5:
+      case FRAG_ATTRIB_TEX6:
+      case FRAG_ATTRIB_TEX7:
+         src = i915_emit_decl(p, REG_TYPE_T,
+                              T_TEX0 + (source->Index - FRAG_ATTRIB_TEX0),
+                              D0_CHANNEL_ALL);
          break;
-
-	 /* Various paramters and env values.  All emitted to
-	  * hardware as program constants.
-	  */
-      case PROGRAM_LOCAL_PARAM:
-         src = i915_emit_param4fv( 
-	    p, program->Base.LocalParams[source->Index]);
-	 break;
-
-      case PROGRAM_ENV_PARAM:
-         src = i915_emit_param4fv( 
-	    p, p->ctx->FragmentProgram.Parameters[source->Index]);
-	 break;
-
-      case PROGRAM_CONSTANT:
-      case PROGRAM_STATE_VAR:
-      case PROGRAM_NAMED_PARAM:
-         src = i915_emit_param4fv( 
-	    p, program->Base.Parameters->ParameterValues[source->Index] );
-	 break;
 
       default:
-	 i915_program_error( p, "Bad source->File" ); 
-	 return 0;
+         i915_program_error(p, "Bad source->Index");
+         return 0;
+      }
+      break;
+
+      /* Various paramters and env values.  All emitted to
+       * hardware as program constants.
+       */
+   case PROGRAM_LOCAL_PARAM:
+      src = i915_emit_param4fv(p, program->Base.LocalParams[source->Index]);
+      break;
+
+   case PROGRAM_ENV_PARAM:
+      src =
+         i915_emit_param4fv(p,
+                            p->ctx->FragmentProgram.Parameters[source->
+                                                               Index]);
+      break;
+
+   case PROGRAM_CONSTANT:
+   case PROGRAM_STATE_VAR:
+   case PROGRAM_NAMED_PARAM:
+      src =
+         i915_emit_param4fv(p,
+                            program->Base.Parameters->ParameterValues[source->
+                                                                      Index]);
+      break;
+
+   default:
+      i915_program_error(p, "Bad source->File");
+      return 0;
    }
 
-   src = swizzle(src, 
-		 GET_SWZ(source->Swizzle, 0),
-		 GET_SWZ(source->Swizzle, 1),
-		 GET_SWZ(source->Swizzle, 2),
-		 GET_SWZ(source->Swizzle, 3));
+   src = swizzle(src,
+                 GET_SWZ(source->Swizzle, 0),
+                 GET_SWZ(source->Swizzle, 1),
+                 GET_SWZ(source->Swizzle, 2), GET_SWZ(source->Swizzle, 3));
 
    if (source->NegateBase)
-      src = negate( src, 
-		    GET_BIT(source->NegateBase, 0),
-		    GET_BIT(source->NegateBase, 1),
-		    GET_BIT(source->NegateBase, 2),
-		    GET_BIT(source->NegateBase, 3));
+      src = negate(src,
+                   GET_BIT(source->NegateBase, 0),
+                   GET_BIT(source->NegateBase, 1),
+                   GET_BIT(source->NegateBase, 2),
+                   GET_BIT(source->NegateBase, 3));
 
    return src;
 }
 
 
-static GLuint get_result_vector( struct i915_fragment_program *p,
-				 const struct prog_instruction *inst )
+static GLuint
+get_result_vector(struct i915_fragment_program *p,
+                  const struct prog_instruction *inst)
 {
    switch (inst->DstReg.File) {
    case PROGRAM_OUTPUT:
       switch (inst->DstReg.Index) {
-      case FRAG_RESULT_COLR: 
-	 return UREG(REG_TYPE_OC, 0);
-      case FRAG_RESULT_DEPR: 
-	 p->depth_written = 1;
-	 return UREG(REG_TYPE_OD, 0);
-      default: 
-	 i915_program_error( p, "Bad inst->DstReg.Index" ); 
-	 return 0;
+      case FRAG_RESULT_COLR:
+         return UREG(REG_TYPE_OC, 0);
+      case FRAG_RESULT_DEPR:
+         p->depth_written = 1;
+         return UREG(REG_TYPE_OD, 0);
+      default:
+         i915_program_error(p, "Bad inst->DstReg.Index");
+         return 0;
       }
    case PROGRAM_TEMPORARY:
       return UREG(REG_TYPE_R, inst->DstReg.Index);
    default:
-      i915_program_error( p, "Bad inst->DstReg.File" ); 
+      i915_program_error(p, "Bad inst->DstReg.File");
       return 0;
    }
 }
-   
-static GLuint get_result_flags( const struct prog_instruction *inst )
+
+static GLuint
+get_result_flags(const struct prog_instruction *inst)
 {
    GLuint flags = 0;
 
-   if (inst->SaturateMode == SATURATE_ZERO_ONE) flags |= A0_DEST_SATURATE;
-   if (inst->DstReg.WriteMask & WRITEMASK_X) flags |= A0_DEST_CHANNEL_X;
-   if (inst->DstReg.WriteMask & WRITEMASK_Y) flags |= A0_DEST_CHANNEL_Y;
-   if (inst->DstReg.WriteMask & WRITEMASK_Z) flags |= A0_DEST_CHANNEL_Z;
-   if (inst->DstReg.WriteMask & WRITEMASK_W) flags |= A0_DEST_CHANNEL_W;
+   if (inst->SaturateMode == SATURATE_ZERO_ONE)
+      flags |= A0_DEST_SATURATE;
+   if (inst->DstReg.WriteMask & WRITEMASK_X)
+      flags |= A0_DEST_CHANNEL_X;
+   if (inst->DstReg.WriteMask & WRITEMASK_Y)
+      flags |= A0_DEST_CHANNEL_Y;
+   if (inst->DstReg.WriteMask & WRITEMASK_Z)
+      flags |= A0_DEST_CHANNEL_Z;
+   if (inst->DstReg.WriteMask & WRITEMASK_W)
+      flags |= A0_DEST_CHANNEL_W;
 
    return flags;
 }
 
-static GLuint translate_tex_src_target( struct i915_fragment_program *p,
-				     GLubyte bit )
+static GLuint
+translate_tex_src_target(struct i915_fragment_program *p, GLubyte bit)
 {
    switch (bit) {
-   case TEXTURE_1D_INDEX:   return D0_SAMPLE_TYPE_2D;
-   case TEXTURE_2D_INDEX:   return D0_SAMPLE_TYPE_2D;
-   case TEXTURE_RECT_INDEX: return D0_SAMPLE_TYPE_2D;
-   case TEXTURE_3D_INDEX:   return D0_SAMPLE_TYPE_VOLUME;
-   case TEXTURE_CUBE_INDEX: return D0_SAMPLE_TYPE_CUBE;
-   default: i915_program_error(p, "TexSrcBit"); return 0;
+   case TEXTURE_1D_INDEX:
+      return D0_SAMPLE_TYPE_2D;
+   case TEXTURE_2D_INDEX:
+      return D0_SAMPLE_TYPE_2D;
+   case TEXTURE_RECT_INDEX:
+      return D0_SAMPLE_TYPE_2D;
+   case TEXTURE_3D_INDEX:
+      return D0_SAMPLE_TYPE_VOLUME;
+   case TEXTURE_CUBE_INDEX:
+      return D0_SAMPLE_TYPE_CUBE;
+   default:
+      i915_program_error(p, "TexSrcBit");
+      return 0;
    }
 }
 
@@ -211,7 +243,7 @@ do {								\
    GLuint coord = src_vector( p, &inst->SrcReg[0], program);	\
    /* Texel lookup */						\
 								\
-   i915_emit_texld( p,						\
+   i915_emit_texld( p, get_live_regs(p, inst),						\
 	       get_result_vector( p, inst ),			\
 	       get_result_flags( inst ),			\
 	       sampler,						\
@@ -234,6 +266,43 @@ do {									\
 #define EMIT_2ARG_ARITH( OP ) EMIT_ARITH( OP, 2 )
 #define EMIT_3ARG_ARITH( OP ) EMIT_ARITH( OP, 3 )
 
+/* 
+ * TODO: consider moving this into core 
+ */
+static void calc_live_regs( struct i915_fragment_program *p )
+{
+    const struct gl_fragment_program *program = p->ctx->FragmentProgram._Current;
+    GLuint regsUsed = 0xffff0000;
+    GLint i;
+   
+    for (i = program->Base.NumInstructions - 1; i >= 0; i--) {
+        struct prog_instruction *inst = &program->Base.Instructions[i];
+        int opArgs = _mesa_num_inst_src_regs(inst->Opcode);
+        int a;
+
+        /* Register is written to: unmark as live for this and preceeding ops */ 
+        if (inst->DstReg.File == PROGRAM_TEMPORARY)
+            regsUsed &= ~(1 << inst->DstReg.Index);
+
+        for (a = 0; a < opArgs; a++) {
+            /* Register is read from: mark as live for this and preceeding ops */ 
+            if (inst->SrcReg[a].File == PROGRAM_TEMPORARY)
+                regsUsed |= 1 << inst->SrcReg[a].Index;
+        }
+
+        p->usedRegs[i] = regsUsed;
+    }
+}
+
+static GLuint get_live_regs( struct i915_fragment_program *p, 
+                             const struct prog_instruction *inst )
+{
+    const struct gl_fragment_program *program = p->ctx->FragmentProgram._Current;
+    GLuint nr = inst - program->Base.Instructions;
+
+    return p->usedRegs[nr];
+}
+ 
 
 /* Possible concerns:
  *
@@ -246,9 +315,11 @@ do {									\
  * can lead to confusion -- hopefully we cope with it ok now.
  *
  */
-static void upload_program( struct i915_fragment_program *p )
+static void
+upload_program(struct i915_fragment_program *p)
 {
-   const struct gl_fragment_program *program = p->ctx->FragmentProgram._Current;
+   const struct gl_fragment_program *program =
+      p->ctx->FragmentProgram._Current;
    const struct prog_instruction *inst = program->Base.Instructions;
 
 /*    _mesa_debug_fp_inst(program->Base.NumInstructions, inst); */
@@ -258,510 +329,551 @@ static void upload_program( struct i915_fragment_program *p )
     * this being uploaded to hardware.
     */
    if (inst[0].Opcode == OPCODE_END) {
-      GLuint tmp = i915_get_utemp( p );
-      i915_emit_arith( p,
-		      A0_MOV,
-		      UREG(REG_TYPE_OC, 0), 
-		      A0_DEST_CHANNEL_ALL, 0,
-		      swizzle(tmp,ONE,ZERO,ONE,ONE), 0, 0);
+      GLuint tmp = i915_get_utemp(p);
+      i915_emit_arith(p,
+                      A0_MOV,
+                      UREG(REG_TYPE_OC, 0),
+                      A0_DEST_CHANNEL_ALL, 0,
+                      swizzle(tmp, ONE, ZERO, ONE, ONE), 0, 0);
       return;
    }
 
+   if (program->Base.NumInstructions > I915_MAX_INSN) {
+       i915_program_error( p, "Exceeded max instructions" );
+       return;
+    }
+
+   /* Not always needed:
+    */
+   calc_live_regs(p);
+
    while (1) {
       GLuint src0, src1, src2, flags;
-      GLuint tmp = 0;
+      GLuint tmp = 0, consts0 = 0, consts1 = 0;
 
       switch (inst->Opcode) {
-      case OPCODE_ABS: 
-	 src0 = src_vector( p, &inst->SrcReg[0], program);
-	 i915_emit_arith( p, 
-			 A0_MAX,
-			 get_result_vector( p, inst ), 
-			 get_result_flags( inst ), 0,
-			 src0, negate(src0, 1,1,1,1), 0);
-	 break;
-
-      case OPCODE_ADD: 
-	 EMIT_2ARG_ARITH( A0_ADD );
-	 break;
-
-      case OPCODE_CMP: 
-	 src0 = src_vector( p, &inst->SrcReg[0], program);
-	 src1 = src_vector( p, &inst->SrcReg[1], program);
-	 src2 = src_vector( p, &inst->SrcReg[2], program);
-	 i915_emit_arith( p, 
-			 A0_CMP,
-			 get_result_vector( p, inst ), 
-			 get_result_flags( inst ), 0,
-			 src0, src2, src1);	/* NOTE: order of src2, src1 */
-	 break;
+      case OPCODE_ABS:
+         src0 = src_vector(p, &inst->SrcReg[0], program);
+         i915_emit_arith(p,
+                         A0_MAX,
+                         get_result_vector(p, inst),
+                         get_result_flags(inst), 0,
+                         src0, negate(src0, 1, 1, 1, 1), 0);
+         break;
 
-      case OPCODE_COS:
-	 src0 = src_vector( p, &inst->SrcReg[0], program);
-	 tmp = i915_get_utemp( p );
+      case OPCODE_ADD:
+         EMIT_2ARG_ARITH(A0_ADD);
+         break;
 
-	 i915_emit_arith( p, 
-			 A0_MUL,
-			 tmp, A0_DEST_CHANNEL_X, 0,
-			 src0, 
-			 i915_emit_const1f(p, 1.0/(M_PI * 2)),
-			 0);
+      case OPCODE_CMP:
+         src0 = src_vector(p, &inst->SrcReg[0], program);
+         src1 = src_vector(p, &inst->SrcReg[1], program);
+         src2 = src_vector(p, &inst->SrcReg[2], program);
+         i915_emit_arith(p, A0_CMP, get_result_vector(p, inst), get_result_flags(inst), 0, src0, src2, src1);   /* NOTE: order of src2, src1 */
+         break;
 
-	 i915_emit_arith( p, 
-			 A0_MOD,
+      case OPCODE_COS:
+         src0 = src_vector(p, &inst->SrcReg[0], program);
+         tmp = i915_get_utemp(p);
+	 consts0 = i915_emit_const4fv(p, sin_quad_constants[0]);
+	 consts1 = i915_emit_const4fv(p, sin_quad_constants[1]);
+
+	 /* Reduce range from repeating about [-pi,pi] to [-1,1] */
+         i915_emit_arith(p,
+                         A0_MAD,
+                         tmp, A0_DEST_CHANNEL_X, 0,
+                         src0,
+			 swizzle(consts1, Z, ZERO, ZERO, ZERO), /* 1/(2pi) */
+			 swizzle(consts0, W, ZERO, ZERO, ZERO)); /* .75 */
+
+         i915_emit_arith(p, A0_FRC, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0);
+
+	 i915_emit_arith(p,
+			 A0_MAD,
 			 tmp, A0_DEST_CHANNEL_X, 0,
-			 tmp, 
-			 0, 0 );
+			 tmp,
+			 swizzle(consts0, X, ZERO, ZERO, ZERO), /* 2 */
+			 swizzle(consts0, Y, ZERO, ZERO, ZERO)); /* -1 */
 
-	 /* By choosing different taylor constants, could get rid of this mul:
+	 /* Compute COS with the same calculation used for SIN, but a
+	  * different source range has been mapped to [-1,1] this time.
 	  */
-	 i915_emit_arith( p, 
-			 A0_MUL,
-			 tmp, A0_DEST_CHANNEL_X, 0,
-			 tmp, 
-			 i915_emit_const1f(p, (M_PI * 2)),
+
+	 /* tmp.y = abs(tmp.x); {x, abs(x), 0, 0} */
+	 i915_emit_arith(p,
+                         A0_MAX,
+			 tmp, A0_DEST_CHANNEL_Y, 0,
+			 swizzle(tmp, ZERO, X, ZERO, ZERO),
+			 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0),
 			 0);
 
-	 /* 
-	  * t0.xy = MUL x.xx11, x.x1111  ; x^2, x, 1, 1
-	  * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, 1
-	  * t0 = MUL t0.xxz1 t0.z111    ; x^6 x^4 x^2 1
-	  * result = DP4 t0, cos_constants
-	  */
-	 i915_emit_arith( p, 
+	 /* tmp.y = tmp.y * tmp.x; {x, x * abs(x), 0, 0} */
+	 i915_emit_arith(p,
 			 A0_MUL,
-			 tmp, A0_DEST_CHANNEL_XY, 0,
-			 swizzle(tmp, X,X,ONE,ONE), 
-			 swizzle(tmp, X,ONE,ONE,ONE), 0);
+			 tmp, A0_DEST_CHANNEL_Y, 0,
+			 swizzle(tmp, ZERO, X, ZERO, ZERO),
+			 tmp,
+			 0);
 
-	 i915_emit_arith( p, 
-			 A0_MUL,
-			 tmp, A0_DEST_CHANNEL_XYZ, 0,
-			 swizzle(tmp, X,Y,X,ONE), 
-			 swizzle(tmp, X,X,ONE,ONE), 0);
+	 /* tmp.x = tmp.xy DP sin_quad_constants[2].xy */
+         i915_emit_arith(p,
+                         A0_DP3,
+                         tmp, A0_DEST_CHANNEL_X, 0,
+			 tmp,
+                         swizzle(consts1, X, Y, ZERO, ZERO),
+			 0);
 
-	 i915_emit_arith( p, 
-			 A0_MUL,
-			 tmp, A0_DEST_CHANNEL_XYZ, 0,
-			 swizzle(tmp, X,X,Z,ONE), 
-			 swizzle(tmp, Z,ONE,ONE,ONE), 0);
-	    
-	 i915_emit_arith( p, 
-			 A0_DP4,
-			 get_result_vector( p, inst ), 
-			 get_result_flags( inst ), 0,
-			 swizzle(tmp, ONE,Z,Y,X),
-			 i915_emit_const4fv( p, cos_constants ), 0);
-
-	 break;
-
-      case OPCODE_DP3: 
-	 EMIT_2ARG_ARITH( A0_DP3 );
-	 break;
-
-      case OPCODE_DP4: 
-	 EMIT_2ARG_ARITH( A0_DP4 );
-	 break;
-
-      case OPCODE_DPH:  
-	 src0 = src_vector( p, &inst->SrcReg[0], program);
-	 src1 = src_vector( p, &inst->SrcReg[1], program);
-
-	 i915_emit_arith( p, 
-			 A0_DP4,
-			 get_result_vector( p, inst ), 
-			 get_result_flags( inst ), 0,
-			 swizzle(src0, X,Y,Z,ONE), src1, 0);
-	 break;
-
-      case OPCODE_DST: 
-	 src0 = src_vector( p, &inst->SrcReg[0], program);
-	 src1 = src_vector( p, &inst->SrcReg[1], program);
-
-	 /* result[0] = 1    * 1;
-	  * result[1] = a[1] * b[1];
-	  * result[2] = a[2] * 1;
-	  * result[3] = 1    * b[3];
+	 /* tmp.x now contains a first approximation (y).  Now, weight it
+	  * against tmp.y**2 to get closer.
 	  */
-	 i915_emit_arith( p, 
-			 A0_MUL,
-			 get_result_vector( p, inst ), 
-			 get_result_flags( inst ), 0,
-			 swizzle(src0, ONE, Y, Z,   ONE), 
-			 swizzle(src1, ONE, Y, ONE, W  ),
+	 i915_emit_arith(p,
+                         A0_MAX,
+			 tmp, A0_DEST_CHANNEL_Y, 0,
+			 swizzle(tmp, ZERO, X, ZERO, ZERO),
+			 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0),
 			 0);
-	 break;
 
-      case OPCODE_EX2: 
-	 src0 = src_vector( p, &inst->SrcReg[0], program);
+	 /* tmp.y = tmp.x * tmp.y - tmp.x; {y, y * abs(y) - y, 0, 0} */
+	 i915_emit_arith(p,
+			 A0_MAD,
+			 tmp, A0_DEST_CHANNEL_Y, 0,
+			 swizzle(tmp, ZERO, X, ZERO, ZERO),
+			 swizzle(tmp, ZERO, Y, ZERO, ZERO),
+			 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0));
+
+	 /* result = .2225 * tmp.y + tmp.x =.2225(y * abs(y) - y) + y= */
+	 i915_emit_arith(p,
+			 A0_MAD,
+                         get_result_vector(p, inst),
+                         get_result_flags(inst), 0,
+			 swizzle(consts1, W, W, W, W),
+			 swizzle(tmp, Y, Y, Y, Y),
+			 swizzle(tmp, X, X, X, X));
+         break;
 
-	 i915_emit_arith( p, 
-			 A0_EXP,
-			 get_result_vector( p, inst ), 
-			 get_result_flags( inst ), 0,
-			 swizzle(src0,X,X,X,X), 0, 0);
-	 break;
+      case OPCODE_DP3:
+         EMIT_2ARG_ARITH(A0_DP3);
+         break;
 
-      case OPCODE_FLR: 
-	 EMIT_1ARG_ARITH( A0_FLR );
-	 break;
+      case OPCODE_DP4:
+         EMIT_2ARG_ARITH(A0_DP4);
+         break;
+
+      case OPCODE_DPH:
+         src0 = src_vector(p, &inst->SrcReg[0], program);
+         src1 = src_vector(p, &inst->SrcReg[1], program);
+
+         i915_emit_arith(p,
+                         A0_DP4,
+                         get_result_vector(p, inst),
+                         get_result_flags(inst), 0,
+                         swizzle(src0, X, Y, Z, ONE), src1, 0);
+         break;
+
+      case OPCODE_DST:
+         src0 = src_vector(p, &inst->SrcReg[0], program);
+         src1 = src_vector(p, &inst->SrcReg[1], program);
+
+         /* result[0] = 1    * 1;
+          * result[1] = a[1] * b[1];
+          * result[2] = a[2] * 1;
+          * result[3] = 1    * b[3];
+          */
+         i915_emit_arith(p,
+                         A0_MUL,
+                         get_result_vector(p, inst),
+                         get_result_flags(inst), 0,
+                         swizzle(src0, ONE, Y, Z, ONE),
+                         swizzle(src1, ONE, Y, ONE, W), 0);
+         break;
 
-      case OPCODE_FRC: 
-	 EMIT_1ARG_ARITH( A0_FRC );
-	 break;
+      case OPCODE_EX2:
+         src0 = src_vector(p, &inst->SrcReg[0], program);
+
+         i915_emit_arith(p,
+                         A0_EXP,
+                         get_result_vector(p, inst),
+                         get_result_flags(inst), 0,
+                         swizzle(src0, X, X, X, X), 0, 0);
+         break;
+
+      case OPCODE_FLR:
+         EMIT_1ARG_ARITH(A0_FLR);
+         break;
+
+      case OPCODE_FRC:
+         EMIT_1ARG_ARITH(A0_FRC);
+         break;
 
       case OPCODE_KIL:
-	 src0 = src_vector( p, &inst->SrcReg[0], program);
-	 tmp = i915_get_utemp( p );
-
-	 i915_emit_texld( p,
-			 tmp, A0_DEST_CHANNEL_ALL, /* use a dummy dest reg */
-			 0,
-			 src0,
-			 T0_TEXKILL );
-	 break;
-
-      case OPCODE_LG2: 
-	 src0 = src_vector( p, &inst->SrcReg[0], program);
-
-	 i915_emit_arith( p, 
-			 A0_LOG,
-			 get_result_vector( p, inst ), 
-			 get_result_flags( inst ), 0,
-			 swizzle(src0,X,X,X,X), 0, 0);
-	 break;
-
-      case OPCODE_LIT: 
-	 src0 = src_vector( p, &inst->SrcReg[0], program);
-	 tmp = i915_get_utemp( p );
-
-	 /* tmp = max( a.xyzw, a.00zw )
-	  * XXX: Clamp tmp.w to -128..128
-	  * tmp.y = log(tmp.y)
-	  * tmp.y = tmp.w * tmp.y
-	  * tmp.y = exp(tmp.y)
-	  * result = cmp (a.11-x1, a.1x01, a.1xy1 )
-	  */
-	 i915_emit_arith( p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0, 
-			 src0, swizzle(src0, ZERO, ZERO, Z, W), 0 );
-
-	 i915_emit_arith( p, A0_LOG, tmp, A0_DEST_CHANNEL_Y, 0, 
-			 swizzle(tmp, Y, Y, Y, Y), 0, 0 );
-
-	 i915_emit_arith( p, A0_MUL, tmp, A0_DEST_CHANNEL_Y, 0, 
-			 swizzle(tmp, ZERO, Y, ZERO, ZERO), 
-			 swizzle(tmp, ZERO, W, ZERO, ZERO), 0 );
-
-	 i915_emit_arith( p, A0_EXP, tmp, A0_DEST_CHANNEL_Y, 0, 
-			 swizzle(tmp, Y, Y, Y, Y), 0, 0 );
-
-	 i915_emit_arith( p, A0_CMP,
-			 get_result_vector( p, inst ), 
-			 get_result_flags( inst ), 0,
-			 negate(swizzle(tmp, ONE, ONE, X, ONE),0,0,1,0),
-			 swizzle(tmp, ONE, X, ZERO, ONE),
-			 swizzle(tmp, ONE, X, Y, ONE));
-		     
-	 break;
-
-      case OPCODE_LRP: 
-	 src0 = src_vector( p, &inst->SrcReg[0], program);
-	 src1 = src_vector( p, &inst->SrcReg[1], program);
-	 src2 = src_vector( p, &inst->SrcReg[2], program);
-	 flags = get_result_flags( inst );
-	 tmp = i915_get_utemp( p );
-
-	 /* b*a + c*(1-a)
-	  *
-	  * b*a + c - ca 
-	  *
-	  * tmp = b*a + c, 
-	  * result = (-c)*a + tmp 
-	  */
-	 i915_emit_arith( p, A0_MAD, tmp, 
-			 flags & A0_DEST_CHANNEL_ALL, 0,
-			 src1, src0, src2 );
+         src0 = src_vector(p, &inst->SrcReg[0], program);
+         tmp = i915_get_utemp(p);
 
-	 i915_emit_arith( p, A0_MAD, 
-			 get_result_vector( p, inst ), 
-			 flags, 0, 
-			 negate(src2, 1,1,1,1), src0, tmp );
-	 break;
+         i915_emit_texld(p, get_live_regs(p, inst),
+                         tmp, A0_DEST_CHANNEL_ALL,   /* use a dummy dest reg */
+                         0, src0, T0_TEXKILL);
+         break;
+
+      case OPCODE_LG2:
+         src0 = src_vector(p, &inst->SrcReg[0], program);
+
+         i915_emit_arith(p,
+                         A0_LOG,
+                         get_result_vector(p, inst),
+                         get_result_flags(inst), 0,
+                         swizzle(src0, X, X, X, X), 0, 0);
+         break;
+
+      case OPCODE_LIT:
+         src0 = src_vector(p, &inst->SrcReg[0], program);
+         tmp = i915_get_utemp(p);
+
+         /* tmp = max( a.xyzw, a.00zw )
+          * XXX: Clamp tmp.w to -128..128
+          * tmp.y = log(tmp.y)
+          * tmp.y = tmp.w * tmp.y
+          * tmp.y = exp(tmp.y)
+          * result = cmp (a.11-x1, a.1x01, a.1xy1 )
+          */
+         i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0,
+                         src0, swizzle(src0, ZERO, ZERO, Z, W), 0);
+
+         i915_emit_arith(p, A0_LOG, tmp, A0_DEST_CHANNEL_Y, 0,
+                         swizzle(tmp, Y, Y, Y, Y), 0, 0);
+
+         i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_Y, 0,
+                         swizzle(tmp, ZERO, Y, ZERO, ZERO),
+                         swizzle(tmp, ZERO, W, ZERO, ZERO), 0);
+
+         i915_emit_arith(p, A0_EXP, tmp, A0_DEST_CHANNEL_Y, 0,
+                         swizzle(tmp, Y, Y, Y, Y), 0, 0);
+
+         i915_emit_arith(p, A0_CMP,
+                         get_result_vector(p, inst),
+                         get_result_flags(inst), 0,
+                         negate(swizzle(tmp, ONE, ONE, X, ONE), 0, 0, 1, 0),
+                         swizzle(tmp, ONE, X, ZERO, ONE),
+                         swizzle(tmp, ONE, X, Y, ONE));
+
+         break;
+
+      case OPCODE_LRP:
+         src0 = src_vector(p, &inst->SrcReg[0], program);
+         src1 = src_vector(p, &inst->SrcReg[1], program);
+         src2 = src_vector(p, &inst->SrcReg[2], program);
+         flags = get_result_flags(inst);
+         tmp = i915_get_utemp(p);
+
+         /* b*a + c*(1-a)
+          *
+          * b*a + c - ca 
+          *
+          * tmp = b*a + c, 
+          * result = (-c)*a + tmp 
+          */
+         i915_emit_arith(p, A0_MAD, tmp,
+                         flags & A0_DEST_CHANNEL_ALL, 0, src1, src0, src2);
+
+         i915_emit_arith(p, A0_MAD,
+                         get_result_vector(p, inst),
+                         flags, 0, negate(src2, 1, 1, 1, 1), src0, tmp);
+         break;
 
       case OPCODE_MAD:
-	 EMIT_3ARG_ARITH( A0_MAD );
-	 break;
+         EMIT_3ARG_ARITH(A0_MAD);
+         break;
 
       case OPCODE_MAX:
-	 EMIT_2ARG_ARITH( A0_MAX );
-	 break;
-
-      case OPCODE_MIN: 
-	 src0 = src_vector( p, &inst->SrcReg[0], program);
-	 src1 = src_vector( p, &inst->SrcReg[1], program);
-	 tmp = i915_get_utemp( p );
-	 flags = get_result_flags( inst );
-
-	 i915_emit_arith( p, 
-			 A0_MAX,
-			 tmp, flags & A0_DEST_CHANNEL_ALL, 0,
-			 negate(src0,1,1,1,1), 
-			 negate(src1,1,1,1,1), 0);
-
-	 i915_emit_arith( p,
-			 A0_MOV,
-			 get_result_vector( p, inst ), 
-			 flags, 0,
-			 negate(tmp, 1,1,1,1), 0, 0);
-	 break;
-
-      case OPCODE_MOV: 
-	 EMIT_1ARG_ARITH( A0_MOV );
-	 break;
-
-      case OPCODE_MUL: 
-	 EMIT_2ARG_ARITH( A0_MUL );
-	 break;
-
-      case OPCODE_POW: 
-	 src0 = src_vector( p, &inst->SrcReg[0], program);
-	 src1 = src_vector( p, &inst->SrcReg[1], program);
-	 tmp = i915_get_utemp( p );
-	 flags = get_result_flags( inst );
-
-	 /* XXX: masking on intermediate values, here and elsewhere.
-	  */
-	 i915_emit_arith( p, 
-			 A0_LOG,
-			 tmp, A0_DEST_CHANNEL_X, 0,
-			 swizzle(src0,X,X,X,X), 0, 0);
+         EMIT_2ARG_ARITH(A0_MAX);
+         break;
 
-	 i915_emit_arith( p,
-			 A0_MUL,
-			 tmp, A0_DEST_CHANNEL_X, 0,
-			 tmp, src1, 0);
+      case OPCODE_MIN:
+         src0 = src_vector(p, &inst->SrcReg[0], program);
+         src1 = src_vector(p, &inst->SrcReg[1], program);
+         tmp = i915_get_utemp(p);
+         flags = get_result_flags(inst);
+
+         i915_emit_arith(p,
+                         A0_MAX,
+                         tmp, flags & A0_DEST_CHANNEL_ALL, 0,
+                         negate(src0, 1, 1, 1, 1),
+                         negate(src1, 1, 1, 1, 1), 0);
+
+         i915_emit_arith(p,
+                         A0_MOV,
+                         get_result_vector(p, inst),
+                         flags, 0, negate(tmp, 1, 1, 1, 1), 0, 0);
+         break;
+
+      case OPCODE_MOV:
+         EMIT_1ARG_ARITH(A0_MOV);
+         break;
+
+      case OPCODE_MUL:
+         EMIT_2ARG_ARITH(A0_MUL);
+         break;
+
+      case OPCODE_POW:
+         src0 = src_vector(p, &inst->SrcReg[0], program);
+         src1 = src_vector(p, &inst->SrcReg[1], program);
+         tmp = i915_get_utemp(p);
+         flags = get_result_flags(inst);
+
+         /* XXX: masking on intermediate values, here and elsewhere.
+          */
+         i915_emit_arith(p,
+                         A0_LOG,
+                         tmp, A0_DEST_CHANNEL_X, 0,
+                         swizzle(src0, X, X, X, X), 0, 0);
 
+         i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_X, 0, tmp, src1, 0);
 
-	 i915_emit_arith( p,
-			 A0_EXP,
-			 get_result_vector( p, inst ), 
-			 flags, 0,
-			 swizzle(tmp,X,X,X,X), 0, 0);
 
-	 break;
+         i915_emit_arith(p,
+                         A0_EXP,
+                         get_result_vector(p, inst),
+                         flags, 0, swizzle(tmp, X, X, X, X), 0, 0);
 
-      case OPCODE_RCP: 
-	 src0 = src_vector( p, &inst->SrcReg[0], program);
+         break;
+
+      case OPCODE_RCP:
+         src0 = src_vector(p, &inst->SrcReg[0], program);
+
+         i915_emit_arith(p,
+                         A0_RCP,
+                         get_result_vector(p, inst),
+                         get_result_flags(inst), 0,
+                         swizzle(src0, X, X, X, X), 0, 0);
+         break;
 
-	 i915_emit_arith( p, 
-			 A0_RCP,
-			 get_result_vector( p, inst ), 
-			 get_result_flags( inst ), 0,
-			 swizzle(src0,X,X,X,X), 0, 0);
-	 break;
+      case OPCODE_RSQ:
 
-      case OPCODE_RSQ: 
+         src0 = src_vector(p, &inst->SrcReg[0], program);
 
-	 src0 = src_vector( p, &inst->SrcReg[0], program);
+         i915_emit_arith(p,
+                         A0_RSQ,
+                         get_result_vector(p, inst),
+                         get_result_flags(inst), 0,
+                         swizzle(src0, X, X, X, X), 0, 0);
+         break;
 
-	 i915_emit_arith( p, 
-			 A0_RSQ,
-			 get_result_vector( p, inst ), 
-			 get_result_flags( inst ), 0,
-			 swizzle(src0,X,X,X,X), 0, 0);
-	 break;
-	 
       case OPCODE_SCS:
-	 src0 = src_vector( p, &inst->SrcReg[0], program);
-	 tmp = i915_get_utemp( p );
-
-	 /* 
-	  * t0.xy = MUL x.xx11, x.x1111  ; x^2, x, 1, 1
-	  * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x
-	  * t1 = MUL t0.xyyw t0.yz11    ; x^7 x^5 x^3 x
-	  * scs.x = DP4 t1, sin_constants
-	  * t1 = MUL t0.xxz1 t0.z111    ; x^6 x^4 x^2 1
-	  * scs.y = DP4 t1, cos_constants
-	  */
-	 i915_emit_arith( p, 
-			 A0_MUL,
-			 tmp, A0_DEST_CHANNEL_XY, 0,
-			 swizzle(src0, X,X,ONE,ONE), 
-			 swizzle(src0, X,ONE,ONE,ONE), 0);
+         src0 = src_vector(p, &inst->SrcReg[0], program);
+         tmp = i915_get_utemp(p);
+
+         /* 
+          * t0.xy = MUL x.xx11, x.x1111  ; x^2, x, 1, 1
+          * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x
+          * t1 = MUL t0.xyyw t0.yz11    ; x^7 x^5 x^3 x
+          * scs.x = DP4 t1, sin_constants
+          * t1 = MUL t0.xxz1 t0.z111    ; x^6 x^4 x^2 1
+          * scs.y = DP4 t1, cos_constants
+          */
+         i915_emit_arith(p,
+                         A0_MUL,
+                         tmp, A0_DEST_CHANNEL_XY, 0,
+                         swizzle(src0, X, X, ONE, ONE),
+                         swizzle(src0, X, ONE, ONE, ONE), 0);
+
+         i915_emit_arith(p,
+                         A0_MUL,
+                         tmp, A0_DEST_CHANNEL_ALL, 0,
+                         swizzle(tmp, X, Y, X, Y),
+                         swizzle(tmp, X, X, ONE, ONE), 0);
+
+         if (inst->DstReg.WriteMask & WRITEMASK_Y) {
+            GLuint tmp1;
+
+            if (inst->DstReg.WriteMask & WRITEMASK_X)
+               tmp1 = i915_get_utemp(p);
+            else
+               tmp1 = tmp;
+
+            i915_emit_arith(p,
+                            A0_MUL,
+                            tmp1, A0_DEST_CHANNEL_ALL, 0,
+                            swizzle(tmp, X, Y, Y, W),
+                            swizzle(tmp, X, Z, ONE, ONE), 0);
+
+            i915_emit_arith(p,
+                            A0_DP4,
+                            get_result_vector(p, inst),
+                            A0_DEST_CHANNEL_Y, 0,
+                            swizzle(tmp1, W, Z, Y, X),
+                            i915_emit_const4fv(p, sin_constants), 0);
+         }
+
+         if (inst->DstReg.WriteMask & WRITEMASK_X) {
+            i915_emit_arith(p,
+                            A0_MUL,
+                            tmp, A0_DEST_CHANNEL_XYZ, 0,
+                            swizzle(tmp, X, X, Z, ONE),
+                            swizzle(tmp, Z, ONE, ONE, ONE), 0);
+
+            i915_emit_arith(p,
+                            A0_DP4,
+                            get_result_vector(p, inst),
+                            A0_DEST_CHANNEL_X, 0,
+                            swizzle(tmp, ONE, Z, Y, X),
+                            i915_emit_const4fv(p, cos_constants), 0);
+         }
+         break;
 
-	 i915_emit_arith( p, 
-			 A0_MUL,
-			 tmp, A0_DEST_CHANNEL_ALL, 0,
-			 swizzle(tmp, X,Y,X,Y), 
-			 swizzle(tmp, X,X,ONE,ONE), 0);
-
-	 if (inst->DstReg.WriteMask & WRITEMASK_Y) {
-	    GLuint tmp1;
-	    
-	    if (inst->DstReg.WriteMask & WRITEMASK_X)
-	       tmp1 = i915_get_utemp( p );
-	    else
-	       tmp1 = tmp;
-
-	    i915_emit_arith( p, 
-			    A0_MUL,
-			    tmp1, A0_DEST_CHANNEL_ALL, 0,
-			    swizzle(tmp, X,Y,Y,W), 
-			    swizzle(tmp, X,Z,ONE,ONE), 0);
-	    
-	    i915_emit_arith( p, 
-			    A0_DP4,
-			    get_result_vector( p, inst ), 
-			    A0_DEST_CHANNEL_Y, 0,
-			    swizzle(tmp1, W,Z,Y,X),
-			    i915_emit_const4fv( p, sin_constants ), 0);
-	 }
-
-	 if (inst->DstReg.WriteMask & WRITEMASK_X) {
-	    i915_emit_arith( p, 
-			    A0_MUL,
-			    tmp, A0_DEST_CHANNEL_XYZ, 0,
-			    swizzle(tmp, X,X,Z,ONE), 
-			    swizzle(tmp, Z,ONE,ONE,ONE), 0);
-	    
-	    i915_emit_arith( p, 
-			    A0_DP4,
-			    get_result_vector( p, inst ), 
-			    A0_DEST_CHANNEL_X, 0,
-			    swizzle(tmp, ONE,Z,Y,X), 
-			    i915_emit_const4fv( p, cos_constants ), 0);
-	 }
-	 break;
-
-      case OPCODE_SGE: 
-	 EMIT_2ARG_ARITH( A0_SGE );
-	 break;
+      case OPCODE_SGE:
+         EMIT_2ARG_ARITH(A0_SGE);
+         break;
 
       case OPCODE_SIN:
-	 src0 = src_vector( p, &inst->SrcReg[0], program);
-	 tmp = i915_get_utemp( p );
+         src0 = src_vector(p, &inst->SrcReg[0], program);
+         tmp = i915_get_utemp(p);
+	 consts0 = i915_emit_const4fv(p, sin_quad_constants[0]);
+	 consts1 = i915_emit_const4fv(p, sin_quad_constants[1]);
+
+	 /* Reduce range from repeating about [-pi,pi] to [-1,1] */
+         i915_emit_arith(p,
+                         A0_MAD,
+                         tmp, A0_DEST_CHANNEL_X, 0,
+                         src0,
+			 swizzle(consts1, Z, ZERO, ZERO, ZERO), /* 1/(2pi) */
+			 swizzle(consts0, Z, ZERO, ZERO, ZERO)); /* .5 */
+
+         i915_emit_arith(p, A0_FRC, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0);
+
+	 i915_emit_arith(p,
+			 A0_MAD,
+			 tmp, A0_DEST_CHANNEL_X, 0,
+			 tmp,
+			 swizzle(consts0, X, ZERO, ZERO, ZERO), /* 2 */
+			 swizzle(consts0, Y, ZERO, ZERO, ZERO)); /* -1 */
+
+	 /* Compute sin using a quadratic and quartic.  It gives continuity
+	  * that repeating the Taylor series lacks every 2*pi, and has
+	  * reduced error.
+	  *
+	  * The idea was described at:
+	  * http://www.devmaster.net/forums/showthread.php?t=5784
+	  */
+
+	 /* tmp.y = abs(tmp.x); {x, abs(x), 0, 0} */
+	 i915_emit_arith(p,
+                         A0_MAX,
+			 tmp, A0_DEST_CHANNEL_Y, 0,
+			 swizzle(tmp, ZERO, X, ZERO, ZERO),
+			 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0),
+			 0);
 
-	 i915_emit_arith( p, 
+	 /* tmp.y = tmp.y * tmp.x; {x, x * abs(x), 0, 0} */
+	 i915_emit_arith(p,
 			 A0_MUL,
-			 tmp, A0_DEST_CHANNEL_X, 0,
-			 src0, 
-			 i915_emit_const1f(p, 1.0/(M_PI * 2)),
+			 tmp, A0_DEST_CHANNEL_Y, 0,
+			 swizzle(tmp, ZERO, X, ZERO, ZERO),
+			 tmp,
 			 0);
 
-	 i915_emit_arith( p, 
-			 A0_MOD,
-			 tmp, A0_DEST_CHANNEL_X, 0,
-			 tmp, 
-			 0, 0 );
+	 /* tmp.x = tmp.xy DP sin_quad_constants[2].xy */
+         i915_emit_arith(p,
+                         A0_DP3,
+                         tmp, A0_DEST_CHANNEL_X, 0,
+			 tmp,
+                         swizzle(consts1, X, Y, ZERO, ZERO),
+			 0);
 
-	 /* By choosing different taylor constants, could get rid of this mul:
+	 /* tmp.x now contains a first approximation (y).  Now, weight it
+	  * against tmp.y**2 to get closer.
 	  */
-	 i915_emit_arith( p, 
-			 A0_MUL,
-			 tmp, A0_DEST_CHANNEL_X, 0,
-			 tmp, 
-			 i915_emit_const1f(p, (M_PI * 2)),
+	 i915_emit_arith(p,
+                         A0_MAX,
+			 tmp, A0_DEST_CHANNEL_Y, 0,
+			 swizzle(tmp, ZERO, X, ZERO, ZERO),
+			 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0),
 			 0);
 
-	 /* 
-	  * t0.xy = MUL x.xx11, x.x1111  ; x^2, x, 1, 1
-	  * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x
-	  * t1 = MUL t0.xyyw t0.yz11    ; x^7 x^5 x^3 x
-	  * result = DP4 t1.wzyx, sin_constants
-	  */
-	 i915_emit_arith( p, 
-			 A0_MUL,
-			 tmp, A0_DEST_CHANNEL_XY, 0,
-			 swizzle(tmp, X,X,ONE,ONE), 
-			 swizzle(tmp, X,ONE,ONE,ONE), 0);
+	 /* tmp.y = tmp.x * tmp.y - tmp.x; {y, y * abs(y) - y, 0, 0} */
+	 i915_emit_arith(p,
+			 A0_MAD,
+			 tmp, A0_DEST_CHANNEL_Y, 0,
+			 swizzle(tmp, ZERO, X, ZERO, ZERO),
+			 swizzle(tmp, ZERO, Y, ZERO, ZERO),
+			 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0));
 
-	 i915_emit_arith( p, 
-			 A0_MUL,
-			 tmp, A0_DEST_CHANNEL_ALL, 0,
-			 swizzle(tmp, X,Y,X,Y), 
-			 swizzle(tmp, X,X,ONE,ONE), 0);
+	 /* result = .2225 * tmp.y + tmp.x =.2225(y * abs(y) - y) + y= */
+	 i915_emit_arith(p,
+			 A0_MAD,
+                         get_result_vector(p, inst),
+                         get_result_flags(inst), 0,
+			 swizzle(consts1, W, W, W, W),
+			 swizzle(tmp, Y, Y, Y, Y),
+			 swizzle(tmp, X, X, X, X));
 
-	 i915_emit_arith( p, 
-			 A0_MUL,
-			 tmp, A0_DEST_CHANNEL_ALL, 0,
-			 swizzle(tmp, X,Y,Y,W), 
-			 swizzle(tmp, X,Z,ONE,ONE), 0);
-	    
-	 i915_emit_arith( p, 
-			 A0_DP4,
-			 get_result_vector( p, inst ), 
-			 get_result_flags( inst ), 0,
-			 swizzle(tmp, W, Z, Y, X ),
-			 i915_emit_const4fv( p, sin_constants ), 0);
-	 break;
-
-      case OPCODE_SLT: 
-	 EMIT_2ARG_ARITH( A0_SLT );
-	 break;
-
-      case OPCODE_SUB: 
-	 src0 = src_vector( p, &inst->SrcReg[0], program);
-	 src1 = src_vector( p, &inst->SrcReg[1], program);
-
-	 i915_emit_arith( p, 
-			 A0_ADD,
-			 get_result_vector( p, inst ), 
-			 get_result_flags( inst ), 0,
-			 src0, negate(src1, 1,1,1,1), 0);
-	 break;
-
-      case OPCODE_SWZ: 
-	 EMIT_1ARG_ARITH( A0_MOV ); /* extended swizzle handled natively */
-	 break;
-
-      case OPCODE_TEX: 
-	 EMIT_TEX( T0_TEXLD );
-	 break;
+         break;
+
+      case OPCODE_SLT:
+         EMIT_2ARG_ARITH(A0_SLT);
+         break;
+
+      case OPCODE_SUB:
+         src0 = src_vector(p, &inst->SrcReg[0], program);
+         src1 = src_vector(p, &inst->SrcReg[1], program);
+
+         i915_emit_arith(p,
+                         A0_ADD,
+                         get_result_vector(p, inst),
+                         get_result_flags(inst), 0,
+                         src0, negate(src1, 1, 1, 1, 1), 0);
+         break;
+
+      case OPCODE_SWZ:
+         EMIT_1ARG_ARITH(A0_MOV);       /* extended swizzle handled natively */
+         break;
+
+      case OPCODE_TEX:
+         EMIT_TEX(T0_TEXLD);
+         break;
 
       case OPCODE_TXB:
-	 EMIT_TEX( T0_TEXLDB );
-	 break;
+         EMIT_TEX(T0_TEXLDB);
+         break;
 
       case OPCODE_TXP:
-	 EMIT_TEX( T0_TEXLDP );
-	 break;
+         EMIT_TEX(T0_TEXLDP);
+         break;
 
       case OPCODE_XPD:
-	 /* Cross product:
-	  *      result.x = src0.y * src1.z - src0.z * src1.y;
-	  *      result.y = src0.z * src1.x - src0.x * src1.z;
-	  *      result.z = src0.x * src1.y - src0.y * src1.x;
-	  *      result.w = undef;
-	  */
-	 src0 = src_vector( p, &inst->SrcReg[0], program);
-	 src1 = src_vector( p, &inst->SrcReg[1], program);
-	 tmp = i915_get_utemp( p );
-	 
-	 i915_emit_arith( p, 
-			 A0_MUL,
-			 tmp, A0_DEST_CHANNEL_ALL, 0,
-			 swizzle(src0,Z,X,Y,ONE), 
-			 swizzle(src1,Y,Z,X,ONE), 0);
-
-	 i915_emit_arith( p, 
-			 A0_MAD,
-			 get_result_vector( p, inst ), 
-			 get_result_flags( inst ), 0,
-			 swizzle(src0,Y,Z,X,ONE), 
-			 swizzle(src1,Z,X,Y,ONE), 
-			 negate(tmp,1,1,1,0));
-	 break;
+         /* Cross product:
+          *      result.x = src0.y * src1.z - src0.z * src1.y;
+          *      result.y = src0.z * src1.x - src0.x * src1.z;
+          *      result.z = src0.x * src1.y - src0.y * src1.x;
+          *      result.w = undef;
+          */
+         src0 = src_vector(p, &inst->SrcReg[0], program);
+         src1 = src_vector(p, &inst->SrcReg[1], program);
+         tmp = i915_get_utemp(p);
+
+         i915_emit_arith(p,
+                         A0_MUL,
+                         tmp, A0_DEST_CHANNEL_ALL, 0,
+                         swizzle(src0, Z, X, Y, ONE),
+                         swizzle(src1, Y, Z, X, ONE), 0);
+
+         i915_emit_arith(p,
+                         A0_MAD,
+                         get_result_vector(p, inst),
+                         get_result_flags(inst), 0,
+                         swizzle(src0, Y, Z, X, ONE),
+                         swizzle(src1, Z, X, Y, ONE),
+                         negate(tmp, 1, 1, 1, 0));
+         break;
 
       case OPCODE_END:
-	 return;
-	 
+         return;
+
       default:
-	 i915_program_error( p, "bad opcode" );
-	 return;
+         i915_program_error(p, "bad opcode");
+         return;
       }
 
       inst++;
-      i915_release_utemps( p ); 
+      i915_release_utemps(p);
    }
 }
 
@@ -769,21 +881,22 @@ static void upload_program( struct i915_fragment_program *p )
  * emit, just move the value into its correct position at the end of
  * the program:
  */
-static void fixup_depth_write( struct i915_fragment_program *p )
+static void
+fixup_depth_write(struct i915_fragment_program *p)
 {
    if (p->depth_written) {
       GLuint depth = UREG(REG_TYPE_OD, 0);
 
-      i915_emit_arith( p, 
-		      A0_MOV,
-		      depth, A0_DEST_CHANNEL_W, 0,
-		      swizzle(depth,X,Y,Z,Z), 
-		      0, 0);
+      i915_emit_arith(p,
+                      A0_MOV,
+                      depth, A0_DEST_CHANNEL_W, 0,
+                      swizzle(depth, X, Y, Z, Z), 0, 0);
    }
 }
 
 
-static void check_wpos( struct i915_fragment_program *p )
+static void
+check_wpos(struct i915_fragment_program *p)
 {
    GLuint inputs = p->FragProg.Base.InputsRead;
    GLint i;
@@ -791,12 +904,12 @@ static void check_wpos( struct i915_fragment_program *p )
    p->wpos_tex = -1;
 
    for (i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) {
-      if (inputs & FRAG_BIT_TEX(i)) 
-	 continue;
+      if (inputs & FRAG_BIT_TEX(i))
+         continue;
       else if (inputs & FRAG_BIT_WPOS) {
-	 p->wpos_tex = i;
-	 inputs &= ~FRAG_BIT_WPOS;
-      }   
+         p->wpos_tex = i;
+         inputs &= ~FRAG_BIT_WPOS;
+      }
    }
 
    if (inputs & FRAG_BIT_WPOS) {
@@ -805,139 +918,131 @@ static void check_wpos( struct i915_fragment_program *p )
 }
 
 
-static void translate_program( struct i915_fragment_program *p )
+static void
+translate_program(struct i915_fragment_program *p)
 {
-   i915ContextPtr i915 = I915_CONTEXT(p->ctx);
-   
-   i915_init_program( i915, p );
-   check_wpos( p ); 
-   upload_program( p );
-   fixup_depth_write( p );
-   i915_fini_program( p ); 
-   
+   struct i915_context *i915 = I915_CONTEXT(p->ctx);
+
+   i915_init_program(i915, p);
+   check_wpos(p);
+   upload_program(p);
+   fixup_depth_write(p);
+   i915_fini_program(p);
+
    p->translated = 1;
 }
 
 
-static void track_params( struct i915_fragment_program *p )
+static void
+track_params(struct i915_fragment_program *p)
 {
    GLint i;
 
    if (p->nr_params)
-      _mesa_load_state_parameters(p->ctx, p->FragProg.Base.Parameters); 
+      _mesa_load_state_parameters(p->ctx, p->FragProg.Base.Parameters);
 
    for (i = 0; i < p->nr_params; i++) {
       GLint reg = p->param[i].reg;
-      COPY_4V( p->constant[reg], p->param[i].values );
+      COPY_4V(p->constant[reg], p->param[i].values);
    }
-   
+
    p->params_uptodate = 1;
-   p->on_hardware = 0;		/* overkill */
+   p->on_hardware = 0;          /* overkill */
 }
 
 
-static void i915BindProgram( GLcontext *ctx,
-			    GLenum target, 
-			    struct gl_program *prog )
+static void
+i915BindProgram(GLcontext * ctx, GLenum target, struct gl_program *prog)
 {
    if (target == GL_FRAGMENT_PROGRAM_ARB) {
-      i915ContextPtr i915 = I915_CONTEXT(ctx);
-      struct i915_fragment_program *p = (struct i915_fragment_program *)prog;
+      struct i915_context *i915 = I915_CONTEXT(ctx);
+      struct i915_fragment_program *p = (struct i915_fragment_program *) prog;
+
+      if (i915->current_program == p)
+         return;
 
-      if (i915->current_program == p) 
-	 return;
-      
       if (i915->current_program) {
-	 i915->current_program->on_hardware = 0;
-	 i915->current_program->params_uptodate = 0;
+         i915->current_program->on_hardware = 0;
+         i915->current_program->params_uptodate = 0;
       }
-      
+
       i915->current_program = p;
 
       assert(p->on_hardware == 0);
       assert(p->params_uptodate == 0);
 
-      /* Hack: make sure fog is correctly enabled according to this
-       * fragment program's fog options.
-       */
-      ctx->Driver.Enable( ctx, GL_FRAGMENT_PROGRAM_ARB, 
-			  ctx->FragmentProgram.Enabled );
    }
 }
 
-static struct gl_program *i915NewProgram( GLcontext *ctx,
-				      GLenum target, 
-				      GLuint id )
+static struct gl_program *
+i915NewProgram(GLcontext * ctx, GLenum target, GLuint id)
 {
    switch (target) {
    case GL_VERTEX_PROGRAM_ARB:
-      return _mesa_init_vertex_program( ctx, CALLOC_STRUCT(gl_vertex_program),
-					target, id );
-
-   case GL_FRAGMENT_PROGRAM_ARB: {
-      struct i915_fragment_program *prog = CALLOC_STRUCT(i915_fragment_program);
-      if (prog) {
-	 i915_init_program( I915_CONTEXT(ctx), prog );
-
-	 return _mesa_init_fragment_program( ctx, &prog->FragProg,
-					     target, id );
+      return _mesa_init_vertex_program(ctx, CALLOC_STRUCT(gl_vertex_program),
+                                       target, id);
+
+   case GL_FRAGMENT_PROGRAM_ARB:{
+         struct i915_fragment_program *prog =
+            CALLOC_STRUCT(i915_fragment_program);
+         if (prog) {
+            i915_init_program(I915_CONTEXT(ctx), prog);
+
+            return _mesa_init_fragment_program(ctx, &prog->FragProg,
+                                               target, id);
+         }
+         else
+            return NULL;
       }
-      else
-	 return NULL;
-   }
 
    default:
       /* Just fallback:
        */
-      return _mesa_new_program( ctx, target, id );
+      return _mesa_new_program(ctx, target, id);
    }
 }
 
-static void i915DeleteProgram( GLcontext *ctx,
-			      struct gl_program *prog )
+static void
+i915DeleteProgram(GLcontext * ctx, struct gl_program *prog)
 {
    if (prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
-      i915ContextPtr i915 = I915_CONTEXT(ctx);
-      struct i915_fragment_program *p = (struct i915_fragment_program *)prog;
-      
-      if (i915->current_program == p) 
-	 i915->current_program = 0;
+      struct i915_context *i915 = I915_CONTEXT(ctx);
+      struct i915_fragment_program *p = (struct i915_fragment_program *) prog;
+
+      if (i915->current_program == p)
+         i915->current_program = 0;
    }
 
-   _mesa_delete_program( ctx, prog );
+   _mesa_delete_program(ctx, prog);
 }
 
 
-static GLboolean i915IsProgramNative( GLcontext *ctx,
-				     GLenum target, 
-				     struct gl_program *prog )
+static GLboolean
+i915IsProgramNative(GLcontext * ctx, GLenum target, struct gl_program *prog)
 {
    if (target == GL_FRAGMENT_PROGRAM_ARB) {
-      struct i915_fragment_program *p = (struct i915_fragment_program *)prog;
+      struct i915_fragment_program *p = (struct i915_fragment_program *) prog;
 
       if (!p->translated)
-	 translate_program( p );
-      
+         translate_program(p);
+
       return !p->error;
    }
    else
       return GL_TRUE;
 }
 
-static void i915ProgramStringNotify( GLcontext *ctx,
-				    GLenum target,
-				    struct gl_program *prog )
+static void
+i915ProgramStringNotify(GLcontext * ctx,
+                        GLenum target, struct gl_program *prog)
 {
    if (target == GL_FRAGMENT_PROGRAM_ARB) {
-      struct i915_fragment_program *p = (struct i915_fragment_program *)prog;
+      struct i915_fragment_program *p = (struct i915_fragment_program *) prog;
       p->translated = 0;
 
       /* Hack: make sure fog is correctly enabled according to this
        * fragment program's fog options.
        */
-      ctx->Driver.Enable( ctx, GL_FRAGMENT_PROGRAM_ARB, 
-			  ctx->FragmentProgram.Enabled );
-
       if (p->FragProg.FogOption) {
          /* add extra instructions to do fog, then turn off FogOption field */
          _mesa_append_fog_code(ctx, &p->FragProg);
@@ -949,28 +1054,28 @@ static void i915ProgramStringNotify( GLcontext *ctx,
 }
 
 
-void i915ValidateFragmentProgram( i915ContextPtr i915 )
+void
+i915ValidateFragmentProgram(struct i915_context *i915)
 {
    GLcontext *ctx = &i915->intel.ctx;
-   intelContextPtr intel = INTEL_CONTEXT(ctx);
+   struct intel_context *intel = intel_context(ctx);
    TNLcontext *tnl = TNL_CONTEXT(ctx);
    struct vertex_buffer *VB = &tnl->vb;
 
-   struct i915_fragment_program *p = 
-      (struct i915_fragment_program *)ctx->FragmentProgram._Current;
+   struct i915_fragment_program *p =
+      (struct i915_fragment_program *) ctx->FragmentProgram._Current;
 
    const GLuint inputsRead = p->FragProg.Base.InputsRead;
    GLuint s4 = i915->state.Ctx[I915_CTXREG_LIS4] & ~S4_VFMT_MASK;
    GLuint s2 = S2_TEXCOORD_NONE;
    int i, offset = 0;
 
-   if (i915->current_program != p) 
-   {
+   if (i915->current_program != p) {
       if (i915->current_program) {
-	 i915->current_program->on_hardware = 0;
-	 i915->current_program->params_uptodate = 0;
+         i915->current_program->on_hardware = 0;
+         i915->current_program->params_uptodate = 0;
       }
-      
+
       i915->current_program = p;
    }
 
@@ -979,8 +1084,8 @@ void i915ValidateFragmentProgram( i915ContextPtr i915 )
     */
    VB->AttribPtr[VERT_ATTRIB_POS] = VB->NdcPtr;
 
-   if (!p->translated) 
-      translate_program( p );
+   if (!p->translated)
+      translate_program(p);
 
    intel->vertex_attr_count = 0;
    intel->wpos_offset = 0;
@@ -989,31 +1094,31 @@ void i915ValidateFragmentProgram( i915ContextPtr i915 )
    intel->specoffset = 0;
 
    if (inputsRead & FRAG_BITS_TEX_ANY) {
-      EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_4F_VIEWPORT, S4_VFMT_XYZW, 16 );
+      EMIT_ATTR(_TNL_ATTRIB_POS, EMIT_4F_VIEWPORT, S4_VFMT_XYZW, 16);
    }
    else {
-      EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_3F_VIEWPORT, S4_VFMT_XYZ, 12 );
+      EMIT_ATTR(_TNL_ATTRIB_POS, EMIT_3F_VIEWPORT, S4_VFMT_XYZ, 12);
    }
 
    if (inputsRead & FRAG_BIT_COL0) {
       intel->coloroffset = offset / 4;
-      EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_BGRA, S4_VFMT_COLOR, 4 );
+      EMIT_ATTR(_TNL_ATTRIB_COLOR0, EMIT_4UB_4F_BGRA, S4_VFMT_COLOR, 4);
    }
-   
-   if ((inputsRead & (FRAG_BIT_COL1|FRAG_BIT_FOGC)) || 
+
+   if ((inputsRead & (FRAG_BIT_COL1 | FRAG_BIT_FOGC)) ||
        i915->vertex_fog != I915_FOG_NONE) {
 
       if (inputsRead & FRAG_BIT_COL1) {
-	 intel->specoffset = offset / 4;
-	 EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_3UB_3F_BGR, S4_VFMT_SPEC_FOG, 3 );
+         intel->specoffset = offset / 4;
+         EMIT_ATTR(_TNL_ATTRIB_COLOR1, EMIT_3UB_3F_BGR, S4_VFMT_SPEC_FOG, 3);
       }
       else
-	 EMIT_PAD(3);
+         EMIT_PAD(3);
 
-      if ((inputsRead & FRAG_BIT_FOGC) || i915->vertex_fog != I915_FOG_NONE) 
-	 EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1UB_1F, S4_VFMT_SPEC_FOG, 1 );
+      if ((inputsRead & FRAG_BIT_FOGC) || i915->vertex_fog != I915_FOG_NONE)
+         EMIT_ATTR(_TNL_ATTRIB_FOG, EMIT_1UB_1F, S4_VFMT_SPEC_FOG, 1);
       else
-	 EMIT_PAD( 1 );
+         EMIT_PAD(1);
    }
 
    /* XXX this was disabled, but enabling this code helped fix the Glean
@@ -1021,63 +1126,66 @@ void i915ValidateFragmentProgram( i915ContextPtr i915 )
     */
 #if 1
    if ((inputsRead & FRAG_BIT_FOGC) || i915->vertex_fog != I915_FOG_NONE) {
-      EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1F, S4_VFMT_FOG_PARAM, 4 );
+      EMIT_ATTR(_TNL_ATTRIB_FOG, EMIT_1F, S4_VFMT_FOG_PARAM, 4);
    }
 #endif
 
    for (i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) {
       if (inputsRead & FRAG_BIT_TEX(i)) {
-	 int sz = VB->TexCoordPtr[i]->size;
-	    
-	 s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK);
-	 s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(sz));
+         int sz = VB->TexCoordPtr[i]->size;
 
-	 EMIT_ATTR( _TNL_ATTRIB_TEX0+i, EMIT_SZ(sz), 0, sz * 4 );
+         s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK);
+         s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(sz));
+
+         EMIT_ATTR(_TNL_ATTRIB_TEX0 + i, EMIT_SZ(sz), 0, sz * 4);
       }
       else if (i == p->wpos_tex) {
-	
-	 /* If WPOS is required, duplicate the XYZ position data in an
-	  * unused texture coordinate:
-	  */
-	 s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK);
-	 s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(3));
 
-	 intel->wpos_offset = offset;
-	 intel->wpos_size = 3 * sizeof(GLuint);
+         /* If WPOS is required, duplicate the XYZ position data in an
+          * unused texture coordinate:
+          */
+         s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK);
+         s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(3));
+
+         intel->wpos_offset = offset;
+         intel->wpos_size = 3 * sizeof(GLuint);
 
-	 EMIT_PAD( intel->wpos_size );
-      }   
+         EMIT_PAD(intel->wpos_size);
+      }
    }
 
    if (s2 != i915->state.Ctx[I915_CTXREG_LIS2] ||
        s4 != i915->state.Ctx[I915_CTXREG_LIS4]) {
-    
-      I915_STATECHANGE( i915, I915_UPLOAD_CTX );
+      int k;
+
+      I915_STATECHANGE(i915, I915_UPLOAD_CTX);
 
       /* Must do this *after* statechange, so as not to affect
        * buffered vertices reliant on the old state:
        */
-      intel->vertex_size = _tnl_install_attrs( &intel->ctx, 
-					       intel->vertex_attrs, 
-					       intel->vertex_attr_count,
-					       intel->ViewportMatrix.m, 0 ); 
+      intel->vertex_size = _tnl_install_attrs(&intel->ctx,
+                                              intel->vertex_attrs,
+                                              intel->vertex_attr_count,
+                                              intel->ViewportMatrix.m, 0);
 
       intel->vertex_size >>= 2;
 
       i915->state.Ctx[I915_CTXREG_LIS2] = s2;
       i915->state.Ctx[I915_CTXREG_LIS4] = s4;
 
-      assert(intel->vtbl.check_vertex_size( intel, intel->vertex_size ));
+      k = intel->vtbl.check_vertex_size(intel, intel->vertex_size);
+      assert(k);
    }
 
-   if (!p->params_uptodate) 
-      track_params( p );
+   if (!p->params_uptodate)
+      track_params(p);
 
-   if (!p->on_hardware) 
-      i915_upload_program( i915, p );
+   if (!p->on_hardware)
+      i915_upload_program(i915, p);
 }
 
-void i915InitFragProgFuncs( struct dd_function_table *functions )
+void
+i915InitFragProgFuncs(struct dd_function_table *functions)
 {
    functions->BindProgram = i915BindProgram;
    functions->NewProgram = i915NewProgram;
diff --git a/i915/i915_metaops.c b/i915/i915_metaops.c
index 1be7ac4..73aa634 100644
--- a/i915/i915_metaops.c
+++ b/i915/i915_metaops.c
@@ -34,128 +34,169 @@
 #include "intel_screen.h"
 #include "intel_batchbuffer.h"
 #include "intel_ioctl.h"
-#include "intel_rotate.h"
+#include "intel_regions.h"
 
 #include "i915_context.h"
 #include "i915_reg.h"
 
-/* A large amount of state doesn't need to be uploaded.
+/* We touch almost everything:
  */
-#define ACTIVE (I915_UPLOAD_INVARIENT |         \
-		I915_UPLOAD_PROGRAM | 		\
-		I915_UPLOAD_STIPPLE |		\
+#define ACTIVE (I915_UPLOAD_INVARIENT | 	\
 		I915_UPLOAD_CTX |		\
 		I915_UPLOAD_BUFFERS |		\
-		I915_UPLOAD_TEX(0))		
+		I915_UPLOAD_STIPPLE |		\
+                I915_UPLOAD_PROGRAM | 		\
+                I915_UPLOAD_FOG | 		\
+		I915_UPLOAD_TEX(0))
 
-#define SET_STATE( i915, STATE )			\
+#define SET_STATE( i915, STATE )		\
 do {						\
    i915->current->emitted &= ~ACTIVE;		\
-   i915->current = &i915->STATE;			\
+   i915->current = &i915->STATE;		\
    i915->current->emitted &= ~ACTIVE;		\
 } while (0)
 
-/* Operations where the 3D engine is decoupled temporarily from the
- * current GL state and used for other purposes than simply rendering
- * incoming triangles.
- */
-static void set_initial_state( i915ContextPtr i915 )
-{
-   memcpy(&i915->meta, &i915->initial, sizeof(i915->meta) );
-   i915->meta.active = ACTIVE;
-   i915->meta.emitted = 0;
-}
 
-
-static void set_no_depth_stencil_write( i915ContextPtr i915 )
+static void
+meta_no_stencil_write(struct intel_context *intel)
 {
+   struct i915_context *i915 = i915_context(&intel->ctx);
+
    /* ctx->Driver.Enable( ctx, GL_STENCIL_TEST, GL_FALSE )
     */
-   i915->meta.Ctx[I915_CTXREG_LIS5] &= ~(S5_STENCIL_TEST_ENABLE | 
-				       S5_STENCIL_WRITE_ENABLE);
+   i915->meta.Ctx[I915_CTXREG_LIS5] &= ~(S5_STENCIL_TEST_ENABLE |
+                                         S5_STENCIL_WRITE_ENABLE);
+
+   i915->meta.emitted &= ~I915_UPLOAD_CTX;
+}
+
+static void
+meta_no_depth_write(struct intel_context *intel)
+{
+   struct i915_context *i915 = i915_context(&intel->ctx);
 
    /* ctx->Driver.Enable( ctx, GL_DEPTH_TEST, GL_FALSE )
     */
    i915->meta.Ctx[I915_CTXREG_LIS6] &= ~(S6_DEPTH_TEST_ENABLE |
-				       S6_DEPTH_WRITE_ENABLE);
+                                         S6_DEPTH_WRITE_ENABLE);
 
    i915->meta.emitted &= ~I915_UPLOAD_CTX;
 }
 
+static void
+meta_depth_replace(struct intel_context *intel)
+{
+   struct i915_context *i915 = i915_context(&intel->ctx);
+
+   /* ctx->Driver.Enable( ctx, GL_DEPTH_TEST, GL_TRUE )
+    * ctx->Driver.DepthMask( ctx, GL_TRUE )
+    */
+   i915->meta.Ctx[I915_CTXREG_LIS6] |= (S6_DEPTH_TEST_ENABLE |
+                                        S6_DEPTH_WRITE_ENABLE);
+
+   /* ctx->Driver.DepthFunc( ctx, GL_ALWAYS )
+    */
+   i915->meta.Ctx[I915_CTXREG_LIS6] &= ~S6_DEPTH_TEST_FUNC_MASK;
+   i915->meta.Ctx[I915_CTXREG_LIS6] |=
+      COMPAREFUNC_ALWAYS << S6_DEPTH_TEST_FUNC_SHIFT;
+
+   i915->meta.emitted &= ~I915_UPLOAD_CTX;
+}
+
+
 /* Set stencil unit to replace always with the reference value.
  */
-static void set_stencil_replace( i915ContextPtr i915,
-				 GLuint s_mask,
-				 GLuint s_clear)
+static void
+meta_stencil_replace(struct intel_context *intel,
+                     GLuint s_mask, GLuint s_clear)
 {
+   struct i915_context *i915 = i915_context(&intel->ctx);
    GLuint op = STENCILOP_REPLACE;
    GLuint func = COMPAREFUNC_ALWAYS;
 
    /* ctx->Driver.Enable( ctx, GL_STENCIL_TEST, GL_TRUE )
     */
-   i915->meta.Ctx[I915_CTXREG_LIS5] |= (S5_STENCIL_TEST_ENABLE | 
-				      S5_STENCIL_WRITE_ENABLE);
-
-
-   /* ctx->Driver.Enable( ctx, GL_DEPTH_TEST, GL_FALSE )
-    */
-   i915->meta.Ctx[I915_CTXREG_LIS6] &= ~(S6_DEPTH_TEST_ENABLE |
-				       S6_DEPTH_WRITE_ENABLE);
-
+   i915->meta.Ctx[I915_CTXREG_LIS5] |= (S5_STENCIL_TEST_ENABLE |
+                                        S5_STENCIL_WRITE_ENABLE);
 
    /* ctx->Driver.StencilMask( ctx, s_mask )
     */
    i915->meta.Ctx[I915_CTXREG_STATE4] &= ~MODE4_ENABLE_STENCIL_WRITE_MASK;
 
    i915->meta.Ctx[I915_CTXREG_STATE4] |= (ENABLE_STENCIL_WRITE_MASK |
-					STENCIL_WRITE_MASK(s_mask));
-
+                                          STENCIL_WRITE_MASK(s_mask));
 
    /* ctx->Driver.StencilOp( ctx, GL_REPLACE, GL_REPLACE, GL_REPLACE )
     */
    i915->meta.Ctx[I915_CTXREG_LIS5] &= ~(S5_STENCIL_FAIL_MASK |
-				       S5_STENCIL_PASS_Z_FAIL_MASK |
-				       S5_STENCIL_PASS_Z_PASS_MASK);
+                                         S5_STENCIL_PASS_Z_FAIL_MASK |
+                                         S5_STENCIL_PASS_Z_PASS_MASK);
 
    i915->meta.Ctx[I915_CTXREG_LIS5] |= ((op << S5_STENCIL_FAIL_SHIFT) |
-				      (op << S5_STENCIL_PASS_Z_FAIL_SHIFT) |
-				      (op << S5_STENCIL_PASS_Z_PASS_SHIFT));
+                                        (op << S5_STENCIL_PASS_Z_FAIL_SHIFT) |
+                                        (op << S5_STENCIL_PASS_Z_PASS_SHIFT));
 
 
    /* ctx->Driver.StencilFunc( ctx, GL_ALWAYS, s_ref, ~0 )
     */
    i915->meta.Ctx[I915_CTXREG_STATE4] &= ~MODE4_ENABLE_STENCIL_TEST_MASK;
    i915->meta.Ctx[I915_CTXREG_STATE4] |= (ENABLE_STENCIL_TEST_MASK |
-					STENCIL_TEST_MASK(0xff));
+                                          STENCIL_TEST_MASK(0xff));
 
    i915->meta.Ctx[I915_CTXREG_LIS5] &= ~(S5_STENCIL_REF_MASK |
-				       S5_STENCIL_TEST_FUNC_MASK);
-					
-   i915->meta.Ctx[I915_CTXREG_LIS5] |= ((s_clear << S5_STENCIL_REF_SHIFT) |  
-				      (func << S5_STENCIL_TEST_FUNC_SHIFT)); 
+                                         S5_STENCIL_TEST_FUNC_MASK);
+
+   i915->meta.Ctx[I915_CTXREG_LIS5] |= ((s_clear << S5_STENCIL_REF_SHIFT) |
+                                        (func << S5_STENCIL_TEST_FUNC_SHIFT));
 
 
    i915->meta.emitted &= ~I915_UPLOAD_CTX;
 }
 
 
-static void set_color_mask( i915ContextPtr i915, GLboolean state )
+static void
+meta_color_mask(struct intel_context *intel, GLboolean state)
 {
+   struct i915_context *i915 = i915_context(&intel->ctx);
    const GLuint mask = (S5_WRITEDISABLE_RED |
-			S5_WRITEDISABLE_GREEN |
-			S5_WRITEDISABLE_BLUE |
-			S5_WRITEDISABLE_ALPHA);
+                        S5_WRITEDISABLE_GREEN |
+                        S5_WRITEDISABLE_BLUE | S5_WRITEDISABLE_ALPHA);
 
    /* Copy colormask state from "regular" hw context.
     */
    if (state) {
       i915->meta.Ctx[I915_CTXREG_LIS5] &= ~mask;
-      i915->meta.Ctx[I915_CTXREG_LIS5] |= 
-	 (i915->state.Ctx[I915_CTXREG_LIS5] & mask);
+      i915->meta.Ctx[I915_CTXREG_LIS5] |=
+         (i915->state.Ctx[I915_CTXREG_LIS5] & mask);
    }
-   else 
+   else
       i915->meta.Ctx[I915_CTXREG_LIS5] |= mask;
-      
+
+   i915->meta.emitted &= ~I915_UPLOAD_CTX;
+}
+
+
+
+static void
+meta_import_pixel_state(struct intel_context *intel)
+{
+   struct i915_context *i915 = i915_context(&intel->ctx);
+   memcpy(i915->meta.Fog, i915->state.Fog, I915_FOG_SETUP_SIZE * 4);
+
+   i915->meta.Ctx[I915_CTXREG_LIS5] = i915->state.Ctx[I915_CTXREG_LIS5];
+   i915->meta.Ctx[I915_CTXREG_LIS6] = i915->state.Ctx[I915_CTXREG_LIS6];
+   i915->meta.Ctx[I915_CTXREG_STATE4] = i915->state.Ctx[I915_CTXREG_STATE4];
+   i915->meta.Ctx[I915_CTXREG_BLENDCOLOR1] =
+      i915->state.Ctx[I915_CTXREG_BLENDCOLOR1];
+   i915->meta.Ctx[I915_CTXREG_IAB] = i915->state.Ctx[I915_CTXREG_IAB];
+
+   i915->meta.Buffer[I915_DESTREG_SENABLE] =
+      i915->state.Buffer[I915_DESTREG_SENABLE];
+   i915->meta.Buffer[I915_DESTREG_SR1] = i915->state.Buffer[I915_DESTREG_SR1];
+   i915->meta.Buffer[I915_DESTREG_SR2] = i915->state.Buffer[I915_DESTREG_SR2];
+
+   i915->meta.emitted &= ~I915_UPLOAD_FOG;
+   i915->meta.emitted &= ~I915_UPLOAD_BUFFERS;
    i915->meta.emitted &= ~I915_UPLOAD_CTX;
 }
 
@@ -212,69 +253,64 @@ static void set_color_mask( i915ContextPtr i915, GLboolean state )
 
 
 
-static void set_no_texture( i915ContextPtr i915 )
+static void
+meta_no_texture(struct intel_context *intel)
 {
+   struct i915_context *i915 = i915_context(&intel->ctx);
+
    static const GLuint prog[] = {
       _3DSTATE_PIXEL_SHADER_PROGRAM,
 
       /* Declare incoming diffuse color:
        */
-      (D0_DCL |
-       D0_DECL_REG( REG_T_DIFFUSE ) |
-       D0_CHANNEL_ALL),
+      (D0_DCL | D0_DECL_REG(REG_T_DIFFUSE) | D0_CHANNEL_ALL),
       D1_MBZ,
       D2_MBZ,
 
       /* output-color = mov(t_diffuse)
        */
       (A0_MOV |
-       A0_DEST_REG( REG_OC ) |
-       A0_DEST_CHANNEL_ALL |
-       A0_SRC0_REG( REG_T_DIFFUSE )),
+       A0_DEST_REG(REG_OC) |
+       A0_DEST_CHANNEL_ALL | A0_SRC0_REG(REG_T_DIFFUSE)),
       (A1_SRC0_XYZW),
       0,
    };
 
-   
-   memcpy( i915->meta.Program, prog, sizeof(prog) );
+
+   memcpy(i915->meta.Program, prog, sizeof(prog));
    i915->meta.ProgramSize = sizeof(prog) / sizeof(*prog);
    i915->meta.Program[0] |= i915->meta.ProgramSize - 2;
    i915->meta.emitted &= ~I915_UPLOAD_PROGRAM;
 }
 
-
-static void enable_texture_blend_replace( i915ContextPtr i915 )
+static void
+meta_texture_blend_replace(struct intel_context *intel)
 {
+   struct i915_context *i915 = i915_context(&intel->ctx);
+
    static const GLuint prog[] = {
       _3DSTATE_PIXEL_SHADER_PROGRAM,
 
       /* Declare the sampler:
        */
-      (D0_DCL |
-       D0_DECL_REG( REG_S(0) ) |
-       D0_SAMPLE_TYPE_2D |
-       D0_CHANNEL_NONE),
+      (D0_DCL | D0_DECL_REG(REG_S(0)) | D0_SAMPLE_TYPE_2D | D0_CHANNEL_NONE),
       D1_MBZ,
       D2_MBZ,
 
       /* Declare the interpolated texture coordinate:
        */
-      (D0_DCL |
-       D0_DECL_REG( REG_T_TEX(0) ) |
-       D0_CHANNEL_ALL),
+      (D0_DCL | D0_DECL_REG(REG_T_TEX(0)) | D0_CHANNEL_ALL),
       D1_MBZ,
       D2_MBZ,
 
       /* output-color = texld(sample0, texcoord0) 
        */
-      (T0_TEXLD | 
-       T0_DEST_REG( REG_OC ) |
-       T0_SAMPLER( 0 )),
+      (T0_TEXLD | T0_DEST_REG(REG_OC) | T0_SAMPLER(0)),
       T1_ADDRESS_REG(REG_TYPE_T, 0),
       T2_MBZ
    };
 
-   memcpy( i915->meta.Program, prog, sizeof(prog) );
+   memcpy(i915->meta.Program, prog, sizeof(prog));
    i915->meta.ProgramSize = sizeof(prog) / sizeof(*prog);
    i915->meta.Program[0] |= i915->meta.ProgramSize - 2;
    i915->meta.emitted &= ~I915_UPLOAD_PROGRAM;
@@ -287,425 +323,186 @@ static void enable_texture_blend_replace( i915ContextPtr i915 )
 /* Set up an arbitary piece of memory as a rectangular texture
  * (including the front or back buffer).
  */
-static void set_tex_rect_source( i915ContextPtr i915,
-				 GLuint offset,
-				 GLuint width, 
-				 GLuint height,
-				 GLuint pitch, /* in bytes! */
-				 GLuint textureFormat )
+static GLboolean
+meta_tex_rect_source(struct intel_context *intel,
+                     dri_bo *buffer,
+                     GLuint offset,
+                     GLuint pitch, GLuint height, GLenum format, GLenum type)
 {
+   struct i915_context *i915 = i915_context(&intel->ctx);
    GLuint unit = 0;
    GLint numLevels = 1;
    GLuint *state = i915->meta.Tex[0];
+   GLuint textureFormat;
+   GLuint cpp;
 
-#if 0
-   printf("TexRect source offset 0x%x  pitch %d\n", offset, pitch);
-#endif
+   /* A full implementation of this would do the upload through
+    * glTexImage2d, and get all the conversion operations at that
+    * point.  We are restricted, but still at least have access to the
+    * fragment program swizzle.
+    */
+   switch (format) {
+   case GL_BGRA:
+      switch (type) {
+      case GL_UNSIGNED_INT_8_8_8_8_REV:
+      case GL_UNSIGNED_BYTE:
+         textureFormat = (MAPSURF_32BIT | MT_32BIT_ARGB8888);
+         cpp = 4;
+         break;
+      default:
+         return GL_FALSE;
+      }
+      break;
+   case GL_RGBA:
+      switch (type) {
+      case GL_UNSIGNED_INT_8_8_8_8_REV:
+      case GL_UNSIGNED_BYTE:
+         textureFormat = (MAPSURF_32BIT | MT_32BIT_ABGR8888);
+         cpp = 4;
+         break;
+      default:
+         return GL_FALSE;
+      }
+      break;
+   case GL_BGR:
+      switch (type) {
+      case GL_UNSIGNED_SHORT_5_6_5_REV:
+         textureFormat = (MAPSURF_16BIT | MT_16BIT_RGB565);
+         cpp = 2;
+         break;
+      default:
+         return GL_FALSE;
+      }
+      break;
+   case GL_RGB:
+      switch (type) {
+      case GL_UNSIGNED_SHORT_5_6_5:
+         textureFormat = (MAPSURF_16BIT | MT_16BIT_RGB565);
+         cpp = 2;
+         break;
+      default:
+         return GL_FALSE;
+      }
+      break;
 
-/*    fprintf(stderr, "%s: offset: %x w: %d h: %d pitch %d format %x\n", */
-/* 	   __FUNCTION__, offset, width, height, pitch, textureFormat ); */
+   default:
+      return GL_FALSE;
+   }
+
+
+   if ((pitch * cpp) & 3) {
+      _mesa_printf("%s: texture is not dword pitch\n", __FUNCTION__);
+      return GL_FALSE;
+   }
+
+/*    intel_region_release(&i915->meta.tex_region[0]); */
+/*    intel_region_reference(&i915->meta.tex_region[0], region); */
+   i915->meta.tex_buffer[0] = buffer;
+   i915->meta.tex_offset[0] = offset;
 
-   state[I915_TEXREG_MS2] = offset;
    state[I915_TEXREG_MS3] = (((height - 1) << MS3_HEIGHT_SHIFT) |
-			    ((width - 1) << MS3_WIDTH_SHIFT) |
-			    textureFormat |
-			    MS3_USE_FENCE_REGS);
+                             ((pitch - 1) << MS3_WIDTH_SHIFT) |
+                             textureFormat | MS3_USE_FENCE_REGS);
 
-   state[I915_TEXREG_MS4] = ((((pitch / 4) - 1) << MS4_PITCH_SHIFT) | 
-			    ((((numLevels-1) * 4)) << MS4_MAX_LOD_SHIFT));
+   state[I915_TEXREG_MS4] = (((((pitch * cpp) / 4) - 1) << MS4_PITCH_SHIFT) |
+                             MS4_CUBE_FACE_ENA_MASK |
+                             ((((numLevels - 1) * 4)) << MS4_MAX_LOD_SHIFT));
 
    state[I915_TEXREG_SS2] = ((FILTER_NEAREST << SS2_MIN_FILTER_SHIFT) |
-			    (MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT) |
-			    (FILTER_NEAREST << SS2_MAG_FILTER_SHIFT));
+                             (MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT) |
+                             (FILTER_NEAREST << SS2_MAG_FILTER_SHIFT));
+
    state[I915_TEXREG_SS3] = ((TEXCOORDMODE_WRAP << SS3_TCX_ADDR_MODE_SHIFT) |
-			    (TEXCOORDMODE_WRAP << SS3_TCY_ADDR_MODE_SHIFT) |
-			    (TEXCOORDMODE_WRAP << SS3_TCZ_ADDR_MODE_SHIFT) |
-			    (unit<<SS3_TEXTUREMAP_INDEX_SHIFT));
+                             (TEXCOORDMODE_WRAP << SS3_TCY_ADDR_MODE_SHIFT) |
+                             (TEXCOORDMODE_WRAP << SS3_TCZ_ADDR_MODE_SHIFT) |
+                             (unit << SS3_TEXTUREMAP_INDEX_SHIFT));
 
    state[I915_TEXREG_SS4] = 0;
 
    i915->meta.emitted &= ~I915_UPLOAD_TEX(0);
+   return GL_TRUE;
 }
 
 
-/* Select between front and back draw buffers.
+/**
+ * Set the color and depth drawing region for meta ops.
  */
-static void set_draw_region( i915ContextPtr i915, const intelRegion *region )
+static void
+meta_draw_region(struct intel_context *intel,
+                 struct intel_region *color_region,
+                 struct intel_region *depth_region)
 {
-#if 0
-   printf("Rotate into region: offset 0x%x  pitch %d\n",
-          region->offset, region->pitch);
-#endif
-   i915->meta.Buffer[I915_DESTREG_CBUFADDR1] =
-      (BUF_3D_ID_COLOR_BACK | BUF_3D_PITCH(region->pitch) | BUF_3D_USE_FENCE);
-   i915->meta.Buffer[I915_DESTREG_CBUFADDR2] = region->offset;
-   i915->meta.emitted &= ~I915_UPLOAD_BUFFERS;
+   struct i915_context *i915 = i915_context(&intel->ctx);
+   i915_state_draw_region(intel, &i915->meta, color_region, depth_region);
 }
 
 
-#if 0
-/* Setup an arbitary draw format, useful for targeting texture or agp
- * memory.
- */
-static void set_draw_format( i915ContextPtr i915,
-			     GLuint format,
-			     GLuint depth_format)
+static void
+set_vertex_format(struct intel_context *intel)
 {
-   i915->meta.Buffer[I915_DESTREG_DV1] = (DSTORG_HORT_BIAS(0x8) | /* .5 */
-					DSTORG_VERT_BIAS(0x8) | /* .5 */
-					format |
-					LOD_PRECLAMP_OGL |
-					TEX_DEFAULT_COLOR_OGL |
-					depth_format);
-
-   i915->meta.emitted &= ~I915_UPLOAD_BUFFERS;
-/*    fprintf(stderr, "%s: DV1: %x\n",  */
-/* 	   __FUNCTION__, i915->meta.Buffer[I915_DESTREG_DV1]); */
-}
-#endif
+   struct i915_context *i915 = i915_context(&intel->ctx);
 
-static void set_vertex_format( i915ContextPtr i915 )
-{
-   i915->meta.Ctx[I915_CTXREG_LIS2] = 
+   i915->meta.Ctx[I915_CTXREG_LIS2] =
       (S2_TEXCOORD_FMT(0, TEXCOORDFMT_2D) |
-       S2_TEXCOORD_FMT(1, TEXCOORDFMT_NOT_PRESENT) | 
+       S2_TEXCOORD_FMT(1, TEXCOORDFMT_NOT_PRESENT) |
        S2_TEXCOORD_FMT(2, TEXCOORDFMT_NOT_PRESENT) |
        S2_TEXCOORD_FMT(3, TEXCOORDFMT_NOT_PRESENT) |
        S2_TEXCOORD_FMT(4, TEXCOORDFMT_NOT_PRESENT) |
-       S2_TEXCOORD_FMT(5, TEXCOORDFMT_NOT_PRESENT) | 
+       S2_TEXCOORD_FMT(5, TEXCOORDFMT_NOT_PRESENT) |
        S2_TEXCOORD_FMT(6, TEXCOORDFMT_NOT_PRESENT) |
        S2_TEXCOORD_FMT(7, TEXCOORDFMT_NOT_PRESENT));
 
    i915->meta.Ctx[I915_CTXREG_LIS4] &= ~S4_VFMT_MASK;
 
-   i915->meta.Ctx[I915_CTXREG_LIS4] |= 
-      (S4_VFMT_COLOR |
-       S4_VFMT_SPEC_FOG |
-       S4_VFMT_XYZW);
+   i915->meta.Ctx[I915_CTXREG_LIS4] |= (S4_VFMT_COLOR | S4_VFMT_XYZ);
 
    i915->meta.emitted &= ~I915_UPLOAD_CTX;
-
 }
 
 
-static void draw_quad(i915ContextPtr i915, 
-		      GLfloat x0, GLfloat x1,
-		      GLfloat y0, GLfloat y1, 
-		      GLubyte red, GLubyte green,
-		      GLubyte blue, GLubyte alpha,
-		      GLfloat s0, GLfloat s1,
-		      GLfloat t0, GLfloat t1 )
-{
-   GLuint vertex_size = 8;
-   GLuint *vb = intelEmitInlinePrimitiveLocked( &i915->intel, 
-						PRIM3D_TRIFAN, 
-						4 * vertex_size,
-						vertex_size );
-   intelVertex tmp;
-   int i;
-
-   if (0)
-      fprintf(stderr, "%s: %f,%f-%f,%f 0x%x%x%x%x %f,%f-%f,%f\n",
-	      __FUNCTION__,
-	      x0,y0,x1,y1,red,green,blue,alpha,s0,t0,s1,t1);
-
-
-   /* initial vertex, left bottom */
-   tmp.v.x = x0;
-   tmp.v.y = y0;
-   tmp.v.z = 1.0;
-   tmp.v.w = 1.0; 
-   tmp.v.color.red = red;
-   tmp.v.color.green = green;
-   tmp.v.color.blue = blue;
-   tmp.v.color.alpha = alpha;
-   tmp.v.specular.red = 0;
-   tmp.v.specular.green = 0;
-   tmp.v.specular.blue = 0;
-   tmp.v.specular.alpha = 0;
-   tmp.v.u0 = s0;
-   tmp.v.v0 = t0;
-
-   for (i = 0 ; i < vertex_size ; i++)
-      vb[i] = tmp.ui[i];
-
-   /* right bottom */
-   vb += vertex_size;
-   tmp.v.x = x1;
-   tmp.v.u0 = s1;
-   for (i = 0 ; i < vertex_size ; i++)
-      vb[i] = tmp.ui[i];
-
-   /* right top */
-   vb += vertex_size;
-   tmp.v.y = y1;
-   tmp.v.v0 = t1;
-   for (i = 0 ; i < vertex_size ; i++)
-      vb[i] = tmp.ui[i];
-
-   /* left top */
-   vb += vertex_size;
-   tmp.v.x = x0;
-   tmp.v.u0 = s0;
-   for (i = 0 ; i < vertex_size ; i++)
-      vb[i] = tmp.ui[i];
-}
-
 
-static void draw_poly(i915ContextPtr i915, 
-		      GLubyte red, GLubyte green, GLubyte blue, GLubyte alpha,
-                      GLuint numVerts,
-                      /*const*/ GLfloat verts[][2],
-                      /*const*/ GLfloat texcoords[][2])
+/* Operations where the 3D engine is decoupled temporarily from the
+ * current GL state and used for other purposes than simply rendering
+ * incoming triangles.
+ */
+static void
+install_meta_state(struct intel_context *intel)
 {
-   GLuint vertex_size = 8;
-   GLuint *vb = intelEmitInlinePrimitiveLocked( &i915->intel, 
-						PRIM3D_TRIFAN, 
-						numVerts * vertex_size,
-						vertex_size );
-   intelVertex tmp;
-   int i, k;
-
-   /* initial constant vertex fields */
-   tmp.v.z = 1.0;
-   tmp.v.w = 1.0; 
-   tmp.v.color.red = red;
-   tmp.v.color.green = green;
-   tmp.v.color.blue = blue;
-   tmp.v.color.alpha = alpha;
-   tmp.v.specular.red = 0;
-   tmp.v.specular.green = 0;
-   tmp.v.specular.blue = 0;
-   tmp.v.specular.alpha = 0;
-
-   for (k = 0; k < numVerts; k++) {
-      tmp.v.x = verts[k][0];
-      tmp.v.y = verts[k][1];
-      tmp.v.u0 = texcoords[k][0];
-      tmp.v.v0 = texcoords[k][1];
-
-      for (i = 0 ; i < vertex_size ; i++)
-         vb[i] = tmp.ui[i];
-
-      vb += vertex_size;
-   }
-}
+   struct i915_context *i915 = i915_context(&intel->ctx);
+   memcpy(&i915->meta, &i915->initial, sizeof(i915->meta));
+   i915->meta.active = ACTIVE;
+   i915->meta.emitted = 0;
 
+   SET_STATE(i915, meta);
+   set_vertex_format(intel);
+   meta_no_texture(intel);
+}
 
-void 
-i915ClearWithTris(intelContextPtr intel, GLbitfield buffers,
-		  GLboolean allFoo,
-		  GLint cxFoo, GLint cyFoo, GLint cwFoo, GLint chFoo)
+static void
+leave_meta_state(struct intel_context *intel)
 {
-   i915ContextPtr i915 = I915_CONTEXT( intel );
-   __DRIdrawablePrivate *dPriv = intel->driDrawable;
-   intelScreenPrivate *screen = intel->intelScreen;
-   int x0, y0, x1, y1;
-   GLint cx, cy, cw, ch;
-   GLboolean all;
-
-   SET_STATE( i915, meta ); 
-   set_initial_state( i915 ); 
-   set_no_texture( i915 ); 
-   set_vertex_format( i915 ); 
-
-   LOCK_HARDWARE(intel);
-
-   /* get clear bounds after locking */
-   cx = intel->ctx.DrawBuffer->_Xmin;
-   cy = intel->ctx.DrawBuffer->_Ymin;
-   cw = intel->ctx.DrawBuffer->_Xmax - cx;
-   ch = intel->ctx.DrawBuffer->_Ymax - cy;
-   all = (cw == intel->ctx.DrawBuffer->Width &&
-          ch == intel->ctx.DrawBuffer->Height);
-
-   if (!all) {
-      x0 = cx;
-      y0 = cy;
-      x1 = x0 + cw;
-      y1 = y0 + ch;
-   } else {
-      x0 = 0;
-      y0 = 0;
-      x1 = x0 + dPriv->w;
-      y1 = y0 + dPriv->h;
-   }
-
-   /* Don't do any clipping to screen - these are window coordinates.
-    * The active cliprects will be applied as for any other geometry.
-    */
-
-   if (buffers & BUFFER_BIT_FRONT_LEFT) { 
-      set_no_depth_stencil_write( i915 );
-      set_color_mask( i915, GL_TRUE );
-      set_draw_region( i915, &screen->front );
-
-      draw_quad(i915, x0, x1, y0, y1,
-		intel->clear_red, intel->clear_green, 
- 		intel->clear_blue, intel->clear_alpha, 
-		0, 0, 0, 0);
-   }
-
-   if (buffers & BUFFER_BIT_BACK_LEFT) {
-      set_no_depth_stencil_write( i915 );
-      set_color_mask( i915, GL_TRUE );
-      set_draw_region( i915, &screen->back );
-
-      draw_quad(i915, x0, x1, y0, y1,
-		intel->clear_red, intel->clear_green,
-		intel->clear_blue, intel->clear_alpha,
-		0, 0, 0, 0);
-   }
-
-   if (buffers & BUFFER_BIT_STENCIL) {
-      set_stencil_replace( i915, 
-			   intel->ctx.Stencil.WriteMask[0], 
-			   intel->ctx.Stencil.Clear);
-      
-      set_color_mask( i915, GL_FALSE );
-      set_draw_region( i915, &screen->front ); /* could be either? */
-
-      draw_quad( i915, x0, x1, y0, y1, 0, 0, 0, 0, 0, 0, 0, 0 );
-   }
-
-   UNLOCK_HARDWARE(intel);
-
-   SET_STATE( i915, state );
+   struct i915_context *i915 = i915_context(&intel->ctx);
+   intel_region_release(&i915->meta.draw_region);
+   intel_region_release(&i915->meta.depth_region);
+/*    intel_region_release(&i915->meta.tex_region[0]); */
+   SET_STATE(i915, state);
 }
 
 
-/**
- * Copy the window contents named by dPriv to the rotated (or reflected)
- * color buffer.
- * srcBuf is BUFFER_BIT_FRONT_LEFT or BUFFER_BIT_BACK_LEFT to indicate the source.
- */
+
 void
-i915RotateWindow(intelContextPtr intel, __DRIdrawablePrivate *dPriv,
-                 GLuint srcBuf)
+i915InitMetaFuncs(struct i915_context *i915)
 {
-   i915ContextPtr i915 = I915_CONTEXT( intel );
-   intelScreenPrivate *screen = intel->intelScreen;
-   const GLuint cpp = screen->cpp;
-   drm_clip_rect_t fullRect;
-   GLuint textureFormat, srcOffset, srcPitch;
-   const drm_clip_rect_t *clipRects;
-   int numClipRects;
-   int i;
-
-   int xOrig, yOrig;
-   int origNumClipRects;
-   drm_clip_rect_t *origRects;
-
-   /*
-    * set up hardware state
-    */
-   intelFlush( &intel->ctx );
-
-   SET_STATE( i915, meta ); 
-   set_initial_state( i915 ); 
-   set_no_texture( i915 ); 
-   set_vertex_format( i915 ); 
-   set_no_depth_stencil_write( i915 );
-   set_color_mask( i915, GL_TRUE );
-
-   LOCK_HARDWARE(intel);
-
-   /* save current drawing origin and cliprects (restored at end) */
-   xOrig = intel->drawX;
-   yOrig = intel->drawY;
-   origNumClipRects = intel->numClipRects;
-   origRects = intel->pClipRects;
-
-   if (!intel->numClipRects)
-      goto done;
-
-   /*
-    * set drawing origin, cliprects for full-screen access to rotated screen
-    */
-   fullRect.x1 = 0;
-   fullRect.y1 = 0;
-   fullRect.x2 = screen->rotatedWidth;
-   fullRect.y2 = screen->rotatedHeight;
-   intel->drawX = 0;
-   intel->drawY = 0;
-   intel->numClipRects = 1;
-   intel->pClipRects = &fullRect;
-
-   set_draw_region( i915, &screen->rotated );
-
-   if (cpp == 4)
-      textureFormat = MAPSURF_32BIT | MT_32BIT_ARGB8888;
-   else
-      textureFormat = MAPSURF_16BIT | MT_16BIT_RGB565;
-
-   if (srcBuf == BUFFER_BIT_FRONT_LEFT) {
-      srcPitch = screen->front.pitch;   /* in bytes */
-      srcOffset = screen->front.offset; /* bytes */
-      clipRects = dPriv->pClipRects;
-      numClipRects = dPriv->numClipRects;
-   }
-   else {
-      srcPitch = screen->back.pitch;   /* in bytes */
-      srcOffset = screen->back.offset; /* bytes */
-      clipRects = dPriv->pBackClipRects;
-      numClipRects = dPriv->numBackClipRects;
-   }
-
-   /* set the whole screen up as a texture to avoid alignment issues */
-   set_tex_rect_source(i915,
-                       srcOffset,
-                       screen->width,
-		       screen->height,
-                       srcPitch,
-                       textureFormat);
-
-   enable_texture_blend_replace(i915);
-
-   /*
-    * loop over the source window's cliprects
-    */
-   for (i = 0; i < numClipRects; i++) {
-      int srcX0 = clipRects[i].x1;
-      int srcY0 = clipRects[i].y1;
-      int srcX1 = clipRects[i].x2;
-      int srcY1 = clipRects[i].y2;
-      GLfloat verts[4][2], tex[4][2];
-      int j;
-
-      /* build vertices for four corners of clip rect */
-      verts[0][0] = srcX0;  verts[0][1] = srcY0;
-      verts[1][0] = srcX1;  verts[1][1] = srcY0;
-      verts[2][0] = srcX1;  verts[2][1] = srcY1;
-      verts[3][0] = srcX0;  verts[3][1] = srcY1;
-
-      /* .. and texcoords */
-      tex[0][0] = srcX0;  tex[0][1] = srcY0;
-      tex[1][0] = srcX1;  tex[1][1] = srcY0;
-      tex[2][0] = srcX1;  tex[2][1] = srcY1;
-      tex[3][0] = srcX0;  tex[3][1] = srcY1;
-
-      /* transform coords to rotated screen coords */
-      for (j = 0; j < 4; j++) {
-         matrix23TransformCoordf(&screen->rotMatrix,
-                                 &verts[j][0], &verts[j][1]);
-      }
-
-      /* draw polygon to map source image to dest region */
-      draw_poly(i915, 255, 255, 255, 255, 4, verts, tex);
-
-   } /* cliprect loop */
-
-   intelFlushBatchLocked( intel, GL_FALSE, GL_FALSE, GL_FALSE );
-
- done:
-   /* restore original drawing origin and cliprects */
-   intel->drawX = xOrig;
-   intel->drawY = yOrig;
-   intel->numClipRects = origNumClipRects;
-   intel->pClipRects = origRects;
-
-   UNLOCK_HARDWARE(intel);
-
-   SET_STATE( i915, state );
+   i915->intel.vtbl.install_meta_state = install_meta_state;
+   i915->intel.vtbl.leave_meta_state = leave_meta_state;
+   i915->intel.vtbl.meta_no_depth_write = meta_no_depth_write;
+   i915->intel.vtbl.meta_no_stencil_write = meta_no_stencil_write;
+   i915->intel.vtbl.meta_stencil_replace = meta_stencil_replace;
+   i915->intel.vtbl.meta_depth_replace = meta_depth_replace;
+   i915->intel.vtbl.meta_color_mask = meta_color_mask;
+   i915->intel.vtbl.meta_no_texture = meta_no_texture;
+   i915->intel.vtbl.meta_texture_blend_replace = meta_texture_blend_replace;
+   i915->intel.vtbl.meta_tex_rect_source = meta_tex_rect_source;
+   i915->intel.vtbl.meta_draw_region = meta_draw_region;
+   i915->intel.vtbl.meta_import_pixel_state = meta_import_pixel_state;
 }
-
diff --git a/i915/i915_program.c b/i915/i915_program.c
index 6849112..f79d00d 100644
--- a/i915/i915_program.c
+++ b/i915/i915_program.c
@@ -72,58 +72,62 @@
 
 #define I915_CONSTFLAG_PARAM 0x1f
 
-GLuint i915_get_temp( struct i915_fragment_program *p )
+GLuint
+i915_get_temp(struct i915_fragment_program *p)
 {
-   int bit = ffs( ~p->temp_flag );
+   int bit = ffs(~p->temp_flag);
    if (!bit) {
       fprintf(stderr, "%s: out of temporaries\n", __FILE__);
       exit(1);
    }
 
-   p->temp_flag |= 1<<(bit-1);
-   return UREG(REG_TYPE_R, (bit-1));
+   p->temp_flag |= 1 << (bit - 1);
+   return UREG(REG_TYPE_R, (bit - 1));
 }
 
 
-GLuint i915_get_utemp( struct i915_fragment_program *p )
+GLuint
+i915_get_utemp(struct i915_fragment_program * p)
 {
-   int bit = ffs( ~p->utemp_flag );
+   int bit = ffs(~p->utemp_flag);
    if (!bit) {
       fprintf(stderr, "%s: out of temporaries\n", __FILE__);
       exit(1);
    }
 
-   p->utemp_flag |= 1<<(bit-1);
-   return UREG(REG_TYPE_U, (bit-1));
+   p->utemp_flag |= 1 << (bit - 1);
+   return UREG(REG_TYPE_U, (bit - 1));
 }
 
-void i915_release_utemps( struct i915_fragment_program *p )
+void
+i915_release_utemps(struct i915_fragment_program *p)
 {
    p->utemp_flag = ~0x7;
 }
 
 
-GLuint i915_emit_decl( struct i915_fragment_program *p,
-		      GLuint type, GLuint nr, GLuint d0_flags )
+GLuint
+i915_emit_decl(struct i915_fragment_program *p,
+               GLuint type, GLuint nr, GLuint d0_flags)
 {
    GLuint reg = UREG(type, nr);
 
    if (type == REG_TYPE_T) {
-      if (p->decl_t & (1<<nr))
-	 return reg;
+      if (p->decl_t & (1 << nr))
+         return reg;
 
-      p->decl_t |= (1<<nr);
+      p->decl_t |= (1 << nr);
    }
    else if (type == REG_TYPE_S) {
-      if (p->decl_s & (1<<nr))
-	 return reg;
+      if (p->decl_s & (1 << nr))
+         return reg;
 
-      p->decl_s |= (1<<nr);
+      p->decl_s |= (1 << nr);
    }
-   else 
+   else
       return reg;
 
-   *(p->decl++) = (D0_DCL | D0_DEST( reg ) | d0_flags);
+   *(p->decl++) = (D0_DCL | D0_DEST(reg) | d0_flags);
    *(p->decl++) = D1_MBZ;
    *(p->decl++) = D2_MBZ;
 
@@ -131,24 +135,26 @@ GLuint i915_emit_decl( struct i915_fragment_program *p,
    return reg;
 }
 
-GLuint i915_emit_arith( struct i915_fragment_program *p,
-		       GLuint op,
-		       GLuint dest,
-		       GLuint mask,
-		       GLuint saturate,
-		       GLuint src0,
-		       GLuint src1,
-		       GLuint src2 )
+GLuint
+i915_emit_arith(struct i915_fragment_program * p,
+                GLuint op,
+                GLuint dest,
+                GLuint mask,
+                GLuint saturate, GLuint src0, GLuint src1, GLuint src2)
 {
    GLuint c[3];
    GLuint nr_const = 0;
 
    assert(GET_UREG_TYPE(dest) != REG_TYPE_CONST);
-   assert(dest = UREG(GET_UREG_TYPE(dest), GET_UREG_NR(dest)));
+   dest = UREG(GET_UREG_TYPE(dest), GET_UREG_NR(dest));
+   assert(dest);
 
-   if (GET_UREG_TYPE(src0) == REG_TYPE_CONST) c[nr_const++] = 0;
-   if (GET_UREG_TYPE(src1) == REG_TYPE_CONST) c[nr_const++] = 1;
-   if (GET_UREG_TYPE(src2) == REG_TYPE_CONST) c[nr_const++] = 2;
+   if (GET_UREG_TYPE(src0) == REG_TYPE_CONST)
+      c[nr_const++] = 0;
+   if (GET_UREG_TYPE(src1) == REG_TYPE_CONST)
+      c[nr_const++] = 1;
+   if (GET_UREG_TYPE(src2) == REG_TYPE_CONST)
+      c[nr_const++] = 2;
 
    /* Recursively call this function to MOV additional const values
     * into temporary registers.  Use utemp registers for this -
@@ -164,57 +170,67 @@ GLuint i915_emit_arith( struct i915_fragment_program *p,
       old_utemp_flag = p->utemp_flag;
 
       first = GET_UREG_NR(s[c[0]]);
-      for (i = 1 ; i < nr_const ; i++) {
-	 if (GET_UREG_NR(s[c[i]]) != first) {
-	    GLuint tmp = i915_get_utemp(p);
-
-	    i915_emit_arith( p, A0_MOV, tmp, A0_DEST_CHANNEL_ALL, 0,
-			    s[c[i]], 0, 0 );
-	    s[c[i]] = tmp;
-	 }
+      for (i = 1; i < nr_const; i++) {
+         if (GET_UREG_NR(s[c[i]]) != first) {
+            GLuint tmp = i915_get_utemp(p);
+
+            i915_emit_arith(p, A0_MOV, tmp, A0_DEST_CHANNEL_ALL, 0,
+                            s[c[i]], 0, 0);
+            s[c[i]] = tmp;
+         }
       }
 
       src0 = s[0];
       src1 = s[1];
       src2 = s[2];
-      p->utemp_flag = old_utemp_flag; /* restore */
+      p->utemp_flag = old_utemp_flag;   /* restore */
    }
 
-   *(p->csr++) = (op | 
-		  A0_DEST( dest ) |
-		  mask | 
-		  saturate |
-		  A0_SRC0( src0 ));
-   *(p->csr++) = (A1_SRC0( src0 ) |
-		  A1_SRC1( src1 ));
-   *(p->csr++) = (A2_SRC1( src1 ) |
-		  A2_SRC2( src2 ));
+   *(p->csr++) = (op | A0_DEST(dest) | mask | saturate | A0_SRC0(src0));
+   *(p->csr++) = (A1_SRC0(src0) | A1_SRC1(src1));
+   *(p->csr++) = (A2_SRC1(src1) | A2_SRC2(src2));
 
    p->nr_alu_insn++;
    return dest;
 }
 
+static GLuint get_free_rreg (struct i915_fragment_program *p, 
+                             GLuint live_regs)
+{
+    int bit = ffs(~live_regs);
+    if (!bit) {
+        i915_program_error(p, "Can't find free R reg");
+        return UREG_BAD;
+    }
+    return UREG(REG_TYPE_R, bit - 1);
+}
+
 GLuint i915_emit_texld( struct i915_fragment_program *p,
+			GLuint live_regs,               
 			GLuint dest,
 			GLuint destmask,
 			GLuint sampler,
 			GLuint coord,
 			GLuint op )
 {
-   if (coord != UREG(GET_UREG_TYPE(coord), GET_UREG_NR(coord))) {
-      /* No real way to work around this in the general case - need to
-       * allocate and declare a new temporary register (a utemp won't
-       * do).  Will fallback for now.
-       */
-      i915_program_error(p, "Can't (yet) swizzle TEX arguments");
-      return 0;
-   }
-
-   /* Don't worry about saturate as we only support  
+    if (coord != UREG(GET_UREG_TYPE(coord), GET_UREG_NR(coord))) {
+        /* With the help of the "needed registers" table created earlier, pick
+         * a register we can MOV the swizzled TC to (since TEX doesn't support
+         * swizzled sources) */
+        GLuint swizCoord = get_free_rreg(p, live_regs);
+        if (swizCoord == UREG_BAD) 
+            return 0;
+
+        i915_emit_arith( p, A0_MOV, swizCoord, A0_DEST_CHANNEL_ALL, 0, coord, 0, 0 );
+        coord = swizCoord;
+    }
+
+   /* Don't worry about saturate as we only support texture formats
+    * that are always in the 0..1 range.
     */
    if (destmask != A0_DEST_CHANNEL_ALL) {
       GLuint tmp = i915_get_utemp(p);
-      i915_emit_texld( p, tmp, A0_DEST_CHANNEL_ALL, sampler, coord, op );
+      i915_emit_texld( p, 0, tmp, A0_DEST_CHANNEL_ALL, sampler, coord, op );
       i915_emit_arith( p, A0_MOV, dest, destmask, 0, tmp, 0, 0 );
       return dest;
    }
@@ -239,24 +255,28 @@ GLuint i915_emit_texld( struct i915_fragment_program *p,
 }
 
 
-GLuint i915_emit_const1f( struct i915_fragment_program *p, GLfloat c0 )
+GLuint
+i915_emit_const1f(struct i915_fragment_program * p, GLfloat c0)
 {
    GLint reg, idx;
 
-   if (c0 == 0.0) return swizzle(UREG(REG_TYPE_R, 0), ZERO, ZERO, ZERO, ZERO);
-   if (c0 == 1.0) return swizzle(UREG(REG_TYPE_R, 0), ONE,  ONE,  ONE,  ONE );
+   if (c0 == 0.0)
+      return swizzle(UREG(REG_TYPE_R, 0), ZERO, ZERO, ZERO, ZERO);
+   if (c0 == 1.0)
+      return swizzle(UREG(REG_TYPE_R, 0), ONE, ONE, ONE, ONE);
 
    for (reg = 0; reg < I915_MAX_CONSTANT; reg++) {
       if (p->constant_flags[reg] == I915_CONSTFLAG_PARAM)
-	 continue;
+         continue;
       for (idx = 0; idx < 4; idx++) {
-	 if (!(p->constant_flags[reg] & (1<<idx)) ||
-	     p->constant[reg][idx] == c0) {
-	    p->constant[reg][idx] = c0;
-	    p->constant_flags[reg] |= 1<<idx;
-	    if (reg+1 > p->nr_constants) p->nr_constants = reg+1;
-	    return swizzle(UREG(REG_TYPE_CONST, reg),idx,ZERO,ZERO,ONE);
-	 }
+         if (!(p->constant_flags[reg] & (1 << idx)) ||
+             p->constant[reg][idx] == c0) {
+            p->constant[reg][idx] = c0;
+            p->constant_flags[reg] |= 1 << idx;
+            if (reg + 1 > p->nr_constants)
+               p->nr_constants = reg + 1;
+            return swizzle(UREG(REG_TYPE_CONST, reg), idx, ZERO, ZERO, ONE);
+         }
       }
    }
 
@@ -265,29 +285,35 @@ GLuint i915_emit_const1f( struct i915_fragment_program *p, GLfloat c0 )
    return 0;
 }
 
-GLuint i915_emit_const2f( struct i915_fragment_program *p, 
-			 GLfloat c0, GLfloat c1 )
+GLuint
+i915_emit_const2f(struct i915_fragment_program * p, GLfloat c0, GLfloat c1)
 {
    GLint reg, idx;
 
-   if (c0 == 0.0) return swizzle(i915_emit_const1f(p, c1), ZERO, X, Z, W);
-   if (c0 == 1.0) return swizzle(i915_emit_const1f(p, c1), ONE,  X, Z, W); 
+   if (c0 == 0.0)
+      return swizzle(i915_emit_const1f(p, c1), ZERO, X, Z, W);
+   if (c0 == 1.0)
+      return swizzle(i915_emit_const1f(p, c1), ONE, X, Z, W);
 
-   if (c1 == 0.0) return swizzle(i915_emit_const1f(p, c0), X, ZERO, Z, W);
-   if (c1 == 1.0) return swizzle(i915_emit_const1f(p, c0), X, ONE,  Z, W);
+   if (c1 == 0.0)
+      return swizzle(i915_emit_const1f(p, c0), X, ZERO, Z, W);
+   if (c1 == 1.0)
+      return swizzle(i915_emit_const1f(p, c0), X, ONE, Z, W);
 
    for (reg = 0; reg < I915_MAX_CONSTANT; reg++) {
       if (p->constant_flags[reg] == 0xf ||
-	  p->constant_flags[reg] == I915_CONSTFLAG_PARAM)
-	 continue;
+          p->constant_flags[reg] == I915_CONSTFLAG_PARAM)
+         continue;
       for (idx = 0; idx < 3; idx++) {
-	 if (!(p->constant_flags[reg] & (3<<idx))) {
-	    p->constant[reg][idx] = c0;
-	    p->constant[reg][idx+1] = c1;
-	    p->constant_flags[reg] |= 3<<idx;
-	    if (reg+1 > p->nr_constants) p->nr_constants = reg+1;
-	    return swizzle(UREG(REG_TYPE_CONST, reg),idx,idx+1,ZERO,ONE);
-	 }
+         if (!(p->constant_flags[reg] & (3 << idx))) {
+            p->constant[reg][idx] = c0;
+            p->constant[reg][idx + 1] = c1;
+            p->constant_flags[reg] |= 3 << idx;
+            if (reg + 1 > p->nr_constants)
+               p->nr_constants = reg + 1;
+            return swizzle(UREG(REG_TYPE_CONST, reg), idx, idx + 1, ZERO,
+                           ONE);
+         }
       }
    }
 
@@ -298,27 +324,28 @@ GLuint i915_emit_const2f( struct i915_fragment_program *p,
 
 
 
-GLuint i915_emit_const4f( struct i915_fragment_program *p, 
-			 GLfloat c0, GLfloat c1, GLfloat c2, GLfloat c3 )
+GLuint
+i915_emit_const4f(struct i915_fragment_program * p,
+                  GLfloat c0, GLfloat c1, GLfloat c2, GLfloat c3)
 {
    GLint reg;
 
    for (reg = 0; reg < I915_MAX_CONSTANT; reg++) {
       if (p->constant_flags[reg] == 0xf &&
-	  p->constant[reg][0] == c0 &&
-	  p->constant[reg][1] == c1 &&
-	  p->constant[reg][2] == c2 &&
-	  p->constant[reg][3] == c3) {
-	 return UREG(REG_TYPE_CONST, reg);
+          p->constant[reg][0] == c0 &&
+          p->constant[reg][1] == c1 &&
+          p->constant[reg][2] == c2 && p->constant[reg][3] == c3) {
+         return UREG(REG_TYPE_CONST, reg);
       }
       else if (p->constant_flags[reg] == 0) {
-	 p->constant[reg][0] = c0;
-	 p->constant[reg][1] = c1;
-	 p->constant[reg][2] = c2;
-	 p->constant[reg][3] = c3;
-	 p->constant_flags[reg] = 0xf;
-	 if (reg+1 > p->nr_constants) p->nr_constants = reg+1;
-	 return UREG(REG_TYPE_CONST, reg);
+         p->constant[reg][0] = c0;
+         p->constant[reg][1] = c1;
+         p->constant[reg][2] = c2;
+         p->constant[reg][3] = c3;
+         p->constant_flags[reg] = 0xf;
+         if (reg + 1 > p->nr_constants)
+            p->nr_constants = reg + 1;
+         return UREG(REG_TYPE_CONST, reg);
       }
    }
 
@@ -328,34 +355,36 @@ GLuint i915_emit_const4f( struct i915_fragment_program *p,
 }
 
 
-GLuint i915_emit_const4fv( struct i915_fragment_program *p, const GLfloat *c )
+GLuint
+i915_emit_const4fv(struct i915_fragment_program * p, const GLfloat * c)
 {
-   return i915_emit_const4f( p, c[0], c[1], c[2], c[3] );
+   return i915_emit_const4f(p, c[0], c[1], c[2], c[3]);
 }
 
 
-GLuint i915_emit_param4fv( struct i915_fragment_program *p, 
-			  const GLfloat *values )
+GLuint
+i915_emit_param4fv(struct i915_fragment_program * p, const GLfloat * values)
 {
    GLint reg, i;
 
    for (i = 0; i < p->nr_params; i++) {
       if (p->param[i].values == values)
-	 return UREG(REG_TYPE_CONST, p->param[i].reg);
+         return UREG(REG_TYPE_CONST, p->param[i].reg);
    }
 
 
    for (reg = 0; reg < I915_MAX_CONSTANT; reg++) {
       if (p->constant_flags[reg] == 0) {
-	 p->constant_flags[reg] = I915_CONSTFLAG_PARAM;
-	 i = p->nr_params++;
+         p->constant_flags[reg] = I915_CONSTFLAG_PARAM;
+         i = p->nr_params++;
 
-	 p->param[i].values = values;
-	 p->param[i].reg = reg;
-	 p->params_uptodate = 0;
+         p->param[i].values = values;
+         p->param[i].reg = reg;
+         p->params_uptodate = 0;
 
-	 if (reg+1 > p->nr_constants) p->nr_constants = reg+1;
-	 return UREG(REG_TYPE_CONST, reg);
+         if (reg + 1 > p->nr_constants)
+            p->nr_constants = reg + 1;
+         return UREG(REG_TYPE_CONST, reg);
       }
    }
 
@@ -366,30 +395,31 @@ GLuint i915_emit_param4fv( struct i915_fragment_program *p,
 
 
 
-
-void i915_program_error( struct i915_fragment_program *p, const char *msg )
+void
+i915_program_error(struct i915_fragment_program *p, const char *msg)
 {
    _mesa_problem(NULL, "i915_program_error: %s", msg);
    p->error = 1;
 }
 
-void i915_init_program( i915ContextPtr i915, struct i915_fragment_program *p )
+
+void
+i915_init_program(struct i915_context *i915, struct i915_fragment_program *p)
 {
    GLcontext *ctx = &i915->intel.ctx;
-   TNLcontext *tnl = TNL_CONTEXT( ctx );
-   
+
    p->translated = 0;
    p->params_uptodate = 0;
    p->on_hardware = 0;
    p->error = 0;
 
-   p->nr_tex_indirect = 1;	/* correct? */
+   p->nr_tex_indirect = 1;      /* correct? */
    p->nr_tex_insn = 0;
    p->nr_alu_insn = 0;
    p->nr_decl_insn = 0;
 
-   p->ctx = ctx;  
-   memset( p->constant_flags, 0, sizeof(p->constant_flags) );
+   p->ctx = ctx;
+   memset(p->constant_flags, 0, sizeof(p->constant_flags));
 
    p->nr_constants = 0;
    p->csr = p->program;
@@ -402,21 +432,17 @@ void i915_init_program( i915ContextPtr i915, struct i915_fragment_program *p )
    p->depth_written = 0;
    p->nr_params = 0;
 
-   p->src_texture = UREG_BAD;
-   p->src_previous = UREG(REG_TYPE_T, T_DIFFUSE);
-   p->last_tex_stage = 0;
-   p->VB = &tnl->vb;
-
    *(p->decl++) = _3DSTATE_PIXEL_SHADER_PROGRAM;
 }
 
 
-void i915_fini_program( struct i915_fragment_program *p )
+void
+i915_fini_program(struct i915_fragment_program *p)
 {
    GLuint program_size = p->csr - p->program;
    GLuint decl_size = p->decl - p->declarations;
-   
-   if (p->nr_tex_indirect > I915_MAX_TEX_INDIRECT) 
+
+   if (p->nr_tex_indirect > I915_MAX_TEX_INDIRECT)
       i915_program_error(p, "Exceeded max nr indirect texture lookups");
 
    if (p->nr_tex_insn > I915_MAX_TEX_INSN)
@@ -446,22 +472,24 @@ void i915_fini_program( struct i915_fragment_program *p )
    p->declarations[0] |= program_size + decl_size - 2;
 }
 
-void i915_upload_program( i915ContextPtr i915, struct i915_fragment_program *p )
+void
+i915_upload_program(struct i915_context *i915,
+                    struct i915_fragment_program *p)
 {
    GLuint program_size = p->csr - p->program;
    GLuint decl_size = p->decl - p->declarations;
 
-   FALLBACK( &i915->intel, I915_FALLBACK_PROGRAM, p->error );
+   FALLBACK(&i915->intel, I915_FALLBACK_PROGRAM, p->error);
 
    /* Could just go straight to the batchbuffer from here:
     */
    if (i915->state.ProgramSize != (program_size + decl_size) ||
-       memcmp(i915->state.Program + decl_size, p->program, 
-	      program_size*sizeof(int)) != 0) {
-      I915_STATECHANGE( i915, I915_UPLOAD_PROGRAM );
-      memcpy(i915->state.Program, p->declarations, decl_size*sizeof(int));
+       memcmp(i915->state.Program + decl_size, p->program,
+              program_size * sizeof(int)) != 0) {
+      I915_STATECHANGE(i915, I915_UPLOAD_PROGRAM);
+      memcpy(i915->state.Program, p->declarations, decl_size * sizeof(int));
       memcpy(i915->state.Program + decl_size, p->program,
-	     program_size*sizeof(int));
+             program_size * sizeof(int));
       i915->state.ProgramSize = decl_size + program_size;
    }
 
@@ -470,30 +498,28 @@ void i915_upload_program( i915ContextPtr i915, struct i915_fragment_program *p )
     */
    if (p->nr_constants) {
       GLuint nr = p->nr_constants;
-      
-      I915_ACTIVESTATE( i915, I915_UPLOAD_CONSTANTS, 1 );
-      I915_STATECHANGE( i915, I915_UPLOAD_CONSTANTS );
+
+      I915_ACTIVESTATE(i915, I915_UPLOAD_CONSTANTS, 1);
+      I915_STATECHANGE(i915, I915_UPLOAD_CONSTANTS);
 
       i915->state.Constant[0] = _3DSTATE_PIXEL_SHADER_CONSTANTS | ((nr) * 4);
-      i915->state.Constant[1] = (1<<(nr-1)) | ((1<<(nr-1))-1);
-      
-      memcpy(&i915->state.Constant[2], p->constant, 4*sizeof(int)*(nr));
+      i915->state.Constant[1] = (1 << (nr - 1)) | ((1 << (nr - 1)) - 1);
+
+      memcpy(&i915->state.Constant[2], p->constant, 4 * sizeof(int) * (nr));
       i915->state.ConstantSize = 2 + (nr) * 4;
 
       if (0) {
-	 GLuint i;
-	 for (i = 0; i < nr; i++) {
-	    fprintf(stderr, "const[%d]: %f %f %f %f\n", i, 
-		    p->constant[i][0],
-		    p->constant[i][1],
-		    p->constant[i][2],
-		    p->constant[i][3]);
-	 }
+         GLuint i;
+         for (i = 0; i < nr; i++) {
+            fprintf(stderr, "const[%d]: %f %f %f %f\n", i,
+                    p->constant[i][0],
+                    p->constant[i][1], p->constant[i][2], p->constant[i][3]);
+         }
       }
    }
    else {
-      I915_ACTIVESTATE( i915, I915_UPLOAD_CONSTANTS, 0 );
-   }  
+      I915_ACTIVESTATE(i915, I915_UPLOAD_CONSTANTS, 0);
+   }
 
    p->on_hardware = 1;
 }
diff --git a/i915/i915_program.h b/i915/i915_program.h
index 8891a17..14a3f08 100644
--- a/i915/i915_program.h
+++ b/i915/i915_program.h
@@ -48,11 +48,11 @@
 #define UREG_CHANNEL_W_NEGATE_SHIFT   11
 #define UREG_CHANNEL_W_SHIFT          8
 #define UREG_CHANNEL_ZERO_NEGATE_MBZ  5
-#define UREG_CHANNEL_ZERO_SHIFT       4      
+#define UREG_CHANNEL_ZERO_SHIFT       4
 #define UREG_CHANNEL_ONE_NEGATE_MBZ   1
-#define UREG_CHANNEL_ONE_SHIFT        0      
+#define UREG_CHANNEL_ONE_SHIFT        0
 
-#define UREG_BAD          0xffffffff /* not a valid ureg */
+#define UREG_BAD          0xffffffff    /* not a valid ureg */
 
 #define X    SRC_X
 #define Y    SRC_Y
@@ -84,78 +84,76 @@
 
 /* One neat thing about the UREG representation:  
  */
-static __inline int swizzle( int reg, int x, int y, int z, int w )
+static INLINE int
+swizzle(int reg, int x, int y, int z, int w)
 {
    return ((reg & ~UREG_XYZW_CHANNEL_MASK) |
-	   CHANNEL_SRC( GET_CHANNEL_SRC( reg, x ), 0 ) |
-	   CHANNEL_SRC( GET_CHANNEL_SRC( reg, y ), 1 ) |
-	   CHANNEL_SRC( GET_CHANNEL_SRC( reg, z ), 2 ) |
-	   CHANNEL_SRC( GET_CHANNEL_SRC( reg, w ), 3 ));
+           CHANNEL_SRC(GET_CHANNEL_SRC(reg, x), 0) |
+           CHANNEL_SRC(GET_CHANNEL_SRC(reg, y), 1) |
+           CHANNEL_SRC(GET_CHANNEL_SRC(reg, z), 2) |
+           CHANNEL_SRC(GET_CHANNEL_SRC(reg, w), 3));
 }
 
 /* Another neat thing about the UREG representation:  
  */
-static __inline int negate( int reg, int x, int y, int z, int w )
+static INLINE int
+negate(int reg, int x, int y, int z, int w)
 {
-   return reg ^ (((x&1)<<UREG_CHANNEL_X_NEGATE_SHIFT)|
-		 ((y&1)<<UREG_CHANNEL_Y_NEGATE_SHIFT)|
-		 ((z&1)<<UREG_CHANNEL_Z_NEGATE_SHIFT)|
-		 ((w&1)<<UREG_CHANNEL_W_NEGATE_SHIFT));
+   return reg ^ (((x & 1) << UREG_CHANNEL_X_NEGATE_SHIFT) |
+                 ((y & 1) << UREG_CHANNEL_Y_NEGATE_SHIFT) |
+                 ((z & 1) << UREG_CHANNEL_Z_NEGATE_SHIFT) |
+                 ((w & 1) << UREG_CHANNEL_W_NEGATE_SHIFT));
 }
 
 
-extern GLuint i915_get_temp( struct i915_fragment_program *p );
-extern GLuint i915_get_utemp( struct i915_fragment_program *p );
-extern void i915_release_utemps( struct i915_fragment_program *p );
+extern GLuint i915_get_temp(struct i915_fragment_program *p);
+extern GLuint i915_get_utemp(struct i915_fragment_program *p);
+extern void i915_release_utemps(struct i915_fragment_program *p);
 
 
-extern GLuint i915_emit_texld( struct i915_fragment_program *p,
-			      GLuint dest,
-			      GLuint destmask,
-			      GLuint sampler,
-			      GLuint coord,
-			      GLuint op );
+extern GLuint i915_emit_texld(struct i915_fragment_program *p,
+                              GLuint live_regs,
+                              GLuint dest,
+                              GLuint destmask,
+                              GLuint sampler, GLuint coord, GLuint op);
 
-extern GLuint i915_emit_arith( struct i915_fragment_program *p,
-			      GLuint op,
-			      GLuint dest,
-			      GLuint mask,
-			      GLuint saturate,
-			      GLuint src0,
-			      GLuint src1,
-			      GLuint src2 );
+extern GLuint i915_emit_arith(struct i915_fragment_program *p,
+                              GLuint op,
+                              GLuint dest,
+                              GLuint mask,
+                              GLuint saturate,
+                              GLuint src0, GLuint src1, GLuint src2);
 
-extern GLuint i915_emit_decl( struct i915_fragment_program *p,
-			     GLuint type, GLuint nr, GLuint d0_flags );
+extern GLuint i915_emit_decl(struct i915_fragment_program *p,
+                             GLuint type, GLuint nr, GLuint d0_flags);
 
 
-extern GLuint i915_emit_const1f( struct i915_fragment_program *p, 
-				GLfloat c0 );
+extern GLuint i915_emit_const1f(struct i915_fragment_program *p, GLfloat c0);
 
-extern GLuint i915_emit_const2f( struct i915_fragment_program *p, 
-				GLfloat c0, GLfloat c1 );
+extern GLuint i915_emit_const2f(struct i915_fragment_program *p,
+                                GLfloat c0, GLfloat c1);
 
-extern GLuint i915_emit_const4fv( struct i915_fragment_program *p,
-				 const GLfloat *c );
+extern GLuint i915_emit_const4fv(struct i915_fragment_program *p,
+                                 const GLfloat * c);
 
-extern GLuint i915_emit_const4f( struct i915_fragment_program *p, 
-				GLfloat c0, GLfloat c1, 
-				GLfloat c2, GLfloat c3 );
+extern GLuint i915_emit_const4f(struct i915_fragment_program *p,
+                                GLfloat c0, GLfloat c1,
+                                GLfloat c2, GLfloat c3);
 
 
-extern GLuint i915_emit_param4fv( struct i915_fragment_program *p, 
-				 const GLfloat *values );
+extern GLuint i915_emit_param4fv(struct i915_fragment_program *p,
+                                 const GLfloat * values);
 
-extern void i915_program_error( struct i915_fragment_program *p,
-                                const char *msg );
+extern void i915_program_error(struct i915_fragment_program *p,
+                               const char *msg);
 
-extern void i915_init_program( i915ContextPtr i915,
-			      struct i915_fragment_program *p );
+extern void i915_init_program(struct i915_context *i915,
+                              struct i915_fragment_program *p);
 
-extern void i915_upload_program( i915ContextPtr i915, 
-				struct i915_fragment_program *p );
+extern void i915_upload_program(struct i915_context *i915,
+                                struct i915_fragment_program *p);
 
-extern void i915_fini_program( struct i915_fragment_program *p );
+extern void i915_fini_program(struct i915_fragment_program *p);
 
 
 
diff --git a/i915/i915_reg.h b/i915/i915_reg.h
index 694cd4c..b5585e7 100644
--- a/i915/i915_reg.h
+++ b/i915/i915_reg.h
@@ -34,8 +34,6 @@
 
 #define I915_SET_FIELD( var, mask, value ) (var &= ~(mask), var |= value)
 
-#define CMD_3D (0x3<<29)
-
 #define PRIM3D_INLINE		(CMD_3D | (0x1f<<24))
 #define PRIM3D_TRILIST		(0x0<<18)
 #define PRIM3D_TRISTRIP 	(0x1<<18)
@@ -112,6 +110,20 @@
 /* 3DSTATE_CHROMA_KEY */
 
 /* 3DSTATE_CLEAR_PARAMETERS, p150 */
+/* 
+ * Sets the color, depth and stencil clear values used by the
+ * CLEAR_RECT and ZONE_INIT primitive types, respectively.  These
+ * primitives set override most 3d state and only take a minimal x/y
+ * vertex.  The color/z/stencil information is supplied here and
+ * therefore cannot vary per vertex.
+ */
+#define _3DSTATE_CLEAR_PARAMETERS	(CMD_3D | (0x1d<<24) | (0x9c<<16) | 5)
+/* Dword 1 */
+#define CLEARPARAM_CLEAR_RECT		(1 << 16)
+#define CLEARPARAM_ZONE_INIT		(0 << 16)
+#define CLEARPARAM_WRITE_COLOR		(1 << 2)
+#define CLEARPARAM_WRITE_DEPTH		(1 << 1)
+#define CLEARPARAM_WRITE_STENCIL	(1 << 0)
 
 /* 3DSTATE_CONSTANT_BLEND_COLOR, p153 */
 #define _3DSTATE_CONST_BLEND_COLOR_CMD	(CMD_3D | (0x1d<<24) | (0x88<<16))
@@ -424,9 +436,22 @@
 
 #define S7_DEPTH_OFFSET_CONST_MASK     ~0
 
+
+/* Helper macros for blend factors
+ */
+#define DST_BLND_FACT(f) ((f)<<S6_CBUF_DST_BLEND_FACT_SHIFT)
+#define SRC_BLND_FACT(f) ((f)<<S6_CBUF_SRC_BLEND_FACT_SHIFT)
+#define DST_ABLND_FACT(f) ((f)<<IAB_DST_FACTOR_SHIFT)
+#define SRC_ABLND_FACT(f) ((f)<<IAB_SRC_FACTOR_SHIFT)
+
+
+
+
 /* 3DSTATE_MAP_DEINTERLACER_PARAMETERS */
-/* 3DSTATE_MAP_PALETTE_LOAD_32, p206 */
 
+/* 3DSTATE_MAP_PALETTE_LOAD_32, p206 */
+#define _3DSTATE_MAP_PALETTE_LOAD_32    (CMD_3D|(0x1d<<24)|(0x8f<<16))
+/* subsequent dwords up to length (max 16) are ARGB8888 color values */
 
 /* _3DSTATE_MODES_4, p218 */
 #define _3DSTATE_MODES_4_CMD		(CMD_3D|(0x0d<<24))
@@ -435,7 +460,7 @@
 #define LOGICOP_MASK			(0xf<<18)
 #define MODE4_ENABLE_STENCIL_TEST_MASK	((1<<17)|(0xff00))
 #define ENABLE_STENCIL_TEST_MASK	(1<<17)
-#define STENCIL_TEST_MASK(x)		((x)<<8)
+#define STENCIL_TEST_MASK(x)		(((x)&0xff)<<8)
 #define MODE4_ENABLE_STENCIL_WRITE_MASK	((1<<16)|(0x00ff))
 #define ENABLE_STENCIL_WRITE_MASK	(1<<16)
 #define STENCIL_WRITE_MASK(x)		((x)&0xff)
@@ -458,7 +483,7 @@
 
 
 #define I915_MAX_TEX_INDIRECT 4
-#define I915_MAX_TEX_INSN     32     
+#define I915_MAX_TEX_INSN     32
 #define I915_MAX_ALU_INSN     64
 #define I915_MAX_DECL_INSN    27
 #define I915_MAX_TEMPORARY    16
@@ -470,33 +495,33 @@
  */
 #define _3DSTATE_PIXEL_SHADER_PROGRAM    (CMD_3D|(0x1d<<24)|(0x5<<16))
 
-#define REG_TYPE_R                 0 /* temporary regs, no need to
-				      * dcl, must be written before
-				      * read -- Preserved between
-				      * phases. 
-				      */
-#define REG_TYPE_T                 1 /* Interpolated values, must be
-				      * dcl'ed before use.
-				      *
-				      * 0..7: texture coord,
-				      * 8: diffuse spec,
-				      * 9: specular color,
-				      * 10: fog parameter in w.
-				      */
-#define REG_TYPE_CONST             2 /* Restriction: only one const
-				      * can be referenced per
-				      * instruction, though it may be
-				      * selected for multiple inputs.
-				      * Constants not initialized
-				      * default to zero.
-				      */
-#define REG_TYPE_S                 3 /* sampler */
-#define REG_TYPE_OC                4 /* output color (rgba) */
-#define REG_TYPE_OD                5 /* output depth (w), xyz are
-				      * temporaries.  If not written,
-				      * interpolated depth is used?
-				      */
-#define REG_TYPE_U                 6 /* unpreserved temporaries */
+#define REG_TYPE_R                 0    /* temporary regs, no need to
+                                         * dcl, must be written before
+                                         * read -- Preserved between
+                                         * phases. 
+                                         */
+#define REG_TYPE_T                 1    /* Interpolated values, must be
+                                         * dcl'ed before use.
+                                         *
+                                         * 0..7: texture coord,
+                                         * 8: diffuse spec,
+                                         * 9: specular color,
+                                         * 10: fog parameter in w.
+                                         */
+#define REG_TYPE_CONST             2    /* Restriction: only one const
+                                         * can be referenced per
+                                         * instruction, though it may be
+                                         * selected for multiple inputs.
+                                         * Constants not initialized
+                                         * default to zero.
+                                         */
+#define REG_TYPE_S                 3    /* sampler */
+#define REG_TYPE_OC                4    /* output color (rgba) */
+#define REG_TYPE_OD                5    /* output depth (w), xyz are
+                                         * temporaries.  If not written,
+                                         * interpolated depth is used?
+                                         */
+#define REG_TYPE_U                 6    /* unpreserved temporaries */
 #define REG_TYPE_MASK              0x7
 #define REG_NR_MASK                0xf
 
@@ -513,34 +538,34 @@
 #define T_TEX7     7
 #define T_DIFFUSE  8
 #define T_SPECULAR 9
-#define T_FOG_W    10		/* interpolated fog is in W coord */
+#define T_FOG_W    10           /* interpolated fog is in W coord */
 
 /* Arithmetic instructions */
 
 /* .replicate_swizzle == selection and replication of a particular
  * scalar channel, ie., .xxxx, .yyyy, .zzzz or .wwww 
  */
-#define A0_NOP    (0x0<<24)		/* no operation */
-#define A0_ADD    (0x1<<24)		/* dst = src0 + src1 */
-#define A0_MOV    (0x2<<24)		/* dst = src0 */
-#define A0_MUL    (0x3<<24)		/* dst = src0 * src1 */
-#define A0_MAD    (0x4<<24)		/* dst = src0 * src1 + src2 */
-#define A0_DP2ADD (0x5<<24)		/* dst.xyzw = src0.xy dot src1.xy + src2.replicate_swizzle */
-#define A0_DP3    (0x6<<24)		/* dst.xyzw = src0.xyz dot src1.xyz */
-#define A0_DP4    (0x7<<24)		/* dst.xyzw = src0.xyzw dot src1.xyzw */
-#define A0_FRC    (0x8<<24)		/* dst = src0 - floor(src0) */
-#define A0_RCP    (0x9<<24)		/* dst.xyzw = 1/(src0.replicate_swizzle) */
-#define A0_RSQ    (0xa<<24)		/* dst.xyzw = 1/(sqrt(abs(src0.replicate_swizzle))) */
-#define A0_EXP    (0xb<<24)		/* dst.xyzw = exp2(src0.replicate_swizzle) */
-#define A0_LOG    (0xc<<24)		/* dst.xyzw = log2(abs(src0.replicate_swizzle)) */
-#define A0_CMP    (0xd<<24)		/* dst = (src0 >= 0.0) ? src1 : src2 */
-#define A0_MIN    (0xe<<24)		/* dst = (src0 < src1) ? src0 : src1 */
-#define A0_MAX    (0xf<<24)		/* dst = (src0 >= src1) ? src0 : src1 */
-#define A0_FLR    (0x10<<24)		/* dst = floor(src0) */
-#define A0_MOD    (0x11<<24)		/* dst = src0 fmod 1.0 */
-#define A0_TRC    (0x12<<24)		/* dst = int(src0) */
-#define A0_SGE    (0x13<<24)		/* dst = src0 >= src1 ? 1.0 : 0.0 */
-#define A0_SLT    (0x14<<24)		/* dst = src0 < src1 ? 1.0 : 0.0 */
+#define A0_NOP    (0x0<<24)     /* no operation */
+#define A0_ADD    (0x1<<24)     /* dst = src0 + src1 */
+#define A0_MOV    (0x2<<24)     /* dst = src0 */
+#define A0_MUL    (0x3<<24)     /* dst = src0 * src1 */
+#define A0_MAD    (0x4<<24)     /* dst = src0 * src1 + src2 */
+#define A0_DP2ADD (0x5<<24)     /* dst.xyzw = src0.xy dot src1.xy + src2.replicate_swizzle */
+#define A0_DP3    (0x6<<24)     /* dst.xyzw = src0.xyz dot src1.xyz */
+#define A0_DP4    (0x7<<24)     /* dst.xyzw = src0.xyzw dot src1.xyzw */
+#define A0_FRC    (0x8<<24)     /* dst = src0 - floor(src0) */
+#define A0_RCP    (0x9<<24)     /* dst.xyzw = 1/(src0.replicate_swizzle) */
+#define A0_RSQ    (0xa<<24)     /* dst.xyzw = 1/(sqrt(abs(src0.replicate_swizzle))) */
+#define A0_EXP    (0xb<<24)     /* dst.xyzw = exp2(src0.replicate_swizzle) */
+#define A0_LOG    (0xc<<24)     /* dst.xyzw = log2(abs(src0.replicate_swizzle)) */
+#define A0_CMP    (0xd<<24)     /* dst = (src0 >= 0.0) ? src1 : src2 */
+#define A0_MIN    (0xe<<24)     /* dst = (src0 < src1) ? src0 : src1 */
+#define A0_MAX    (0xf<<24)     /* dst = (src0 >= src1) ? src0 : src1 */
+#define A0_FLR    (0x10<<24)    /* dst = floor(src0) */
+#define A0_MOD    (0x11<<24)    /* dst = src0 fmod 1.0 */
+#define A0_TRC    (0x12<<24)    /* dst = int(src0) */
+#define A0_SGE    (0x13<<24)    /* dst = src0 >= src1 ? 1.0 : 0.0 */
+#define A0_SLT    (0x14<<24)    /* dst = src0 < src1 ? 1.0 : 0.0 */
 #define A0_DEST_SATURATE                 (1<<22)
 #define A0_DEST_TYPE_SHIFT                19
 /* Allow: R, OC, OD, U */
@@ -599,23 +624,23 @@
 
 
 /* Texture instructions */
-#define T0_TEXLD     (0x15<<24)	/* Sample texture using predeclared
-				 * sampler and address, and output
-				 * filtered texel data to destination
-				 * register */
-#define T0_TEXLDP    (0x16<<24)	/* Same as texld but performs a
-				 * perspective divide of the texture
-				 * coordinate .xyz values by .w before
-				 * sampling. */
-#define T0_TEXLDB    (0x17<<24)	/* Same as texld but biases the
-				 * computed LOD by w.  Only S4.6 two's
-				 * comp is used.  This implies that a
-				 * float to fixed conversion is
-				 * done. */
-#define T0_TEXKILL   (0x18<<24)	/* Does not perform a sampling
-				 * operation.  Simply kills the pixel
-				 * if any channel of the address
-				 * register is < 0.0. */
+#define T0_TEXLD     (0x15<<24) /* Sample texture using predeclared
+                                 * sampler and address, and output
+                                 * filtered texel data to destination
+                                 * register */
+#define T0_TEXLDP    (0x16<<24) /* Same as texld but performs a
+                                 * perspective divide of the texture
+                                 * coordinate .xyz values by .w before
+                                 * sampling. */
+#define T0_TEXLDB    (0x17<<24) /* Same as texld but biases the
+                                 * computed LOD by w.  Only S4.6 two's
+                                 * comp is used.  This implies that a
+                                 * float to fixed conversion is
+                                 * done. */
+#define T0_TEXKILL   (0x18<<24) /* Does not perform a sampling
+                                 * operation.  Simply kills the pixel
+                                 * if any channel of the address
+                                 * register is < 0.0. */
 #define T0_DEST_TYPE_SHIFT                19
 /* Allow: R, OC, OD, U */
 /* Note: U (unpreserved) regs do not retain their values between
@@ -627,18 +652,18 @@
  */
 #define T0_DEST_NR_SHIFT                 14
 /* Allow R: 0..15, OC,OD: 0..0, U: 0..2 */
-#define T0_SAMPLER_NR_SHIFT              0 /* This field ignored for TEXKILL */
+#define T0_SAMPLER_NR_SHIFT              0      /* This field ignored for TEXKILL */
 #define T0_SAMPLER_NR_MASK               (0xf<<0)
 
-#define T1_ADDRESS_REG_TYPE_SHIFT        24 /* Reg to use as texture coord */
+#define T1_ADDRESS_REG_TYPE_SHIFT        24     /* Reg to use as texture coord */
 /* Allow R, T, OC, OD -- R, OC, OD are 'dependent' reads, new program phase */
 #define T1_ADDRESS_REG_NR_SHIFT          17
 #define T2_MBZ                           0
 
 /* Declaration instructions */
-#define D0_DCL       (0x19<<24)	/* Declare a t (interpolated attrib)
-				 * register or an s (sampler)
-				 * register. */
+#define D0_DCL       (0x19<<24) /* Declare a t (interpolated attrib)
+                                 * register or an s (sampler)
+                                 * register. */
 #define D0_SAMPLE_TYPE_SHIFT              22
 #define D0_SAMPLE_TYPE_2D                 (0x0<<22)
 #define D0_SAMPLE_TYPE_CUBE               (0x1<<22)
@@ -695,12 +720,12 @@
 #define    MAPSURF_4BIT_INDEXED		   (7<<7)
 #define MS3_MT_FORMAT_MASK         (0x7 << 3)
 #define MS3_MT_FORMAT_SHIFT        3
-#define    MT_4BIT_IDX_ARGB8888	           (7<<3) /* SURFACE_4BIT_INDEXED */
-#define    MT_8BIT_I8		           (0<<3) /* SURFACE_8BIT */
+#define    MT_4BIT_IDX_ARGB8888	           (7<<3)       /* SURFACE_4BIT_INDEXED */
+#define    MT_8BIT_I8		           (0<<3)       /* SURFACE_8BIT */
 #define    MT_8BIT_L8		           (1<<3)
 #define    MT_8BIT_A8		           (4<<3)
 #define    MT_8BIT_MONO8	           (5<<3)
-#define    MT_16BIT_RGB565 		   (0<<3) /* SURFACE_16BIT */
+#define    MT_16BIT_RGB565 		   (0<<3)       /* SURFACE_16BIT */
 #define    MT_16BIT_ARGB1555		   (1<<3)
 #define    MT_16BIT_ARGB4444		   (2<<3)
 #define    MT_16BIT_AY88		   (3<<3)
@@ -709,7 +734,7 @@
 #define    MT_16BIT_I16	                   (7<<3)
 #define    MT_16BIT_L16	                   (8<<3)
 #define    MT_16BIT_A16	                   (9<<3)
-#define    MT_32BIT_ARGB8888		   (0<<3) /* SURFACE_32BIT */
+#define    MT_32BIT_ARGB8888		   (0<<3)       /* SURFACE_32BIT */
 #define    MT_32BIT_ABGR8888		   (1<<3)
 #define    MT_32BIT_XRGB8888		   (2<<3)
 #define    MT_32BIT_XBGR8888		   (3<<3)
@@ -725,11 +750,11 @@
 #define    MT_32BIT_xI824	           (0xD<<3)
 #define    MT_32BIT_xA824	           (0xE<<3)
 #define    MT_32BIT_xL824	           (0xF<<3)
-#define    MT_422_YCRCB_SWAPY	           (0<<3) /* SURFACE_422 */
+#define    MT_422_YCRCB_SWAPY	           (0<<3)       /* SURFACE_422 */
 #define    MT_422_YCRCB_NORMAL	           (1<<3)
 #define    MT_422_YCRCB_SWAPUV	           (2<<3)
 #define    MT_422_YCRCB_SWAPUVY	           (3<<3)
-#define    MT_COMPRESS_DXT1		   (0<<3) /* SURFACE_COMPRESSED */
+#define    MT_COMPRESS_DXT1		   (0<<3)       /* SURFACE_COMPRESSED */
 #define    MT_COMPRESS_DXT2_3	           (1<<3)
 #define    MT_COMPRESS_DXT4_5	           (2<<3)
 #define    MT_COMPRESS_FXT1		   (3<<3)
@@ -751,7 +776,7 @@
 #define MS4_MIP_LAYOUT_LEGACY           (0<<8)
 #define MS4_MIP_LAYOUT_BELOW_LPT        (0<<8)
 #define MS4_MIP_LAYOUT_RIGHT_LPT        (1<<8)
-#define MS4_VOLUME_DEPTH_SHIFT          0    
+#define MS4_VOLUME_DEPTH_SHIFT          0
 #define MS4_VOLUME_DEPTH_MASK           (0xff<<0)
 
 /* p244 */
@@ -779,7 +804,7 @@
 #define   FILTER_4X4_1    	3
 #define   FILTER_4X4_2    	4
 #define   FILTER_4X4_FLAT 	5
-#define   FILTER_6X5_MONO   	6 /* XXX - check */
+#define   FILTER_6X5_MONO   	6       /* XXX - check */
 #define SS2_MIN_FILTER_SHIFT          14
 #define SS2_MIN_FILTER_MASK           (0x7<<14)
 #define SS2_LOD_BIAS_SHIFT            5
@@ -826,10 +851,14 @@
 #define ST1_ENABLE               (1<<16)
 #define ST1_MASK                 (0xffff)
 
+#define _3DSTATE_DEFAULT_Z          ((0x3<<29)|(0x1d<<24)|(0x98<<16))
+#define _3DSTATE_DEFAULT_DIFFUSE    ((0x3<<29)|(0x1d<<24)|(0x99<<16))
+#define _3DSTATE_DEFAULT_SPECULAR   ((0x3<<29)|(0x1d<<24)|(0x9a<<16))
+
 
-#define MI_FLUSH           ((0<<29)|(4<<23))
-#define FLUSH_MAP_CACHE    (1<<0)
-#define FLUSH_RENDER_CACHE (1<<1)
+#define MI_FLUSH                   ((0<<29)|(4<<23))
+#define FLUSH_MAP_CACHE            (1<<0)
+#define INHIBIT_FLUSH_RENDER_CACHE (1<<2)
 
 
 #endif
diff --git a/i915/i915_state.c b/i915/i915_state.c
index 0d5ca32..c814f8d 100644
--- a/i915/i915_state.c
+++ b/i915/i915_state.c
@@ -36,97 +36,101 @@
 
 #include "texmem.h"
 
+#include "drivers/common/driverfuncs.h"
+
+#include "intel_fbo.h"
 #include "intel_screen.h"
 #include "intel_batchbuffer.h"
 
 #include "i915_context.h"
 #include "i915_reg.h"
 
-
+#define FILE_DEBUG_FLAG DEBUG_STATE
 
 static void
-i915StencilFuncSeparate(GLcontext *ctx, GLenum face, GLenum func, GLint ref,
+i915StencilFuncSeparate(GLcontext * ctx, GLenum face, GLenum func, GLint ref,
                         GLuint mask)
 {
-   i915ContextPtr i915 = I915_CONTEXT(ctx);
-   int test = intel_translate_compare_func( func );
+   struct i915_context *i915 = I915_CONTEXT(ctx);
+   int test = intel_translate_compare_func(func);
 
    mask = mask & 0xff;
 
-   if (INTEL_DEBUG&DEBUG_DRI)
-      fprintf(stderr, "%s : func: %s, ref : 0x%x, mask: 0x%x\n", __FUNCTION__,
-	      _mesa_lookup_enum_by_nr(func), ref, mask);
+   DBG("%s : func: %s, ref : 0x%x, mask: 0x%x\n", __FUNCTION__,
+       _mesa_lookup_enum_by_nr(func), ref, mask);
 
 
    I915_STATECHANGE(i915, I915_UPLOAD_CTX);
    i915->state.Ctx[I915_CTXREG_STATE4] &= ~MODE4_ENABLE_STENCIL_TEST_MASK;
    i915->state.Ctx[I915_CTXREG_STATE4] |= (ENABLE_STENCIL_TEST_MASK |
-					 STENCIL_TEST_MASK(mask));
+                                           STENCIL_TEST_MASK(mask));
 
    i915->state.Ctx[I915_CTXREG_LIS5] &= ~(S5_STENCIL_REF_MASK |
-					S5_STENCIL_TEST_FUNC_MASK);
-					
-   i915->state.Ctx[I915_CTXREG_LIS5] |= ((ref << S5_STENCIL_REF_SHIFT) |  
- 				       (test << S5_STENCIL_TEST_FUNC_SHIFT)); 
+                                          S5_STENCIL_TEST_FUNC_MASK);
+
+   i915->state.Ctx[I915_CTXREG_LIS5] |= ((ref << S5_STENCIL_REF_SHIFT) |
+                                         (test <<
+                                          S5_STENCIL_TEST_FUNC_SHIFT));
 }
 
 static void
-i915StencilMaskSeparate(GLcontext *ctx, GLenum face, GLuint mask)
+i915StencilMaskSeparate(GLcontext * ctx, GLenum face, GLuint mask)
 {
-   i915ContextPtr i915 = I915_CONTEXT(ctx);
-
-   if (INTEL_DEBUG&DEBUG_DRI)
-      fprintf(stderr, "%s : mask 0x%x\n", __FUNCTION__, mask);
+   struct i915_context *i915 = I915_CONTEXT(ctx);
 
+   DBG("%s : mask 0x%x\n", __FUNCTION__, mask);
+   
    mask = mask & 0xff;
 
    I915_STATECHANGE(i915, I915_UPLOAD_CTX);
    i915->state.Ctx[I915_CTXREG_STATE4] &= ~MODE4_ENABLE_STENCIL_WRITE_MASK;
    i915->state.Ctx[I915_CTXREG_STATE4] |= (ENABLE_STENCIL_WRITE_MASK |
-					STENCIL_WRITE_MASK(mask));
+                                           STENCIL_WRITE_MASK(mask));
 }
 
 
 static void
-i915StencilOpSeparate(GLcontext *ctx, GLenum face, GLenum fail, GLenum zfail,
+i915StencilOpSeparate(GLcontext * ctx, GLenum face, GLenum fail, GLenum zfail,
                       GLenum zpass)
 {
-   i915ContextPtr i915 = I915_CONTEXT(ctx);
-   int fop = intel_translate_stencil_op(fail); 
-   int dfop = intel_translate_stencil_op(zfail); 
+   struct i915_context *i915 = I915_CONTEXT(ctx);
+   int fop = intel_translate_stencil_op(fail);
+   int dfop = intel_translate_stencil_op(zfail);
    int dpop = intel_translate_stencil_op(zpass);
 
 
-   if (INTEL_DEBUG&DEBUG_DRI)
-      fprintf(stderr, "%s: fail : %s, zfail: %s, zpass : %s\n", __FUNCTION__,
-	      _mesa_lookup_enum_by_nr(fail),
-	      _mesa_lookup_enum_by_nr(zfail),
-	      _mesa_lookup_enum_by_nr(zpass));
+   DBG("%s: fail : %s, zfail: %s, zpass : %s\n", __FUNCTION__,
+       _mesa_lookup_enum_by_nr(fail),
+       _mesa_lookup_enum_by_nr(zfail), _mesa_lookup_enum_by_nr(zpass));
 
    I915_STATECHANGE(i915, I915_UPLOAD_CTX);
 
    i915->state.Ctx[I915_CTXREG_LIS5] &= ~(S5_STENCIL_FAIL_MASK |
-					S5_STENCIL_PASS_Z_FAIL_MASK |
-					S5_STENCIL_PASS_Z_PASS_MASK);
+                                          S5_STENCIL_PASS_Z_FAIL_MASK |
+                                          S5_STENCIL_PASS_Z_PASS_MASK);
 
    i915->state.Ctx[I915_CTXREG_LIS5] |= ((fop << S5_STENCIL_FAIL_SHIFT) |
-				       (dfop << S5_STENCIL_PASS_Z_FAIL_SHIFT) |
-				       (dpop << S5_STENCIL_PASS_Z_PASS_SHIFT));
+                                         (dfop <<
+                                          S5_STENCIL_PASS_Z_FAIL_SHIFT) |
+                                         (dpop <<
+                                          S5_STENCIL_PASS_Z_PASS_SHIFT));
 }
 
-static void i915AlphaFunc(GLcontext *ctx, GLenum func, GLfloat ref)
+static void
+i915AlphaFunc(GLcontext * ctx, GLenum func, GLfloat ref)
 {
-   i915ContextPtr i915 = I915_CONTEXT(ctx);
-   int test = intel_translate_compare_func( func );
+   struct i915_context *i915 = I915_CONTEXT(ctx);
+   int test = intel_translate_compare_func(func);
    GLubyte refByte;
 
    UNCLAMPED_FLOAT_TO_UBYTE(refByte, ref);
 
    I915_STATECHANGE(i915, I915_UPLOAD_CTX);
    i915->state.Ctx[I915_CTXREG_LIS6] &= ~(S6_ALPHA_TEST_FUNC_MASK |
-					S6_ALPHA_REF_MASK);
+                                          S6_ALPHA_REF_MASK);
    i915->state.Ctx[I915_CTXREG_LIS6] |= ((test << S6_ALPHA_TEST_FUNC_SHIFT) |
-				       (((GLuint)refByte) << S6_ALPHA_REF_SHIFT));
+                                         (((GLuint) refByte) <<
+                                          S6_ALPHA_REF_SHIFT));
 }
 
 /* This function makes sure that the proper enables are
@@ -135,41 +139,45 @@ static void i915AlphaFunc(GLcontext *ctx, GLenum func, GLfloat ref)
  * could change the LogicOp or Independant Alpha Blend without subsequent
  * calls to glEnable.
  */
-static void i915EvalLogicOpBlendState(GLcontext *ctx)
+static void
+i915EvalLogicOpBlendState(GLcontext * ctx)
 {
-   i915ContextPtr i915 = I915_CONTEXT(ctx);
+   struct i915_context *i915 = I915_CONTEXT(ctx);
 
    I915_STATECHANGE(i915, I915_UPLOAD_CTX);
 
    if (RGBA_LOGICOP_ENABLED(ctx)) {
       i915->state.Ctx[I915_CTXREG_LIS5] |= S5_LOGICOP_ENABLE;
       i915->state.Ctx[I915_CTXREG_LIS6] &= ~S6_CBUF_BLEND_ENABLE;
-   } else {
+   }
+   else {
       i915->state.Ctx[I915_CTXREG_LIS5] &= ~S5_LOGICOP_ENABLE;
 
       if (ctx->Color.BlendEnabled) {
-	 i915->state.Ctx[I915_CTXREG_LIS6] |= S6_CBUF_BLEND_ENABLE;
-      } else {
-	 i915->state.Ctx[I915_CTXREG_LIS6] &= ~S6_CBUF_BLEND_ENABLE;
+         i915->state.Ctx[I915_CTXREG_LIS6] |= S6_CBUF_BLEND_ENABLE;
+      }
+      else {
+         i915->state.Ctx[I915_CTXREG_LIS6] &= ~S6_CBUF_BLEND_ENABLE;
       }
    }
 }
 
-static void i915BlendColor(GLcontext *ctx, const GLfloat color[4])
+static void
+i915BlendColor(GLcontext * ctx, const GLfloat color[4])
 {
-   i915ContextPtr i915 = I915_CONTEXT(ctx);
+   struct i915_context *i915 = I915_CONTEXT(ctx);
    GLubyte r, g, b, a;
 
-   if (INTEL_DEBUG&DEBUG_DRI)
-      fprintf(stderr, "%s\n", __FUNCTION__);
-
+   DBG("%s\n", __FUNCTION__);
+   
    UNCLAMPED_FLOAT_TO_UBYTE(r, color[RCOMP]);
    UNCLAMPED_FLOAT_TO_UBYTE(g, color[GCOMP]);
    UNCLAMPED_FLOAT_TO_UBYTE(b, color[BCOMP]);
    UNCLAMPED_FLOAT_TO_UBYTE(a, color[ACOMP]);
 
    I915_STATECHANGE(i915, I915_UPLOAD_CTX);
-   i915->state.Ctx[I915_CTXREG_BLENDCOLOR1] = (a<<24) | (r<<16) | (g<<8) | b;
+   i915->state.Ctx[I915_CTXREG_BLENDCOLOR1] =
+      (a << 24) | (r << 16) | (g << 8) | b;
 }
 
 
@@ -180,31 +188,37 @@ static void i915BlendColor(GLcontext *ctx, const GLfloat color[4])
 
 
 
-static GLuint translate_blend_equation( GLenum mode )
+static GLuint
+translate_blend_equation(GLenum mode)
 {
    switch (mode) {
-   case GL_FUNC_ADD: return BLENDFUNC_ADD; 
-   case GL_MIN: return BLENDFUNC_MIN; 
-   case GL_MAX: return BLENDFUNC_MAX; 
-   case GL_FUNC_SUBTRACT: return BLENDFUNC_SUBTRACT; 
-   case GL_FUNC_REVERSE_SUBTRACT: return BLENDFUNC_REVERSE_SUBTRACT; 
-   default: return 0;
+   case GL_FUNC_ADD:
+      return BLENDFUNC_ADD;
+   case GL_MIN:
+      return BLENDFUNC_MIN;
+   case GL_MAX:
+      return BLENDFUNC_MAX;
+   case GL_FUNC_SUBTRACT:
+      return BLENDFUNC_SUBTRACT;
+   case GL_FUNC_REVERSE_SUBTRACT:
+      return BLENDFUNC_REVERSE_SUBTRACT;
+   default:
+      return 0;
    }
 }
 
-static void i915UpdateBlendState( GLcontext *ctx )
+static void
+i915UpdateBlendState(GLcontext * ctx)
 {
-   i915ContextPtr i915 = I915_CONTEXT(ctx);
-   GLuint iab = (i915->state.Ctx[I915_CTXREG_IAB] & 
-		 ~(IAB_SRC_FACTOR_MASK |
-		   IAB_DST_FACTOR_MASK |
-		   (BLENDFUNC_MASK << IAB_FUNC_SHIFT) |
-		   IAB_ENABLE));
-
-   GLuint lis6 = (i915->state.Ctx[I915_CTXREG_LIS6] & 
-		  ~(S6_CBUF_SRC_BLEND_FACT_MASK |
-		    S6_CBUF_DST_BLEND_FACT_MASK |
-		    S6_CBUF_BLEND_FUNC_MASK));
+   struct i915_context *i915 = I915_CONTEXT(ctx);
+   GLuint iab = (i915->state.Ctx[I915_CTXREG_IAB] &
+                 ~(IAB_SRC_FACTOR_MASK |
+                   IAB_DST_FACTOR_MASK |
+                   (BLENDFUNC_MASK << IAB_FUNC_SHIFT) | IAB_ENABLE));
+
+   GLuint lis6 = (i915->state.Ctx[I915_CTXREG_LIS6] &
+                  ~(S6_CBUF_SRC_BLEND_FACT_MASK |
+                    S6_CBUF_DST_BLEND_FACT_MASK | S6_CBUF_BLEND_FUNC_MASK));
 
    GLuint eqRGB = ctx->Color.BlendEquationRGB;
    GLuint eqA = ctx->Color.BlendEquationA;
@@ -221,15 +235,15 @@ static void i915UpdateBlendState( GLcontext *ctx )
       srcA = dstA = GL_ONE;
    }
 
-   lis6 |= SRC_BLND_FACT(intel_translate_blend_factor(srcRGB)); 
-   lis6 |= DST_BLND_FACT(intel_translate_blend_factor(dstRGB)); 
-   lis6 |= translate_blend_equation( eqRGB ) << S6_CBUF_BLEND_FUNC_SHIFT;
+   lis6 |= SRC_BLND_FACT(intel_translate_blend_factor(srcRGB));
+   lis6 |= DST_BLND_FACT(intel_translate_blend_factor(dstRGB));
+   lis6 |= translate_blend_equation(eqRGB) << S6_CBUF_BLEND_FUNC_SHIFT;
 
-   iab |= SRC_ABLND_FACT(intel_translate_blend_factor(srcA)); 
-   iab |= DST_ABLND_FACT(intel_translate_blend_factor(dstA)); 
-   iab |= translate_blend_equation( eqA ) << IAB_FUNC_SHIFT;
+   iab |= SRC_ABLND_FACT(intel_translate_blend_factor(srcA));
+   iab |= DST_ABLND_FACT(intel_translate_blend_factor(dstA));
+   iab |= translate_blend_equation(eqA) << IAB_FUNC_SHIFT;
 
-   if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) 
+   if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB)
       iab |= IAB_ENABLE;
 
    if (iab != i915->state.Ctx[I915_CTXREG_IAB] ||
@@ -244,41 +258,41 @@ static void i915UpdateBlendState( GLcontext *ctx )
 }
 
 
-static void i915BlendFuncSeparate(GLcontext *ctx, GLenum srcRGB, 
-				 GLenum dstRGB, GLenum srcA,
-				 GLenum dstA )
-{  
-   i915UpdateBlendState( ctx );
+static void
+i915BlendFuncSeparate(GLcontext * ctx, GLenum srcRGB,
+                      GLenum dstRGB, GLenum srcA, GLenum dstA)
+{
+   i915UpdateBlendState(ctx);
 }
 
 
-static void i915BlendEquationSeparate(GLcontext *ctx, GLenum eqRGB,
-				     GLenum eqA) 
+static void
+i915BlendEquationSeparate(GLcontext * ctx, GLenum eqRGB, GLenum eqA)
 {
-   i915UpdateBlendState( ctx );
+   i915UpdateBlendState(ctx);
 }
 
 
-static void i915DepthFunc(GLcontext *ctx, GLenum func)
+static void
+i915DepthFunc(GLcontext * ctx, GLenum func)
 {
-   i915ContextPtr i915 = I915_CONTEXT(ctx);
-   int test = intel_translate_compare_func( func );
-
-   if (INTEL_DEBUG&DEBUG_DRI)
-      fprintf(stderr, "%s\n", __FUNCTION__);
+   struct i915_context *i915 = I915_CONTEXT(ctx);
+   int test = intel_translate_compare_func(func);
 
+   DBG("%s\n", __FUNCTION__);
+   
    I915_STATECHANGE(i915, I915_UPLOAD_CTX);
    i915->state.Ctx[I915_CTXREG_LIS6] &= ~S6_DEPTH_TEST_FUNC_MASK;
    i915->state.Ctx[I915_CTXREG_LIS6] |= test << S6_DEPTH_TEST_FUNC_SHIFT;
 }
 
-static void i915DepthMask(GLcontext *ctx, GLboolean flag)
+static void
+i915DepthMask(GLcontext * ctx, GLboolean flag)
 {
-   i915ContextPtr i915 = I915_CONTEXT(ctx);
-
-   if (INTEL_DEBUG&DEBUG_DRI)
-      fprintf(stderr, "%s flag (%d)\n", __FUNCTION__, flag);
+   struct i915_context *i915 = I915_CONTEXT(ctx);
 
+   DBG("%s flag (%d)\n", __FUNCTION__, flag);
+   
    I915_STATECHANGE(i915, I915_UPLOAD_CTX);
 
    if (flag && ctx->Depth.Test)
@@ -293,14 +307,15 @@ static void i915DepthMask(GLcontext *ctx, GLboolean flag)
  * The i915 supports a 4x4 stipple natively, GL wants 32x32.
  * Fortunately stipple is usually a repeating pattern.
  */
-static void i915PolygonStipple( GLcontext *ctx, const GLubyte *mask )
+static void
+i915PolygonStipple(GLcontext * ctx, const GLubyte * mask)
 {
-   i915ContextPtr i915 = I915_CONTEXT(ctx);
-   const GLubyte *m = mask;
+   struct i915_context *i915 = I915_CONTEXT(ctx);
+   const GLubyte *m;
    GLubyte p[4];
-   int i,j,k;
+   int i, j, k;
    int active = (ctx->Polygon.StippleFlag &&
-		 i915->intel.reduced_primitive == GL_TRIANGLES);
+                 i915->intel.reduced_primitive == GL_TRIANGLES);
    GLuint newMask;
 
    if (active) {
@@ -308,23 +323,32 @@ static void i915PolygonStipple( GLcontext *ctx, const GLubyte *mask )
       i915->state.Stipple[I915_STPREG_ST1] &= ~ST1_ENABLE;
    }
 
-   p[0] = mask[12] & 0xf; p[0] |= p[0] << 4;
-   p[1] = mask[8] & 0xf; p[1] |= p[1] << 4;
-   p[2] = mask[4] & 0xf; p[2] |= p[2] << 4;
-   p[3] = mask[0] & 0xf; p[3] |= p[3] << 4;
-
-   for (k = 0 ; k < 8 ; k++)
-      for (j = 3 ; j >= 0; j--)
-	 for (i = 0 ; i < 4 ; i++, m++)
-	    if (*m != p[j]) {
-	       i915->intel.hw_stipple = 0;
-	       return;
-	    }
+   /* Use the already unpacked stipple data from the context rather than the
+    * uninterpreted mask passed in.
+    */
+   mask = (const GLubyte *)ctx->PolygonStipple;
+   m = mask;
+
+   p[0] = mask[12] & 0xf;
+   p[0] |= p[0] << 4;
+   p[1] = mask[8] & 0xf;
+   p[1] |= p[1] << 4;
+   p[2] = mask[4] & 0xf;
+   p[2] |= p[2] << 4;
+   p[3] = mask[0] & 0xf;
+   p[3] |= p[3] << 4;
+
+   for (k = 0; k < 8; k++)
+      for (j = 3; j >= 0; j--)
+         for (i = 0; i < 4; i++, m++)
+            if (*m != p[j]) {
+               i915->intel.hw_stipple = 0;
+               return;
+            }
 
    newMask = (((p[0] & 0xf) << 0) |
-	      ((p[1] & 0xf) << 4) |
-	      ((p[2] & 0xf) << 8) |
-	      ((p[3] & 0xf) << 12));
+              ((p[1] & 0xf) << 4) |
+              ((p[2] & 0xf) << 8) | ((p[3] & 0xf) << 12));
 
 
    if (newMask == 0xffff || newMask == 0x0) {
@@ -345,49 +369,54 @@ static void i915PolygonStipple( GLcontext *ctx, const GLubyte *mask )
 /* =============================================================
  * Hardware clipping
  */
-static void i915Scissor(GLcontext *ctx, GLint x, GLint y, 
-			GLsizei w, GLsizei h)
+static void
+i915Scissor(GLcontext * ctx, GLint x, GLint y, GLsizei w, GLsizei h)
 {
-   i915ContextPtr i915 = I915_CONTEXT(ctx);
-   intelScreenPrivate *screen = i915->intel.intelScreen;
+   struct i915_context *i915 = I915_CONTEXT(ctx);
    int x1, y1, x2, y2;
 
-   if (!i915->intel.driDrawable)
+   if (!ctx->DrawBuffer)
       return;
 
-   x1 = x;
-   y1 = i915->intel.driDrawable->h - (y + h);
-   x2 = x + w - 1;
-   y2 = y1 + h - 1;
-
-   if (INTEL_DEBUG&DEBUG_DRI)
-      fprintf(stderr, "[%s] x(%d) y(%d) w(%d) h(%d)\n", __FUNCTION__,
-	      x, y, w, h);
-
-   if (x1 < 0) x1 = 0;
-   if (y1 < 0) y1 = 0;
-   if (x2 < 0) x2 = 0;
-   if (y2 < 0) y2 = 0;
-
-   if (x2 >= screen->width) x2 = screen->width-1;
-   if (y2 >= screen->height) y2 = screen->height-1;
-   if (x1 >= screen->width) x1 = screen->width-1;
-   if (y1 >= screen->height) y1 = screen->height-1;
+   DBG("%s %d,%d %dx%d\n", __FUNCTION__, x, y, w, h);
 
+   if (ctx->DrawBuffer->Name == 0) {
+      x1 = x;
+      y1 = ctx->DrawBuffer->Height - (y + h);
+      x2 = x + w - 1;
+      y2 = y1 + h - 1;
+      DBG("%s %d..%d,%d..%d (inverted)\n", __FUNCTION__, x1, x2, y1, y2);
+   }
+   else {
+      /* FBO - not inverted
+       */
+      x1 = x;
+      y1 = y;
+      x2 = x + w - 1;
+      y2 = y + h - 1;
+      DBG("%s %d..%d,%d..%d (not inverted)\n", __FUNCTION__, x1, x2, y1, y2);
+   }
+   
+   x1 = CLAMP(x1, 0, ctx->DrawBuffer->Width - 1);
+   y1 = CLAMP(y1, 0, ctx->DrawBuffer->Height - 1);
+   x2 = CLAMP(x2, 0, ctx->DrawBuffer->Width - 1);
+   y2 = CLAMP(y2, 0, ctx->DrawBuffer->Height - 1);
+   
+   DBG("%s %d..%d,%d..%d (clamped)\n", __FUNCTION__, x1, x2, y1, y2);
 
    I915_STATECHANGE(i915, I915_UPLOAD_BUFFERS);
    i915->state.Buffer[I915_DESTREG_SR1] = (y1 << 16) | (x1 & 0xffff);
    i915->state.Buffer[I915_DESTREG_SR2] = (y2 << 16) | (x2 & 0xffff);
 }
 
-static void i915LogicOp(GLcontext *ctx, GLenum opcode)
+static void
+i915LogicOp(GLcontext * ctx, GLenum opcode)
 {
-   i915ContextPtr i915 = I915_CONTEXT(ctx);
+   struct i915_context *i915 = I915_CONTEXT(ctx);
    int tmp = intel_translate_logic_op(opcode);
 
-   if (INTEL_DEBUG&DEBUG_DRI)
-      fprintf(stderr, "%s\n", __FUNCTION__);
-
+   DBG("%s\n", __FUNCTION__);
+   
    I915_STATECHANGE(i915, I915_UPLOAD_CTX);
    i915->state.Ctx[I915_CTXREG_STATE4] &= ~LOGICOP_MASK;
    i915->state.Ctx[I915_CTXREG_STATE4] |= LOGIC_OP_FUNC(tmp);
@@ -395,13 +424,14 @@ static void i915LogicOp(GLcontext *ctx, GLenum opcode)
 
 
 
-static void i915CullFaceFrontFace(GLcontext *ctx, GLenum unused)
+static void
+i915CullFaceFrontFace(GLcontext * ctx, GLenum unused)
 {
-   i915ContextPtr i915 = I915_CONTEXT(ctx);
+   struct i915_context *i915 = I915_CONTEXT(ctx);
    GLuint mode;
 
-   if (INTEL_DEBUG&DEBUG_DRI)
-      fprintf(stderr, "%s\n", __FUNCTION__);
+   DBG("%s %d\n", __FUNCTION__,
+       ctx->DrawBuffer ? ctx->DrawBuffer->Name : 0);
 
    if (!ctx->Polygon.CullFlag) {
       mode = S4_CULLMODE_NONE;
@@ -409,10 +439,12 @@ static void i915CullFaceFrontFace(GLcontext *ctx, GLenum unused)
    else if (ctx->Polygon.CullFaceMode != GL_FRONT_AND_BACK) {
       mode = S4_CULLMODE_CW;
 
+      if (ctx->DrawBuffer && ctx->DrawBuffer->Name != 0)
+         mode ^= (S4_CULLMODE_CW ^ S4_CULLMODE_CCW);
       if (ctx->Polygon.CullFaceMode == GL_FRONT)
-	 mode ^= (S4_CULLMODE_CW ^ S4_CULLMODE_CCW);
+         mode ^= (S4_CULLMODE_CW ^ S4_CULLMODE_CCW);
       if (ctx->Polygon.FrontFace != GL_CCW)
-	 mode ^= (S4_CULLMODE_CW ^ S4_CULLMODE_CCW);
+         mode ^= (S4_CULLMODE_CW ^ S4_CULLMODE_CCW);
    }
    else {
       mode = S4_CULLMODE_BOTH;
@@ -423,16 +455,16 @@ static void i915CullFaceFrontFace(GLcontext *ctx, GLenum unused)
    i915->state.Ctx[I915_CTXREG_LIS4] |= mode;
 }
 
-static void i915LineWidth( GLcontext *ctx, GLfloat widthf )
+static void
+i915LineWidth(GLcontext * ctx, GLfloat widthf)
 {
-   i915ContextPtr i915 = I915_CONTEXT( ctx );
+   struct i915_context *i915 = I915_CONTEXT(ctx);
    int lis4 = i915->state.Ctx[I915_CTXREG_LIS4] & ~S4_LINE_WIDTH_MASK;
    int width;
 
-   if (INTEL_DEBUG&DEBUG_DRI)
-      fprintf(stderr, "%s\n", __FUNCTION__);
-
-   width = (int)(widthf * 2);
+   DBG("%s\n", __FUNCTION__);
+   
+   width = (int) (widthf * 2);
    CLAMP_SELF(width, 1, 0xf);
    lis4 |= width << S4_LINE_WIDTH_SHIFT;
 
@@ -442,15 +474,15 @@ static void i915LineWidth( GLcontext *ctx, GLfloat widthf )
    }
 }
 
-static void i915PointSize(GLcontext *ctx, GLfloat size)
+static void
+i915PointSize(GLcontext * ctx, GLfloat size)
 {
-   i915ContextPtr i915 = I915_CONTEXT(ctx);
+   struct i915_context *i915 = I915_CONTEXT(ctx);
    int lis4 = i915->state.Ctx[I915_CTXREG_LIS4] & ~S4_POINT_WIDTH_MASK;
-   GLint point_size = (int)size;
-
-   if (INTEL_DEBUG&DEBUG_DRI)
-     fprintf(stderr, "%s\n", __FUNCTION__);
+   GLint point_size = (int) size;
 
+   DBG("%s\n", __FUNCTION__);
+   
    CLAMP_SELF(point_size, 1, 255);
    lis4 |= point_size << S4_POINT_WIDTH_SHIFT;
 
@@ -465,20 +497,24 @@ static void i915PointSize(GLcontext *ctx, GLfloat size)
  * Color masks
  */
 
-static void i915ColorMask(GLcontext *ctx,
-			 GLboolean r, GLboolean g,
-			 GLboolean b, GLboolean a)
+static void
+i915ColorMask(GLcontext * ctx,
+              GLboolean r, GLboolean g, GLboolean b, GLboolean a)
 {
-   i915ContextPtr i915 = I915_CONTEXT( ctx );
+   struct i915_context *i915 = I915_CONTEXT(ctx);
    GLuint tmp = i915->state.Ctx[I915_CTXREG_LIS5] & ~S5_WRITEDISABLE_MASK;
 
-   if (INTEL_DEBUG&DEBUG_DRI)
-      fprintf(stderr, "%s r(%d) g(%d) b(%d) a(%d)\n", __FUNCTION__, r, g, b, a);
+   DBG("%s r(%d) g(%d) b(%d) a(%d)\n", __FUNCTION__, r, g, b,
+       a);
 
-   if (!r) tmp |= S5_WRITEDISABLE_RED;
-   if (!g) tmp |= S5_WRITEDISABLE_GREEN;
-   if (!b) tmp |= S5_WRITEDISABLE_BLUE;
-   if (!a) tmp |= S5_WRITEDISABLE_ALPHA;
+   if (!r)
+      tmp |= S5_WRITEDISABLE_RED;
+   if (!g)
+      tmp |= S5_WRITEDISABLE_GREEN;
+   if (!b)
+      tmp |= S5_WRITEDISABLE_BLUE;
+   if (!a)
+      tmp |= S5_WRITEDISABLE_ALPHA;
 
    if (tmp != i915->state.Ctx[I915_CTXREG_LIS5]) {
       I915_STATECHANGE(i915, I915_UPLOAD_CTX);
@@ -486,54 +522,55 @@ static void i915ColorMask(GLcontext *ctx,
    }
 }
 
-static void update_specular( GLcontext *ctx )
+static void
+update_specular(GLcontext * ctx)
 {
    /* A hack to trigger the rebuild of the fragment program.
     */
-   INTEL_CONTEXT(ctx)->NewGLState |= _NEW_TEXTURE;
-   I915_CONTEXT(ctx)->tex_program.translated = 0; 
+   intel_context(ctx)->NewGLState |= _NEW_TEXTURE;
 }
 
-static void i915LightModelfv(GLcontext *ctx, GLenum pname, 
-			     const GLfloat *param)
+static void
+i915LightModelfv(GLcontext * ctx, GLenum pname, const GLfloat * param)
 {
-   if (INTEL_DEBUG&DEBUG_DRI)
-      fprintf(stderr, "%s\n", __FUNCTION__);
-
+   DBG("%s\n", __FUNCTION__);
+   
    if (pname == GL_LIGHT_MODEL_COLOR_CONTROL) {
-      update_specular( ctx );
+      update_specular(ctx);
    }
 }
 
-static void i915ShadeModel(GLcontext *ctx, GLenum mode)
+static void
+i915ShadeModel(GLcontext * ctx, GLenum mode)
 {
-   i915ContextPtr i915 = I915_CONTEXT(ctx);
+   struct i915_context *i915 = I915_CONTEXT(ctx);
    I915_STATECHANGE(i915, I915_UPLOAD_CTX);
 
    if (mode == GL_SMOOTH) {
-     i915->state.Ctx[I915_CTXREG_LIS4] &= ~(S4_FLATSHADE_ALPHA | 
-					  S4_FLATSHADE_COLOR | 
-					  S4_FLATSHADE_SPECULAR);
-   } else {
-     i915->state.Ctx[I915_CTXREG_LIS4] |= (S4_FLATSHADE_ALPHA | 
-					 S4_FLATSHADE_COLOR | 
-					 S4_FLATSHADE_SPECULAR);
+      i915->state.Ctx[I915_CTXREG_LIS4] &= ~(S4_FLATSHADE_ALPHA |
+                                             S4_FLATSHADE_COLOR |
+                                             S4_FLATSHADE_SPECULAR);
+   }
+   else {
+      i915->state.Ctx[I915_CTXREG_LIS4] |= (S4_FLATSHADE_ALPHA |
+                                            S4_FLATSHADE_COLOR |
+                                            S4_FLATSHADE_SPECULAR);
    }
 }
 
 /* =============================================================
  * Fog
  */
-void i915_update_fog( GLcontext *ctx )
+void
+i915_update_fog(GLcontext * ctx)
 {
-   i915ContextPtr i915 = I915_CONTEXT(ctx);
+   struct i915_context *i915 = I915_CONTEXT(ctx);
    GLenum mode;
    GLboolean enabled;
    GLboolean try_pixel_fog;
-   
+
    if (ctx->FragmentProgram._Active) {
       /* Pull in static fog state from program */
-      
       mode = ctx->FragmentProgram._Current->FogOption;
       enabled = (mode != GL_NONE);
       try_pixel_fog = 0;
@@ -544,7 +581,7 @@ void i915_update_fog( GLcontext *ctx )
 #if 0
       /* XXX - DISABLED -- Need ortho fallback */
       try_pixel_fog = (ctx->Fog.FogCoordinateSource == GL_FRAGMENT_DEPTH_EXT
-                       &&ctx->Hint.Fog == GL_NICEST);
+                       && ctx->Hint.Fog == GL_NICEST);
 #else
       try_pixel_fog = 0;
 #endif
@@ -557,48 +594,49 @@ void i915_update_fog( GLcontext *ctx )
       I915_STATECHANGE(i915, I915_UPLOAD_FOG);
       i915->state.Fog[I915_FOGREG_MODE1] &= ~FMC1_FOGFUNC_MASK;
       i915->vertex_fog = I915_FOG_PIXEL;
-	 
+
       switch (mode) {
       case GL_LINEAR:
-	 if (ctx->Fog.End <= ctx->Fog.Start) {
-	    /* XXX - this won't work with fragment programs.  Need to
-	     * either fallback or append fog instructions to end of
-	     * program in the case of linear fog.
-	     */
-	    i915->state.Fog[I915_FOGREG_MODE1] |= FMC1_FOGFUNC_VERTEX;
-	    i915->vertex_fog = I915_FOG_VERTEX;
-	 }
-	 else {
+         if (ctx->Fog.End <= ctx->Fog.Start) {
+            /* XXX - this won't work with fragment programs.  Need to
+             * either fallback or append fog instructions to end of
+             * program in the case of linear fog.
+             */
+            printf("vertex fog!\n");
+            i915->state.Fog[I915_FOGREG_MODE1] |= FMC1_FOGFUNC_VERTEX;
+            i915->vertex_fog = I915_FOG_VERTEX;
+         }
+         else {
             GLfloat c2 = 1.0 / (ctx->Fog.End - ctx->Fog.Start);
             GLfloat c1 = ctx->Fog.End * c2;
 
-	    i915->state.Fog[I915_FOGREG_MODE1] &= ~FMC1_C1_MASK;
-	    i915->state.Fog[I915_FOGREG_MODE1] |= FMC1_FOGFUNC_PIXEL_LINEAR;
-	    i915->state.Fog[I915_FOGREG_MODE1] |= 
-	       ((GLuint)(c1 * FMC1_C1_ONE)) & FMC1_C1_MASK;
-
-	    if (i915->state.Fog[I915_FOGREG_MODE1] & FMC1_FOGINDEX_Z) {
-	       i915->state.Fog[I915_FOGREG_MODE2]
-                  = (GLuint)(c2 * FMC2_C2_ONE);
-	    }
-	    else {
-	       fi_type fi;
-	       fi.f = c2; 
-	       i915->state.Fog[I915_FOGREG_MODE2] = fi.i; 
-	    }
-	 }
-	 break;
+            i915->state.Fog[I915_FOGREG_MODE1] &= ~FMC1_C1_MASK;
+            i915->state.Fog[I915_FOGREG_MODE1] |= FMC1_FOGFUNC_PIXEL_LINEAR;
+            i915->state.Fog[I915_FOGREG_MODE1] |=
+               ((GLuint) (c1 * FMC1_C1_ONE)) & FMC1_C1_MASK;
+
+            if (i915->state.Fog[I915_FOGREG_MODE1] & FMC1_FOGINDEX_Z) {
+               i915->state.Fog[I915_FOGREG_MODE2]
+                  = (GLuint) (c2 * FMC2_C2_ONE);
+            }
+            else {
+               fi_type fi;
+               fi.f = c2;
+               i915->state.Fog[I915_FOGREG_MODE2] = fi.i;
+            }
+         }
+         break;
       case GL_EXP:
-	 i915->state.Fog[I915_FOGREG_MODE1] |= FMC1_FOGFUNC_PIXEL_EXP;
-	 break;
+         i915->state.Fog[I915_FOGREG_MODE1] |= FMC1_FOGFUNC_PIXEL_EXP;
+         break;
       case GL_EXP2:
-	 i915->state.Fog[I915_FOGREG_MODE1] |= FMC1_FOGFUNC_PIXEL_EXP2;
-	 break;
+         i915->state.Fog[I915_FOGREG_MODE1] |= FMC1_FOGFUNC_PIXEL_EXP2;
+         break;
       default:
-	 break;
+         break;
       }
    }
-   else /* if (i915->vertex_fog != I915_FOG_VERTEX) */ {      
+   else { /* if (i915->vertex_fog != I915_FOG_VERTEX) */
       I915_STATECHANGE(i915, I915_UPLOAD_FOG);
       i915->state.Fog[I915_FOGREG_MODE1] &= ~FMC1_FOGFUNC_MASK;
       i915->state.Fog[I915_FOGREG_MODE1] |= FMC1_FOGFUNC_VERTEX;
@@ -620,38 +658,38 @@ void i915_update_fog( GLcontext *ctx )
 }
 
 static void
-i915Fogfv(GLcontext *ctx, GLenum pname, const GLfloat *param)
+i915Fogfv(GLcontext * ctx, GLenum pname, const GLfloat * param)
 {
-   i915ContextPtr i915 = I915_CONTEXT(ctx);
+   struct i915_context *i915 = I915_CONTEXT(ctx);
 
    switch (pname) {
-   case GL_FOG_COORDINATE_SOURCE_EXT: 
+   case GL_FOG_COORDINATE_SOURCE_EXT:
    case GL_FOG_MODE:
    case GL_FOG_START:
-   case GL_FOG_END: 
+   case GL_FOG_END:
       break;
 
    case GL_FOG_DENSITY:
       I915_STATECHANGE(i915, I915_UPLOAD_FOG);
 
       if (i915->state.Fog[I915_FOGREG_MODE1] & FMC1_FOGINDEX_Z) {
-	 i915->state.Fog[I915_FOGREG_MODE3]
-            = (GLuint)(ctx->Fog.Density * FMC3_D_ONE);
+         i915->state.Fog[I915_FOGREG_MODE3] =
+            (GLuint) (ctx->Fog.Density * FMC3_D_ONE);
       }
       else {
-	 union { float f; int i; } fi;
-	 fi.f = ctx->Fog.Density; 
-	 i915->state.Fog[I915_FOGREG_MODE3] = fi.i; 
+         fi_type fi;
+         fi.f = ctx->Fog.Density;
+         i915->state.Fog[I915_FOGREG_MODE3] = fi.i;
       }
       break;
 
-   case GL_FOG_COLOR: 
+   case GL_FOG_COLOR:
       I915_STATECHANGE(i915, I915_UPLOAD_FOG);
-      i915->state.Fog[I915_FOGREG_COLOR] = 
-	 (_3DSTATE_FOG_COLOR_CMD | 
-	  ((GLubyte)(ctx->Fog.Color[0]*255.0F) << 16) |
-	  ((GLubyte)(ctx->Fog.Color[1]*255.0F) << 8) |
-	  ((GLubyte)(ctx->Fog.Color[2]*255.0F) << 0));
+      i915->state.Fog[I915_FOGREG_COLOR] =
+         (_3DSTATE_FOG_COLOR_CMD |
+          ((GLubyte) (ctx->Fog.Color[0] * 255.0F) << 16) |
+          ((GLubyte) (ctx->Fog.Color[1] * 255.0F) << 8) |
+          ((GLubyte) (ctx->Fog.Color[2] * 255.0F) << 0));
       break;
 
    default:
@@ -659,7 +697,8 @@ i915Fogfv(GLcontext *ctx, GLenum pname, const GLfloat *param)
    }
 }
 
-static void i915Hint(GLcontext *ctx, GLenum target, GLenum state)
+static void
+i915Hint(GLcontext * ctx, GLenum target, GLenum state)
 {
    switch (target) {
    case GL_FOG_HINT:
@@ -672,25 +711,26 @@ static void i915Hint(GLcontext *ctx, GLenum target, GLenum state)
 /* =============================================================
  */
 
-static void i915Enable(GLcontext *ctx, GLenum cap, GLboolean state)
+static void
+i915Enable(GLcontext * ctx, GLenum cap, GLboolean state)
 {
-   i915ContextPtr i915 = I915_CONTEXT(ctx);
+   struct i915_context *i915 = I915_CONTEXT(ctx);
 
-   switch(cap) {
+   switch (cap) {
    case GL_TEXTURE_2D:
       break;
 
    case GL_LIGHTING:
    case GL_COLOR_SUM:
-      update_specular( ctx );
+      update_specular(ctx);
       break;
 
    case GL_ALPHA_TEST:
       I915_STATECHANGE(i915, I915_UPLOAD_CTX);
       if (state)
-	 i915->state.Ctx[I915_CTXREG_LIS6] |= S6_ALPHA_TEST_ENABLE;
+         i915->state.Ctx[I915_CTXREG_LIS6] |= S6_ALPHA_TEST_ENABLE;
       else
-	 i915->state.Ctx[I915_CTXREG_LIS6] &= ~S6_ALPHA_TEST_ENABLE;
+         i915->state.Ctx[I915_CTXREG_LIS6] &= ~S6_ALPHA_TEST_ENABLE;
       break;
 
    case GL_BLEND:
@@ -702,8 +742,8 @@ static void i915Enable(GLcontext *ctx, GLenum cap, GLboolean state)
 
       /* Logicop doesn't seem to work at 16bpp:
        */
-      if (i915->intel.intelScreen->cpp == 2)
-	 FALLBACK( &i915->intel, I915_FALLBACK_LOGICOP, state );
+      if (ctx->Visual.rgbBits == 16)
+         FALLBACK(&i915->intel, I915_FALLBACK_LOGICOP, state);
       break;
 
    case GL_FRAGMENT_PROGRAM_ARB:
@@ -712,37 +752,37 @@ static void i915Enable(GLcontext *ctx, GLenum cap, GLboolean state)
    case GL_DITHER:
       I915_STATECHANGE(i915, I915_UPLOAD_CTX);
       if (state)
-	 i915->state.Ctx[I915_CTXREG_LIS5] |= S5_COLOR_DITHER_ENABLE;
+         i915->state.Ctx[I915_CTXREG_LIS5] |= S5_COLOR_DITHER_ENABLE;
       else
-	 i915->state.Ctx[I915_CTXREG_LIS5] &= ~S5_COLOR_DITHER_ENABLE;
+         i915->state.Ctx[I915_CTXREG_LIS5] &= ~S5_COLOR_DITHER_ENABLE;
       break;
 
    case GL_DEPTH_TEST:
       I915_STATECHANGE(i915, I915_UPLOAD_CTX);
       if (state)
-	 i915->state.Ctx[I915_CTXREG_LIS6] |= S6_DEPTH_TEST_ENABLE;
+         i915->state.Ctx[I915_CTXREG_LIS6] |= S6_DEPTH_TEST_ENABLE;
       else
-	 i915->state.Ctx[I915_CTXREG_LIS6] &= ~S6_DEPTH_TEST_ENABLE;
+         i915->state.Ctx[I915_CTXREG_LIS6] &= ~S6_DEPTH_TEST_ENABLE;
 
-      i915DepthMask( ctx, ctx->Depth.Mask );
+      i915DepthMask(ctx, ctx->Depth.Mask);
       break;
 
    case GL_SCISSOR_TEST:
       I915_STATECHANGE(i915, I915_UPLOAD_BUFFERS);
       if (state)
-	 i915->state.Buffer[I915_DESTREG_SENABLE] = (_3DSTATE_SCISSOR_ENABLE_CMD |
-						   ENABLE_SCISSOR_RECT);
+         i915->state.Buffer[I915_DESTREG_SENABLE] =
+            (_3DSTATE_SCISSOR_ENABLE_CMD | ENABLE_SCISSOR_RECT);
       else
-	 i915->state.Buffer[I915_DESTREG_SENABLE] = (_3DSTATE_SCISSOR_ENABLE_CMD |
-						   DISABLE_SCISSOR_RECT);
+         i915->state.Buffer[I915_DESTREG_SENABLE] =
+            (_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT);
       break;
 
    case GL_LINE_SMOOTH:
       I915_STATECHANGE(i915, I915_UPLOAD_CTX);
       if (state)
-	 i915->state.Ctx[I915_CTXREG_LIS4] |= S4_LINE_ANTIALIAS_ENABLE;
+         i915->state.Ctx[I915_CTXREG_LIS4] |= S4_LINE_ANTIALIAS_ENABLE;
       else
-	 i915->state.Ctx[I915_CTXREG_LIS4] &= ~S4_LINE_ANTIALIAS_ENABLE;
+         i915->state.Ctx[I915_CTXREG_LIS4] &= ~S4_LINE_ANTIALIAS_ENABLE;
       break;
 
    case GL_FOG:
@@ -753,16 +793,25 @@ static void i915Enable(GLcontext *ctx, GLenum cap, GLboolean state)
       break;
 
    case GL_STENCIL_TEST:
-      if (i915->intel.hw_stencil) {
-	 I915_STATECHANGE(i915, I915_UPLOAD_CTX);
-	 if (state)
-	    i915->state.Ctx[I915_CTXREG_LIS5] |= (S5_STENCIL_TEST_ENABLE |
-						S5_STENCIL_WRITE_ENABLE);
-	 else
-	    i915->state.Ctx[I915_CTXREG_LIS5] &= ~(S5_STENCIL_TEST_ENABLE | 
-						 S5_STENCIL_WRITE_ENABLE);
-      } else {
-	 FALLBACK( &i915->intel, I915_FALLBACK_STENCIL, state );
+      {
+         GLboolean hw_stencil = GL_FALSE;
+         if (ctx->DrawBuffer) {
+            struct intel_renderbuffer *irbStencil
+               = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_STENCIL);
+            hw_stencil = (irbStencil && irbStencil->region);
+         }
+         if (hw_stencil) {
+            I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+            if (state)
+               i915->state.Ctx[I915_CTXREG_LIS5] |= (S5_STENCIL_TEST_ENABLE |
+                                                     S5_STENCIL_WRITE_ENABLE);
+            else
+               i915->state.Ctx[I915_CTXREG_LIS5] &= ~(S5_STENCIL_TEST_ENABLE |
+                                                      S5_STENCIL_WRITE_ENABLE);
+         }
+         else {
+            FALLBACK(&i915->intel, I915_FALLBACK_STENCIL, state);
+         }
       }
       break;
 
@@ -771,23 +820,20 @@ static void i915Enable(GLcontext *ctx, GLenum cap, GLboolean state)
        * I'll do more testing later to find out exactly which hardware
        * supports it.  Disabled for now.
        */
-      if (i915->intel.hw_stipple && 
-	  i915->intel.reduced_primitive == GL_TRIANGLES)
-      {
-	 I915_STATECHANGE(i915, I915_UPLOAD_STIPPLE);
-	 if (state)
-	    i915->state.Stipple[I915_STPREG_ST1] |= ST1_ENABLE;
-	 else
-	    i915->state.Stipple[I915_STPREG_ST1] &= ~ST1_ENABLE;
+      if (i915->intel.hw_stipple &&
+          i915->intel.reduced_primitive == GL_TRIANGLES) {
+         I915_STATECHANGE(i915, I915_UPLOAD_STIPPLE);
+         if (state)
+            i915->state.Stipple[I915_STPREG_ST1] |= ST1_ENABLE;
+         else
+            i915->state.Stipple[I915_STPREG_ST1] &= ~ST1_ENABLE;
       }
       break;
 
    case GL_POLYGON_SMOOTH:
-      FALLBACK( &i915->intel, I915_FALLBACK_POLYGON_SMOOTH, state );
       break;
 
    case GL_POINT_SMOOTH:
-      FALLBACK( &i915->intel, I915_FALLBACK_POINT_SMOOTH, state );
       break;
 
    default:
@@ -796,10 +842,9 @@ static void i915Enable(GLcontext *ctx, GLenum cap, GLboolean state)
 }
 
 
-static void i915_init_packets( i915ContextPtr i915 )
+static void
+i915_init_packets(struct i915_context *i915)
 {
-   intelScreenPrivate *screen = i915->intel.intelScreen;
-
    /* Zero all state */
    memset(&i915->state, 0, sizeof(i915->state));
 
@@ -809,39 +854,35 @@ static void i915_init_packets( i915ContextPtr i915 )
       /* Probably don't want to upload all this stuff every time one 
        * piece changes.
        */
-      i915->state.Ctx[I915_CTXREG_LI] = (_3DSTATE_LOAD_STATE_IMMEDIATE_1 | 
-				       I1_LOAD_S(2) |
-				       I1_LOAD_S(4) |
-				       I1_LOAD_S(5) |
-				       I1_LOAD_S(6) | 
-				       (3));
+      i915->state.Ctx[I915_CTXREG_LI] = (_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
+                                         I1_LOAD_S(2) |
+                                         I1_LOAD_S(4) |
+                                         I1_LOAD_S(5) | I1_LOAD_S(6) | (3));
       i915->state.Ctx[I915_CTXREG_LIS2] = 0;
       i915->state.Ctx[I915_CTXREG_LIS4] = 0;
       i915->state.Ctx[I915_CTXREG_LIS5] = 0;
 
-      if (screen->cpp == 2)
-	 i915->state.Ctx[I915_CTXREG_LIS5] |= S5_COLOR_DITHER_ENABLE;
+      if (i915->intel.ctx.Visual.rgbBits == 16)
+         i915->state.Ctx[I915_CTXREG_LIS5] |= S5_COLOR_DITHER_ENABLE;
 
 
       i915->state.Ctx[I915_CTXREG_LIS6] = (S6_COLOR_WRITE_ENABLE |
-					 (2 << S6_TRISTRIP_PV_SHIFT));
+                                           (2 << S6_TRISTRIP_PV_SHIFT));
 
       i915->state.Ctx[I915_CTXREG_STATE4] = (_3DSTATE_MODES_4_CMD |
-					   ENABLE_LOGIC_OP_FUNC |
-					   LOGIC_OP_FUNC(LOGICOP_COPY) |
-					   ENABLE_STENCIL_TEST_MASK |
-					   STENCIL_TEST_MASK(0xff) |
-					   ENABLE_STENCIL_WRITE_MASK |
-					   STENCIL_WRITE_MASK(0xff));
-
-
-      i915->state.Ctx[I915_CTXREG_IAB] = (_3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD |
-					IAB_MODIFY_ENABLE |
-					IAB_MODIFY_FUNC |
-					IAB_MODIFY_SRC_FACTOR |
-					IAB_MODIFY_DST_FACTOR);
-
-      i915->state.Ctx[I915_CTXREG_BLENDCOLOR0] = _3DSTATE_CONST_BLEND_COLOR_CMD;
+                                             ENABLE_LOGIC_OP_FUNC |
+                                             LOGIC_OP_FUNC(LOGICOP_COPY) |
+                                             ENABLE_STENCIL_TEST_MASK |
+                                             STENCIL_TEST_MASK(0xff) |
+                                             ENABLE_STENCIL_WRITE_MASK |
+                                             STENCIL_WRITE_MASK(0xff));
+
+      i915->state.Ctx[I915_CTXREG_IAB] =
+         (_3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD | IAB_MODIFY_ENABLE |
+          IAB_MODIFY_FUNC | IAB_MODIFY_SRC_FACTOR | IAB_MODIFY_DST_FACTOR);
+
+      i915->state.Ctx[I915_CTXREG_BLENDCOLOR0] =
+         _3DSTATE_CONST_BLEND_COLOR_CMD;
       i915->state.Ctx[I915_CTXREG_BLENDCOLOR1] = 0;
 
    }
@@ -856,79 +897,50 @@ static void i915_init_packets( i915ContextPtr i915 )
       I915_STATECHANGE(i915, I915_UPLOAD_FOG);
       i915->state.Fog[I915_FOGREG_MODE0] = _3DSTATE_FOG_MODE_CMD;
       i915->state.Fog[I915_FOGREG_MODE1] = (FMC1_FOGFUNC_MODIFY_ENABLE |
-					  FMC1_FOGFUNC_VERTEX |
-					  FMC1_FOGINDEX_MODIFY_ENABLE |
-					  FMC1_FOGINDEX_W |
-					  FMC1_C1_C2_MODIFY_ENABLE |
-					  FMC1_DENSITY_MODIFY_ENABLE);
+                                            FMC1_FOGFUNC_VERTEX |
+                                            FMC1_FOGINDEX_MODIFY_ENABLE |
+                                            FMC1_FOGINDEX_W |
+                                            FMC1_C1_C2_MODIFY_ENABLE |
+                                            FMC1_DENSITY_MODIFY_ENABLE);
       i915->state.Fog[I915_FOGREG_COLOR] = _3DSTATE_FOG_COLOR_CMD;
    }
 
-
    {
-      I915_STATECHANGE(i915, I915_UPLOAD_BUFFERS);
-      /* color buffer offset/stride */
-      i915->state.Buffer[I915_DESTREG_CBUFADDR0] = _3DSTATE_BUF_INFO_CMD;
-      i915->state.Buffer[I915_DESTREG_CBUFADDR1] = 
-	 (BUF_3D_ID_COLOR_BACK | 
-	  BUF_3D_PITCH(screen->front.pitch) |  /* pitch in bytes */
-	  BUF_3D_USE_FENCE);
-      /*i915->state.Buffer[I915_DESTREG_CBUFADDR2] is the offset */
-
-
-      /* depth/Z buffer offset/stride */
-      i915->state.Buffer[I915_DESTREG_DBUFADDR0] = _3DSTATE_BUF_INFO_CMD;
-      i915->state.Buffer[I915_DESTREG_DBUFADDR1] = 
-	 (BUF_3D_ID_DEPTH |
-	  BUF_3D_PITCH(screen->depth.pitch) |  /* pitch in bytes */
-	  BUF_3D_USE_FENCE);
-      i915->state.Buffer[I915_DESTREG_DBUFADDR2] = screen->depth.offset;
-
-
       i915->state.Buffer[I915_DESTREG_DV0] = _3DSTATE_DST_BUF_VARS_CMD;
 
-      /* color/depth pixel format */
-      switch (screen->fbFormat) {
-      case DV_PF_555:
-      case DV_PF_565:
-	 i915->state.Buffer[I915_DESTREG_DV1] = (DSTORG_HORT_BIAS(0x8) | /* .5 */
-					       DSTORG_VERT_BIAS(0x8) | /* .5 */
-					       LOD_PRECLAMP_OGL |
-					       TEX_DEFAULT_COLOR_OGL |
-					       DITHER_FULL_ALWAYS |
-					       screen->fbFormat |
-					       DEPTH_FRMT_16_FIXED);
-	 break;
-      case DV_PF_8888:
-	 i915->state.Buffer[I915_DESTREG_DV1] = (DSTORG_HORT_BIAS(0x8) | /* .5 */
-					       DSTORG_VERT_BIAS(0x8) | /* .5 */
-					       LOD_PRECLAMP_OGL |
-					       TEX_DEFAULT_COLOR_OGL |
-					       screen->fbFormat |
-					       DEPTH_FRMT_24_FIXED_8_OTHER);
-	 break;
-      }
-
       /* scissor */
-      i915->state.Buffer[I915_DESTREG_SENABLE] = (_3DSTATE_SCISSOR_ENABLE_CMD |
-						DISABLE_SCISSOR_RECT);
+      i915->state.Buffer[I915_DESTREG_SENABLE] =
+         (_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT);
       i915->state.Buffer[I915_DESTREG_SR0] = _3DSTATE_SCISSOR_RECT_0_CMD;
       i915->state.Buffer[I915_DESTREG_SR1] = 0;
       i915->state.Buffer[I915_DESTREG_SR2] = 0;
    }
 
 
+#if 0
+   {
+      I915_STATECHANGE(i915, I915_UPLOAD_DEFAULTS);
+      i915->state.Default[I915_DEFREG_C0] = _3DSTATE_DEFAULT_DIFFUSE;
+      i915->state.Default[I915_DEFREG_C1] = 0;
+      i915->state.Default[I915_DEFREG_S0] = _3DSTATE_DEFAULT_SPECULAR;
+      i915->state.Default[I915_DEFREG_S1] = 0;
+      i915->state.Default[I915_DEFREG_Z0] = _3DSTATE_DEFAULT_Z;
+      i915->state.Default[I915_DEFREG_Z1] = 0;
+   }
+#endif
+
+
    /* These will be emitted every at the head of every buffer, unless
     * we get hardware contexts working.
     */
-   i915->state.active = (I915_UPLOAD_PROGRAM | 
-			 I915_UPLOAD_STIPPLE | 
-			 I915_UPLOAD_CTX | 
-			 I915_UPLOAD_BUFFERS | 
-			 I915_UPLOAD_INVARIENT);
+   i915->state.active = (I915_UPLOAD_PROGRAM |
+                         I915_UPLOAD_STIPPLE |
+                         I915_UPLOAD_CTX |
+                         I915_UPLOAD_BUFFERS | I915_UPLOAD_INVARIENT);
 }
 
-void i915InitStateFunctions( struct dd_function_table *functions )
+void
+i915InitStateFunctions(struct dd_function_table *functions)
 {
    functions->AlphaFunc = i915AlphaFunc;
    functions->BlendColor = i915BlendColor;
@@ -955,21 +967,15 @@ void i915InitStateFunctions( struct dd_function_table *functions )
 }
 
 
-void i915InitState( i915ContextPtr i915 )
+void
+i915InitState(struct i915_context *i915)
 {
    GLcontext *ctx = &i915->intel.ctx;
 
-   i915_init_packets( i915 );
+   i915_init_packets(i915);
 
-   intelInitState( ctx );
+   _mesa_init_driver_state(ctx);
 
-   memcpy( &i915->initial, &i915->state, sizeof(i915->state) );
+   memcpy(&i915->initial, &i915->state, sizeof(i915->state));
    i915->current = &i915->state;
 }
-
-
-
-
-
-
-
diff --git a/i915/i915_tex.c b/i915/i915_tex.c
index d9609d3..386617a 100644
--- a/i915/i915_tex.c
+++ b/i915/i915_tex.c
@@ -45,109 +45,25 @@
 
 
 
-
-
-
-/**
- * Allocate space for and load the mesa images into the texture memory block.
- * This will happen before drawing with a new texture, or drawing with a
- * texture after it was swapped out or teximaged again.
- */
-
-intelTextureObjectPtr i915AllocTexObj( struct gl_texture_object *texObj )
+static void
+i915TexEnv(GLcontext * ctx, GLenum target,
+           GLenum pname, const GLfloat * param)
 {
-   i915TextureObjectPtr t = CALLOC_STRUCT( i915_texture_object );
-   if ( !t ) 
-      return NULL;
+   struct i915_context *i915 = I915_CONTEXT(ctx);
 
-   texObj->DriverData = t;
-   t->intel.base.tObj = texObj;
-   t->intel.dirty = I915_UPLOAD_TEX_ALL;
-   make_empty_list( &t->intel.base );
-   return &t->intel;
-}
-
-
-static void i915TexParameter( GLcontext *ctx, GLenum target,
-			     struct gl_texture_object *tObj,
-			     GLenum pname, const GLfloat *params )
-{
-   i915TextureObjectPtr t = (i915TextureObjectPtr) tObj->DriverData;
- 
    switch (pname) {
-   case GL_TEXTURE_MIN_FILTER:
-   case GL_TEXTURE_MAG_FILTER:
-   case GL_TEXTURE_MAX_ANISOTROPY_EXT:
-   case GL_TEXTURE_WRAP_S:
-   case GL_TEXTURE_WRAP_T:
-   case GL_TEXTURE_WRAP_R:
-   case GL_TEXTURE_BORDER_COLOR:
-      t->intel.dirty = I915_UPLOAD_TEX_ALL;
-      break;
-
-   case GL_TEXTURE_COMPARE_MODE:
-      t->intel.dirty = I915_UPLOAD_TEX_ALL;
-      break;
-   case GL_TEXTURE_COMPARE_FUNC:
-      t->intel.dirty = I915_UPLOAD_TEX_ALL;
-      break;
-
-   case GL_TEXTURE_BASE_LEVEL:
-   case GL_TEXTURE_MAX_LEVEL:
-   case GL_TEXTURE_MIN_LOD:
-   case GL_TEXTURE_MAX_LOD:
-      /* The i915 and its successors can do a lot of this without
-       * reloading the textures.  A project for someone?
-       */
-      intelFlush( ctx );
-      driSwapOutTextureObject( (driTextureObject *) t );
-      t->intel.dirty = I915_UPLOAD_TEX_ALL;
-      break;
-
-   default:
-      return;
-   }
-}
-
-
-static void i915TexEnv( GLcontext *ctx, GLenum target, 
-			GLenum pname, const GLfloat *param )
-{
-   i915ContextPtr i915 = I915_CONTEXT( ctx );
-   GLuint unit = ctx->Texture.CurrentUnit;
-
-   switch (pname) {
-   case GL_TEXTURE_ENV_COLOR: 	/* Should be a tracked param */
-   case GL_TEXTURE_ENV_MODE:
-   case GL_COMBINE_RGB:
-   case GL_COMBINE_ALPHA:
-   case GL_SOURCE0_RGB:
-   case GL_SOURCE1_RGB:
-   case GL_SOURCE2_RGB:
-   case GL_SOURCE0_ALPHA:
-   case GL_SOURCE1_ALPHA:
-   case GL_SOURCE2_ALPHA:
-   case GL_OPERAND0_RGB:
-   case GL_OPERAND1_RGB:
-   case GL_OPERAND2_RGB:
-   case GL_OPERAND0_ALPHA:
-   case GL_OPERAND1_ALPHA:
-   case GL_OPERAND2_ALPHA:
-   case GL_RGB_SCALE:
-   case GL_ALPHA_SCALE:
-      i915->tex_program.translated = 0; 
-      break;
-
-   case GL_TEXTURE_LOD_BIAS: {
-      int b = (int) ((*param) * 16.0);
-      if (b > 255) b = 255;
-      if (b < -256) b = -256;
-      I915_STATECHANGE(i915, I915_UPLOAD_TEX(unit));
-      i915->state.Tex[unit][I915_TEXREG_SS2] &= ~SS2_LOD_BIAS_MASK;
-      i915->state.Tex[unit][I915_TEXREG_SS2] |= 
-	 ((b << SS2_LOD_BIAS_SHIFT) & SS2_LOD_BIAS_MASK);
-      break;
-   }
+   case GL_TEXTURE_LOD_BIAS:{
+         GLuint unit = ctx->Texture.CurrentUnit;
+         GLint b = (int) ((*param) * 16.0);
+         if (b > 255)
+            b = 255;
+         if (b < -256)
+            b = -256;
+         I915_STATECHANGE(i915, I915_UPLOAD_TEX(unit));
+         i915->lodbias_ss2[unit] =
+            ((b << SS2_LOD_BIAS_SHIFT) & SS2_LOD_BIAS_MASK);
+         break;
+      }
 
    default:
       break;
@@ -155,33 +71,10 @@ static void i915TexEnv( GLcontext *ctx, GLenum target,
 }
 
 
-static void i915BindTexture( GLcontext *ctx, GLenum target,
-			    struct gl_texture_object *texObj )
-{
-   i915TextureObjectPtr tex;
-   
-   if (!texObj->DriverData)
-      i915AllocTexObj( texObj );
-   
-   tex = (i915TextureObjectPtr)texObj->DriverData;
-
-   if (tex->lastTarget != texObj->Target) {
-      tex->intel.dirty = I915_UPLOAD_TEX_ALL;
-      tex->lastTarget = texObj->Target;
-   }
-
-   /* Need this if image format changes between bound textures.
-    * Could try and shortcircuit by checking for differences in
-    * state between incoming and outgoing textures:
-    */
-   I915_CONTEXT(ctx)->tex_program.translated = 0; 
-}
-
-
-
-void i915InitTextureFuncs( struct dd_function_table *functions )
+void
+i915InitTextureFuncs(struct dd_function_table *functions)
 {
-   functions->BindTexture = i915BindTexture;
+/*
    functions->TexEnv = i915TexEnv;
-   functions->TexParameter = i915TexParameter;
+*/
 }
diff --git a/i915/i915_tex_layout.c b/i915/i915_tex_layout.c
new file mode 100644
index 0000000..b5085f4
--- /dev/null
+++ b/i915/i915_tex_layout.c
@@ -0,0 +1,477 @@
+/**************************************************************************
+ * 
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+/** @file i915_tex_layout.c
+ * Code to layout images in a mipmap tree for i830M-GM915 and G945 and beyond.
+ */
+
+#include "intel_mipmap_tree.h"
+#include "intel_tex_layout.h"
+#include "macros.h"
+#include "intel_context.h"
+
+#define FILE_DEBUG_FLAG DEBUG_TEXTURE
+
+static GLint initial_offsets[6][2] = {
+   [FACE_POS_X] = {0, 0},
+   [FACE_POS_Y] = {1, 0},
+   [FACE_POS_Z] = {1, 1},
+   [FACE_NEG_X] = {0, 2},
+   [FACE_NEG_Y] = {1, 2},
+   [FACE_NEG_Z] = {1, 3},
+};
+
+
+static GLint step_offsets[6][2] = {
+   [FACE_POS_X] = {0, 2},
+   [FACE_POS_Y] = {-1, 2},
+   [FACE_POS_Z] = {-1, 1},
+   [FACE_NEG_X] = {0, 2},
+   [FACE_NEG_Y] = {-1, 2},
+   [FACE_NEG_Z] = {-1, 1},
+};
+
+/**
+ * Cube texture map layout for i830M-GM915.
+ *
+ * Hardware layout looks like:
+ *
+ * +-------+-------+
+ * |       |       |
+ * |       |       |
+ * |       |       |
+ * |  +x   |  +y   |
+ * |       |       |
+ * |       |       |
+ * |       |       |
+ * |       |       |
+ * +---+---+-------+
+ * |   |   |       |
+ * | +x| +y|       |
+ * |   |   |       |
+ * |   |   |       |
+ * +-+-+---+  +z   |
+ * | | |   |       |
+ * +-+-+ +z|       |
+ *   | |   |       |
+ * +-+-+---+-------+
+ * |       |       |
+ * |       |       |
+ * |       |       |
+ * |  -x   |  -y   |
+ * |       |       |
+ * |       |       |
+ * |       |       |
+ * |       |       |
+ * +---+---+-------+
+ * |   |   |       |
+ * | -x| -y|       |
+ * |   |   |       |
+ * |   |   |       |
+ * +-+-+---+  -z   |
+ * | | |   |       |
+ * +-+-+ -z|       |
+ *   | |   |       |
+ *   +-+---+-------+
+ *
+ */
+static void
+i915_miptree_layout_cube(struct intel_context *intel,
+			 struct intel_mipmap_tree * mt)
+{
+   const GLuint dim = mt->width0;
+   GLuint face;
+   GLuint lvlWidth = mt->width0, lvlHeight = mt->height0;
+   GLint level;
+
+   assert(lvlWidth == lvlHeight); /* cubemap images are square */
+
+   /* double pitch for cube layouts */
+   mt->pitch = intel_miptree_pitch_align (intel, mt, dim * 2);
+   mt->total_height = dim * 4;
+
+   for (level = mt->first_level; level <= mt->last_level; level++) {
+      intel_miptree_set_level_info(mt, level, 6,
+				   0, 0,
+				   /*OLD: mt->pitch, mt->total_height,*/
+				   lvlWidth, lvlHeight,
+				   1);
+      lvlWidth /= 2;
+      lvlHeight /= 2;
+   }
+
+   for (face = 0; face < 6; face++) {
+      GLuint x = initial_offsets[face][0] * dim;
+      GLuint y = initial_offsets[face][1] * dim;
+      GLuint d = dim;
+
+      for (level = mt->first_level; level <= mt->last_level; level++) {
+	 intel_miptree_set_image_offset(mt, level, face, x, y);
+
+	 if (d == 0)
+	    _mesa_printf("cube mipmap %d/%d (%d..%d) is 0x0\n",
+			 face, level, mt->first_level, mt->last_level);
+
+	 d >>= 1;
+	 x += step_offsets[face][0] * d;
+	 y += step_offsets[face][1] * d;
+      }
+   }
+}
+
+static void
+i915_miptree_layout_3d(struct intel_context *intel,
+		       struct intel_mipmap_tree * mt)
+{
+   GLuint width = mt->width0;
+   GLuint height = mt->height0;
+   GLuint depth = mt->depth0;
+   GLuint stack_height = 0;
+   GLint level;
+
+   /* Calculate the size of a single slice. */
+   mt->pitch = intel_miptree_pitch_align (intel, mt, mt->width0);
+
+   /* XXX: hardware expects/requires 9 levels at minimum. */
+   for (level = mt->first_level; level <= MAX2(8, mt->last_level); level++) {
+      intel_miptree_set_level_info(mt, level, depth, 0, mt->total_height,
+				   width, height, depth);
+
+      stack_height += MAX2(2, height);
+
+      width = minify(width);
+      height = minify(height);
+      depth = minify(depth);
+   }
+
+   /* Fixup depth image_offsets: */
+   depth = mt->depth0;
+   for (level = mt->first_level; level <= mt->last_level; level++) {
+      GLuint i;
+      for (i = 0; i < depth; i++) {
+	 intel_miptree_set_image_offset(mt, level, i,
+					0, i * stack_height);
+      }
+
+      depth = minify(depth);
+   }
+
+   /* Multiply slice size by texture depth for total size.  It's
+    * remarkable how wasteful of memory the i915 texture layouts
+    * are.  They are largely fixed in the i945.
+    */
+   mt->total_height = stack_height * mt->depth0;
+}
+
+static void
+i915_miptree_layout_2d(struct intel_context *intel,
+		       struct intel_mipmap_tree * mt)
+{
+   GLuint width = mt->width0;
+   GLuint height = mt->height0;
+   GLuint img_height;
+   GLint level;
+
+   mt->pitch = intel_miptree_pitch_align (intel, mt, mt->width0);
+   mt->total_height = 0;
+
+   for (level = mt->first_level; level <= mt->last_level; level++) {
+      intel_miptree_set_level_info(mt, level, 1,
+				   0, mt->total_height,
+				   width, height, 1);
+
+      if (mt->compressed)
+	 img_height = MAX2(1, height / 4);
+      else
+	 img_height = (MAX2(2, height) + 1) & ~1;
+
+      mt->total_height += img_height;
+
+      width = minify(width);
+      height = minify(height);
+   }
+}
+
+GLboolean
+i915_miptree_layout(struct intel_context *intel, struct intel_mipmap_tree * mt)
+{
+   switch (mt->target) {
+   case GL_TEXTURE_CUBE_MAP:
+      i915_miptree_layout_cube(intel, mt);
+      break;
+   case GL_TEXTURE_3D:
+      i915_miptree_layout_3d(intel, mt);
+      break;
+   case GL_TEXTURE_1D:
+   case GL_TEXTURE_2D:
+   case GL_TEXTURE_RECTANGLE_ARB:
+      i915_miptree_layout_2d(intel, mt);
+      break;
+   default:
+      _mesa_problem(NULL, "Unexpected tex target in i915_miptree_layout()");
+      break;
+   }
+
+   DBG("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__,
+       mt->pitch,
+       mt->total_height, mt->cpp, mt->pitch * mt->total_height * mt->cpp);
+
+   return GL_TRUE;
+}
+
+
+/**
+ * Cube texture map layout for GM945 and later.
+ *
+ * The hardware layout looks like the 830-915 layout, except for the small
+ * sizes.  A zoomed in view of the layout for 945 is:
+ *
+ * +-------+-------+
+ * |  8x8  |  8x8  |
+ * |       |       |
+ * |       |       |
+ * |  +x   |  +y   |
+ * |       |       |
+ * |       |       |
+ * |       |       |
+ * |       |       |
+ * +---+---+-------+
+ * |4x4|   |  8x8  |
+ * | +x|   |       |
+ * |   |   |       |
+ * |   |   |       |
+ * +---+   |  +z   |
+ * |4x4|   |       |
+ * | +y|   |       |
+ * |   |   |       |
+ * +---+   +-------+
+ *
+ * ...
+ *
+ * +-------+-------+
+ * |  8x8  |  8x8  |
+ * |       |       |
+ * |       |       |
+ * |  -x   |  -y   |
+ * |       |       |
+ * |       |       |
+ * |       |       |
+ * |       |       |
+ * +---+---+-------+
+ * |4x4|   |  8x8  |
+ * | -x|   |       |
+ * |   |   |       |
+ * |   |   |       |
+ * +---+   |  -z   |
+ * |4x4|   |       |
+ * | -y|   |       |
+ * |   |   |       |
+ * +---+   +---+---+---+---+---+---+---+---+---+
+ * |4x4|   |4x4|   |2x2|   |2x2|   |2x2|   |2x2|
+ * | +z|   | -z|   | +x|   | +y|   | +z|   | -x| ...
+ * |   |   |   |   |   |   |   |   |   |   |   |
+ * +---+   +---+   +---+   +---+   +---+   +---+
+ *
+ * The bottom row continues with the remaining 2x2 then the 1x1 mip contents
+ * in order, with each of them aligned to a 4x4 block boundary.  Thus, for
+ * 32x32 cube maps and smaller, the bottom row layout is going to dictate the
+ * pitch of the tree.  For a tree with 4x4 images, the pitch is at least
+ * 14 * 8 = 112 texels, for 2x2 it is at least 12 * 8 texels, and for 1x1
+ * it is 6 * 8 texels.
+ */
+
+static void
+i945_miptree_layout_cube(struct intel_context *intel,
+			 struct intel_mipmap_tree * mt)
+{
+   const GLuint dim = mt->width0;
+   GLuint face;
+   GLuint lvlWidth = mt->width0, lvlHeight = mt->height0;
+   GLint level;
+
+   assert(lvlWidth == lvlHeight); /* cubemap images are square */
+
+   /* Depending on the size of the largest images, pitch can be
+    * determined either by the old-style packing of cubemap faces,
+    * or the final row of 4x4, 2x2 and 1x1 faces below this.
+    */
+   if (dim > 32)
+      mt->pitch = intel_miptree_pitch_align (intel, mt, dim * 2);
+   else
+      mt->pitch = intel_miptree_pitch_align (intel, mt, 14 * 8);
+
+   if (dim >= 4)
+      mt->total_height = dim * 4 + 4;
+   else
+      mt->total_height = 4;
+
+   /* Set all the levels to effectively occupy the whole rectangular region. */
+   for (level = mt->first_level; level <= mt->last_level; level++) {
+      intel_miptree_set_level_info(mt, level, 6,
+				   0, 0,
+				   lvlWidth, lvlHeight, 1);
+      lvlWidth /= 2;
+      lvlHeight /= 2;
+   }
+
+   for (face = 0; face < 6; face++) {
+      GLuint x = initial_offsets[face][0] * dim;
+      GLuint y = initial_offsets[face][1] * dim;
+      GLuint d = dim;
+
+      if (dim == 4 && face >= 4) {
+	 y = mt->total_height - 4;
+	 x = (face - 4) * 8;
+      } else if (dim < 4 && (face > 0 || mt->first_level > 0)) {
+	 y = mt->total_height - 4;
+	 x = face * 8;
+      }
+
+      for (level = mt->first_level; level <= mt->last_level; level++) {
+	 intel_miptree_set_image_offset(mt, level, face, x, y);
+
+	 d >>= 1;
+
+	 switch (d) {
+	 case 4:
+	    switch (face) {
+	    case FACE_POS_X:
+	    case FACE_NEG_X:
+	       x += step_offsets[face][0] * d;
+	       y += step_offsets[face][1] * d;
+	       break;
+	    case FACE_POS_Y:
+	    case FACE_NEG_Y:
+	       y += 12;
+	       x -= 8;
+	       break;
+	    case FACE_POS_Z:
+	    case FACE_NEG_Z:
+	       y = mt->total_height - 4;
+	       x = (face - 4) * 8;
+	       break;
+	    }
+
+	 case 2:
+	    y = mt->total_height - 4;
+	    x = 16 + face * 8;
+	    break;
+
+	 case 1:
+	    x += 48;
+	    break;
+
+	 default:
+	    x += step_offsets[face][0] * d;
+	    y += step_offsets[face][1] * d;
+	    break;
+	 }
+      }
+   }
+}
+
+static void
+i945_miptree_layout_3d(struct intel_context *intel,
+		       struct intel_mipmap_tree * mt)
+{
+   GLuint width = mt->width0;
+   GLuint height = mt->height0;
+   GLuint depth = mt->depth0;
+   GLuint pack_x_pitch, pack_x_nr;
+   GLuint pack_y_pitch;
+   GLuint level;
+
+   mt->pitch = intel_miptree_pitch_align (intel, mt, mt->width0);
+   mt->total_height = 0;
+
+   pack_y_pitch = MAX2(mt->height0, 2);
+   pack_x_pitch = mt->pitch;
+   pack_x_nr = 1;
+
+   for (level = mt->first_level; level <= mt->last_level; level++) {
+      GLint x = 0;
+      GLint y = 0;
+      GLint q, j;
+
+      intel_miptree_set_level_info(mt, level, depth,
+				   0, mt->total_height,
+				   width, height, depth);
+
+      for (q = 0; q < depth;) {
+	 for (j = 0; j < pack_x_nr && q < depth; j++, q++) {
+	    intel_miptree_set_image_offset(mt, level, q, x, y);
+	    x += pack_x_pitch;
+	 }
+
+	 x = 0;
+	 y += pack_y_pitch;
+      }
+
+      mt->total_height += y;
+
+      if (pack_x_pitch > 4) {
+	 pack_x_pitch >>= 1;
+	 pack_x_nr <<= 1;
+	 assert(pack_x_pitch * pack_x_nr <= mt->pitch);
+      }
+
+      if (pack_y_pitch > 2) {
+	 pack_y_pitch >>= 1;
+      }
+
+      width = minify(width);
+      height = minify(height);
+      depth = minify(depth);
+   }
+}
+
+GLboolean
+i945_miptree_layout(struct intel_context *intel, struct intel_mipmap_tree * mt)
+{
+   switch (mt->target) {
+   case GL_TEXTURE_CUBE_MAP:
+      i945_miptree_layout_cube(intel, mt);
+      break;
+   case GL_TEXTURE_3D:
+      i945_miptree_layout_3d(intel, mt);
+      break;
+   case GL_TEXTURE_1D:
+   case GL_TEXTURE_2D:
+   case GL_TEXTURE_RECTANGLE_ARB:
+      i945_miptree_layout_2d(intel, mt);
+      break;
+   default:
+      _mesa_problem(NULL, "Unexpected tex target in i945_miptree_layout()");
+      break;
+   }
+
+   DBG("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__,
+       mt->pitch,
+       mt->total_height, mt->cpp, mt->pitch * mt->total_height * mt->cpp);
+
+   return GL_TRUE;
+}
diff --git a/i915/i915_texprog.c b/i915/i915_texprog.c
deleted file mode 100644
index f6a8b02..0000000
--- a/i915/i915_texprog.c
+++ /dev/null
@@ -1,676 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#include <strings.h>
-
-#include "glheader.h"
-#include "macros.h"
-#include "enums.h"
-
-#include "tnl/t_context.h"
-#include "intel_batchbuffer.h"
-
-#include "i915_reg.h"
-#include "i915_context.h"
-#include "i915_program.h"
-
-static GLuint translate_tex_src_bit( struct i915_fragment_program *p,
-				     GLubyte bit )
-{
-   switch (bit) {
-   case TEXTURE_1D_BIT:   return D0_SAMPLE_TYPE_2D;
-   case TEXTURE_2D_BIT:   return D0_SAMPLE_TYPE_2D;
-   case TEXTURE_RECT_BIT: return D0_SAMPLE_TYPE_2D;
-   case TEXTURE_3D_BIT:   return D0_SAMPLE_TYPE_VOLUME;
-   case TEXTURE_CUBE_BIT: return D0_SAMPLE_TYPE_CUBE;
-   default: i915_program_error(p, "TexSrcBit"); return 0;
-   }
-}
-
-static GLuint get_source( struct i915_fragment_program *p, 
-			  GLenum src, GLuint unit )
-{
-   switch (src) {
-   case GL_TEXTURE: 
-      if (p->src_texture == UREG_BAD) {
-
-	 /* TODO: Use D0_CHANNEL_XY where possible.
-	  */
-	 GLuint dim = translate_tex_src_bit( p, p->ctx->Texture.Unit[unit]._ReallyEnabled);
-	 GLuint sampler = i915_emit_decl(p, REG_TYPE_S, unit, dim);
-	 GLuint texcoord = i915_emit_decl(p, REG_TYPE_T, unit, D0_CHANNEL_ALL);
-	 GLuint tmp = i915_get_temp( p );
-	 GLuint op = T0_TEXLD;
-
-	 if (p->VB->TexCoordPtr[unit]->size == 4)
-	    op = T0_TEXLDP;
-
-	 p->src_texture = i915_emit_texld( p, tmp, A0_DEST_CHANNEL_ALL, 
-					  sampler, texcoord, op );
-      }
-
-      return p->src_texture;
-
-      /* Crossbar: */
-   case GL_TEXTURE0:
-   case GL_TEXTURE1:
-   case GL_TEXTURE2:
-   case GL_TEXTURE3:
-   case GL_TEXTURE4:
-   case GL_TEXTURE5:
-   case GL_TEXTURE6:
-   case GL_TEXTURE7: {
-      return UREG_BAD;
-   }
-
-   case GL_CONSTANT:
-      return i915_emit_const4fv( p, p->ctx->Texture.Unit[unit].EnvColor );
-   case GL_PRIMARY_COLOR:
-      return i915_emit_decl(p, REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL);
-   case GL_PREVIOUS:
-   default: 
-      i915_emit_decl(p, 
-		GET_UREG_TYPE(p->src_previous),
-		GET_UREG_NR(p->src_previous), D0_CHANNEL_ALL); 
-      return p->src_previous;
-   }
-}
-			
-
-static GLuint emit_combine_source( struct i915_fragment_program *p, 
-				   GLuint mask,
-				   GLuint unit,
-				   GLenum source, 
-				   GLenum operand )
-{
-   GLuint arg, src;
-
-   src = get_source(p, source, unit);
-
-   switch (operand) {
-   case GL_ONE_MINUS_SRC_COLOR: 
-      /* Get unused tmp,
-       * Emit tmp = 1.0 + arg.-x-y-z-w
-       */
-      arg = i915_get_temp( p );
-      return i915_emit_arith( p, A0_ADD, arg, mask, 0,
-		  swizzle(src, ONE, ONE, ONE, ONE ),
-		  negate(src, 1,1,1,1), 0);
-
-   case GL_SRC_ALPHA: 
-      if (mask == A0_DEST_CHANNEL_W)
-	 return src;
-      else
-	 return swizzle( src, W, W, W, W );
-   case GL_ONE_MINUS_SRC_ALPHA: 
-      /* Get unused tmp,
-       * Emit tmp = 1.0 + arg.-w-w-w-w
-       */
-      arg = i915_get_temp( p );
-      return i915_emit_arith( p, A0_ADD, arg, mask, 0,
-			 swizzle(src, ONE, ONE, ONE, ONE ),
-			 negate( swizzle(src,W,W,W,W), 1,1,1,1), 0);
-   case GL_SRC_COLOR: 
-   default:
-      return src;
-   }
-}
-
-
-
-static int nr_args( GLenum mode )
-{
-   switch (mode) {
-   case GL_REPLACE: return 1; 
-   case GL_MODULATE: return 2;
-   case GL_ADD: return 2;
-   case GL_ADD_SIGNED: return 2;
-   case GL_INTERPOLATE:	return 3;
-   case GL_SUBTRACT: return 2;
-   case GL_DOT3_RGB_EXT: return 2;
-   case GL_DOT3_RGBA_EXT: return 2;
-   case GL_DOT3_RGB: return 2;
-   case GL_DOT3_RGBA: return 2;
-   default: return 0;
-   }
-}
-
-
-static GLboolean args_match( struct gl_texture_unit *texUnit )
-{
-   int i, nr = nr_args(texUnit->Combine.ModeRGB);
-
-   for (i = 0 ; i < nr ; i++) {
-      if (texUnit->Combine.SourceA[i] != texUnit->Combine.SourceRGB[i]) 
-	 return GL_FALSE;
-
-      switch(texUnit->Combine.OperandA[i]) {
-      case GL_SRC_ALPHA: 
-	 switch(texUnit->Combine.OperandRGB[i]) {
-	 case GL_SRC_COLOR: 
-	 case GL_SRC_ALPHA: 
-	    break;
-	 default:
-	    return GL_FALSE;
-	 }
-	 break;
-      case GL_ONE_MINUS_SRC_ALPHA: 
-	 switch(texUnit->Combine.OperandRGB[i]) {
-	 case GL_ONE_MINUS_SRC_COLOR: 
-	 case GL_ONE_MINUS_SRC_ALPHA: 
-	    break;
-	 default:
-	    return GL_FALSE;
-	 }
-	 break;
-      default: 
-	 return GL_FALSE;	/* impossible */
-      }
-   }
-
-   return GL_TRUE;
-}
-
-
-static GLuint emit_combine( struct i915_fragment_program *p,
-			    GLuint dest,
-			    GLuint mask,
-			    GLuint saturate,
-			    GLuint unit,
-			    GLenum mode,
-			    const GLenum *source,
-			    const GLenum *operand)
-{
-   int tmp, src[3], nr = nr_args(mode);
-   int i;
-
-   for (i = 0; i < nr; i++)
-      src[i] = emit_combine_source( p, mask, unit, source[i], operand[i] );
-
-   switch (mode) {
-   case GL_REPLACE: 
-      if (mask == A0_DEST_CHANNEL_ALL && !saturate)
-	 return src[0];
-      else
-	 return i915_emit_arith( p, A0_MOV, dest, mask, saturate, src[0], 0, 0 );
-   case GL_MODULATE: 
-      return i915_emit_arith( p, A0_MUL, dest, mask, saturate,
-			     src[0], src[1], 0 );
-   case GL_ADD: 
-      return i915_emit_arith( p, A0_ADD, dest, mask, saturate, 
-			     src[0], src[1], 0 );
-   case GL_ADD_SIGNED:
-      /* tmp = arg0 + arg1
-       * result = tmp + -.5
-       */
-      tmp = i915_emit_const1f(p, .5);
-      tmp = negate(swizzle(tmp,X,X,X,X),1,1,1,1);
-      i915_emit_arith( p, A0_ADD, dest, mask, 0, src[0], src[1], 0 );
-      i915_emit_arith( p, A0_ADD, dest, mask, saturate, dest, tmp, 0 );
-      return dest;
-   case GL_INTERPOLATE:		/* TWO INSTRUCTIONS */
-      /* Arg0 * (Arg2) + Arg1 * (1-Arg2)
-       *
-       * Arg0*Arg2 + Arg1 - Arg1Arg2 
-       *
-       * tmp = Arg0*Arg2 + Arg1, 
-       * result = (-Arg1)Arg2 + tmp 
-       */
-      tmp = i915_get_temp( p );
-      i915_emit_arith( p, A0_MAD, tmp, mask, 0, src[0], src[2], src[1] );
-      i915_emit_arith( p, A0_MAD, dest, mask, saturate, 
-		      negate(src[1], 1,1,1,1), src[2], tmp );
-      return dest;
-   case GL_SUBTRACT: 
-      /* negate src[1] */
-      return i915_emit_arith( p, A0_ADD, dest, mask, saturate, src[0],
-			 negate(src[1],1,1,1,1), 0 );
-
-   case GL_DOT3_RGBA:
-   case GL_DOT3_RGBA_EXT: 
-   case GL_DOT3_RGB_EXT:
-   case GL_DOT3_RGB: {
-      GLuint tmp0 = i915_get_temp( p );
-      GLuint tmp1 = i915_get_temp( p );
-      GLuint neg1 = negate(swizzle(i915_emit_const1f(p, 1),X,X,X,X), 1,1,1,1);
-      GLuint two = swizzle(i915_emit_const1f(p, 2),X,X,X,X);
-      i915_emit_arith( p, A0_MAD, tmp0, A0_DEST_CHANNEL_ALL, 0, 
-		      two, src[0], neg1);
-      if (src[0] == src[1])
-	 tmp1 = tmp0;
-      else
-	 i915_emit_arith( p, A0_MAD, tmp1, A0_DEST_CHANNEL_ALL, 0, 
-			 two, src[1], neg1);
-      i915_emit_arith( p, A0_DP3, dest, mask, saturate, tmp0, tmp1, 0);
-      return dest;
-   }
-
-   default: 
-      return src[0];
-   }
-}
-
-static GLuint get_dest( struct i915_fragment_program *p, int unit )
-{
-   if (p->ctx->_TriangleCaps & DD_SEPARATE_SPECULAR)
-      return i915_get_temp( p );
-   else if (unit != p->last_tex_stage)
-      return i915_get_temp( p );
-   else
-      return UREG(REG_TYPE_OC, 0);
-}
-      
-
-
-static GLuint emit_texenv( struct i915_fragment_program *p, int unit )
-{
-   struct gl_texture_unit *texUnit = &p->ctx->Texture.Unit[unit];
-   GLenum envMode = texUnit->EnvMode;
-   struct gl_texture_object *tObj = texUnit->_Current;
-   GLenum format = tObj->Image[0][tObj->BaseLevel]->_BaseFormat;
-   GLuint saturate = unit < p->last_tex_stage ? A0_DEST_SATURATE : 0;
-
-   switch(envMode) {
-   case GL_BLEND: {
-      const int cf = get_source(p, GL_PREVIOUS, unit);
-      const int cc = get_source(p, GL_CONSTANT, unit);
-      const int cs = get_source(p, GL_TEXTURE, unit);
-      const int out = get_dest(p, unit);
-
-      if (format == GL_INTENSITY) {
-	 /* cv = cf(1 - cs) + cc.cs
-	  * cv = cf - cf.cs + cc.cs
-	  */
-	 /* u[2] = MAD( -cf * cs + cf )
-	  * cv   = MAD( cc * cs + u[2] )
-	  */
-	 
-	 i915_emit_arith( p, A0_MAD, out, A0_DEST_CHANNEL_ALL, 0, 
-			 negate(cf,1,1,1,1), cs, cf );
-
-	 i915_emit_arith( p, A0_MAD, out, A0_DEST_CHANNEL_ALL, saturate, 
-			 cc, cs, out );
-
-	 return out;
-      } else {
-	 /* cv = cf(1 - cs) + cc.cs
-	  * cv = cf - cf.cs + cc.cs
-	  * av =      af.as
-	  */
-	 /* u[2] = MAD( cf.-x-y-zw * cs.xyzw + cf.xyz0 )
-	  * oC   = MAD( cc.xyz0 * cs.xyz0 + u[2].xyzw )
-	  */
-	 i915_emit_arith( p, A0_MAD, out, A0_DEST_CHANNEL_ALL, 0,
-			 negate(cf,1,1,1,0),  
-			 cs,
-			 swizzle(cf,X,Y,Z,ZERO) );
-
-
-	 i915_emit_arith( p, A0_MAD, out, A0_DEST_CHANNEL_ALL, saturate,
-			 swizzle(cc,X,Y,Z,ZERO),  
-			 swizzle(cs,X,Y,Z,ZERO),
-			 out );
-
-	 return out;
-      }
-   }
-
-   case GL_DECAL: {
-      if (format == GL_RGB ||
-	  format == GL_RGBA) {
-	 int cf = get_source( p, GL_PREVIOUS, unit );
-	 int cs = get_source( p, GL_TEXTURE, unit );
-	 int out = get_dest(p, unit);
-	 
-	 /* cv = cf(1-as) + cs.as
-	  * cv = cf.(-as) + cf + cs.as
-	  * av = af
-	  */ 
-	 
-	 /* u[2] = mad( cf.xyzw * cs.-w-w-w1 + cf.xyz0 )
-	  * oc = mad( cs.xyz0 * cs.www0 + u[2].xyzw )
-	  */
-	 i915_emit_arith( p, A0_MAD, out, A0_DEST_CHANNEL_ALL, 0,
-			 cf,  
-			 negate(swizzle(cs,W,W,W,ONE),1,1,1,0),
-			 swizzle(cf,X,Y,Z,ZERO) );
-	 
-	 i915_emit_arith( p, A0_MAD, out, A0_DEST_CHANNEL_ALL, saturate,
-			 swizzle(cs,X,Y,Z,ZERO),  
-			 swizzle(cs,W,W,W,ZERO),
-			 out );
-	 return out;
-      }
-      else {
-	 return get_source( p, GL_PREVIOUS, unit );
-      }
-   }
-
-   case GL_REPLACE: {
-      const int cs = get_source( p, GL_TEXTURE, unit );	/* saturated */
-      switch (format) {
-      case GL_ALPHA: {
-	 const int cf = get_source( p, GL_PREVIOUS, unit ); /* saturated */
-	 i915_emit_arith( p, A0_MOV, cs, A0_DEST_CHANNEL_XYZ, 0, cf, 0, 0 );
-	 return cs;
-      }
-      case GL_RGB:
-      case GL_LUMINANCE: {
-	 const int cf = get_source( p, GL_PREVIOUS, unit ); /* saturated */
-	 i915_emit_arith( p, A0_MOV, cs, A0_DEST_CHANNEL_W, 0, cf, 0, 0 );
-	 return cs;
-      }
-      default:
-	 return cs;
-      }
-   }
-
-   case GL_MODULATE: {
-      const int cf = get_source( p, GL_PREVIOUS, unit );
-      const int cs = get_source( p, GL_TEXTURE, unit );
-      const int out = get_dest(p, unit);
-      switch (format) {
-      case GL_ALPHA: 
-	 i915_emit_arith( p, A0_MUL, out, A0_DEST_CHANNEL_ALL, saturate,
-			 swizzle(cs, ONE, ONE, ONE, W), cf, 0 );
-	 break;
-      default:
-	 i915_emit_arith( p, A0_MUL, out, A0_DEST_CHANNEL_ALL, saturate, 
-			 cs, cf, 0 );
-	 break;
-      }
-      return out;
-   }
-   case GL_ADD: {
-      int cf = get_source( p, GL_PREVIOUS, unit );
-      int cs = get_source( p, GL_TEXTURE, unit );
-      const int out = get_dest( p, unit );
-
-      if (format == GL_INTENSITY) {
-	 /* output-color.rgba = add( incoming, u[1] )
-	  */
-	 i915_emit_arith( p, A0_ADD, out, A0_DEST_CHANNEL_ALL, saturate, 
-			 cs, cf, 0 );
-	 return out;
-      }
-      else {
-	 /* cv.xyz = cf.xyz + cs.xyz
-	  * cv.w   = cf.w * cs.w
-	  *
-	  * cv.xyzw = MAD( cf.111w * cs.xyzw + cf.xyz0 )
-	  */
- 	 i915_emit_arith( p, A0_MAD, out, A0_DEST_CHANNEL_ALL, saturate,
-			 swizzle(cf,ONE,ONE,ONE,W), 
-			 cs,  
-			 swizzle(cf,X,Y,Z,ZERO) ); 
-	 return out;
-      }
-      break;
-   }
-   case GL_COMBINE: {
-      GLuint rgb_shift, alpha_shift, out, shift;
-      GLuint dest = get_dest(p, unit);
-
-      /* The EXT version of the DOT3 extension does not support the
-       * scale factor, but the ARB version (and the version in OpenGL
-       * 1.3) does.
-       */
-      switch (texUnit->Combine.ModeRGB) {
-      case GL_DOT3_RGB_EXT:
-	 alpha_shift = texUnit->Combine.ScaleShiftA;
-	 rgb_shift = 0;
-	 break;
-
-      case GL_DOT3_RGBA_EXT:
-	 alpha_shift = 0;
-	 rgb_shift = 0;
-	 break;
-
-      default:
-	 rgb_shift = texUnit->Combine.ScaleShiftRGB;
-	 alpha_shift = texUnit->Combine.ScaleShiftA;
-	 break;
-      }
-
-
-      /* Emit the RGB and A combine ops
-       */
-      if (texUnit->Combine.ModeRGB == texUnit->Combine.ModeA && 
-	  args_match( texUnit )) {
-	 out = emit_combine( p, dest, A0_DEST_CHANNEL_ALL, saturate,
-			     unit,
-			     texUnit->Combine.ModeRGB,
-			     texUnit->Combine.SourceRGB,
-			     texUnit->Combine.OperandRGB );
-      }
-      else if (texUnit->Combine.ModeRGB == GL_DOT3_RGBA_EXT ||
-	       texUnit->Combine.ModeRGB == GL_DOT3_RGBA) {
-
-	 out = emit_combine( p, dest, A0_DEST_CHANNEL_ALL, saturate,
-			     unit,
-			     texUnit->Combine.ModeRGB,
-			     texUnit->Combine.SourceRGB,
-			     texUnit->Combine.OperandRGB );
-      }
-      else {
-	 /* Need to do something to stop from re-emitting identical
-	  * argument calculations here:
-	  */
-	 out = emit_combine( p, dest, A0_DEST_CHANNEL_XYZ, saturate,
-			     unit,
-			     texUnit->Combine.ModeRGB,
-			     texUnit->Combine.SourceRGB,
-			     texUnit->Combine.OperandRGB );
-	 out = emit_combine( p, dest, A0_DEST_CHANNEL_W, saturate,
-			     unit,
-			     texUnit->Combine.ModeA,
-			     texUnit->Combine.SourceA,
-			     texUnit->Combine.OperandA );
-      }
-
-      /* Deal with the final shift:
-       */
-      if (alpha_shift || rgb_shift) {
-	 if (rgb_shift == alpha_shift) {
-	    shift = i915_emit_const1f(p, 1<<rgb_shift);
-	    shift = swizzle(shift,X,X,X,X);
-	 }
-	 else {
-	    shift = i915_emit_const2f(p, 1<<rgb_shift, 1<<alpha_shift);
-	    shift = swizzle(shift,X,X,X,Y);
-	 }
-	 return i915_emit_arith( p, A0_MUL, dest, A0_DEST_CHANNEL_ALL, 
-				saturate, out, shift, 0 );
-      }
-
-      return out;
-   }
-
-   default:
-      return get_source(p, GL_PREVIOUS, 0);
-   }
-}
-
-static void emit_program_fini( struct i915_fragment_program *p )
-{
-   int cf = get_source( p, GL_PREVIOUS, 0 );
-   int out = UREG( REG_TYPE_OC, 0 );
-
-   if (p->ctx->_TriangleCaps & DD_SEPARATE_SPECULAR) {
-      /* Emit specular add.
-       */
-      GLuint s = i915_emit_decl(p, REG_TYPE_T, T_SPECULAR, D0_CHANNEL_ALL);
-      i915_emit_arith( p, A0_ADD, out, A0_DEST_CHANNEL_ALL, 0, cf, 
-		  swizzle(s, X,Y,Z,ZERO), 0 );
-   }
-   else if (cf != out) {
-      /* Will wind up in here if no texture enabled or a couple of
-       * other scenarios (GL_REPLACE for instance).
-       */
-      i915_emit_arith( p, A0_MOV, out, A0_DEST_CHANNEL_ALL, 0, cf, 0, 0 );
-   }
-}
-
-
-static void i915EmitTextureProgram( i915ContextPtr i915 )
-{
-   GLcontext *ctx = &i915->intel.ctx;
-   struct i915_fragment_program *p = &i915->tex_program;
-   GLuint unit;
-
-   if (0) fprintf(stderr, "%s\n", __FUNCTION__);
-
-   i915_init_program( i915, p );
-
-   if (ctx->Texture._EnabledUnits) {
-      for (unit = 0 ; unit < ctx->Const.MaxTextureUnits ; unit++)
-	 if (ctx->Texture.Unit[unit]._ReallyEnabled) {
-	    p->last_tex_stage = unit;
-	 }
-
-      for (unit = 0 ; unit < ctx->Const.MaxTextureUnits; unit++)
-	 if (ctx->Texture.Unit[unit]._ReallyEnabled) {
-	    p->src_previous = emit_texenv( p, unit );
-	    p->src_texture = UREG_BAD;
-	    p->temp_flag = 0xffff000;
-	    p->temp_flag |= 1 << GET_UREG_NR(p->src_previous);
-	 }
-   }
-
-   emit_program_fini( p );
-
-   i915_fini_program( p );
-   i915_upload_program( i915, p );
-
-   p->translated = 1;
-}
-
-
-void i915ValidateTextureProgram( i915ContextPtr i915 )
-{
-   intelContextPtr intel = &i915->intel;
-   GLcontext *ctx = &intel->ctx;
-   TNLcontext *tnl = TNL_CONTEXT(ctx);
-   struct vertex_buffer *VB = &tnl->vb;
-   DECLARE_RENDERINPUTS(index_bitset);
-   int i, offset;
-   GLuint s4 = i915->state.Ctx[I915_CTXREG_LIS4] & ~S4_VFMT_MASK;
-   GLuint s2 = S2_TEXCOORD_NONE;
-
-   RENDERINPUTS_COPY( index_bitset, tnl->render_inputs_bitset );
-
-   /* Important:
-    */
-   VB->AttribPtr[VERT_ATTRIB_POS] = VB->NdcPtr;
-   intel->vertex_attr_count = 0;
-   intel->coloroffset = 0;
-   intel->specoffset = 0;
-   offset = 0;
-
-   if (i915->current_program) {
-      i915->current_program->on_hardware = 0;
-      i915->current_program->params_uptodate = 0;
-   }
-
-   if (i915->vertex_fog == I915_FOG_PIXEL) {
-      EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_4F_VIEWPORT, S4_VFMT_XYZW, 16 );
-      RENDERINPUTS_CLEAR( index_bitset, _TNL_ATTRIB_FOG );
-   }
-   else if (RENDERINPUTS_TEST_RANGE( index_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX )) {
-      EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_4F_VIEWPORT, S4_VFMT_XYZW, 16 );
-   }
-   else {
-      EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_3F_VIEWPORT, S4_VFMT_XYZ, 12 );
-   }
-
-   /* How undefined is undefined? */
-   if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_POINTSIZE )) {
-      EMIT_ATTR( _TNL_ATTRIB_POINTSIZE, EMIT_1F, S4_VFMT_POINT_WIDTH, 4 );
-   }
-      
-   intel->coloroffset = offset / 4;
-   EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_BGRA, S4_VFMT_COLOR, 4 );
-            
-   if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_COLOR1 ) ||
-       RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_FOG )) {
-      if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_COLOR1 )) {
-	 intel->specoffset = offset / 4;
-	 EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_3UB_3F_BGR, S4_VFMT_SPEC_FOG, 3 );
-      } else 
-	 EMIT_PAD( 3 );
-      
-      if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_FOG ))
-	 EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1UB_1F, S4_VFMT_SPEC_FOG, 1 );
-      else
-	 EMIT_PAD( 1 );
-   }
-
-   if (RENDERINPUTS_TEST_RANGE( index_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX )) {
-      for (i = 0; i < 8; i++) {
-	 if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_TEX(i) )) {
-	    int sz = VB->TexCoordPtr[i]->size;
-	    
-	    s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK);
-	    s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(sz));
-
-	    EMIT_ATTR( _TNL_ATTRIB_TEX0+i, EMIT_SZ(sz), 0, sz * 4 );
-	 }
-      }
-   }
-
-   /* Only need to change the vertex emit code if there has been a
-    * statechange to a new hardware vertex format:
-    */
-   if (s2 != i915->state.Ctx[I915_CTXREG_LIS2] ||
-       s4 != i915->state.Ctx[I915_CTXREG_LIS4]) {
-    
-      I915_STATECHANGE( i915, I915_UPLOAD_CTX );
-
-      i915->tex_program.translated = 0;
-
-      /* Must do this *after* statechange, so as not to affect
-       * buffered vertices reliant on the old state:
-       */
-      intel->vertex_size = _tnl_install_attrs( ctx, 
-					       intel->vertex_attrs, 
-					       intel->vertex_attr_count,
-					       intel->ViewportMatrix.m, 0 ); 
-
-      intel->vertex_size >>= 2;
-
-      i915->state.Ctx[I915_CTXREG_LIS2] = s2;
-      i915->state.Ctx[I915_CTXREG_LIS4] = s4;
-
-      assert(intel->vtbl.check_vertex_size( intel, intel->vertex_size ));
-   }
-
-   if (!i915->tex_program.translated ||
-       i915->last_ReallyEnabled != ctx->Texture._EnabledUnits) {
-      i915EmitTextureProgram( i915 );      
-      i915->last_ReallyEnabled = ctx->Texture._EnabledUnits;
-   }
-}
diff --git a/i915/i915_texstate.c b/i915/i915_texstate.c
index a19d4b6..69c1e07 100644
--- a/i915/i915_texstate.c
+++ b/i915/i915_texstate.c
@@ -25,902 +25,351 @@
  * 
  **************************************************************************/
 
-#include "glheader.h"
-#include "macros.h"
 #include "mtypes.h"
-#include "simple_list.h"
 #include "enums.h"
 #include "texformat.h"
-#include "texstore.h"
+#include "dri_bufmgr.h"
 
-#include "mm.h"
-
-#include "intel_screen.h"
-#include "intel_ioctl.h"
+#include "intel_mipmap_tree.h"
 #include "intel_tex.h"
 
 #include "i915_context.h"
 #include "i915_reg.h"
 
-static GLint initial_offsets[6][2] = { {0,0},
-				       {0,2},
-				       {1,0},
-				       {1,2},
-				       {1,1},
-				       {1,3} };
-
-
-static GLint step_offsets[6][2] = { {0,2},
-				    {0,2},
-				    {-1,2},
-				    {-1,2},
-				    {-1,1},
-				    {-1,1} };
-
-
-#define I915_TEX_UNIT_ENABLED(unit)		(1<<unit)
-
-static void i915LayoutTextureImages( i915ContextPtr i915,
-				     struct gl_texture_object *tObj )
-{
-   const struct gl_texture_image *baseImage = tObj->Image[0][tObj->BaseLevel];
-   i915TextureObjectPtr t = (i915TextureObjectPtr) tObj->DriverData;
-   GLint firstLevel, lastLevel, numLevels;
-   GLint i, total_height, pitch;
-
-   /* Compute which mipmap levels we really want to send to the hardware.
-    */
-   driCalculateTextureFirstLastLevel( (driTextureObject *) t );
-
-   /* Figure out the amount of memory required to hold all the mipmap
-    * levels.  Choose the smallest pitch to accomodate the largest
-    * mipmap:
-    */
-   firstLevel = t->intel.base.firstLevel;
-   lastLevel = t->intel.base.lastLevel;
-   numLevels = lastLevel - firstLevel + 1;
-
-
-
-   /* All images must be loaded at this pitch.  Count the number of
-    * lines required:
-    */
-   switch (tObj->Target) {
-   case GL_TEXTURE_CUBE_MAP: {
-      const GLuint dim = tObj->Image[0][firstLevel]->Width;
-      GLuint face;
-
-      pitch = dim * t->intel.texelBytes;
-      pitch *= 2;		/* double pitch for cube layouts */
-      pitch = (pitch + 3) & ~3;
-      
-      total_height = dim * 4;
-
-      for ( face = 0 ; face < 6 ; face++) {
-	 GLuint x = initial_offsets[face][0] * dim;
-	 GLuint y = initial_offsets[face][1] * dim;
-	 GLuint d = dim;
-	 
-	 t->intel.base.dirty_images[face] = ~0;
-
-	 assert(tObj->Image[face][firstLevel]->Width == dim);
-	 assert(tObj->Image[face][firstLevel]->Height == dim);
-
-	 for (i = 0; i < numLevels; i++) {
-	    t->intel.image[face][i].image = tObj->Image[face][firstLevel + i];
-	    if (!t->intel.image[face][i].image) {
-	       fprintf(stderr, "no image %d %d\n", face, i);
-	       break;		/* can't happen */
-	    }
-	 
-	    t->intel.image[face][i].offset = 
-	       y * pitch + x * t->intel.texelBytes;
-	    t->intel.image[face][i].internalFormat = baseImage->_BaseFormat;
-
-	    d >>= 1;
-	    x += step_offsets[face][0] * d;
-	    y += step_offsets[face][1] * d;
-	 }
-      }
-      break;
-   }
-   case GL_TEXTURE_3D: {
-      GLuint virtual_height;
-      GLuint tmp_numLevels = numLevels;
-      pitch = tObj->Image[0][firstLevel]->Width * t->intel.texelBytes;
-      pitch = (pitch + 3) & ~3;
-      t->intel.base.dirty_images[0] = ~0;
-
-      /* Calculate the size of a single slice.  Hardware demands a
-       * minimum of 8 mipmaps, some of which might ultimately not be
-       * used:
-       */
-      if (tmp_numLevels < 9)
-	 tmp_numLevels = 9;
-
-      virtual_height = tObj->Image[0][firstLevel]->Height;
-
-      for ( total_height = i = 0 ; i < tmp_numLevels ; i++ ) {
-	 t->intel.image[0][i].image = tObj->Image[0][firstLevel + i];
-	 if (t->intel.image[0][i].image) {
-	    t->intel.image[0][i].offset = total_height * pitch;
-	    t->intel.image[0][i].internalFormat = baseImage->_BaseFormat;
-	 }
-
-	 total_height += MAX2(2, virtual_height);
-	 virtual_height >>= 1;
-      }
-
-      t->intel.depth_pitch = total_height * pitch;
-
-      /* Multiply slice size by texture depth for total size.  It's
-       * remarkable how wasteful of memory all the i8x0 texture
-       * layouts are.
-       */
-      total_height *= t->intel.image[0][0].image->Depth;
-      break;
-   }
-   default:
-      pitch = tObj->Image[0][firstLevel]->Width * t->intel.texelBytes;
-      pitch = (pitch + 3) & ~3;
-      t->intel.base.dirty_images[0] = ~0;
-
-      for ( total_height = i = 0 ; i < numLevels ; i++ ) {
-	 t->intel.image[0][i].image = tObj->Image[0][firstLevel + i];
-	 if (!t->intel.image[0][i].image) 
-	    break;
-	 
-	 t->intel.image[0][i].offset = total_height * pitch;
-	 t->intel.image[0][i].internalFormat = baseImage->_BaseFormat;
-	 if (t->intel.image[0][i].image->IsCompressed) {
-	    total_height += (t->intel.image[0][i].image->Height + 3) / 4;
-	 }
-	 else
-	   total_height += MAX2(2, t->intel.image[0][i].image->Height);
-      }
-      break;
-   }
-
-   t->intel.Pitch = pitch;
-   t->intel.base.totalSize = total_height*pitch;
-   t->intel.max_level = numLevels-1;
-}
-
 
-static void i945LayoutTextureImages( i915ContextPtr i915,
-				    struct gl_texture_object *tObj )
+static GLuint
+translate_texture_format(GLuint mesa_format, GLenum DepthMode)
 {
-   const struct gl_texture_image *baseImage = tObj->Image[0][tObj->BaseLevel];
-   i915TextureObjectPtr t = (i915TextureObjectPtr) tObj->DriverData;
-   GLint firstLevel, lastLevel, numLevels;
-   GLint i, total_height, pitch, sz, max_offset = 0, offset;
-
-
-   /* Compute which mipmap levels we really want to send to the hardware.
-    */
-   driCalculateTextureFirstLastLevel( (driTextureObject *) t );
-
-   /* Figure out the amount of memory required to hold all the mipmap
-    * levels.  Choose the smallest pitch to accomodate the largest
-    * mipmap:
-    */
-   firstLevel = t->intel.base.firstLevel;
-   lastLevel = t->intel.base.lastLevel;
-   numLevels = lastLevel - firstLevel + 1;
-
-
-
-   /* All images must be loaded at this pitch.  Count the number of
-    * lines required:
-    */
-   switch (tObj->Target) {
-   case GL_TEXTURE_CUBE_MAP: {
-      const GLuint dim = tObj->Image[0][firstLevel]->Width;
-      GLuint face;
-
-      /* Depending on the size of the largest images, pitch can be
-       * determined either by the old-style packing of cubemap faces,
-       * or the final row of 4x4, 2x2 and 1x1 faces below this. 
-       */
-      if (dim > 32) {
-	 pitch = dim * t->intel.texelBytes;
-	 pitch *= 2;		/* double pitch for cube layouts */
-	 pitch = (pitch + 3) & ~3;
-      }
-      else {
-	 pitch = 14 * 8 * t->intel.texelBytes; /* determined by row of
-						* little maps at
-						* bottom */
-      }
-      
-      total_height = dim * 4 + 4;
-
-      for ( face = 0 ; face < 6 ; face++) {
-	 GLuint x = initial_offsets[face][0] * dim;
-	 GLuint y = initial_offsets[face][1] * dim;
-	 GLuint d = dim;
-	 
-	 if (dim == 4 && face >= 4) {
-	    y = total_height - 4;
-	    x = (face - 4) * 8;
-	 }
-	 else if (dim < 4) {
-	    y = total_height - 4;
-	    x = face * 8;
-	 }
-
-	 t->intel.base.dirty_images[face] = ~0;
-
-	 assert(tObj->Image[face][firstLevel]->Width == dim);
-	 assert(tObj->Image[face][firstLevel]->Height == dim);
-
-	 for (i = 0; i < numLevels; i++) {
-
-
-	    t->intel.image[face][i].image = tObj->Image[face][firstLevel + i];
-	    assert(t->intel.image[face][i].image);
-	 
-	    t->intel.image[face][i].offset = 
-	       y * pitch + x * t->intel.texelBytes;
-	    t->intel.image[face][i].internalFormat = baseImage->_BaseFormat;
-
-	    d >>= 1;
-	    
-	    switch (d) {
-	    case 4:
-	       switch (face) {
-	       case FACE_POS_X:
-	       case FACE_NEG_X:
-		  x += step_offsets[face][0] * d;
-		  y += step_offsets[face][1] * d;
-		  break;
-	       case FACE_POS_Y:
-	       case FACE_NEG_Y:
-		  y += 12;
-		  x -= 8;
-		  break;
-	       case FACE_POS_Z:
-	       case FACE_NEG_Z:
-		  y = total_height - 4;
-		  x = (face - 4) * 8;
-		  break;
-	       }
-
-	    case 2:
-	       y = total_height - 4;
-	       x = 16 + face * 8;
-	       break;
-
-	    case 1:
-	       x += 48;
-	       break;
-	       
-	    default:
-	       x += step_offsets[face][0] * d;
-	       y += step_offsets[face][1] * d;
-	       break;
-	    }
-	 }
-      }
-      max_offset = total_height * pitch;
-      break;
-   }
-   case GL_TEXTURE_3D: {
-      GLuint depth_packing = 0, depth_pack_pitch;
-      GLuint tmp_numLevels = numLevels;
-      pitch = tObj->Image[0][firstLevel]->Width * t->intel.texelBytes;
-      pitch = (pitch + 3) & ~3;
-      depth_pack_pitch = pitch;
-      
-      t->intel.base.dirty_images[0] = ~0;
-
-
-      for ( total_height = i = 0 ; i < tmp_numLevels ; i++ ) {
-	 t->intel.image[0][i].image = tObj->Image[0][firstLevel + i];
-	 if (!t->intel.image[0][i].image) 
-	    break;
-
-	 
-	 t->intel.image[0][i].offset = total_height * pitch;
-	 t->intel.image[0][i].internalFormat = baseImage->_BaseFormat;
-	 
-
-
-	 total_height += MAX2(2, t->intel.image[0][i].image->Height) * 
-	    MAX2((t->intel.image[0][i].image->Depth >> depth_packing), 1);
-
-	 /* When alignment dominates, can't increase depth packing?
-	  * Or does pitch grow???  What are the alignment constraints,
-	  * anyway?
-	  */
-	 if (depth_pack_pitch > 4) {
-	    depth_packing++;
-	    depth_pack_pitch <<= 2;
-	 }
-      }
-
-      max_offset = total_height * pitch;
-      break;
-   }
-   default:
-      pitch = tObj->Image[0][firstLevel]->Width * t->intel.texelBytes;
-      pitch = (pitch + 3) & ~3;
-      t->intel.base.dirty_images[0] = ~0;
-      max_offset = 0;
-
-      for ( offset = i = 0 ; i < numLevels ; i++ ) {
-	 t->intel.image[0][i].image = tObj->Image[0][firstLevel + i];
-	 if (!t->intel.image[0][i].image) 
-	    break;
-	 
-	 t->intel.image[0][i].offset = offset;
-	 t->intel.image[0][i].internalFormat = baseImage->_BaseFormat;
-
-	 if (t->intel.image[0][i].image->IsCompressed)
-	    sz = MAX2(1, t->intel.image[0][i].image->Height/4) * pitch;
-	 else
-	    sz = MAX2(2, t->intel.image[0][i].image->Height) * pitch;
-	 
-	 /* Because the images are packed better, the final offset
-	  * might not be the maximal one:
-	  */
-	 max_offset = MAX2(max_offset, offset + sz);
-
-	 /* LPT change: step right after second mipmap.
-	  */
-	 if (i == 1) 
-	    offset += pitch / 2;
-	 else 
-	    offset += sz;
-
-      }
-      break;
-   }
-
-   t->intel.Pitch = pitch;
-   t->intel.base.totalSize = max_offset;
-   t->intel.max_level = numLevels-1;
-}
-
-
-
-
-static void i915SetTexImages( i915ContextPtr i915, 
-			     struct gl_texture_object *tObj )
-{
-   GLuint textureFormat;
-   i915TextureObjectPtr t = (i915TextureObjectPtr) tObj->DriverData;
-   const struct gl_texture_image *baseImage = tObj->Image[0][tObj->BaseLevel];
-   GLint ss2 = 0;
-
-   switch( baseImage->TexFormat->MesaFormat ) {
+   switch (mesa_format) {
    case MESA_FORMAT_L8:
-      t->intel.texelBytes = 1;
-      textureFormat = MAPSURF_8BIT | MT_8BIT_L8;
-      break;
-
+      return MAPSURF_8BIT | MT_8BIT_L8;
    case MESA_FORMAT_I8:
-      t->intel.texelBytes = 1;
-      textureFormat = MAPSURF_8BIT | MT_8BIT_I8;
-      break;
-
+      return MAPSURF_8BIT | MT_8BIT_I8;
    case MESA_FORMAT_A8:
-      t->intel.texelBytes = 1;
-      textureFormat = MAPSURF_8BIT | MT_8BIT_A8; 
-      break;
-
+      return MAPSURF_8BIT | MT_8BIT_A8;
    case MESA_FORMAT_AL88:
-      t->intel.texelBytes = 2;
-      textureFormat = MAPSURF_16BIT | MT_16BIT_AY88;
-      break;
-
+      return MAPSURF_16BIT | MT_16BIT_AY88;
    case MESA_FORMAT_RGB565:
-      t->intel.texelBytes = 2;
-      textureFormat = MAPSURF_16BIT | MT_16BIT_RGB565;
-      break;
-
+      return MAPSURF_16BIT | MT_16BIT_RGB565;
    case MESA_FORMAT_ARGB1555:
-      t->intel.texelBytes = 2;
-      textureFormat = MAPSURF_16BIT | MT_16BIT_ARGB1555;
-      break;
-
+      return MAPSURF_16BIT | MT_16BIT_ARGB1555;
    case MESA_FORMAT_ARGB4444:
-      t->intel.texelBytes = 2;
-      textureFormat = MAPSURF_16BIT | MT_16BIT_ARGB4444;
-      break;
-
+      return MAPSURF_16BIT | MT_16BIT_ARGB4444;
    case MESA_FORMAT_ARGB8888:
-      t->intel.texelBytes = 4;
-      textureFormat = MAPSURF_32BIT | MT_32BIT_ARGB8888;
-      break;
-
+      return MAPSURF_32BIT | MT_32BIT_ARGB8888;
    case MESA_FORMAT_YCBCR_REV:
-      t->intel.texelBytes = 2;
-      textureFormat = (MAPSURF_422 | MT_422_YCRCB_NORMAL);
-      ss2 |= SS2_COLORSPACE_CONVERSION;
-      break;
-
+      return (MAPSURF_422 | MT_422_YCRCB_NORMAL);
    case MESA_FORMAT_YCBCR:
-      t->intel.texelBytes = 2;
-      textureFormat = (MAPSURF_422 | MT_422_YCRCB_SWAPY);
-      ss2 |= SS2_COLORSPACE_CONVERSION;
-      break;
-
+      return (MAPSURF_422 | MT_422_YCRCB_SWAPY);
    case MESA_FORMAT_RGB_FXT1:
    case MESA_FORMAT_RGBA_FXT1:
-     t->intel.texelBytes = 2;
-     textureFormat = (MAPSURF_COMPRESSED | MT_COMPRESS_FXT1);
-     break;
-
+      return (MAPSURF_COMPRESSED | MT_COMPRESS_FXT1);
    case MESA_FORMAT_Z16:
-      t->intel.texelBytes = 2;
-      textureFormat = (MAPSURF_16BIT | MT_16BIT_L16);
-      break;
-
+      if (DepthMode == GL_ALPHA)
+          return (MAPSURF_16BIT | MT_16BIT_A16);
+      else if (DepthMode == GL_INTENSITY)
+          return (MAPSURF_16BIT | MT_16BIT_I16);
+      else
+          return (MAPSURF_16BIT | MT_16BIT_L16);
    case MESA_FORMAT_RGBA_DXT1:
    case MESA_FORMAT_RGB_DXT1:
-     /* 
-      * DXTn pitches are Width/4 * blocksize in bytes 
-      * for DXT1: blocksize=8 so Width/4*8 = Width * 2 
-      * for DXT3/5: blocksize=16 so Width/4*16 = Width * 4
-      */
-     t->intel.texelBytes = 2;
-     textureFormat = (MAPSURF_COMPRESSED | MT_COMPRESS_DXT1);
-     break;
-
+      return (MAPSURF_COMPRESSED | MT_COMPRESS_DXT1);
    case MESA_FORMAT_RGBA_DXT3:
-     t->intel.texelBytes = 4;
-     textureFormat = (MAPSURF_COMPRESSED | MT_COMPRESS_DXT2_3);
-     break;
-
+      return (MAPSURF_COMPRESSED | MT_COMPRESS_DXT2_3);
    case MESA_FORMAT_RGBA_DXT5:
-     t->intel.texelBytes = 4;
-     textureFormat = (MAPSURF_COMPRESSED | MT_COMPRESS_DXT4_5);
-     break;
-
-#if 0
+      return (MAPSURF_COMPRESSED | MT_COMPRESS_DXT4_5);
    case MESA_FORMAT_Z24_S8:
-      t->intel.texelBytes = 4;
-      textureFormat = (MAPSURF_32BIT | MT_32BIT_xL824);
-      break;
-#endif
-
+      return (MAPSURF_32BIT | MT_32BIT_xI824);
    default:
-      fprintf(stderr, "%s: bad image format %x\n", __FUNCTION__,
-	      baseImage->TexFormat->MesaFormat);
+      fprintf(stderr, "%s: bad image format %x\n", __FUNCTION__, mesa_format);
       abort();
+      return 0;
    }
+}
 
-   switch (i915->intel.intelScreen->deviceID) {
-   case PCI_CHIP_I945_G:
-   case PCI_CHIP_I945_GM:
-   case PCI_CHIP_I945_GME:
-   case PCI_CHIP_G33_G:
-   case PCI_CHIP_Q33_G:
-   case PCI_CHIP_Q35_G:
-       i945LayoutTextureImages( i915, tObj );
-       break;
-   default:
-       i915LayoutTextureImages( i915, tObj );
-       break;
-   }
-
-   t->Setup[I915_TEXREG_MS3] = 
-      (((tObj->Image[0][t->intel.base.firstLevel]->Height - 1) << MS3_HEIGHT_SHIFT) |
-       ((tObj->Image[0][t->intel.base.firstLevel]->Width - 1) << MS3_WIDTH_SHIFT) |
-       textureFormat |
-       MS3_USE_FENCE_REGS);
-
-   t->Setup[I915_TEXREG_MS4] = 
-      ((((t->intel.Pitch / 4) - 1) << MS4_PITCH_SHIFT) | 
-       MS4_CUBE_FACE_ENA_MASK |
-       (((t->intel.max_level * 4)) << MS4_MAX_LOD_SHIFT) |
-       ((tObj->Image[0][t->intel.base.firstLevel]->Depth - 1) << MS4_VOLUME_DEPTH_SHIFT));
-
-   t->Setup[I915_TEXREG_SS2] &= ~(SS2_COLORSPACE_CONVERSION);
-   t->Setup[I915_TEXREG_SS2] |= ss2;
-
-   t->intel.dirty = I915_UPLOAD_TEX_ALL;
 
-}
 
 
 /* The i915 (and related graphics cores) do not support GL_CLAMP.  The
  * Intel drivers for "other operating systems" implement GL_CLAMP as
  * GL_CLAMP_TO_EDGE, so the same is done here.
  */
-static GLuint translate_wrap_mode( GLenum wrap )
+static GLuint
+translate_wrap_mode(GLenum wrap)
 {
-   switch( wrap ) {
-   case GL_REPEAT: return TEXCOORDMODE_WRAP;
-   case GL_CLAMP:  return TEXCOORDMODE_CLAMP_EDGE; /* not quite correct */
-   case GL_CLAMP_TO_EDGE: return TEXCOORDMODE_CLAMP_EDGE;
-   case GL_CLAMP_TO_BORDER: return TEXCOORDMODE_CLAMP_BORDER;
-   case GL_MIRRORED_REPEAT: return TEXCOORDMODE_MIRROR;
-   default: return TEXCOORDMODE_WRAP;
-   }
-}
-
-
-/**
- */
-static void i915ImportTexObjState( struct gl_texture_object *texObj )
-{   
-   i915TextureObjectPtr t = (i915TextureObjectPtr)texObj->DriverData;
-   int minFilt = 0, mipFilt = 0, magFilt = 0, shadow = 0;
-
-   if(INTEL_DEBUG&DEBUG_DRI)
-      fprintf(stderr, "%s\n", __FUNCTION__);
-
-   switch (texObj->MinFilter) {
-   case GL_NEAREST:
-      minFilt = FILTER_NEAREST;
-      mipFilt = MIPFILTER_NONE;
-      break;
-   case GL_LINEAR:
-      minFilt = FILTER_LINEAR;
-      mipFilt = MIPFILTER_NONE;
-      break;
-   case GL_NEAREST_MIPMAP_NEAREST:
-      minFilt = FILTER_NEAREST;
-      mipFilt = MIPFILTER_NEAREST;
-      break;
-   case GL_LINEAR_MIPMAP_NEAREST:
-      minFilt = FILTER_LINEAR;
-      mipFilt = MIPFILTER_NEAREST;
-      break;
-   case GL_NEAREST_MIPMAP_LINEAR:
-      minFilt = FILTER_NEAREST;
-      mipFilt = MIPFILTER_LINEAR;
-      break;
-   case GL_LINEAR_MIPMAP_LINEAR:
-      minFilt = FILTER_LINEAR;
-      mipFilt = MIPFILTER_LINEAR;
-      break;
+   switch (wrap) {
+   case GL_REPEAT:
+      return TEXCOORDMODE_WRAP;
+   case GL_CLAMP:
+      return TEXCOORDMODE_CLAMP_EDGE;   /* not quite correct */
+   case GL_CLAMP_TO_EDGE:
+      return TEXCOORDMODE_CLAMP_EDGE;
+   case GL_CLAMP_TO_BORDER:
+      return TEXCOORDMODE_CLAMP_BORDER;
+   case GL_MIRRORED_REPEAT:
+      return TEXCOORDMODE_MIRROR;
    default:
-      break;
-   }
-
-   if ( texObj->MaxAnisotropy > 1.0 ) {
-      minFilt = FILTER_ANISOTROPIC; 
-      magFilt = FILTER_ANISOTROPIC;
-   }
-   else {
-      switch (texObj->MagFilter) {
-      case GL_NEAREST:
-	 magFilt = FILTER_NEAREST;
-	 break;
-      case GL_LINEAR:
-	 magFilt = FILTER_LINEAR;
-	 break;
-      default:
-	 break;
-      }  
-   }
-
-   if (texObj->CompareMode == GL_COMPARE_R_TO_TEXTURE_ARB && 
-       texObj->Target != GL_TEXTURE_3D) {
-
-      shadow = SS2_SHADOW_ENABLE;
-      shadow |= intel_translate_compare_func( texObj->CompareFunc );
-      
-      minFilt = FILTER_4X4_FLAT;
-      magFilt = FILTER_4X4_FLAT;
-   }
-
-
-   t->Setup[I915_TEXREG_SS2] &= ~(SS2_MIN_FILTER_MASK |
-				 SS2_MIP_FILTER_MASK |
-				 SS2_MAG_FILTER_MASK |
-				 SS2_SHADOW_ENABLE |
-				 SS2_SHADOW_FUNC_MASK);
-   t->Setup[I915_TEXREG_SS2] |= ((minFilt << SS2_MIN_FILTER_SHIFT) |
-				(mipFilt << SS2_MIP_FILTER_SHIFT) |
-				(magFilt << SS2_MAG_FILTER_SHIFT) |
-				shadow);
-
-   {
-      GLuint ss3 = t->Setup[I915_TEXREG_SS3] & ~(SS3_TCX_ADDR_MODE_MASK |
-						SS3_TCY_ADDR_MODE_MASK |
-						SS3_TCZ_ADDR_MODE_MASK);
-      GLenum ws = texObj->WrapS;
-      GLenum wt = texObj->WrapT;
-      GLenum wr = texObj->WrapR;
-      
-      t->refs_border_color = 0;
-
-      if (texObj->Target == GL_TEXTURE_3D &&
-	  (texObj->MinFilter != GL_NEAREST ||
-	   texObj->MagFilter != GL_NEAREST)) {
-	 
-	 /* Try to mimic GL_CLAMP functionality a little better -
-	  * switch to CLAMP_TO_BORDER whenever a non-NEAREST filter is
-	  * in use.  Only do this for 3D textures at the moment --
-	  * doing it universally would fix the conform texbc.c
-	  * failure, though.
-	  */
-	 if (ws == GL_CLAMP) ws = GL_CLAMP_TO_BORDER;
-	 if (wt == GL_CLAMP) wt = GL_CLAMP_TO_BORDER;
-	 if (wr == GL_CLAMP) wr = GL_CLAMP_TO_BORDER;
-
-	 /* 3D textures don't seem to respect the border color.
-	  * Fallback if there's ever a danger that they might refer to
-	  * it.
-	  */
-	 if (ws == GL_CLAMP_TO_BORDER) t->refs_border_color = 1;
-	 if (wt == GL_CLAMP_TO_BORDER) t->refs_border_color = 1;
-	 if (wr == GL_CLAMP_TO_BORDER) t->refs_border_color = 1;
-      }
-
-      ss3 |= translate_wrap_mode(ws) << SS3_TCX_ADDR_MODE_SHIFT;
-      ss3 |= translate_wrap_mode(wt) << SS3_TCY_ADDR_MODE_SHIFT;
-      ss3 |= translate_wrap_mode(wr) << SS3_TCZ_ADDR_MODE_SHIFT;
-   
-      if (ss3 != t->Setup[I915_TEXREG_SS3]) {
-	 t->intel.dirty = I915_UPLOAD_TEX_ALL;
-	 t->Setup[I915_TEXREG_SS3] = ss3;
-      }
-   }
-
-   {   
-      const GLubyte *color = texObj->_BorderChan;
-
-      t->Setup[I915_TEXREG_SS4] = INTEL_PACKCOLOR8888(color[0],color[1],
-						     color[2],color[3]);
+      return TEXCOORDMODE_WRAP;
    }
 }
 
 
 
-static void i915_import_tex_unit( i915ContextPtr i915, 
-				 i915TextureObjectPtr t,
-				 GLuint unit )
+/* Recalculate all state from scratch.  Perhaps not the most
+ * efficient, but this has gotten complex enough that we need
+ * something which is understandable and reliable.
+ */
+static GLboolean
+i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3)
 {
-   GLuint state[I915_TEX_SETUP_SIZE];
+   GLcontext *ctx = &intel->ctx;
+   struct i915_context *i915 = i915_context(ctx);
+   struct gl_texture_unit *tUnit = &ctx->Texture.Unit[unit];
+   struct gl_texture_object *tObj = tUnit->_Current;
+   struct intel_texture_object *intelObj = intel_texture_object(tObj);
+   struct gl_texture_image *firstImage;
+   GLuint *state = i915->state.Tex[unit], format, pitch;
+   GLint lodbias;
 
-   if(INTEL_DEBUG&DEBUG_TEXTURE)
-      fprintf(stderr, "%s unit(%d)\n", __FUNCTION__, unit);
-   
-   if (i915->intel.CurrentTexObj[unit]) 
-      i915->intel.CurrentTexObj[unit]->base.bound &= ~(1U << unit);
+   memset(state, 0, sizeof(state));
 
-   i915->intel.CurrentTexObj[unit] = (intelTextureObjectPtr)t;
-   t->intel.base.bound |= (1 << unit);
+   /*We need to refcount these. */
 
-   if (t->intel.dirty & I915_UPLOAD_TEX(unit)) {
-      i915ImportTexObjState( t->intel.base.tObj );
-      t->intel.dirty &= ~I915_UPLOAD_TEX(unit);
+   if (i915->state.tex_buffer[unit] != NULL) {
+       dri_bo_unreference(i915->state.tex_buffer[unit]);
+       i915->state.tex_buffer[unit] = NULL;
    }
 
-   state[I915_TEXREG_MS2] = t->intel.TextureOffset;
-   state[I915_TEXREG_MS3] = t->Setup[I915_TEXREG_MS3];
-   state[I915_TEXREG_MS4] = t->Setup[I915_TEXREG_MS4];
-
-   state[I915_TEXREG_SS2] = (i915->state.Tex[unit][I915_TEXREG_SS2] &
-			    SS2_LOD_BIAS_MASK);
-   state[I915_TEXREG_SS2] |= (t->Setup[I915_TEXREG_SS2] & ~SS2_LOD_BIAS_MASK);
-
-   state[I915_TEXREG_SS3] = (i915->state.Tex[unit][I915_TEXREG_SS3] &
-			    SS3_NORMALIZED_COORDS);
-   state[I915_TEXREG_SS3] |= (t->Setup[I915_TEXREG_SS3] &
-			     ~(SS3_NORMALIZED_COORDS|
-			       SS3_TEXTUREMAP_INDEX_MASK));
+   if (!intelObj->imageOverride && !intel_finalize_mipmap_tree(intel, unit))
+      return GL_FALSE;
 
-   state[I915_TEXREG_SS3] |= (unit<<SS3_TEXTUREMAP_INDEX_SHIFT);
+   /* Get first image here, since intelObj->firstLevel will get set in
+    * the intel_finalize_mipmap_tree() call above.
+    */
+   firstImage = tObj->Image[0][intelObj->firstLevel];
 
-   state[I915_TEXREG_SS4] = t->Setup[I915_TEXREG_SS4];
+   if (intelObj->imageOverride) {
+      i915->state.tex_buffer[unit] = NULL;
+      i915->state.tex_offset[unit] = intelObj->textureOffset;
 
+      switch (intelObj->depthOverride) {
+      case 32:
+	 format = MAPSURF_32BIT | MT_32BIT_ARGB8888;
+	 break;
+      case 24:
+      default:
+	 format = MAPSURF_32BIT | MT_32BIT_XRGB8888;
+	 break;
+      case 16:
+	 format = MAPSURF_16BIT | MT_16BIT_RGB565;
+	 break;
+      }
 
-   if (memcmp(state, i915->state.Tex[unit], sizeof(state)) != 0) {
-      I915_STATECHANGE( i915, I915_UPLOAD_TEX(unit) );
-      memcpy(i915->state.Tex[unit], state, sizeof(state));
+      pitch = intelObj->pitchOverride;
+   } else {
+      dri_bo_reference(intelObj->mt->region->buffer);
+      i915->state.tex_buffer[unit] = intelObj->mt->region->buffer;
+      i915->state.tex_offset[unit] =  intel_miptree_image_offset(intelObj->mt,
+								 0, intelObj->
+								 firstLevel);
+
+      format = translate_texture_format(firstImage->TexFormat->MesaFormat, 
+		tObj->DepthMode);
+      pitch = intelObj->mt->pitch * intelObj->mt->cpp;
    }
-}
-
-
 
-static GLboolean enable_tex_common( GLcontext *ctx, GLuint unit )
-{
-   i915ContextPtr i915 = I915_CONTEXT(ctx);
-   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
-   struct gl_texture_object *tObj = texUnit->_Current;
-   i915TextureObjectPtr t = (i915TextureObjectPtr)tObj->DriverData;
+   state[I915_TEXREG_MS3] =
+      (((firstImage->Height - 1) << MS3_HEIGHT_SHIFT) |
+       ((firstImage->Width - 1) << MS3_WIDTH_SHIFT) | format |
+       MS3_USE_FENCE_REGS);
 
-   if (0) fprintf(stderr, "%s %d\n", __FUNCTION__, unit);
+   state[I915_TEXREG_MS4] =
+     ((((pitch / 4) - 1) << MS4_PITCH_SHIFT) | MS4_CUBE_FACE_ENA_MASK |
+       ((((intelObj->lastLevel - intelObj->firstLevel) * 4)) <<
+	MS4_MAX_LOD_SHIFT) | ((firstImage->Depth - 1) <<
+			      MS4_VOLUME_DEPTH_SHIFT));
 
-   if (!(i915->state.active & I915_UPLOAD_TEX(unit))) {
-      I915_ACTIVESTATE(i915, I915_UPLOAD_TEX(unit), GL_TRUE);
-   }
 
-   /* Fallback if there's a texture border */
-   if ( tObj->Image[0][tObj->BaseLevel]->Border > 0 ) {
-      return GL_FALSE;
-   }
+   {
+      GLuint minFilt, mipFilt, magFilt;
 
+      switch (tObj->MinFilter) {
+      case GL_NEAREST:
+         minFilt = FILTER_NEAREST;
+         mipFilt = MIPFILTER_NONE;
+         break;
+      case GL_LINEAR:
+         minFilt = FILTER_LINEAR;
+         mipFilt = MIPFILTER_NONE;
+         break;
+      case GL_NEAREST_MIPMAP_NEAREST:
+         minFilt = FILTER_NEAREST;
+         mipFilt = MIPFILTER_NEAREST;
+         break;
+      case GL_LINEAR_MIPMAP_NEAREST:
+         minFilt = FILTER_LINEAR;
+         mipFilt = MIPFILTER_NEAREST;
+         break;
+      case GL_NEAREST_MIPMAP_LINEAR:
+         minFilt = FILTER_NEAREST;
+         mipFilt = MIPFILTER_LINEAR;
+         break;
+      case GL_LINEAR_MIPMAP_LINEAR:
+         minFilt = FILTER_LINEAR;
+         mipFilt = MIPFILTER_LINEAR;
+         break;
+      default:
+         return GL_FALSE;
+      }
 
-   /* Update state if this is a different texture object to last
-    * time.
-    */
-   if (i915->intel.CurrentTexObj[unit] != &t->intel || 
-       (t->intel.dirty & I915_UPLOAD_TEX(unit))) {
-      i915_import_tex_unit( i915, t, unit);
-      i915->tex_program.translated = 0;
-   }
+      if (tObj->MaxAnisotropy > 1.0) {
+         minFilt = FILTER_ANISOTROPIC;
+         magFilt = FILTER_ANISOTROPIC;
+      }
+      else {
+         switch (tObj->MagFilter) {
+         case GL_NEAREST:
+            magFilt = FILTER_NEAREST;
+            break;
+         case GL_LINEAR:
+            magFilt = FILTER_LINEAR;
+            break;
+         default:
+            return GL_FALSE;
+         }
+      }
 
-   return GL_TRUE;
-}
+      lodbias = (int) ((tUnit->LodBias + tObj->LodBias) * 16.0);
+      if (lodbias < -256)
+          lodbias = -256;
+      if (lodbias > 255)
+          lodbias = 255;
+      state[I915_TEXREG_SS2] = ((lodbias << SS2_LOD_BIAS_SHIFT) & 
+                                SS2_LOD_BIAS_MASK);
 
-static GLboolean enable_tex_rect( GLcontext *ctx, GLuint unit )
-{
-   i915ContextPtr i915 = I915_CONTEXT(ctx);
-   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
-   struct gl_texture_object *tObj = texUnit->_Current;
-   i915TextureObjectPtr t = (i915TextureObjectPtr)tObj->DriverData;
-   GLuint ss3 = i915->state.Tex[unit][I915_TEXREG_SS3];
+      /* YUV conversion:
+       */
+      if (firstImage->TexFormat->MesaFormat == MESA_FORMAT_YCBCR ||
+          firstImage->TexFormat->MesaFormat == MESA_FORMAT_YCBCR_REV)
+         state[I915_TEXREG_SS2] |= SS2_COLORSPACE_CONVERSION;
 
-   ss3 &= ~SS3_NORMALIZED_COORDS;
+      /* Shadow:
+       */
+      if (tObj->CompareMode == GL_COMPARE_R_TO_TEXTURE_ARB &&
+          tObj->Target != GL_TEXTURE_3D) {
+         if (tObj->Target == GL_TEXTURE_1D) 
+            return GL_FALSE;
 
-   if (ss3 != i915->state.Tex[unit][I915_TEXREG_SS3]) {
-      I915_STATECHANGE(i915, I915_UPLOAD_TEX(unit));
-      i915->state.Tex[unit][I915_TEXREG_SS3] = ss3;
-   }
+         state[I915_TEXREG_SS2] |=
+            (SS2_SHADOW_ENABLE |
+             intel_translate_shadow_compare_func(tObj->CompareFunc));
 
-   /* Upload teximages (not pipelined)
-    */
-   if (t->intel.base.dirty_images[0]) {
-      i915SetTexImages( i915, tObj );
-      if (!intelUploadTexImages( &i915->intel, &t->intel, 0 )) {
-	 return GL_FALSE;
+         minFilt = FILTER_4X4_FLAT;
+         magFilt = FILTER_4X4_FLAT;
       }
-   }
-
-   return GL_TRUE;
-}
 
+      state[I915_TEXREG_SS2] |= ((minFilt << SS2_MIN_FILTER_SHIFT) |
+                                 (mipFilt << SS2_MIP_FILTER_SHIFT) |
+                                 (magFilt << SS2_MAG_FILTER_SHIFT));
+   }
 
-static GLboolean enable_tex_2d( GLcontext *ctx, GLuint unit )
-{
-   i915ContextPtr i915 = I915_CONTEXT(ctx);
-   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
-   struct gl_texture_object *tObj = texUnit->_Current;
-   i915TextureObjectPtr t = (i915TextureObjectPtr)tObj->DriverData;
-   GLuint ss3 = i915->state.Tex[unit][I915_TEXREG_SS3];
-
-   ss3 |= SS3_NORMALIZED_COORDS;
+   {
+      GLenum ws = tObj->WrapS;
+      GLenum wt = tObj->WrapT;
+      GLenum wr = tObj->WrapR;
 
-   if (ss3 != i915->state.Tex[unit][I915_TEXREG_SS3]) {
-      I915_STATECHANGE(i915, I915_UPLOAD_TEX(unit));
-      i915->state.Tex[unit][I915_TEXREG_SS3] = ss3;
-   }
 
-   /* Upload teximages (not pipelined)
-    */
-   if (t->intel.base.dirty_images[0]) {
-      i915SetTexImages( i915, tObj );
-      if (!intelUploadTexImages( &i915->intel, &t->intel, 0 )) {
-	 return GL_FALSE;
-      }
-   }
+      /* 3D textures don't seem to respect the border color.
+       * Fallback if there's ever a danger that they might refer to
+       * it.  
+       * 
+       * Effectively this means fallback on 3D clamp or
+       * clamp_to_border.
+       */
+      if (tObj->Target == GL_TEXTURE_3D &&
+          (tObj->MinFilter != GL_NEAREST ||
+           tObj->MagFilter != GL_NEAREST) &&
+          (ws == GL_CLAMP ||
+           wt == GL_CLAMP ||
+           wr == GL_CLAMP ||
+           ws == GL_CLAMP_TO_BORDER ||
+           wt == GL_CLAMP_TO_BORDER || wr == GL_CLAMP_TO_BORDER))
+         return GL_FALSE;
 
-   return GL_TRUE;
-}
 
-static GLboolean enable_tex_cube( GLcontext *ctx, GLuint unit )
-{
-   i915ContextPtr i915 = I915_CONTEXT(ctx);
-   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
-   struct gl_texture_object *tObj = texUnit->_Current;
-   i915TextureObjectPtr t = (i915TextureObjectPtr)tObj->DriverData;
-   GLuint ss3 = i915->state.Tex[unit][I915_TEXREG_SS3];
-   GLuint face;
-
-   ss3 |= SS3_NORMALIZED_COORDS;
-
-   if (ss3 != i915->state.Tex[unit][I915_TEXREG_SS3]) {
-      I915_STATECHANGE(i915, I915_UPLOAD_TEX(unit));
-      i915->state.Tex[unit][I915_TEXREG_SS3] = ss3;
-   }
+      state[I915_TEXREG_SS3] = ss3;     /* SS3_NORMALIZED_COORDS */
 
-   /* Upload teximages (not pipelined)
-    */
-   if ( t->intel.base.dirty_images[0] || t->intel.base.dirty_images[1] ||
-        t->intel.base.dirty_images[2] || t->intel.base.dirty_images[3] ||
-        t->intel.base.dirty_images[4] || t->intel.base.dirty_images[5] ) {
-      i915SetTexImages( i915, tObj );
-   }
+      state[I915_TEXREG_SS3] |=
+         ((translate_wrap_mode(ws) << SS3_TCX_ADDR_MODE_SHIFT) |
+          (translate_wrap_mode(wt) << SS3_TCY_ADDR_MODE_SHIFT) |
+          (translate_wrap_mode(wr) << SS3_TCZ_ADDR_MODE_SHIFT));
 
-   /* upload (per face) */
-   for (face = 0; face < 6; face++) {
-      if (t->intel.base.dirty_images[face]) {
-	 if (!intelUploadTexImages( &i915->intel, &t->intel, face )) {
-	    return GL_FALSE;
-	 }
-      }
+      state[I915_TEXREG_SS3] |= (unit << SS3_TEXTUREMAP_INDEX_SHIFT);
    }
 
 
-   return GL_TRUE;
-}
+   state[I915_TEXREG_SS4] = INTEL_PACKCOLOR8888(tObj->_BorderChan[0],
+                                                tObj->_BorderChan[1],
+                                                tObj->_BorderChan[2],
+                                                tObj->_BorderChan[3]);
 
-static GLboolean enable_tex_3d( GLcontext *ctx, GLuint unit )
-{
-   struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
-   i915TextureObjectPtr t = (i915TextureObjectPtr)tObj->DriverData;
 
-   /* 3D textures on I915 seem to get bogus border colors, hence this
-    * fallback:
+   I915_ACTIVESTATE(i915, I915_UPLOAD_TEX(unit), GL_TRUE);
+   /* memcmp was already disabled, but definitely won't work as the
+    * region might now change and that wouldn't be detected:
     */
-   if (t->refs_border_color)
-      return GL_FALSE;
+   I915_STATECHANGE(i915, I915_UPLOAD_TEX(unit));
 
-   return GL_TRUE;
-}
 
-
-
- 
-static GLboolean disable_tex( GLcontext *ctx, GLuint unit )
-{
-   i915ContextPtr i915 = I915_CONTEXT(ctx);
-
-   if (i915->state.active & I915_UPLOAD_TEX(unit)) {
-      I915_ACTIVESTATE(i915, I915_UPLOAD_TEX(unit), GL_FALSE);
-   }
-
-   /* The old texture is no longer bound to this texture unit.
-    * Mark it as such.
-    */
-   if ( i915->intel.CurrentTexObj[unit] != NULL ) {
-      i915->intel.CurrentTexObj[unit]->base.bound &= ~(1U << 0);
-      i915->intel.CurrentTexObj[unit] = NULL;
-   }
+#if 0
+   DBG(TEXTURE, "state[I915_TEXREG_SS2] = 0x%x\n", state[I915_TEXREG_SS2]);
+   DBG(TEXTURE, "state[I915_TEXREG_SS3] = 0x%x\n", state[I915_TEXREG_SS3]);
+   DBG(TEXTURE, "state[I915_TEXREG_SS4] = 0x%x\n", state[I915_TEXREG_SS4]);
+   DBG(TEXTURE, "state[I915_TEXREG_MS2] = 0x%x\n", state[I915_TEXREG_MS2]);
+   DBG(TEXTURE, "state[I915_TEXREG_MS3] = 0x%x\n", state[I915_TEXREG_MS3]);
+   DBG(TEXTURE, "state[I915_TEXREG_MS4] = 0x%x\n", state[I915_TEXREG_MS4]);
+#endif
 
    return GL_TRUE;
 }
 
-static GLboolean i915UpdateTexUnit( GLcontext *ctx, GLuint unit )
-{
-   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
 
-   if (texUnit->_ReallyEnabled &&
-       INTEL_CONTEXT(ctx)->intelScreen->tex.size < 2048 * 1024)
-      return GL_FALSE;
 
-   switch (texUnit->_ReallyEnabled) {
-   case TEXTURE_1D_BIT:
-   case TEXTURE_2D_BIT:
-      return (enable_tex_2d( ctx, unit ) &&
-	      enable_tex_common( ctx, unit ));
-   case TEXTURE_RECT_BIT:
-      return (enable_tex_rect( ctx, unit ) &&
-	      enable_tex_common( ctx, unit ));
-   case TEXTURE_CUBE_BIT:
-      return (enable_tex_cube( ctx, unit ) &&
-	      enable_tex_common( ctx, unit ));
-   case TEXTURE_3D_BIT:
-       return (enable_tex_2d( ctx, unit ) && 
-	       enable_tex_common( ctx, unit ) &&
-	       enable_tex_3d( ctx, unit)); 
-   case 0:
-      return disable_tex( ctx, unit );
-   default:
-      return GL_FALSE;
-   }
-}
 
-
-void i915UpdateTextureState( intelContextPtr intel )
+void
+i915UpdateTextureState(struct intel_context *intel)
 {
-   GLcontext *ctx = &intel->ctx;
    GLboolean ok = GL_TRUE;
    GLuint i;
 
-   for (i = 0 ; i < I915_TEX_UNITS && ok ; i++) {
-      ok = i915UpdateTexUnit( ctx, i );
+   for (i = 0; i < I915_TEX_UNITS && ok; i++) {
+      switch (intel->ctx.Texture.Unit[i]._ReallyEnabled) {
+      case TEXTURE_1D_BIT:
+      case TEXTURE_2D_BIT:
+      case TEXTURE_CUBE_BIT:
+      case TEXTURE_3D_BIT:
+         ok = i915_update_tex_unit(intel, i, SS3_NORMALIZED_COORDS);
+         break;
+      case TEXTURE_RECT_BIT:
+         ok = i915_update_tex_unit(intel, i, 0);
+         break;
+      case 0:{
+            struct i915_context *i915 = i915_context(&intel->ctx);
+            if (i915->state.active & I915_UPLOAD_TEX(i))
+               I915_ACTIVESTATE(i915, I915_UPLOAD_TEX(i), GL_FALSE);
+
+	    if (i915->state.tex_buffer[i] != NULL) {
+	       dri_bo_unreference(i915->state.tex_buffer[i]);
+	       i915->state.tex_buffer[i] = NULL;
+	    }
+
+            break;
+         }
+      default:
+         ok = GL_FALSE;
+         break;
+      }
    }
 
-   FALLBACK( intel, I915_FALLBACK_TEXTURE, !ok );
+   FALLBACK(intel, I915_FALLBACK_TEXTURE, !ok);
 }
-
-
-
diff --git a/i915/i915_vtbl.c b/i915/i915_vtbl.c
index cc8a605..135bfaa 100644
--- a/i915/i915_vtbl.c
+++ b/i915/i915_vtbl.c
@@ -37,115 +37,147 @@
 #include "tnl/t_vertex.h"
 
 #include "intel_batchbuffer.h"
+#include "intel_tex.h"
+#include "intel_regions.h"
 
 #include "i915_reg.h"
 #include "i915_context.h"
 
-static void i915_render_start( intelContextPtr intel )
+#include "glapi.h"
+
+static void
+i915_render_prevalidate(struct intel_context *intel)
 {
-   GLcontext *ctx = &intel->ctx;
-   i915ContextPtr i915 = I915_CONTEXT(intel);
+   struct i915_context *i915 = i915_context(&intel->ctx);
 
-   if (ctx->FragmentProgram._Active) 
-      i915ValidateFragmentProgram( i915 );
-   else {
-      assert(!ctx->FragmentProgram._MaintainTexEnvProgram);
-      i915ValidateTextureProgram( i915 );
-   }
+   if (!intel->Fallback)
+       i915ValidateFragmentProgram(i915);
+}
+
+static void
+i915_render_start(struct intel_context *intel)
+{
 }
 
 
-static void i915_reduced_primitive_state( intelContextPtr intel,
-					  GLenum rprim )
+static void
+i915_reduced_primitive_state(struct intel_context *intel, GLenum rprim)
 {
-    i915ContextPtr i915 = I915_CONTEXT(intel);
-    GLuint st1 = i915->state.Stipple[I915_STPREG_ST1];
-
-    st1 &= ~ST1_ENABLE;
-
-    switch (rprim) {
-    case GL_QUADS: /* from RASTERIZE(GL_QUADS) in t_dd_tritemp.h */
-    case GL_TRIANGLES:
-       if (intel->ctx.Polygon.StippleFlag &&
-	   intel->hw_stipple)
-	  st1 |= ST1_ENABLE;
-       break;
-    case GL_LINES:
-    case GL_POINTS:
-    default:
-       break;
-    }
-
-    i915->intel.reduced_primitive = rprim;
-
-    if (st1 != i915->state.Stipple[I915_STPREG_ST1]) {
-       I915_STATECHANGE(i915, I915_UPLOAD_STIPPLE);
-       i915->state.Stipple[I915_STPREG_ST1] = st1;
-    }
+   struct i915_context *i915 = i915_context(&intel->ctx);
+   GLuint st1 = i915->state.Stipple[I915_STPREG_ST1];
+
+   st1 &= ~ST1_ENABLE;
+
+   switch (rprim) {
+   case GL_QUADS: /* from RASTERIZE(GL_QUADS) in t_dd_tritemp.h */
+   case GL_TRIANGLES:
+      if (intel->ctx.Polygon.StippleFlag && intel->hw_stipple)
+         st1 |= ST1_ENABLE;
+      break;
+   case GL_LINES:
+   case GL_POINTS:
+   default:
+      break;
+   }
+
+   i915->intel.reduced_primitive = rprim;
+
+   if (st1 != i915->state.Stipple[I915_STPREG_ST1]) {
+      INTEL_FIREVERTICES(intel);
+
+      I915_STATECHANGE(i915, I915_UPLOAD_STIPPLE);
+      i915->state.Stipple[I915_STPREG_ST1] = st1;
+   }
 }
 
 
 /* Pull apart the vertex format registers and figure out how large a
  * vertex is supposed to be. 
  */
-static GLboolean i915_check_vertex_size( intelContextPtr intel,
-					 GLuint expected )
+static GLboolean
+i915_check_vertex_size(struct intel_context *intel, GLuint expected)
 {
-   i915ContextPtr i915 = I915_CONTEXT(intel);
+   struct i915_context *i915 = i915_context(&intel->ctx);
    int lis2 = i915->current->Ctx[I915_CTXREG_LIS2];
    int lis4 = i915->current->Ctx[I915_CTXREG_LIS4];
    int i, sz = 0;
 
    switch (lis4 & S4_VFMT_XYZW_MASK) {
-   case S4_VFMT_XY: sz = 2; break;
-   case S4_VFMT_XYZ: sz = 3; break;
-   case S4_VFMT_XYW: sz = 3; break;
-   case S4_VFMT_XYZW: sz = 4; break;
-   default: 
+   case S4_VFMT_XY:
+      sz = 2;
+      break;
+   case S4_VFMT_XYZ:
+      sz = 3;
+      break;
+   case S4_VFMT_XYW:
+      sz = 3;
+      break;
+   case S4_VFMT_XYZW:
+      sz = 4;
+      break;
+   default:
       fprintf(stderr, "no xyzw specified\n");
       return 0;
    }
 
-   if (lis4 & S4_VFMT_SPEC_FOG) sz++;
-   if (lis4 & S4_VFMT_COLOR) sz++;
-   if (lis4 & S4_VFMT_DEPTH_OFFSET) sz++;
-   if (lis4 & S4_VFMT_POINT_WIDTH) sz++;
-   if (lis4 & S4_VFMT_FOG_PARAM) sz++;
-	
-   for (i = 0 ; i < 8 ; i++) { 
+   if (lis4 & S4_VFMT_SPEC_FOG)
+      sz++;
+   if (lis4 & S4_VFMT_COLOR)
+      sz++;
+   if (lis4 & S4_VFMT_DEPTH_OFFSET)
+      sz++;
+   if (lis4 & S4_VFMT_POINT_WIDTH)
+      sz++;
+   if (lis4 & S4_VFMT_FOG_PARAM)
+      sz++;
+
+   for (i = 0; i < 8; i++) {
       switch (lis2 & S2_TEXCOORD_FMT0_MASK) {
-      case TEXCOORDFMT_2D: sz += 2; break;
-      case TEXCOORDFMT_3D: sz += 3; break;
-      case TEXCOORDFMT_4D: sz += 4; break;
-      case TEXCOORDFMT_1D: sz += 1; break;
-      case TEXCOORDFMT_2D_16: sz += 1; break;
-      case TEXCOORDFMT_4D_16: sz += 2; break;
-      case TEXCOORDFMT_NOT_PRESENT: break;
+      case TEXCOORDFMT_2D:
+         sz += 2;
+         break;
+      case TEXCOORDFMT_3D:
+         sz += 3;
+         break;
+      case TEXCOORDFMT_4D:
+         sz += 4;
+         break;
+      case TEXCOORDFMT_1D:
+         sz += 1;
+         break;
+      case TEXCOORDFMT_2D_16:
+         sz += 1;
+         break;
+      case TEXCOORDFMT_4D_16:
+         sz += 2;
+         break;
+      case TEXCOORDFMT_NOT_PRESENT:
+         break;
       default:
-	 fprintf(stderr, "bad texcoord fmt %d\n", i);
-	 return GL_FALSE;
+         fprintf(stderr, "bad texcoord fmt %d\n", i);
+         return GL_FALSE;
       }
       lis2 >>= S2_TEXCOORD_FMT1_SHIFT;
    }
-	
-   if (sz != expected) 
+
+   if (sz != expected)
       fprintf(stderr, "vertex size mismatch %d/%d\n", sz, expected);
-   
+
    return sz == expected;
 }
 
 
-static void i915_emit_invarient_state( intelContextPtr intel )
+static void
+i915_emit_invarient_state(struct intel_context *intel)
 {
    BATCH_LOCALS;
 
-   BEGIN_BATCH( 20 );
+   BEGIN_BATCH(200, IGNORE_CLIPRECTS);
 
    OUT_BATCH(_3DSTATE_AA_CMD |
-	     AA_LINE_ECAAR_WIDTH_ENABLE |
-	     AA_LINE_ECAAR_WIDTH_1_0 |
-	     AA_LINE_REGION_WIDTH_ENABLE |
-	     AA_LINE_REGION_WIDTH_1_0);
+             AA_LINE_ECAAR_WIDTH_ENABLE |
+             AA_LINE_ECAAR_WIDTH_1_0 |
+             AA_LINE_REGION_WIDTH_ENABLE | AA_LINE_REGION_WIDTH_1_0);
 
    OUT_BATCH(_3DSTATE_DFLT_DIFFUSE_CMD);
    OUT_BATCH(0);
@@ -158,35 +190,27 @@ static void i915_emit_invarient_state( intelContextPtr intel )
 
    /* Don't support texture crossbar yet */
    OUT_BATCH(_3DSTATE_COORD_SET_BINDINGS |
-	     CSB_TCB(0, 0) |
-	     CSB_TCB(1, 1) |
-	     CSB_TCB(2, 2) |
-	     CSB_TCB(3, 3) |
-	     CSB_TCB(4, 4) |
-	     CSB_TCB(5, 5) |
-	     CSB_TCB(6, 6) |
-	     CSB_TCB(7, 7));
+             CSB_TCB(0, 0) |
+             CSB_TCB(1, 1) |
+             CSB_TCB(2, 2) |
+             CSB_TCB(3, 3) |
+             CSB_TCB(4, 4) | CSB_TCB(5, 5) | CSB_TCB(6, 6) | CSB_TCB(7, 7));
 
    OUT_BATCH(_3DSTATE_RASTER_RULES_CMD |
-	     ENABLE_POINT_RASTER_RULE |
-	     OGL_POINT_RASTER_RULE |
-	     ENABLE_LINE_STRIP_PROVOKE_VRTX |
-	     ENABLE_TRI_FAN_PROVOKE_VRTX |
-	     LINE_STRIP_PROVOKE_VRTX(1) |
-	     TRI_FAN_PROVOKE_VRTX(2) | 
-	     ENABLE_TEXKILL_3D_4D |
-	     TEXKILL_4D);
+             ENABLE_POINT_RASTER_RULE |
+             OGL_POINT_RASTER_RULE |
+             ENABLE_LINE_STRIP_PROVOKE_VRTX |
+             ENABLE_TRI_FAN_PROVOKE_VRTX |
+             LINE_STRIP_PROVOKE_VRTX(1) |
+             TRI_FAN_PROVOKE_VRTX(2) | ENABLE_TEXKILL_3D_4D | TEXKILL_4D);
 
    /* Need to initialize this to zero.
     */
-   OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | 
-	     I1_LOAD_S(3) |
-	     (0));
+   OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | (0));
    OUT_BATCH(0);
- 
+
    /* XXX: Use this */
-   OUT_BATCH(_3DSTATE_SCISSOR_ENABLE_CMD | 
-	     DISABLE_SCISSOR_RECT);
+   OUT_BATCH(_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT);
 
    OUT_BATCH(_3DSTATE_SCISSOR_RECT_0_CMD);
    OUT_BATCH(0);
@@ -194,29 +218,23 @@ static void i915_emit_invarient_state( intelContextPtr intel )
 
    OUT_BATCH(_3DSTATE_DEPTH_SUBRECT_DISABLE);
 
-   OUT_BATCH(_3DSTATE_LOAD_INDIRECT | 0); /* disable indirect state */
+   OUT_BATCH(_3DSTATE_LOAD_INDIRECT | 0);       /* disable indirect state */
    OUT_BATCH(0);
 
 
    /* Don't support twosided stencil yet */
-   OUT_BATCH(_3DSTATE_BACKFACE_STENCIL_OPS |
-	     BFO_ENABLE_STENCIL_TWO_SIDE |
-	     0 );
-   
+   OUT_BATCH(_3DSTATE_BACKFACE_STENCIL_OPS | BFO_ENABLE_STENCIL_TWO_SIDE | 0);
+   OUT_BATCH(0);
+
    ADVANCE_BATCH();
 }
 
 
-#define emit( intel, state, size )			\
-do {							\
-   int k;						\
-   BEGIN_BATCH( (size) / sizeof(GLuint));		\
-   for (k = 0 ; k < (size) / sizeof(GLuint) ; k++)	\
-      OUT_BATCH((state)[k]);				\
-   ADVANCE_BATCH();					\
-} while (0);
+#define emit(intel, state, size )		     \
+   intel_batchbuffer_data(intel->batch, state, size, IGNORE_CLIPRECTS )
 
-static GLuint get_dirty( struct i915_hw_state *state )
+static GLuint
+get_dirty(struct i915_hw_state *state)
 {
    GLuint dirty;
 
@@ -227,94 +245,168 @@ static GLuint get_dirty( struct i915_hw_state *state )
    if (dirty & I915_UPLOAD_TEX_ALL)
       state->emitted &= ~I915_UPLOAD_TEX_ALL;
    dirty = state->active & ~state->emitted;
-
    return dirty;
 }
 
 
-static GLuint get_state_size( struct i915_hw_state *state )
+static GLuint
+get_state_size(struct i915_hw_state *state)
 {
    GLuint dirty = get_dirty(state);
    GLuint i;
    GLuint sz = 0;
 
    if (dirty & I915_UPLOAD_INVARIENT)
-      sz += 20 * sizeof(int);
+      sz += 30 * 4;
 
    if (dirty & I915_UPLOAD_CTX)
       sz += sizeof(state->Ctx);
 
-   if (dirty & I915_UPLOAD_BUFFERS) 
+   if (dirty & I915_UPLOAD_BUFFERS)
       sz += sizeof(state->Buffer);
 
    if (dirty & I915_UPLOAD_STIPPLE)
       sz += sizeof(state->Stipple);
 
-   if (dirty & I915_UPLOAD_FOG) 
+   if (dirty & I915_UPLOAD_FOG)
       sz += sizeof(state->Fog);
 
    if (dirty & I915_UPLOAD_TEX_ALL) {
       int nr = 0;
-      for (i = 0; i < I915_TEX_UNITS; i++) 
-	 if (dirty & I915_UPLOAD_TEX(i)) 
-	    nr++;
+      for (i = 0; i < I915_TEX_UNITS; i++)
+         if (dirty & I915_UPLOAD_TEX(i))
+            nr++;
 
-      sz += (2+nr*3) * sizeof(GLuint) * 2;
+      sz += (2 + nr * 3) * sizeof(GLuint) * 2;
    }
 
-   if (dirty & I915_UPLOAD_CONSTANTS) 
+   if (dirty & I915_UPLOAD_CONSTANTS)
       sz += state->ConstantSize * sizeof(GLuint);
 
-   if (dirty & I915_UPLOAD_PROGRAM) 
+   if (dirty & I915_UPLOAD_PROGRAM)
       sz += state->ProgramSize * sizeof(GLuint);
 
    return sz;
 }
 
-
 /* Push the state into the sarea and/or texture memory.
  */
-static void i915_emit_state( intelContextPtr intel )
+static void
+i915_emit_state(struct intel_context *intel)
 {
-   i915ContextPtr i915 = I915_CONTEXT(intel);
+   struct i915_context *i915 = i915_context(&intel->ctx);
    struct i915_hw_state *state = i915->current;
    int i;
-   GLuint dirty = get_dirty(state);
-   GLuint counter = intel->batch.counter;
+   int ret, count;
+   GLuint dirty;
+   GET_CURRENT_CONTEXT(ctx);
    BATCH_LOCALS;
 
-   if (intel->batch.space < get_state_size(state)) {
-      intelFlushBatch(intel, GL_TRUE);
-      dirty = get_dirty(state);
-      counter = intel->batch.counter;
+   /* We don't hold the lock at this point, so want to make sure that
+    * there won't be a buffer wrap between the state emits and the primitive
+    * emit header.
+    *
+    * It might be better to talk about explicit places where
+    * scheduling is allowed, rather than assume that it is whenever a
+    * batchbuffer fills up.
+    *
+    * Set the space as LOOP_CLIPRECTS now, since that's what our primitives
+    * will be emitted under.
+    */
+   intel_batchbuffer_require_space(intel->batch, get_state_size(state) + 8,
+				   LOOP_CLIPRECTS);
+   count = 0;
+ again:
+   dirty = get_dirty(state);
+
+   ret = 0;
+   if (dirty & I915_UPLOAD_BUFFERS) {
+     ret |= dri_bufmgr_check_aperture_space(state->draw_region->buffer);
+     if (state->depth_region)
+        ret |= dri_bufmgr_check_aperture_space(state->depth_region->buffer);
    }
 
-   if (VERBOSE) 
+   if (dirty & I915_UPLOAD_TEX_ALL) {
+     for (i = 0; i < I915_TEX_UNITS; i++)
+       if (dirty & I915_UPLOAD_TEX(i)) {
+	   if (state->tex_buffer[i]) {
+	       ret |= dri_bufmgr_check_aperture_space(state->tex_buffer[i]);
+	   }
+       }
+   }
+   if (ret) {
+       if (count == 0) {
+	   count++;
+	   intel_batchbuffer_flush(intel->batch);
+	   goto again;
+       } else {
+	   _mesa_error(ctx, GL_OUT_OF_MEMORY, "i915 emit state");
+	   assert(0);
+       }
+   }
+
+   /* work out list of buffers to emit */
+   
+   /* Do this here as we may have flushed the batchbuffer above,
+    * causing more state to be dirty!
+    */
+   dirty = get_dirty(state);
+   state->emitted |= dirty;
+   assert(get_dirty(state) == 0);
+
+   if (INTEL_DEBUG & DEBUG_STATE)
       fprintf(stderr, "%s dirty: %x\n", __FUNCTION__, dirty);
 
    if (dirty & I915_UPLOAD_INVARIENT) {
-      if (VERBOSE) fprintf(stderr, "I915_UPLOAD_INVARIENT:\n"); 
-      i915_emit_invarient_state( intel );
+      if (INTEL_DEBUG & DEBUG_STATE)
+         fprintf(stderr, "I915_UPLOAD_INVARIENT:\n");
+      i915_emit_invarient_state(intel);
    }
 
    if (dirty & I915_UPLOAD_CTX) {
-      if (VERBOSE) fprintf(stderr, "I915_UPLOAD_CTX:\n"); 
-      emit( i915, state->Ctx, sizeof(state->Ctx) );
+      if (INTEL_DEBUG & DEBUG_STATE)
+         fprintf(stderr, "I915_UPLOAD_CTX:\n");
+
+      emit(intel, state->Ctx, sizeof(state->Ctx));
    }
 
    if (dirty & I915_UPLOAD_BUFFERS) {
-      if (VERBOSE) fprintf(stderr, "I915_UPLOAD_BUFFERS:\n"); 
-      emit( i915, state->Buffer, sizeof(state->Buffer) );
+      if (INTEL_DEBUG & DEBUG_STATE)
+         fprintf(stderr, "I915_UPLOAD_BUFFERS:\n");
+      BEGIN_BATCH(I915_DEST_SETUP_SIZE + 2, IGNORE_CLIPRECTS);
+      OUT_BATCH(state->Buffer[I915_DESTREG_CBUFADDR0]);
+      OUT_BATCH(state->Buffer[I915_DESTREG_CBUFADDR1]);
+      OUT_RELOC(state->draw_region->buffer,
+                DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE,
+                state->draw_region->draw_offset);
+
+      if (state->depth_region) {
+         OUT_BATCH(state->Buffer[I915_DESTREG_DBUFADDR0]);
+         OUT_BATCH(state->Buffer[I915_DESTREG_DBUFADDR1]);
+         OUT_RELOC(state->depth_region->buffer,
+                   DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE,
+                   state->depth_region->draw_offset);
+      }
+
+      OUT_BATCH(state->Buffer[I915_DESTREG_DV0]);
+      OUT_BATCH(state->Buffer[I915_DESTREG_DV1]);
+      OUT_BATCH(state->Buffer[I915_DESTREG_SENABLE]);
+      OUT_BATCH(state->Buffer[I915_DESTREG_SR0]);
+      OUT_BATCH(state->Buffer[I915_DESTREG_SR1]);
+      OUT_BATCH(state->Buffer[I915_DESTREG_SR2]);
+      ADVANCE_BATCH();
    }
 
    if (dirty & I915_UPLOAD_STIPPLE) {
-      if (VERBOSE) fprintf(stderr, "I915_UPLOAD_STIPPLE:\n"); 
-      emit( i915, state->Stipple, sizeof(state->Stipple) );
+      if (INTEL_DEBUG & DEBUG_STATE)
+         fprintf(stderr, "I915_UPLOAD_STIPPLE:\n");
+      emit(intel, state->Stipple, sizeof(state->Stipple));
    }
 
    if (dirty & I915_UPLOAD_FOG) {
-      if (VERBOSE) fprintf(stderr, "I915_UPLOAD_FOG:\n"); 
-      emit( i915, state->Fog, sizeof(state->Fog) );
+      if (INTEL_DEBUG & DEBUG_STATE)
+         fprintf(stderr, "I915_UPLOAD_FOG:\n");
+      emit(intel, state->Fog, sizeof(state->Fog));
    }
 
    /* Combine all the dirty texture state into a single command to
@@ -323,141 +415,218 @@ static void i915_emit_state( intelContextPtr intel )
    if (dirty & I915_UPLOAD_TEX_ALL) {
       int nr = 0;
 
-      for (i = 0; i < I915_TEX_UNITS; i++) 
-	 if (dirty & I915_UPLOAD_TEX(i)) 
-	    nr++;
+      for (i = 0; i < I915_TEX_UNITS; i++)
+         if (dirty & I915_UPLOAD_TEX(i))
+            nr++;
 
-      BEGIN_BATCH(2+nr*3);
-      OUT_BATCH(_3DSTATE_MAP_STATE | (3*nr));
+      BEGIN_BATCH(2 + nr * 3, IGNORE_CLIPRECTS);
+      OUT_BATCH(_3DSTATE_MAP_STATE | (3 * nr));
       OUT_BATCH((dirty & I915_UPLOAD_TEX_ALL) >> I915_UPLOAD_TEX_0_SHIFT);
-      for (i = 0 ; i < I915_TEX_UNITS ; i++)
-	 if (dirty & I915_UPLOAD_TEX(i)) {
-	    OUT_BATCH(state->Tex[i][I915_TEXREG_MS2]);
-	    OUT_BATCH(state->Tex[i][I915_TEXREG_MS3]);
-	    OUT_BATCH(state->Tex[i][I915_TEXREG_MS4]);
-	 }
+      for (i = 0; i < I915_TEX_UNITS; i++)
+         if (dirty & I915_UPLOAD_TEX(i)) {
+
+            if (state->tex_buffer[i]) {
+               OUT_RELOC(state->tex_buffer[i],
+                         DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
+                         state->tex_offset[i]);
+            }
+            else if (state == &i915->meta) {
+               assert(i == 0);
+               OUT_BATCH(0);
+            }
+            else {
+               OUT_BATCH(state->tex_offset[i]);
+            }
+
+            OUT_BATCH(state->Tex[i][I915_TEXREG_MS3]);
+            OUT_BATCH(state->Tex[i][I915_TEXREG_MS4]);
+         }
       ADVANCE_BATCH();
 
-      BEGIN_BATCH(2+nr*3);
-      OUT_BATCH(_3DSTATE_SAMPLER_STATE | (3*nr));
+      BEGIN_BATCH(2 + nr * 3, IGNORE_CLIPRECTS);
+      OUT_BATCH(_3DSTATE_SAMPLER_STATE | (3 * nr));
       OUT_BATCH((dirty & I915_UPLOAD_TEX_ALL) >> I915_UPLOAD_TEX_0_SHIFT);
-      for (i = 0 ; i < I915_TEX_UNITS ; i++)
-	 if (dirty & I915_UPLOAD_TEX(i)) {
-	    OUT_BATCH(state->Tex[i][I915_TEXREG_SS2]);
-	    OUT_BATCH(state->Tex[i][I915_TEXREG_SS3]);
-	    OUT_BATCH(state->Tex[i][I915_TEXREG_SS4]);
-	 }
+      for (i = 0; i < I915_TEX_UNITS; i++)
+         if (dirty & I915_UPLOAD_TEX(i)) {
+            OUT_BATCH(state->Tex[i][I915_TEXREG_SS2]);
+            OUT_BATCH(state->Tex[i][I915_TEXREG_SS3]);
+            OUT_BATCH(state->Tex[i][I915_TEXREG_SS4]);
+         }
       ADVANCE_BATCH();
    }
 
    if (dirty & I915_UPLOAD_CONSTANTS) {
-      if (VERBOSE) fprintf(stderr, "I915_UPLOAD_CONSTANTS:\n"); 
-      emit( i915, state->Constant, state->ConstantSize * sizeof(GLuint) );
+      if (INTEL_DEBUG & DEBUG_STATE)
+         fprintf(stderr, "I915_UPLOAD_CONSTANTS:\n");
+      emit(intel, state->Constant, state->ConstantSize * sizeof(GLuint));
    }
 
    if (dirty & I915_UPLOAD_PROGRAM) {
-      if (VERBOSE) fprintf(stderr, "I915_UPLOAD_PROGRAM:\n"); 
+      if (state->ProgramSize) {
+         if (INTEL_DEBUG & DEBUG_STATE)
+            fprintf(stderr, "I915_UPLOAD_PROGRAM:\n");
+
+         assert((state->Program[0] & 0x1ff) + 2 == state->ProgramSize);
 
-      assert((state->Program[0] & 0x1ff)+2 == state->ProgramSize);
-      
-      emit( i915, state->Program, state->ProgramSize * sizeof(GLuint) );
-      if (VERBOSE)
-	 i915_disassemble_program( state->Program, state->ProgramSize );
+         emit(intel, state->Program, state->ProgramSize * sizeof(GLuint));
+         if (INTEL_DEBUG & DEBUG_STATE)
+            i915_disassemble_program(state->Program, state->ProgramSize);
+      }
    }
 
-   state->emitted |= dirty;
-   intel->batch.last_emit_state = counter;
-   assert(counter == intel->batch.counter);
+   intel->batch->dirty_state &= ~dirty;
+   assert(get_dirty(state) == 0);
+   assert((intel->batch->dirty_state & (1<<1)) == 0);
 }
 
-static void i915_destroy_context( intelContextPtr intel )
+static void
+i915_destroy_context(struct intel_context *intel)
 {
+   GLuint i;
+   struct i915_context *i915 = i915_context(&intel->ctx);
+
+   for (i = 0; i < I915_TEX_UNITS; i++) {
+      if (i915->state.tex_buffer[i] != NULL) {
+	 dri_bo_unreference(i915->state.tex_buffer[i]);
+	 i915->state.tex_buffer[i] = NULL;
+      }
+   }
+
    _tnl_free_vertices(&intel->ctx);
 }
 
 
 /**
- * Set the color buffer drawing region.
+ * Set the drawing regions for the color and depth/stencil buffers.
+ * This involves setting the pitch, cpp and buffer ID/location.
+ * Also set pixel format for color and Z rendering
+ * Used for setting both regular and meta state.
  */
-static void
-i915_set_color_region( intelContextPtr intel, const intelRegion *region)
+void
+i915_state_draw_region(struct intel_context *intel,
+                       struct i915_hw_state *state,
+                       struct intel_region *color_region,
+                       struct intel_region *depth_region)
 {
-   i915ContextPtr i915 = I915_CONTEXT(intel);
-   I915_STATECHANGE( i915, I915_UPLOAD_BUFFERS );
-   i915->state.Buffer[I915_DESTREG_CBUFADDR1] =
-      (BUF_3D_ID_COLOR_BACK | BUF_3D_PITCH(region->pitch) | BUF_3D_USE_FENCE);
-   i915->state.Buffer[I915_DESTREG_CBUFADDR2] = region->offset;
+   struct i915_context *i915 = i915_context(&intel->ctx);
+   GLuint value;
+
+   ASSERT(state == &i915->state || state == &i915->meta);
+
+   if (state->draw_region != color_region) {
+      intel_region_release(&state->draw_region);
+      intel_region_reference(&state->draw_region, color_region);
+   }
+   if (state->depth_region != depth_region) {
+      intel_region_release(&state->depth_region);
+      intel_region_reference(&state->depth_region, depth_region);
+   }
+
+   /*
+    * Set stride/cpp values
+    */
+   if (color_region) {
+      state->Buffer[I915_DESTREG_CBUFADDR0] = _3DSTATE_BUF_INFO_CMD;
+      state->Buffer[I915_DESTREG_CBUFADDR1] =
+         (BUF_3D_ID_COLOR_BACK |
+          BUF_3D_PITCH(color_region->pitch * color_region->cpp) |
+          BUF_3D_USE_FENCE);
+   }
+
+   if (depth_region) {
+      state->Buffer[I915_DESTREG_DBUFADDR0] = _3DSTATE_BUF_INFO_CMD;
+      state->Buffer[I915_DESTREG_DBUFADDR1] =
+         (BUF_3D_ID_DEPTH |
+          BUF_3D_PITCH(depth_region->pitch * depth_region->cpp) |
+          BUF_3D_USE_FENCE);
+   }
+
+   /*
+    * Compute/set I915_DESTREG_DV1 value
+    */
+   value = (DSTORG_HORT_BIAS(0x8) |     /* .5 */
+            DSTORG_VERT_BIAS(0x8) |     /* .5 */
+            LOD_PRECLAMP_OGL | TEX_DEFAULT_COLOR_OGL);
+   if (color_region && color_region->cpp == 4) {
+      value |= DV_PF_8888;
+   }
+   else {
+      value |= (DITHER_FULL_ALWAYS | DV_PF_565);
+   }
+   if (depth_region && depth_region->cpp == 4) {
+      value |= DEPTH_FRMT_24_FIXED_8_OTHER;
+   }
+   else {
+      value |= DEPTH_FRMT_16_FIXED;
+   }
+   state->Buffer[I915_DESTREG_DV1] = value;
+
+   I915_STATECHANGE(i915, I915_UPLOAD_BUFFERS);
 }
 
 
-/**
- * specify the z-buffer/stencil region
- */
 static void
-i915_set_z_region( intelContextPtr intel, const intelRegion *region)
+i915_set_draw_region(struct intel_context *intel,
+                     struct intel_region *color_regions[],
+                     struct intel_region *depth_region,
+		     GLuint num_regions)
 {
-   i915ContextPtr i915 = I915_CONTEXT(intel);
-   I915_STATECHANGE( i915, I915_UPLOAD_BUFFERS );
-   i915->state.Buffer[I915_DESTREG_DBUFADDR1] =
-      (BUF_3D_ID_DEPTH | BUF_3D_PITCH(region->pitch) | BUF_3D_USE_FENCE);
-   i915->state.Buffer[I915_DESTREG_DBUFADDR2] = region->offset;
+   struct i915_context *i915 = i915_context(&intel->ctx);
+   i915_state_draw_region(intel, &i915->state, color_regions[0], depth_region);
 }
 
 
-/**
- * Set both the color and Z/stencil drawing regions.
- * Similar to two previous functions, but don't use I915_STATECHANGE()
- */
+
 static void
-i915_update_color_z_regions(intelContextPtr intel,
-                            const intelRegion *colorRegion,
-                            const intelRegion *depthRegion)
+i915_new_batch(struct intel_context *intel)
 {
-   i915ContextPtr i915 = I915_CONTEXT(intel);
+   struct i915_context *i915 = i915_context(&intel->ctx);
 
-   i915->state.Buffer[I915_DESTREG_CBUFADDR1] =
-      (BUF_3D_ID_COLOR_BACK | BUF_3D_PITCH(colorRegion->pitch) | BUF_3D_USE_FENCE);
-   i915->state.Buffer[I915_DESTREG_CBUFADDR2] = colorRegion->offset;
+   /* Mark all state as needing to be emitted when starting a new batchbuffer.
+    * Using hardware contexts would be an alternative, but they have some
+    * difficulties associated with them (physical address requirements).
+    */
+   i915->state.emitted = 0;
 
-   i915->state.Buffer[I915_DESTREG_DBUFADDR1] =
-      (BUF_3D_ID_DEPTH |
-       BUF_3D_PITCH(depthRegion->pitch) |  /* pitch in bytes */
-       BUF_3D_USE_FENCE);
-   i915->state.Buffer[I915_DESTREG_DBUFADDR2] = depthRegion->offset;
+   /* Check that we didn't just wrap our batchbuffer at a bad time. */
+   assert(!intel->no_batch_wrap);
 }
 
-
-static void i915_lost_hardware( intelContextPtr intel )
+static GLuint
+i915_flush_cmd(void)
 {
-   I915_CONTEXT(intel)->state.emitted = 0;
+   return MI_FLUSH | FLUSH_MAP_CACHE;
 }
 
-static void i915_emit_flush( intelContextPtr intel )
+static void 
+i915_assert_not_dirty( struct intel_context *intel )
 {
-   BATCH_LOCALS;
+   struct i915_context *i915 = i915_context(&intel->ctx);
+   struct i915_hw_state *state = i915->current;
+   GLuint dirty = get_dirty(state);
+   assert(!dirty);
+}
 
-   BEGIN_BATCH(2);
-   OUT_BATCH( MI_FLUSH | FLUSH_MAP_CACHE | FLUSH_RENDER_CACHE ); 
-   OUT_BATCH( 0 );
-   ADVANCE_BATCH();
+static void
+i915_note_unlock( struct intel_context *intel )
+{
+    /* nothing */
 }
 
 
-void i915InitVtbl( i915ContextPtr i915 )
+void
+i915InitVtbl(struct i915_context *i915)
 {
-   i915->intel.vtbl.alloc_tex_obj = i915AllocTexObj;
    i915->intel.vtbl.check_vertex_size = i915_check_vertex_size;
-   i915->intel.vtbl.clear_with_tris = i915ClearWithTris;
-   i915->intel.vtbl.rotate_window = i915RotateWindow;
    i915->intel.vtbl.destroy = i915_destroy_context;
    i915->intel.vtbl.emit_state = i915_emit_state;
-   i915->intel.vtbl.lost_hardware = i915_lost_hardware;
+   i915->intel.vtbl.new_batch = i915_new_batch;
    i915->intel.vtbl.reduced_primitive_state = i915_reduced_primitive_state;
    i915->intel.vtbl.render_start = i915_render_start;
-   i915->intel.vtbl.set_color_region = i915_set_color_region;
-   i915->intel.vtbl.set_z_region = i915_set_z_region;
-   i915->intel.vtbl.update_color_z_regions = i915_update_color_z_regions;
+   i915->intel.vtbl.render_prevalidate = i915_render_prevalidate;
+   i915->intel.vtbl.set_draw_region = i915_set_draw_region;
    i915->intel.vtbl.update_texture_state = i915UpdateTextureState;
-   i915->intel.vtbl.emit_flush = i915_emit_flush;
+   i915->intel.vtbl.flush_cmd = i915_flush_cmd;
+   i915->intel.vtbl.assert_not_dirty = i915_assert_not_dirty;
+   i915->intel.vtbl.note_unlock = i915_note_unlock; 
 }
-
diff --git a/i915/intel_batchbuffer.c b/i915/intel_batchbuffer.c
deleted file mode 100644
index 803b41b..0000000
--- a/i915/intel_batchbuffer.c
+++ /dev/null
@@ -1,829 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-
-#include <stdio.h>
-#include <errno.h>
-
-#include "mtypes.h"
-#include "context.h"
-#include "enums.h"
-#include "vblank.h"
-
-#include "intel_reg.h"
-#include "intel_batchbuffer.h"
-#include "intel_context.h"
-
-
-
-
-/* ================================================================
- * Performance monitoring functions
- */
-
-static void intel_fill_box( intelContextPtr intel,
-			    GLshort x, GLshort y,
-			    GLshort w, GLshort h,
-			    GLubyte r, GLubyte g, GLubyte b )
-{
-   x += intel->drawX;
-   y += intel->drawY;
-
-   if (x >= 0 && y >= 0 &&
-       x+w < intel->intelScreen->width &&
-       y+h < intel->intelScreen->height)
-      intelEmitFillBlitLocked( intel, 
-			       intel->intelScreen->cpp,
-			       intel->intelScreen->back.pitch,
-			       intel->intelScreen->back.offset,
-			       x, y, w, h,
-			       INTEL_PACKCOLOR(intel->intelScreen->fbFormat,
-					       r,g,b,0xff));
-}
-
-static void intel_draw_performance_boxes( intelContextPtr intel )
-{
-   /* Purple box for page flipping
-    */
-   if ( intel->perf_boxes & I830_BOX_FLIP ) 
-      intel_fill_box( intel, 4, 4, 8, 8, 255, 0, 255 );
-
-   /* Red box if we have to wait for idle at any point
-    */
-   if ( intel->perf_boxes & I830_BOX_WAIT ) 
-      intel_fill_box( intel, 16, 4, 8, 8, 255, 0, 0 );
-
-   /* Blue box: lost context?
-    */
-   if ( intel->perf_boxes & I830_BOX_LOST_CONTEXT ) 
-      intel_fill_box( intel, 28, 4, 8, 8, 0, 0, 255 );
-
-   /* Yellow box for texture swaps
-    */
-   if ( intel->perf_boxes & I830_BOX_TEXTURE_LOAD ) 
-      intel_fill_box( intel, 40, 4, 8, 8, 255, 255, 0 );
-
-   /* Green box if hardware never idles (as far as we can tell)
-    */
-   if ( !(intel->perf_boxes & I830_BOX_RING_EMPTY) ) 
-      intel_fill_box( intel, 64, 4, 8, 8, 0, 255, 0 );
-
-
-   /* Draw bars indicating number of buffers allocated 
-    * (not a great measure, easily confused)
-    */
-#if 0
-   if (intel->dma_used) {
-      int bar = intel->dma_used / 10240;
-      if (bar > 100) bar = 100;
-      if (bar < 1) bar = 1;
-      intel_fill_box( intel, 4, 16, bar, 4, 196, 128, 128 );
-      intel->dma_used = 0;
-   }
-#endif
-
-   intel->perf_boxes = 0;
-}
-
-
-
-
-
-
-static int bad_prim_vertex_nr( int primitive, int nr )
-{
-   switch (primitive & PRIM3D_MASK) {
-   case PRIM3D_POINTLIST:
-      return nr < 1;
-   case PRIM3D_LINELIST:
-      return (nr & 1) || nr == 0;
-   case PRIM3D_LINESTRIP:
-      return nr < 2;
-   case PRIM3D_TRILIST:
-   case PRIM3D_RECTLIST:
-      return nr % 3 || nr == 0;
-   case PRIM3D_POLY:
-   case PRIM3D_TRIFAN:
-   case PRIM3D_TRISTRIP:
-   case PRIM3D_TRISTRIP_RVRSE:
-      return nr < 3;
-   default:
-      return 1;
-   }	
-}
-
-static void intel_flush_inline_primitive( GLcontext *ctx )
-{
-   intelContextPtr intel = INTEL_CONTEXT( ctx );
-   GLuint used = intel->batch.ptr - intel->prim.start_ptr;
-   GLuint vertcount;
-
-   assert(intel->prim.primitive != ~0);
-
-   if (1) {
-      /* Check vertex size against the vertex we're specifying to
-       * hardware.  If it's wrong, ditch the primitive.
-       */ 
-      if (!intel->vtbl.check_vertex_size( intel, intel->vertex_size )) 
-	 goto do_discard;
-
-      vertcount = (used - 4)/ (intel->vertex_size * 4);
-
-      if (!vertcount)
-	 goto do_discard;
-      
-      if (vertcount * intel->vertex_size * 4 != used - 4) {
-	 fprintf(stderr, "vertex size confusion %d %d\n", used, 
-		 intel->vertex_size * vertcount * 4);
-	 goto do_discard;
-      }
-
-      if (bad_prim_vertex_nr( intel->prim.primitive, vertcount )) {
-	 fprintf(stderr, "bad_prim_vertex_nr %x %d\n", intel->prim.primitive,
-		 vertcount);
-	 goto do_discard;
-      }
-   }
-
-   if (used < 8)
-      goto do_discard;
-
-   *(int *)intel->prim.start_ptr = (_3DPRIMITIVE | 
-				    intel->prim.primitive |
-				    (used/4-2));
-
-   goto finished;
-   
- do_discard:
-   intel->batch.ptr -= used;
-   intel->batch.space += used;
-   assert(intel->batch.space >= 0);
-
- finished:
-   intel->prim.primitive = ~0;
-   intel->prim.start_ptr = 0;
-   intel->prim.flush = 0;
-}
-
-
-/* Emit a primitive referencing vertices in a vertex buffer.
- */
-void intelStartInlinePrimitive( intelContextPtr intel, GLuint prim )
-{
-   BATCH_LOCALS;
-
-   if (0)
-      fprintf(stderr, "%s %x\n", __FUNCTION__, prim);
-
-
-   /* Finish any in-progress primitive:
-    */
-   INTEL_FIREVERTICES( intel );
-   
-   /* Emit outstanding state:
-    */
-   intel->vtbl.emit_state( intel );
-   
-   /* Make sure there is some space in this buffer:
-    */
-   if (intel->vertex_size * 10 * sizeof(GLuint) >= intel->batch.space) {
-      intelFlushBatch(intel, GL_TRUE); 
-      intel->vtbl.emit_state( intel );
-   }
-
-#if 1
-   if (((unsigned long)intel->batch.ptr) & 0x4) {
-      BEGIN_BATCH(1);
-      OUT_BATCH(0);
-      ADVANCE_BATCH();
-   }
-#endif
-
-   /* Emit a slot which will be filled with the inline primitive
-    * command later.
-    */
-   BEGIN_BATCH(2);
-   OUT_BATCH( 0 );
-
-   intel->prim.start_ptr = batch_ptr;
-   intel->prim.primitive = prim;
-   intel->prim.flush = intel_flush_inline_primitive;
-   intel->batch.contains_geometry = 1;
-
-   OUT_BATCH( 0 );
-   ADVANCE_BATCH();
-}
-
-
-void intelRestartInlinePrimitive( intelContextPtr intel )
-{
-   GLuint prim = intel->prim.primitive;
-
-   intel_flush_inline_primitive( &intel->ctx );
-   if (1) intelFlushBatch(intel, GL_TRUE); /* GL_TRUE - is critical */
-   intelStartInlinePrimitive( intel, prim );
-}
-
-
-
-void intelWrapInlinePrimitive( intelContextPtr intel )
-{
-   GLuint prim = intel->prim.primitive;
-
-   if (0)
-      fprintf(stderr, "%s\n", __FUNCTION__);
-   intel_flush_inline_primitive( &intel->ctx );
-   intelFlushBatch(intel, GL_TRUE);
-   intelStartInlinePrimitive( intel, prim );
-}
-
-
-/* Emit a primitive with space for inline vertices.
- */
-GLuint *intelEmitInlinePrimitiveLocked(intelContextPtr intel, 
-				       int primitive,
-				       int dwords,
-				       int vertex_size )
-{
-   GLuint *tmp = 0;
-   BATCH_LOCALS;
-
-   if (0)
-      fprintf(stderr, "%s 0x%x %d\n", __FUNCTION__, primitive, dwords);
-
-   /* Emit outstanding state:
-    */
-   intel->vtbl.emit_state( intel );
-
-   if ((1+dwords)*4 >= intel->batch.space) {
-      intelFlushBatch(intel, GL_TRUE); 
-      intel->vtbl.emit_state( intel );
-   }
-
-
-   if (1) {
-      int used = dwords * 4;
-      int vertcount;
-
-      /* Check vertex size against the vertex we're specifying to
-       * hardware.  If it's wrong, ditch the primitive.
-       */ 
-      if (!intel->vtbl.check_vertex_size( intel, vertex_size )) 
-	 goto do_discard;
-
-      vertcount = dwords / vertex_size;
-      
-      if (dwords % vertex_size) {
-	 fprintf(stderr, "did not request a whole number of vertices\n");
-	 goto do_discard;
-      }
-
-      if (bad_prim_vertex_nr( primitive, vertcount )) {
-	 fprintf(stderr, "bad_prim_vertex_nr %x %d\n", primitive, vertcount);
-	 goto do_discard;
-      }
-
-      if (used < 8)
-	 goto do_discard;
-   }
-
-   /* Emit 3D_PRIMITIVE commands:
-    */
-   BEGIN_BATCH(1 + dwords);
-   OUT_BATCH( _3DPRIMITIVE | 
-	      primitive |
-	      (dwords-1) );
-
-   tmp = (GLuint *)batch_ptr;
-   batch_ptr += dwords * 4;
-
-   ADVANCE_BATCH();
-
-   intel->batch.contains_geometry = 1;
-
- do_discard:
-   return tmp;
-}
-
-
-static void intelWaitForFrameCompletion( intelContextPtr intel )
-{
-  drm_i915_sarea_t *sarea = (drm_i915_sarea_t *)intel->sarea;
-
-   if (intel->do_irqs) {
-      if (intelGetLastFrame(intel) < sarea->last_dispatch) {
-	 if (!intel->irqsEmitted) {
-	    while (intelGetLastFrame (intel) < sarea->last_dispatch)
-	       ;
-	 }
-	 else {
-	    intelWaitIrq( intel, intel->alloc.irq_emitted );	
-	 }
-	 intel->irqsEmitted = 10;
-      }
-
-      if (intel->irqsEmitted) {
-	 LOCK_HARDWARE( intel ); 
-	 intelEmitIrqLocked( intel );
-	 intel->irqsEmitted--;
-	 UNLOCK_HARDWARE( intel ); 
-      }
-   } 
-   else {
-      while (intelGetLastFrame (intel) < sarea->last_dispatch) {
-	 if (intel->do_usleeps) 
-	    DO_USLEEP( 1 );
-      }
-   }
-}
-
-/*
- * Copy the back buffer to the front buffer. 
- */
-void intelCopyBuffer( const __DRIdrawablePrivate *dPriv,
-		      const drm_clip_rect_t	 *rect)
-{
-   intelContextPtr intel;
-   const intelScreenPrivate *intelScreen;
-   GLboolean   missed_target;
-   int64_t ust;
-
-   if (0)
-      fprintf(stderr, "%s\n", __FUNCTION__);
-
-   assert(dPriv);
-   assert(dPriv->driContextPriv);
-   assert(dPriv->driContextPriv->driverPrivate);
-
-   intel = (intelContextPtr) dPriv->driContextPriv->driverPrivate;
-
-   intelFlush( &intel->ctx );
-   
-   intelScreen = intel->intelScreen;
-
-   if (!rect && !intel->swap_scheduled && intelScreen->drmMinor >= 6 &&
-       !(intel->vblank_flags & VBLANK_FLAG_NO_IRQ) &&
-       intelScreen->current_rotation == 0) {
-      unsigned int interval = driGetVBlankInterval(dPriv, intel->vblank_flags);
-      unsigned int target;
-      drm_i915_vblank_swap_t swap;
-
-      swap.drawable = dPriv->hHWDrawable;
-      swap.seqtype = DRM_VBLANK_ABSOLUTE;
-      target = swap.sequence = intel->vbl_seq + interval;
-
-      if (intel->vblank_flags & VBLANK_FLAG_SYNC) {
-	 swap.seqtype |= DRM_VBLANK_NEXTONMISS;
-      } else if (interval == 0) {
-	 goto noschedule;
-      }
-
-      if ( intel->vblank_flags & VBLANK_FLAG_SECONDARY ) {
-	 swap.seqtype |= DRM_VBLANK_SECONDARY;
-      }
-
-      if (!drmCommandWriteRead(intel->driFd, DRM_I915_VBLANK_SWAP, &swap,
-                              sizeof(swap))) {
-        intel->swap_scheduled = 1;
-        intel->vbl_seq = swap.sequence;
-        swap.sequence -= target;
-        missed_target = swap.sequence > 0 && swap.sequence <= (1 << 23);
-      }
-   } else {
-      intel->swap_scheduled = 0;
-   }
-noschedule:
-
-   if (!intel->swap_scheduled) {
-      intelWaitForFrameCompletion( intel );
-      LOCK_HARDWARE( intel );
-
-      if (!rect)
-      {
-	 UNLOCK_HARDWARE( intel );
-	 driWaitForVBlank( dPriv, &intel->vbl_seq, intel->vblank_flags, & missed_target );
-	 LOCK_HARDWARE( intel );
-      }
-      {
-	 const intelScreenPrivate *intelScreen = intel->intelScreen;
-	 const __DRIdrawablePrivate *dPriv = intel->driDrawable;
-	 const int nbox = dPriv->numClipRects;
-	 const drm_clip_rect_t *pbox = dPriv->pClipRects;
-	 drm_clip_rect_t box;
-	 const int cpp = intelScreen->cpp;
-	 const int pitch = intelScreen->front.pitch; /* in bytes */
-	 int i;
-	 GLuint CMD, BR13;
-	 BATCH_LOCALS;
-
-	 switch(cpp) {
-	 case 2: 
-	    BR13 = (pitch) | (0xCC << 16) | (1<<24);
-	    CMD = XY_SRC_COPY_BLT_CMD;
-	    break;
-	 case 4:
-	    BR13 = (pitch) | (0xCC << 16) | (1<<24) | (1<<25);
-	    CMD = (XY_SRC_COPY_BLT_CMD | XY_SRC_COPY_BLT_WRITE_ALPHA |
-		   XY_SRC_COPY_BLT_WRITE_RGB);
-	    break;
-	 default:
-	    BR13 = (pitch) | (0xCC << 16) | (1<<24);
-	    CMD = XY_SRC_COPY_BLT_CMD;
-	    break;
-	 }
-   
-	 if (0) 
-	    intel_draw_performance_boxes( intel );
-
-	 for (i = 0 ; i < nbox; i++, pbox++) 
-	 {
-	    if (pbox->x1 > pbox->x2 ||
-		pbox->y1 > pbox->y2 ||
-		pbox->x2 > intelScreen->width ||
-		pbox->y2 > intelScreen->height) {
-	       _mesa_warning(&intel->ctx, "Bad cliprect in intelCopyBuffer()");
-	       continue;
-	    }
-
-	    box = *pbox;
-
-	    if (rect)
-	    {
-	       if (rect->x1 > box.x1)
-		  box.x1 = rect->x1;
-	       if (rect->y1 > box.y1)
-		  box.y1 = rect->y1;
-	       if (rect->x2 < box.x2)
-		  box.x2 = rect->x2;
-	       if (rect->y2 < box.y2)
-		  box.y2 = rect->y2;
-
-	       if (box.x1 > box.x2 || box.y1 > box.y2)
-		  continue;
-	    }
-
-	    BEGIN_BATCH( 8);
-	    OUT_BATCH( CMD );
-	    OUT_BATCH( BR13 );
-	    OUT_BATCH( (box.y1 << 16) | box.x1 );
-	    OUT_BATCH( (box.y2 << 16) | box.x2 );
-
-	    if (intel->sarea->pf_current_page == 0) 
-	       OUT_BATCH( intelScreen->front.offset );
-	    else
-	       OUT_BATCH( intelScreen->back.offset );			
-
-	    OUT_BATCH( (box.y1 << 16) | box.x1 );
-	    OUT_BATCH( BR13 & 0xffff );
-
-	    if (intel->sarea->pf_current_page == 0) 
-	       OUT_BATCH( intelScreen->back.offset );			
-	    else
-	       OUT_BATCH( intelScreen->front.offset );
-
-	    ADVANCE_BATCH();
-	 }
-      }
-      intelFlushBatchLocked( intel, GL_TRUE, GL_TRUE, GL_TRUE );
-      UNLOCK_HARDWARE( intel );
-   }
-
-   if (!rect)
-   {
-       intel->swap_count++;
-       (*dri_interface->getUST)(&ust);
-       if (missed_target) {
-	   intel->swap_missed_count++;
-	   intel->swap_missed_ust = ust -  intel->swap_ust;
-       }
-   
-       intel->swap_ust = ust;
-   }
-}
-
-
-
-
-void intelEmitFillBlitLocked( intelContextPtr intel,
-			      GLuint cpp,
-			      GLshort dst_pitch,  /* in bytes */
-			      GLuint dst_offset,
-			      GLshort x, GLshort y, 
-			      GLshort w, GLshort h,
-			      GLuint color )
-{
-   GLuint BR13, CMD;
-   BATCH_LOCALS;
-
-   switch(cpp) {
-   case 1: 
-   case 2: 
-   case 3: 
-      BR13 = dst_pitch | (0xF0 << 16) | (1<<24);
-      CMD = XY_COLOR_BLT_CMD;
-      break;
-   case 4:
-      BR13 = dst_pitch | (0xF0 << 16) | (1<<24) | (1<<25);
-      CMD = (XY_COLOR_BLT_CMD | XY_COLOR_BLT_WRITE_ALPHA |
-	     XY_COLOR_BLT_WRITE_RGB);
-      break;
-   default:
-      return;
-   }
-
-   BEGIN_BATCH( 6);
-   OUT_BATCH( CMD );
-   OUT_BATCH( BR13 );
-   OUT_BATCH( (y << 16) | x );
-   OUT_BATCH( ((y+h) << 16) | (x+w) );
-   OUT_BATCH( dst_offset );
-   OUT_BATCH( color );
-   ADVANCE_BATCH();
-}
-
-
-/* Copy BitBlt
- */
-void intelEmitCopyBlitLocked( intelContextPtr intel,
-			      GLuint cpp,
-			      GLshort src_pitch,
-			      GLuint  src_offset,
-			      GLshort dst_pitch,
-			      GLuint  dst_offset,
-			      GLshort src_x, GLshort src_y,
-			      GLshort dst_x, GLshort dst_y,
-			      GLshort w, GLshort h )
-{
-   GLuint CMD, BR13;
-   int dst_y2 = dst_y + h;
-   int dst_x2 = dst_x + w;
-   BATCH_LOCALS;
-
-   src_pitch *= cpp;
-   dst_pitch *= cpp;
-
-   switch(cpp) {
-   case 1: 
-   case 2: 
-   case 3: 
-      BR13 = dst_pitch | (0xCC << 16) | (1<<24);
-      CMD = XY_SRC_COPY_BLT_CMD;
-      break;
-   case 4:
-      BR13 = dst_pitch | (0xCC << 16) | (1<<24) | (1<<25);
-      CMD = (XY_SRC_COPY_BLT_CMD | XY_SRC_COPY_BLT_WRITE_ALPHA |
-	     XY_SRC_COPY_BLT_WRITE_RGB);
-      break;
-   default:
-      return;
-   }
-
-   if (dst_y2 < dst_y ||
-       dst_x2 < dst_x) {
-      return;
-   }
-
-   BEGIN_BATCH( 12);
-   OUT_BATCH( CMD );
-   OUT_BATCH( BR13 );
-   OUT_BATCH( (dst_y << 16) | dst_x );
-   OUT_BATCH( (dst_y2 << 16) | dst_x2 );
-   OUT_BATCH( dst_offset );	
-   OUT_BATCH( (src_y << 16) | src_x );
-   OUT_BATCH( src_pitch );
-   OUT_BATCH( src_offset ); 
-   ADVANCE_BATCH();
-}
-
-
-
-void intelClearWithBlit(GLcontext *ctx, GLbitfield buffers, GLboolean allFoo,
-                        GLint cx1Foo, GLint cy1Foo, GLint cwFoo, GLint chFoo)
-{
-   intelContextPtr intel = INTEL_CONTEXT( ctx );
-   intelScreenPrivate *intelScreen = intel->intelScreen;
-   GLuint clear_depth, clear_color;
-   GLint cx, cy, cw, ch;
-   GLboolean all;
-   GLint pitch;
-   GLint cpp = intelScreen->cpp;
-   GLint i;
-   GLuint BR13, CMD, D_CMD;
-   BATCH_LOCALS;
-
-   intelFlush( &intel->ctx );
-   LOCK_HARDWARE( intel );
-
-   /* get clear bounds after locking */
-   cx = intel->ctx.DrawBuffer->_Xmin;
-   cy = intel->ctx.DrawBuffer->_Ymin;
-   cw = intel->ctx.DrawBuffer->_Xmax - cx;
-   ch = intel->ctx.DrawBuffer->_Ymax - cy;
-   all = (cw == intel->ctx.DrawBuffer->Width &&
-          ch == intel->ctx.DrawBuffer->Height);
-
-   pitch = intelScreen->front.pitch;
-
-   clear_color = intel->ClearColor;
-   clear_depth = 0;
-
-   if (buffers & BUFFER_BIT_DEPTH) {
-      clear_depth = (GLuint)(ctx->Depth.Clear * intel->ClearDepth);
-   }
-
-   if (buffers & BUFFER_BIT_STENCIL) {
-      clear_depth |= (ctx->Stencil.Clear & 0xff) << 24;
-   }
-
-   switch(cpp) {
-   case 2: 
-      BR13 = (0xF0 << 16) | (pitch) | (1<<24);
-      D_CMD = CMD = XY_COLOR_BLT_CMD;
-      break;
-   case 4:
-      BR13 = (0xF0 << 16) | (pitch) | (1<<24) | (1<<25);
-      CMD = (XY_COLOR_BLT_CMD |
-	     XY_COLOR_BLT_WRITE_ALPHA | 
-	     XY_COLOR_BLT_WRITE_RGB);
-      D_CMD = XY_COLOR_BLT_CMD;
-      if (buffers & BUFFER_BIT_DEPTH) D_CMD |= XY_COLOR_BLT_WRITE_RGB;
-      if (buffers & BUFFER_BIT_STENCIL) D_CMD |= XY_COLOR_BLT_WRITE_ALPHA;
-      break;
-   default:
-      BR13 = (0xF0 << 16) | (pitch) | (1<<24);
-      D_CMD = CMD = XY_COLOR_BLT_CMD;
-      break;
-   }
-
-   {
-      /* flip top to bottom */
-      cy = intel->driDrawable->h - cy - ch;
-      cx = cx + intel->drawX;
-      cy += intel->drawY;
-
-      /* adjust for page flipping */
-      if ( intel->sarea->pf_current_page == 1 ) {
-	 GLuint tmp = buffers;
-
-	 buffers &= ~(BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_BACK_LEFT);
-	 if ( tmp & BUFFER_BIT_FRONT_LEFT ) buffers |= BUFFER_BIT_BACK_LEFT;
-	 if ( tmp & BUFFER_BIT_BACK_LEFT )  buffers |= BUFFER_BIT_FRONT_LEFT;
-      }
-
-      for (i = 0 ; i < intel->numClipRects ; i++) 
-      { 	 
-	 drm_clip_rect_t *box = &intel->pClipRects[i];	 
-	 drm_clip_rect_t b;
-
-	 if (!all) {
-	    GLint x = box->x1;
-	    GLint y = box->y1;
-	    GLint w = box->x2 - x;
-	    GLint h = box->y2 - y;
-
-	    if (x < cx) w -= cx - x, x = cx; 
-	    if (y < cy) h -= cy - y, y = cy;
-	    if (x + w > cx + cw) w = cx + cw - x;
-	    if (y + h > cy + ch) h = cy + ch - y;
-	    if (w <= 0) continue;
-	    if (h <= 0) continue;
-
-	    b.x1 = x;
-	    b.y1 = y;
-	    b.x2 = x + w;
-	    b.y2 = y + h;      
-	 } else {
-	    b = *box;
-	 }
-
-
-	 if (b.x1 > b.x2 ||
-	     b.y1 > b.y2 ||
-	     b.x2 > intelScreen->width ||
-	     b.y2 > intelScreen->height)
-	    continue;
-
-	 if ( buffers & BUFFER_BIT_FRONT_LEFT ) {	    
-	    BEGIN_BATCH( 6);	    
-	    OUT_BATCH( CMD );
-	    OUT_BATCH( BR13 );
-	    OUT_BATCH( (b.y1 << 16) | b.x1 );
-	    OUT_BATCH( (b.y2 << 16) | b.x2 );
-	    OUT_BATCH( intelScreen->front.offset );
-	    OUT_BATCH( clear_color );
-	    ADVANCE_BATCH();
-	 }
-
-	 if ( buffers & BUFFER_BIT_BACK_LEFT ) {
-	    BEGIN_BATCH( 6); 
-	    OUT_BATCH( CMD );
-	    OUT_BATCH( BR13 );
-	    OUT_BATCH( (b.y1 << 16) | b.x1 );
-	    OUT_BATCH( (b.y2 << 16) | b.x2 );
-	    OUT_BATCH( intelScreen->back.offset );
-	    OUT_BATCH( clear_color );
-	    ADVANCE_BATCH();
-	 }
-
-	 if ( buffers & (BUFFER_BIT_STENCIL | BUFFER_BIT_DEPTH) ) {
-	    BEGIN_BATCH( 6);
-	    OUT_BATCH( D_CMD );
-	    OUT_BATCH( BR13 );
-	    OUT_BATCH( (b.y1 << 16) | b.x1 );
-	    OUT_BATCH( (b.y2 << 16) | b.x2 );
-	    OUT_BATCH( intelScreen->depth.offset );
-	    OUT_BATCH( clear_depth );
-	    ADVANCE_BATCH();
-	 }      
-      }
-   }
-   intelFlushBatchLocked( intel, GL_TRUE, GL_FALSE, GL_TRUE );
-   UNLOCK_HARDWARE( intel );
-}
-
-
-
-
-void intelDestroyBatchBuffer( GLcontext *ctx )
-{
-   intelContextPtr intel = INTEL_CONTEXT(ctx);
-
-   if (intel->alloc.offset) {
-      intelFreeAGP( intel, intel->alloc.ptr );
-      intel->alloc.ptr = NULL;
-      intel->alloc.offset = 0;
-   }
-   else if (intel->alloc.ptr) {
-      free(intel->alloc.ptr);
-      intel->alloc.ptr = NULL;
-   }
-
-   memset(&intel->batch, 0, sizeof(intel->batch));
-}
-
-
-void intelInitBatchBuffer( GLcontext *ctx )
-{
-   intelContextPtr intel = INTEL_CONTEXT(ctx);
-
-   /* This path isn't really safe with rotate:
-    */
-   if (getenv("INTEL_BATCH") && intel->intelScreen->allow_batchbuffer) {      
-      switch (intel->intelScreen->deviceID) {
-      case PCI_CHIP_I865_G:
-	 /* HW bug?  Seems to crash if batchbuffer crosses 4k boundary.
-	  */
-	 intel->alloc.size = 8 * 1024; 
-	 break;
-      default:
-	 /* This is the smallest amount of memory the kernel deals with.
-	  * We'd ideally like to make this smaller.
-	  */
-	 intel->alloc.size = 1 << intel->intelScreen->logTextureGranularity;
-	 break;
-      }
-
-      intel->alloc.ptr = intelAllocateAGP( intel, intel->alloc.size );
-      if (intel->alloc.ptr)
-	 intel->alloc.offset = 
-	    intelAgpOffsetFromVirtual( intel, intel->alloc.ptr );
-      else
-         intel->alloc.offset = 0; /* OK? */
-   }
-
-   /* The default is now to use a local buffer and pass that to the
-    * kernel.  This is also a fallback if allocation fails on the
-    * above path:
-    */
-   if (!intel->alloc.ptr) {
-      intel->alloc.size = 8 * 1024;
-      intel->alloc.ptr = malloc( intel->alloc.size );
-      intel->alloc.offset = 0;
-   }
-
-   assert(intel->alloc.ptr);
-}
diff --git a/i915/intel_batchbuffer.h b/i915/intel_batchbuffer.h
deleted file mode 100644
index 577d071..0000000
--- a/i915/intel_batchbuffer.h
+++ /dev/null
@@ -1,126 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#ifndef INTEL_BATCHBUFFER_H
-#define INTEL_BATCHBUFFER_H
-
-#include "intel_context.h"
-#include "intel_ioctl.h"
-
-
-#define BATCH_LOCALS	GLubyte *batch_ptr;
-
-/* #define VERBOSE 0 */
-#ifndef VERBOSE
-extern int VERBOSE;
-#endif
-
-
-#define BEGIN_BATCH(n)							\
-do {									\
-   if (VERBOSE) fprintf(stderr, 					\
-			"BEGIN_BATCH(%ld) in %s, %d dwords free\n",	\
-			((unsigned long)n), __FUNCTION__,		\
-			intel->batch.space/4);				\
-   if (intel->batch.space < (n)*4)					\
-      intelFlushBatch(intel, GL_TRUE);					\
-   if (intel->batch.space == intel->batch.size)	intel->batch.func = __FUNCTION__;			\
-   batch_ptr = intel->batch.ptr;					\
-} while (0)
-
-#define OUT_BATCH(n)					\
-do {							\
-   *(GLuint *)batch_ptr = (n);				\
-   if (VERBOSE) fprintf(stderr, " -- %08x at %s/%d\n", (n), __FILE__, __LINE__);	\
-   batch_ptr += 4;					\
-} while (0)
-
-#define ADVANCE_BATCH()						\
-do {								\
-   if (VERBOSE) fprintf(stderr, "ADVANCE_BATCH()\n");		\
-   intel->batch.space -= (batch_ptr - intel->batch.ptr);	\
-   intel->batch.ptr = batch_ptr;				\
-   assert(intel->batch.space >= 0);				\
-} while(0)
-
-extern void intelInitBatchBuffer( GLcontext *ctx );
-extern void intelDestroyBatchBuffer( GLcontext *ctx );
-
-extern void intelStartInlinePrimitive( intelContextPtr intel, GLuint prim );
-extern void intelWrapInlinePrimitive( intelContextPtr intel );
-extern void intelRestartInlinePrimitive( intelContextPtr intel );
-extern GLuint *intelEmitInlinePrimitiveLocked(intelContextPtr intel, 
-					      int primitive, int dwords,
-					      int vertex_size);
-extern void intelCopyBuffer( const __DRIdrawablePrivate *dpriv,
-			     const drm_clip_rect_t	*rect);
-extern void intelClearWithBlit(GLcontext *ctx, GLbitfield mask, GLboolean all,
-			     GLint cx1, GLint cy1, GLint cw, GLint ch);
-
-extern void intelEmitCopyBlitLocked( intelContextPtr intel,
-				     GLuint cpp,
-				     GLshort src_pitch,
-				     GLuint  src_offset,
-				     GLshort dst_pitch,
-				     GLuint  dst_offset,
-				     GLshort srcx, GLshort srcy,
-				     GLshort dstx, GLshort dsty,
-				     GLshort w, GLshort h );
-
-extern void intelEmitFillBlitLocked( intelContextPtr intel,
-				     GLuint cpp,
-				     GLshort dst_pitch,
-				     GLuint dst_offset,
-				     GLshort x, GLshort y, 
-				     GLshort w, GLshort h,
-				     GLuint color );
-
-
-
-
-static __inline GLuint *intelExtendInlinePrimitive( intelContextPtr intel, 
-						GLuint dwords )
-{
-   GLuint sz = dwords * sizeof(GLuint);
-   GLuint *ptr;
-
-   if (intel->batch.space < sz) {
-      intelWrapInlinePrimitive( intel );
-/*       assert(intel->batch.space >= sz); */
-   }
-
-/*    assert(intel->prim.primitive != ~0); */
-   ptr = (GLuint *)intel->batch.ptr;
-   intel->batch.ptr += sz;
-   intel->batch.space -= sz;
-
-   return ptr;
-}
-
-
-
-#endif
diff --git a/i915/intel_context.c b/i915/intel_context.c
deleted file mode 100644
index bb5ce64..0000000
--- a/i915/intel_context.c
+++ /dev/null
@@ -1,871 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-
-#include "glheader.h"
-#include "context.h"
-#include "matrix.h"
-#include "simple_list.h"
-#include "extensions.h"
-#include "framebuffer.h"
-#include "imports.h"
-#include "points.h"
-
-#include "swrast/swrast.h"
-#include "swrast_setup/swrast_setup.h"
-#include "tnl/tnl.h"
-#include "vbo/vbo.h"
-
-#include "tnl/t_pipeline.h"
-#include "tnl/t_vertex.h"
-
-#include "drivers/common/driverfuncs.h"
-
-#include "intel_screen.h"
-
-#include "i830_dri.h"
-#include "i830_common.h"
-
-#include "intel_tex.h"
-#include "intel_span.h"
-#include "intel_tris.h"
-#include "intel_ioctl.h"
-#include "intel_batchbuffer.h"
-
-#include "vblank.h"
-#include "utils.h"
-#include "xmlpool.h" /* for symbolic values of enum-type options */
-#ifndef INTEL_DEBUG
-int INTEL_DEBUG = (0);
-#endif
-
-#define need_GL_ARB_multisample
-#define need_GL_ARB_point_parameters
-#define need_GL_ARB_texture_compression
-#define need_GL_ARB_vertex_buffer_object
-#define need_GL_ARB_vertex_program
-#define need_GL_ARB_window_pos
-#define need_GL_EXT_blend_color
-#define need_GL_EXT_blend_equation_separate
-#define need_GL_EXT_blend_func_separate
-#define need_GL_EXT_blend_minmax
-#define need_GL_EXT_cull_vertex
-#define need_GL_EXT_fog_coord
-#define need_GL_EXT_multi_draw_arrays
-#define need_GL_EXT_secondary_color
-#define need_GL_NV_vertex_program
-#include "extension_helper.h"
-
-#ifndef VERBOSE
-int VERBOSE = 0;
-#endif
-
-#if DEBUG_LOCKING
-char *prevLockFile;
-int prevLockLine;
-#endif
-
-/***************************************
- * Mesa's Driver Functions
- ***************************************/
-
-#define DRIVER_DATE "20061017"
-
-const GLubyte *intelGetString( GLcontext *ctx, GLenum name )
-{
-   const char * chipset;
-   static char buffer[128];
-
-   switch (name) {
-   case GL_VENDOR:
-      return (GLubyte *)"Tungsten Graphics, Inc";
-      break;
-      
-   case GL_RENDERER:
-      switch (INTEL_CONTEXT(ctx)->intelScreen->deviceID) {
-      case PCI_CHIP_845_G:
-	 chipset = "Intel(R) 845G"; break;
-      case PCI_CHIP_I830_M:
-	 chipset = "Intel(R) 830M"; break;
-      case PCI_CHIP_I855_GM:
-	 chipset = "Intel(R) 852GM/855GM"; break;
-      case PCI_CHIP_I865_G:
-	 chipset = "Intel(R) 865G"; break;
-      case PCI_CHIP_I915_G:
-	 chipset = "Intel(R) 915G"; break;
-      case PCI_CHIP_I915_GM:
-	 chipset = "Intel(R) 915GM"; break;
-      case PCI_CHIP_I945_G:
-	 chipset = "Intel(R) 945G"; break;
-      case PCI_CHIP_I945_GM:
-	 chipset = "Intel(R) 945GM"; break;
-      case PCI_CHIP_I945_GME:
-	 chipset = "Intel(R) 945GME"; break;
-      case PCI_CHIP_G33_G:
-	 chipset = "Intel(R) G33"; break;
-      case PCI_CHIP_Q35_G:
-	 chipset = "Intel(R) Q35"; break;
-      case PCI_CHIP_Q33_G:
-	 chipset = "Intel(R) Q33"; break;
-      default:
-	 chipset = "Unknown Intel Chipset"; break;
-      }
-
-      (void) driGetRendererString( buffer, chipset, DRIVER_DATE, 0 );
-      return (GLubyte *) buffer;
-
-   default:
-      return NULL;
-   }
-}
-
-
-/**
- * Extension strings exported by the intel driver.
- *
- * \note
- * It appears that ARB_texture_env_crossbar has "disappeared" compared to the
- * old i830-specific driver.
- */
-const struct dri_extension card_extensions[] =
-{
-    { "GL_ARB_multisample",                GL_ARB_multisample_functions },
-    { "GL_ARB_multitexture",               NULL },
-    { "GL_ARB_point_parameters",           GL_ARB_point_parameters_functions },
-    { "GL_ARB_texture_border_clamp",       NULL },
-    { "GL_ARB_texture_compression",        GL_ARB_texture_compression_functions },
-    { "GL_ARB_texture_cube_map",           NULL },
-    { "GL_ARB_texture_env_add",            NULL },
-    { "GL_ARB_texture_env_combine",        NULL },
-    { "GL_ARB_texture_env_dot3",           NULL },
-    { "GL_ARB_texture_mirrored_repeat",    NULL },
-    { "GL_ARB_texture_rectangle",          NULL },
-    { "GL_ARB_vertex_buffer_object",       GL_ARB_vertex_buffer_object_functions },
-    { "GL_ARB_vertex_program",             GL_ARB_vertex_program_functions },
-    { "GL_ARB_window_pos",                 GL_ARB_window_pos_functions },
-    { "GL_EXT_blend_color",                GL_EXT_blend_color_functions },
-    { "GL_EXT_blend_equation_separate",    GL_EXT_blend_equation_separate_functions },
-    { "GL_EXT_blend_func_separate",        GL_EXT_blend_func_separate_functions },
-    { "GL_EXT_blend_minmax",               GL_EXT_blend_minmax_functions },
-    { "GL_EXT_blend_subtract",             NULL },
-    { "GL_EXT_cull_vertex",                GL_EXT_cull_vertex_functions },
-    { "GL_EXT_fog_coord",                  GL_EXT_fog_coord_functions },
-    { "GL_EXT_multi_draw_arrays",          GL_EXT_multi_draw_arrays_functions },
-    { "GL_EXT_secondary_color",            GL_EXT_secondary_color_functions },
-    { "GL_EXT_stencil_wrap",               NULL },
-    { "GL_EXT_texture_edge_clamp",         NULL },
-    { "GL_EXT_texture_env_combine",        NULL },
-    { "GL_EXT_texture_env_dot3",           NULL },
-    { "GL_EXT_texture_filter_anisotropic", NULL },
-    { "GL_EXT_texture_lod_bias",           NULL },
-    { "GL_3DFX_texture_compression_FXT1",  NULL },
-    { "GL_APPLE_client_storage",           NULL },
-    { "GL_MESA_pack_invert",               NULL },
-    { "GL_MESA_ycbcr_texture",             NULL },
-    { "GL_NV_blend_square",                NULL },
-    { "GL_NV_vertex_program",              GL_NV_vertex_program_functions },
-    { "GL_NV_vertex_program1_1",           NULL },
-    { "GL_SGIS_generate_mipmap",           NULL },
-    { NULL,                                NULL }
-};
-
-extern const struct tnl_pipeline_stage _intel_render_stage;
-
-static const struct tnl_pipeline_stage *intel_pipeline[] = {
-   &_tnl_vertex_transform_stage,
-   &_tnl_vertex_cull_stage,
-   &_tnl_normal_transform_stage,
-   &_tnl_lighting_stage,
-   &_tnl_fog_coordinate_stage,
-   &_tnl_texgen_stage,
-   &_tnl_texture_transform_stage,
-   &_tnl_point_attenuation_stage,
-   &_tnl_vertex_program_stage,
-#if 1
-   &_intel_render_stage,     /* ADD: unclipped rastersetup-to-dma */
-#endif
-   &_tnl_render_stage,
-   0,
-};
-
-
-static const struct dri_debug_control debug_control[] =
-{
-    { "fall",  DEBUG_FALLBACKS },
-    { "tex",   DEBUG_TEXTURE },
-    { "ioctl", DEBUG_IOCTL },
-    { "prim",  DEBUG_PRIMS },
-    { "vert",  DEBUG_VERTS },
-    { "state", DEBUG_STATE },
-    { "verb",  DEBUG_VERBOSE },
-    { "dri",   DEBUG_DRI },
-    { "dma",   DEBUG_DMA },
-    { "san",   DEBUG_SANITY },
-    { "sync",  DEBUG_SYNC },
-    { "sleep", DEBUG_SLEEP },
-    { "pix",   DEBUG_PIXEL },
-    { NULL,    0 }
-};
-
-
-static void intelInvalidateState( GLcontext *ctx, GLuint new_state )
-{
-   _swrast_InvalidateState( ctx, new_state );
-   _swsetup_InvalidateState( ctx, new_state );
-   _vbo_InvalidateState( ctx, new_state );
-   _tnl_InvalidateState( ctx, new_state );
-   _tnl_invalidate_vertex_state( ctx, new_state );
-   INTEL_CONTEXT(ctx)->NewGLState |= new_state;
-}
-
-
-void intelInitDriverFunctions( struct dd_function_table *functions )
-{
-   _mesa_init_driver_functions( functions );
-
-   functions->Clear = intelClear;
-   functions->Flush = intelglFlush;
-   functions->Finish = intelFinish;
-   functions->GetString = intelGetString;
-   functions->UpdateState = intelInvalidateState;
-
-   intelInitTextureFuncs( functions );
-   intelInitPixelFuncs( functions );
-   intelInitStateFuncs( functions );
-}
-
-static void intel_emit_invarient_state( GLcontext *ctx )
-{
-}
-
-
-
-GLboolean intelInitContext( intelContextPtr intel,
-			    const __GLcontextModes *mesaVis,
-			    __DRIcontextPrivate *driContextPriv,
-			    void *sharedContextPrivate,
-			    struct dd_function_table *functions )
-{
-   GLcontext *ctx = &intel->ctx;
-   GLcontext *shareCtx = (GLcontext *) sharedContextPrivate;
-   __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv;
-   intelScreenPrivate *intelScreen = (intelScreenPrivate *)sPriv->private;
-   drmI830Sarea *saPriv = (drmI830Sarea *)
-      (((GLubyte *)sPriv->pSAREA)+intelScreen->sarea_priv_offset);
-   int fthrottle_mode;
-
-   if (!_mesa_initialize_context(&intel->ctx,
-				 mesaVis, shareCtx, 
-				 functions,
-				 (void*) intel))
-      return GL_FALSE;
-
-   driContextPriv->driverPrivate = intel;
-   intel->intelScreen = intelScreen;
-   intel->driScreen = sPriv;
-   intel->sarea = saPriv;
-
-
-   (void) memset( intel->texture_heaps, 0, sizeof( intel->texture_heaps ) );
-   make_empty_list( & intel->swapped );
-
-   driParseConfigFiles (&intel->optionCache, &intelScreen->optionCache,
-			intel->driScreen->myNum, "i915");
-
-   ctx->Const.MaxTextureMaxAnisotropy = 2.0;
-
-   ctx->Const.MinLineWidth = 1.0;
-   ctx->Const.MinLineWidthAA = 1.0;
-   ctx->Const.MaxLineWidth = 3.0;
-   ctx->Const.MaxLineWidthAA = 3.0;
-   ctx->Const.LineWidthGranularity = 1.0;
-
-   ctx->Const.MinPointSize = 1.0;
-   ctx->Const.MinPointSizeAA = 1.0;
-   ctx->Const.MaxPointSize = 255.0;
-   ctx->Const.MaxPointSizeAA = 3.0;
-   ctx->Const.PointSizeGranularity = 1.0;
-
-   /* reinitialize the context point state.
-    * It depend on constants in __GLcontextRec::Const
-    */
-   _mesa_init_point(ctx);
-
-   /* Initialize the software rasterizer and helper modules. */
-   _swrast_CreateContext( ctx );
-   _vbo_CreateContext( ctx );
-   _tnl_CreateContext( ctx );
-   _swsetup_CreateContext( ctx );
-
-   /* Install the customized pipeline: */
-   _tnl_destroy_pipeline( ctx );
-   _tnl_install_pipeline( ctx, intel_pipeline );
-
-   /* Configure swrast to match hardware characteristics: */
-   _swrast_allow_pixel_fog( ctx, GL_FALSE );
-   _swrast_allow_vertex_fog( ctx, GL_TRUE );
-
-   /* Dri stuff */
-   intel->hHWContext = driContextPriv->hHWContext;
-   intel->driFd = sPriv->fd;
-   intel->driHwLock = (drmLock *) &sPriv->pSAREA->lock;
-
-   intel->hw_stencil = mesaVis->stencilBits && mesaVis->depthBits == 24;
-   intel->hw_stipple = 1;
-
-   switch(mesaVis->depthBits) {
-   case 0:			/* what to do in this case? */
-   case 16:
-      intel->depth_scale = 1.0/0xffff;
-      intel->polygon_offset_scale = 1.0/0xffff;
-      intel->depth_clear_mask = ~0;
-      intel->ClearDepth = 0xffff;
-      break;
-   case 24:
-      intel->depth_scale = 1.0/0xffffff;
-      intel->polygon_offset_scale = 2.0/0xffffff; /* req'd to pass glean */
-      intel->depth_clear_mask = 0x00ffffff;
-      intel->stencil_clear_mask = 0xff000000;
-      intel->ClearDepth = 0x00ffffff;
-      break;
-   default:
-      assert(0); 
-      break;
-   }
-
-   /* Initialize swrast, tnl driver tables: */
-   intelInitSpanFuncs( ctx );
-   intelInitTriFuncs( ctx );
-
-
-   intel->RenderIndex = ~0;
-
-   fthrottle_mode = driQueryOptioni(&intel->optionCache, "fthrottle_mode");
-   intel->iw.irq_seq = -1;
-   intel->irqsEmitted = 0;
-
-   intel->do_irqs = (intel->intelScreen->irq_active &&
-		     fthrottle_mode == DRI_CONF_FTHROTTLE_IRQS);
-
-   intel->do_usleeps = (fthrottle_mode == DRI_CONF_FTHROTTLE_USLEEPS);
-
-   intel->vblank_flags = (intel->intelScreen->irq_active != 0)
-       ? driGetDefaultVBlankFlags(&intel->optionCache) : VBLANK_FLAG_NO_IRQ;
-
-   (*dri_interface->getUST)(&intel->swap_ust);
-   _math_matrix_ctr (&intel->ViewportMatrix);
-
-   driInitExtensions( ctx, card_extensions, GL_TRUE );
-
-   if (intel->ctx.Mesa_DXTn) {
-     _mesa_enable_extension( ctx, "GL_EXT_texture_compression_s3tc" );
-     _mesa_enable_extension( ctx, "GL_S3_s3tc" );
-   }
-   else if (driQueryOptionb (&intel->optionCache, "force_s3tc_enable")) {
-     _mesa_enable_extension( ctx, "GL_EXT_texture_compression_s3tc" );
-   }
-
-/*    driInitTextureObjects( ctx, & intel->swapped, */
-/* 			  DRI_TEXMGR_DO_TEXTURE_1D | */
-/* 			  DRI_TEXMGR_DO_TEXTURE_2D |  */
-/* 			  DRI_TEXMGR_DO_TEXTURE_RECT ); */
-
-
-   intelInitBatchBuffer(&intel->ctx);
-   intel->prim.flush = intel_emit_invarient_state;
-   intel->prim.primitive = ~0;
-
-
-#if DO_DEBUG
-   INTEL_DEBUG  = driParseDebugString( getenv( "INTEL_DEBUG" ),
-				       debug_control );
-   INTEL_DEBUG |= driParseDebugString( getenv( "INTEL_DEBUG" ),
-				       debug_control );
-#endif
-
-#ifndef VERBOSE
-   if (getenv("INTEL_VERBOSE"))
-      VERBOSE=1;
-#endif
-
-   if (getenv("INTEL_NO_RAST") || 
-       getenv("INTEL_NO_RAST")) {
-      fprintf(stderr, "disabling 3D rasterization\n");
-      FALLBACK(intel, INTEL_FALLBACK_USER, 1); 
-   }
-
-   return GL_TRUE;
-}
-
-void intelDestroyContext(__DRIcontextPrivate *driContextPriv)
-{
-   intelContextPtr intel = (intelContextPtr) driContextPriv->driverPrivate;
-
-   assert(intel); /* should never be null */
-   if (intel) {
-      GLboolean   release_texture_heaps;
-
-      INTEL_FIREVERTICES( intel );
-
-      intel->vtbl.destroy( intel );
-
-      release_texture_heaps = (intel->ctx.Shared->RefCount == 1);
-      _swsetup_DestroyContext (&intel->ctx);
-      _tnl_DestroyContext (&intel->ctx);
-      _vbo_DestroyContext (&intel->ctx);
-
-      _swrast_DestroyContext (&intel->ctx);
-      intel->Fallback = 0;	/* don't call _swrast_Flush later */
-
-      intelDestroyBatchBuffer(&intel->ctx);
-      
-
-      if ( release_texture_heaps ) {
-         /* This share group is about to go away, free our private
-          * texture object data.
-          */
-         int i;
-
-         for ( i = 0 ; i < intel->nr_heaps ; i++ ) {
-	    driDestroyTextureHeap( intel->texture_heaps[ i ] );
-	    intel->texture_heaps[ i ] = NULL;
-         }
-
-	 assert( is_empty_list( & intel->swapped ) );
-      }
-
-      /* free the Mesa context */
-      _mesa_destroy_context(&intel->ctx);
-   }
-}
-
-void intelSetFrontClipRects( intelContextPtr intel )
-{
-   __DRIdrawablePrivate *dPriv = intel->driDrawable;
-
-   if (!dPriv) return;
-
-   intel->numClipRects = dPriv->numClipRects;
-   intel->pClipRects = dPriv->pClipRects;
-   intel->drawX = dPriv->x;
-   intel->drawY = dPriv->y;
-}
-
-
-void intelSetBackClipRects( intelContextPtr intel )
-{
-   __DRIdrawablePrivate *dPriv = intel->driDrawable;
-
-   if (!dPriv) return;
-
-   if (intel->sarea->pf_enabled == 0 && dPriv->numBackClipRects == 0) {
-      intel->numClipRects = dPriv->numClipRects;
-      intel->pClipRects = dPriv->pClipRects;
-      intel->drawX = dPriv->x;
-      intel->drawY = dPriv->y;
-   } else {
-      intel->numClipRects = dPriv->numBackClipRects;
-      intel->pClipRects = dPriv->pBackClipRects;
-      intel->drawX = dPriv->backX;
-      intel->drawY = dPriv->backY;
-      
-      if (dPriv->numBackClipRects == 1 &&
-	  dPriv->x == dPriv->backX &&
-	  dPriv->y == dPriv->backY) {
-      
-	 /* Repeat the calculation of the back cliprect dimensions here
-	  * as early versions of dri.a in the Xserver are incorrect.  Try
-	  * very hard not to restrict future versions of dri.a which
-	  * might eg. allocate truly private back buffers.
-	  */
-	 int x1, y1;
-	 int x2, y2;
-	 
-	 x1 = dPriv->x;
-	 y1 = dPriv->y;      
-	 x2 = dPriv->x + dPriv->w;
-	 y2 = dPriv->y + dPriv->h;
-	 
-	 if (x1 < 0) x1 = 0;
-	 if (y1 < 0) y1 = 0;
-	 if (x2 > intel->intelScreen->width) x2 = intel->intelScreen->width;
-	 if (y2 > intel->intelScreen->height) y2 = intel->intelScreen->height;
-
-	 if (x1 == dPriv->pBackClipRects[0].x1 &&
-	     y1 == dPriv->pBackClipRects[0].y1) {
-
-	    dPriv->pBackClipRects[0].x2 = x2;
-	    dPriv->pBackClipRects[0].y2 = y2;
-	 }
-      }
-   }
-}
-
-
-void intelWindowMoved( intelContextPtr intel )
-{
-   __DRIdrawablePrivate *dPriv = intel->driDrawable;
-   GLframebuffer *drawFb = (GLframebuffer *) dPriv->driverPrivate;
-
-   if (!intel->ctx.DrawBuffer) {
-      intelSetFrontClipRects( intel );
-   }
-   else {
-      driUpdateFramebufferSize(&intel->ctx, dPriv);
-      switch (drawFb->_ColorDrawBufferMask[0]) {
-      case BUFFER_BIT_FRONT_LEFT:
-	 intelSetFrontClipRects( intel );
-	 break;
-      case BUFFER_BIT_BACK_LEFT:
-	 intelSetBackClipRects( intel );
-	 break;
-      default:
-	 /* glDrawBuffer(GL_NONE or GL_FRONT_AND_BACK): software fallback */
-	 intelSetFrontClipRects( intel );
-      }
-   }
-
-   if (drawFb->Width != dPriv->w || drawFb->Height != dPriv->h) {
-      /* update Mesa's notion of framebuffer/window size */
-      _mesa_resize_framebuffer(&intel->ctx, drawFb, dPriv->w, dPriv->h);
-      drawFb->Initialized = GL_TRUE; /* XXX remove someday */
-   }
-
-   /* Set state we know depends on drawable parameters:
-    */
-   {
-      GLcontext *ctx = &intel->ctx;
-
-      if (intel->intelScreen->driScrnPriv->ddxMinor >= 7) {
-	 drmI830Sarea *sarea = intel->sarea;
-	 drm_clip_rect_t drw_rect = { .x1 = dPriv->x, .x2 = dPriv->x + dPriv->w,
-				      .y1 = dPriv->y, .y2 = dPriv->y + dPriv->h };
-	 drm_clip_rect_t pipeA_rect = { .x1 = sarea->pipeA_x,
-					.x2 = sarea->pipeA_x + sarea->pipeA_w,
-					.y1 = sarea->pipeA_y,
-					.y2 = sarea->pipeA_y + sarea->pipeA_h };
-	 drm_clip_rect_t pipeB_rect = { .x1 = sarea->pipeB_x,
-					.x2 = sarea->pipeB_x + sarea->pipeB_w,
-					.y1 = sarea->pipeB_y,
-					.y2 = sarea->pipeB_y + sarea->pipeB_h };
-	 GLint areaA = driIntersectArea( drw_rect, pipeA_rect );
-	 GLint areaB = driIntersectArea( drw_rect, pipeB_rect );
-	 GLuint flags = intel->vblank_flags;
-
-	 if (areaB > areaA || (areaA == areaB && areaB > 0)) {
-	    flags = intel->vblank_flags | VBLANK_FLAG_SECONDARY;
-	 } else {
-	    flags = intel->vblank_flags & ~VBLANK_FLAG_SECONDARY;
-	 }
-
-	 if (flags != intel->vblank_flags) {
-	    intel->vblank_flags = flags;
-	    driGetCurrentVBlank(dPriv, intel->vblank_flags, &intel->vbl_seq);
-	 }
-      } else {
-	 intel->vblank_flags &= ~VBLANK_FLAG_SECONDARY;
-      }
-
-      ctx->Driver.Scissor( ctx, ctx->Scissor.X, ctx->Scissor.Y,
-			   ctx->Scissor.Width, ctx->Scissor.Height );
-      
-      ctx->Driver.DepthRange( ctx, 
-			      ctx->Viewport.Near,
-			      ctx->Viewport.Far );
-   }
-}
-
-GLboolean intelUnbindContext(__DRIcontextPrivate *driContextPriv)
-{
-   return GL_TRUE;
-}
-
-GLboolean intelMakeCurrent(__DRIcontextPrivate *driContextPriv,
-			  __DRIdrawablePrivate *driDrawPriv,
-			  __DRIdrawablePrivate *driReadPriv)
-{
-
-   if (driContextPriv) {
-      intelContextPtr intel = (intelContextPtr) driContextPriv->driverPrivate;
-
-      if ( intel->driDrawable != driDrawPriv ) {
-	 /* Shouldn't the readbuffer be stored also? */
-	 driDrawableInitVBlank( driDrawPriv, intel->vblank_flags,
-				&intel->vbl_seq );
-
-	 intel->driDrawable = driDrawPriv;
-	 intelWindowMoved( intel );
-      }
-
-      _mesa_make_current(&intel->ctx,
-			 (GLframebuffer *) driDrawPriv->driverPrivate,
-			 (GLframebuffer *) driReadPriv->driverPrivate);
-
-      intel->ctx.Driver.DrawBuffer( &intel->ctx, intel->ctx.Color.DrawBuffer[0] );
-   } else {
-      _mesa_make_current(NULL, NULL, NULL);
-   }
-
-   return GL_TRUE;
-}
-
-/**
- * Use the information in the sarea to update the screen parameters
- * related to screen rotation.
- */
-static void
-intelUpdateScreenRotation(intelContextPtr intel,
-                          __DRIscreenPrivate *sPriv,
-                          drmI830Sarea *sarea)
-{
-   intelScreenPrivate *intelScreen = (intelScreenPrivate *)sPriv->private;
-   intelRegion *colorBuf;
-
-   intelUnmapScreenRegions(intelScreen);
-
-   intelUpdateScreenFromSAREA(intelScreen, sarea);
-
-   /* update the current hw offsets for the color and depth buffers */
-   if (intel->ctx.DrawBuffer->_ColorDrawBufferMask[0] == BUFFER_BIT_BACK_LEFT)
-      colorBuf = &intelScreen->back;
-   else
-      colorBuf = &intelScreen->front;
-   intel->vtbl.update_color_z_regions(intel, colorBuf, &intelScreen->depth);
-
-   if (!intelMapScreenRegions(sPriv)) {
-      fprintf(stderr, "ERROR Remapping screen regions!!!\n");
-   }
-}
-
-void intelGetLock( intelContextPtr intel, GLuint flags )
-{
-   __DRIdrawablePrivate *dPriv = intel->driDrawable;
-   __DRIscreenPrivate *sPriv = intel->driScreen;
-   intelScreenPrivate *intelScreen = (intelScreenPrivate *)sPriv->private;
-   drmI830Sarea * sarea = intel->sarea;
-   unsigned   i;
-
-   drmGetLock(intel->driFd, intel->hHWContext, flags);
-
-   /* If the window moved, may need to set a new cliprect now.
-    *
-    * NOTE: This releases and regains the hw lock, so all state
-    * checking must be done *after* this call:
-    */
-   if (dPriv)
-      DRI_VALIDATE_DRAWABLE_INFO(sPriv, dPriv);
-
-   if (dPriv && intel->lastStamp != dPriv->lastStamp) {
-      intelWindowMoved( intel );
-      intel->lastStamp = dPriv->lastStamp;
-   }
-
-   /* If we lost context, need to dump all registers to hardware.
-    * Note that we don't care about 2d contexts, even if they perform
-    * accelerated commands, so the DRI locking in the X server is even
-    * more broken than usual.
-    */
-
-   if (sarea->width != intelScreen->width ||
-       sarea->height != intelScreen->height ||
-       sarea->rotation != intelScreen->current_rotation) {
-      intelUpdateScreenRotation(intel, sPriv, sarea);
-
-      /* This will drop the outstanding batchbuffer on the floor */
-      intel->batch.ptr -= (intel->batch.size - intel->batch.space);
-      intel->batch.space = intel->batch.size;
-      /* lose all primitives */
-      intel->prim.primitive = ~0;
-      intel->prim.start_ptr = 0;
-      intel->prim.flush = 0;
-      intel->vtbl.lost_hardware( intel ); 
-
-      intel->lastStamp = 0; /* force window update */
-
-      /* Release batch buffer
-       */
-      intelDestroyBatchBuffer(&intel->ctx);
-      intelInitBatchBuffer(&intel->ctx);
-      intel->prim.flush = intel_emit_invarient_state;
-
-      /* Still need to reset the global LRU?
-       */
-      intel_driReinitTextureHeap( intel->texture_heaps[0], intel->intelScreen->tex.size );
-   }
-
-   /* Shared texture managment - if another client has played with
-    * texture space, figure out which if any of our textures have been
-    * ejected, and update our global LRU.
-    */
-   for ( i = 0 ; i < intel->nr_heaps ; i++ ) {
-      DRI_AGE_TEXTURES( intel->texture_heaps[ i ] );
-   }
-}
-
-
-void intelSwapBuffers( __DRIdrawablePrivate *dPriv )
-{
-   if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) {
-      intelContextPtr intel;
-      GLcontext *ctx;
-      intel = (intelContextPtr) dPriv->driContextPriv->driverPrivate;
-      ctx = &intel->ctx;
-      if (ctx->Visual.doubleBufferMode) {
-         intelScreenPrivate *screen = intel->intelScreen;
-	 _mesa_notifySwapBuffers( ctx );  /* flush pending rendering comands */
-	 if ( 0 /*intel->doPageFlip*/ ) { /* doPageFlip is never set !!! */
-	    intelPageFlip( dPriv );
-	 } else {
-	     intelCopyBuffer( dPriv, NULL );
-	 }
-         if (screen->current_rotation != 0) {
-            intelRotateWindow(intel, dPriv, BUFFER_BIT_FRONT_LEFT);
-         }
-      }
-   } else {
-      /* XXX this shouldn't be an error but we can't handle it for now */
-      fprintf(stderr, "%s: drawable has no context!\n", __FUNCTION__);
-   }
-}
-
-void intelCopySubBuffer( __DRIdrawablePrivate *dPriv,
-			 int x, int y, int w, int h )
-{
-   if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) {
-      intelContextPtr intel;
-      GLcontext *ctx;
-      intel = (intelContextPtr) dPriv->driContextPriv->driverPrivate;
-      ctx = &intel->ctx;
-      if (ctx->Visual.doubleBufferMode) {
-	 drm_clip_rect_t rect;
-	 rect.x1 = x + dPriv->x;
-	 rect.y1 = (dPriv->h - y - h) + dPriv->y;
-	 rect.x2 = rect.x1 + w;
-	 rect.y2 = rect.y1 + h;
-	 _mesa_notifySwapBuffers( ctx );  /* flush pending rendering comands */
-	 intelCopyBuffer( dPriv, &rect );
-      }
-   } else {
-      /* XXX this shouldn't be an error but we can't handle it for now */
-      fprintf(stderr, "%s: drawable has no context!\n", __FUNCTION__);
-   }
-}
-
-void intelInitState( GLcontext *ctx )
-{
-   /* Mesa should do this for us:
-    */
-   ctx->Driver.AlphaFunc( ctx, 
-			  ctx->Color.AlphaFunc,
-			  ctx->Color.AlphaRef);
-
-   ctx->Driver.BlendColor( ctx,
-			   ctx->Color.BlendColor );
-
-   ctx->Driver.BlendEquationSeparate( ctx, 
-				      ctx->Color.BlendEquationRGB,
-				      ctx->Color.BlendEquationA);
-
-   ctx->Driver.BlendFuncSeparate( ctx,
-				  ctx->Color.BlendSrcRGB,
-				  ctx->Color.BlendDstRGB,
-				  ctx->Color.BlendSrcA,
-				  ctx->Color.BlendDstA);
-
-   ctx->Driver.ColorMask( ctx, 
-			  ctx->Color.ColorMask[RCOMP],
-			  ctx->Color.ColorMask[GCOMP],
-			  ctx->Color.ColorMask[BCOMP],
-			  ctx->Color.ColorMask[ACOMP]);
-
-   ctx->Driver.CullFace( ctx, ctx->Polygon.CullFaceMode );
-   ctx->Driver.DepthFunc( ctx, ctx->Depth.Func );
-   ctx->Driver.DepthMask( ctx, ctx->Depth.Mask );
-
-   ctx->Driver.Enable( ctx, GL_ALPHA_TEST, ctx->Color.AlphaEnabled );
-   ctx->Driver.Enable( ctx, GL_BLEND, ctx->Color.BlendEnabled );
-   ctx->Driver.Enable( ctx, GL_COLOR_LOGIC_OP, ctx->Color.ColorLogicOpEnabled );
-   ctx->Driver.Enable( ctx, GL_COLOR_SUM, ctx->Fog.ColorSumEnabled );
-   ctx->Driver.Enable( ctx, GL_CULL_FACE, ctx->Polygon.CullFlag );
-   ctx->Driver.Enable( ctx, GL_DEPTH_TEST, ctx->Depth.Test );
-   ctx->Driver.Enable( ctx, GL_DITHER, ctx->Color.DitherFlag );
-   ctx->Driver.Enable( ctx, GL_FOG, ctx->Fog.Enabled );
-   ctx->Driver.Enable( ctx, GL_LIGHTING, ctx->Light.Enabled );
-   ctx->Driver.Enable( ctx, GL_LINE_SMOOTH, ctx->Line.SmoothFlag );
-   ctx->Driver.Enable( ctx, GL_POLYGON_STIPPLE, ctx->Polygon.StippleFlag );
-   ctx->Driver.Enable( ctx, GL_SCISSOR_TEST, ctx->Scissor.Enabled );
-   ctx->Driver.Enable( ctx, GL_STENCIL_TEST, ctx->Stencil.Enabled );
-   ctx->Driver.Enable( ctx, GL_TEXTURE_1D, GL_FALSE );
-   ctx->Driver.Enable( ctx, GL_TEXTURE_2D, GL_FALSE );
-   ctx->Driver.Enable( ctx, GL_TEXTURE_RECTANGLE_NV, GL_FALSE );
-   ctx->Driver.Enable( ctx, GL_TEXTURE_3D, GL_FALSE );
-   ctx->Driver.Enable( ctx, GL_TEXTURE_CUBE_MAP, GL_FALSE );
-
-   ctx->Driver.Fogfv( ctx, GL_FOG_COLOR, ctx->Fog.Color );
-   ctx->Driver.Fogfv( ctx, GL_FOG_MODE, 0 );
-   ctx->Driver.Fogfv( ctx, GL_FOG_DENSITY, &ctx->Fog.Density );
-   ctx->Driver.Fogfv( ctx, GL_FOG_START, &ctx->Fog.Start );
-   ctx->Driver.Fogfv( ctx, GL_FOG_END, &ctx->Fog.End );
-
-   ctx->Driver.FrontFace( ctx, ctx->Polygon.FrontFace );
-
-   {
-      GLfloat f = (GLfloat)ctx->Light.Model.ColorControl;
-      ctx->Driver.LightModelfv( ctx, GL_LIGHT_MODEL_COLOR_CONTROL, &f );
-   }
-
-   ctx->Driver.LineWidth( ctx, ctx->Line.Width );
-   ctx->Driver.LogicOpcode( ctx, ctx->Color.LogicOp );
-   ctx->Driver.PointSize( ctx, ctx->Point.Size );
-   ctx->Driver.PolygonStipple( ctx, (const GLubyte *)ctx->PolygonStipple );
-   ctx->Driver.Scissor( ctx, ctx->Scissor.X, ctx->Scissor.Y,
-			ctx->Scissor.Width, ctx->Scissor.Height );
-   ctx->Driver.ShadeModel( ctx, ctx->Light.ShadeModel );
-   ctx->Driver.StencilFuncSeparate( ctx, GL_FRONT,
-                                    ctx->Stencil.Function[0],
-                                    ctx->Stencil.Ref[0],
-                                    ctx->Stencil.ValueMask[0] );
-   ctx->Driver.StencilFuncSeparate( ctx, GL_BACK,
-                                    ctx->Stencil.Function[1],
-                                    ctx->Stencil.Ref[1],
-                                    ctx->Stencil.ValueMask[1] );
-   ctx->Driver.StencilMaskSeparate( ctx, GL_FRONT, ctx->Stencil.WriteMask[0] );
-   ctx->Driver.StencilMaskSeparate( ctx, GL_BACK, ctx->Stencil.WriteMask[1] );
-   ctx->Driver.StencilOpSeparate( ctx, GL_FRONT,
-                                  ctx->Stencil.FailFunc[0],
-                                  ctx->Stencil.ZFailFunc[0],
-                                  ctx->Stencil.ZPassFunc[0]);
-   ctx->Driver.StencilOpSeparate( ctx, GL_BACK,
-                                  ctx->Stencil.FailFunc[1],
-                                  ctx->Stencil.ZFailFunc[1],
-                                  ctx->Stencil.ZPassFunc[1]);
-
-
-   ctx->Driver.DrawBuffer( ctx, ctx->Color.DrawBuffer[0] );
-}
-
-
diff --git a/i915/intel_context.h b/i915/intel_context.h
deleted file mode 100644
index 50e6178..0000000
--- a/i915/intel_context.h
+++ /dev/null
@@ -1,564 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#ifndef INTELCONTEXT_INC
-#define INTELCONTEXT_INC
-
-
-
-#include "mtypes.h"
-#include "drm.h"
-#include "mm.h"
-#include "texmem.h"
-#include "vblank.h"
-
-#include "intel_screen.h"
-#include "i915_drm.h"
-#include "i830_common.h"
-#include "tnl/t_vertex.h"
-
-#define TAG(x) intel##x
-#include "tnl_dd/t_dd_vertex.h"
-#undef TAG
-
-#define DV_PF_555  (1<<8)
-#define DV_PF_565  (2<<8)
-#define DV_PF_8888 (3<<8)
-
-#define INTEL_CONTEXT(ctx)	((intelContextPtr)(ctx))
-
-typedef struct intel_context intelContext;
-typedef struct intel_context *intelContextPtr;
-typedef struct intel_texture_object *intelTextureObjectPtr;
-
-typedef void (*intel_tri_func)(intelContextPtr, intelVertex *, intelVertex *,
-							  intelVertex *);
-typedef void (*intel_line_func)(intelContextPtr, intelVertex *, intelVertex *);
-typedef void (*intel_point_func)(intelContextPtr, intelVertex *);
-
-#define INTEL_FALLBACK_DRAW_BUFFER	 0x1
-#define INTEL_FALLBACK_READ_BUFFER	 0x2
-#define INTEL_FALLBACK_USER		 0x4
-#define INTEL_FALLBACK_NO_BATCHBUFFER	 0x8
-#define INTEL_FALLBACK_NO_TEXMEM	 0x10
-#define INTEL_FALLBACK_RENDERMODE	 0x20
-
-extern void intelFallback( intelContextPtr intel, GLuint bit, GLboolean mode );
-#define FALLBACK( intel, bit, mode ) intelFallback( intel, bit, mode )
-
-
-#define INTEL_TEX_MAXLEVELS 10
-
-
-struct intel_texture_object
-{
-   driTextureObject    base;	/* the parent class */
-
-   GLuint texelBytes;
-   GLuint age;
-   GLuint Pitch;
-   GLuint Height;
-   GLuint TextureOffset;
-   GLubyte *BufAddr;   
-
-   GLuint min_level;
-   GLuint max_level;
-   GLuint depth_pitch;
-
-   struct {
-      const struct gl_texture_image *image;
-      GLuint offset;       /* into BufAddr */
-      GLuint height;
-      GLuint internalFormat;
-   } image[6][INTEL_TEX_MAXLEVELS];
-
-   GLuint dirty;
-   GLuint firstLevel,lastLevel;
-};
-
-
-struct intel_context
-{
-   GLcontext ctx;		/* the parent class */
-
-   struct {
-      void (*destroy)( intelContextPtr intel ); 
-      void (*emit_state)( intelContextPtr intel );
-      void (*lost_hardware)( intelContextPtr intel );
-      void (*update_texture_state)( intelContextPtr intel );
-
-      void (*render_start)( intelContextPtr intel );
-      void (*set_color_region)( intelContextPtr intel, const intelRegion *reg );
-      void (*set_z_region)( intelContextPtr intel, const intelRegion *reg );
-      void (*update_color_z_regions)(intelContextPtr intel,
-                                     const intelRegion *colorRegion,
-                                     const intelRegion *depthRegion);
-      void (*emit_flush)( intelContextPtr intel );
-      void (*reduced_primitive_state)( intelContextPtr intel, GLenum rprim );
-
-      GLboolean (*check_vertex_size)( intelContextPtr intel, GLuint expected );
-
-      void (*clear_with_tris)( intelContextPtr intel, GLbitfield mask,
-			       GLboolean all, 
-			       GLint cx, GLint cy, GLint cw, GLint ch);
-
-      void (*rotate_window)( intelContextPtr intel,
-                             __DRIdrawablePrivate *dPriv, GLuint srcBuf);
-
-      intelTextureObjectPtr (*alloc_tex_obj)( struct gl_texture_object *tObj );
-
-   } vtbl;
-
-   GLint refcount;   
-   GLuint Fallback;
-   GLuint NewGLState;
-   
-   struct {
-      GLuint start_offset;
-      GLint size;
-      GLint space;
-      GLubyte *ptr;
-      GLuint counter;
-      GLuint last_emit_state;
-      GLboolean contains_geometry;
-      const char *func;
-      GLuint last_swap;
-   } batch;
-      
-   struct {
-      void *ptr;
-      GLint size;
-      GLuint offset;
-      GLuint active_buf;
-      GLuint irq_emitted;
-   } alloc;
-
-   struct {
-      GLuint primitive;
-      GLubyte *start_ptr;      
-      void (*flush)( GLcontext * );
-   } prim;
-
-   GLboolean locked;
-
-   GLubyte clear_red;
-   GLubyte clear_green;
-   GLubyte clear_blue;
-   GLubyte clear_alpha;
-   GLuint ClearColor;
-   GLuint ClearDepth;
-
-   GLuint coloroffset;
-   GLuint specoffset;
-
-   /* Support for duplicating XYZW as WPOS parameter (crutch for I915).
-    */
-   GLuint wpos_offset;
-   GLuint wpos_size;
-
-   struct tnl_attr_map vertex_attrs[VERT_ATTRIB_MAX];
-   GLuint vertex_attr_count;
-
-   GLfloat depth_scale;
-   GLfloat polygon_offset_scale; /* dependent on depth_scale, bpp */
-   GLuint depth_clear_mask;
-   GLuint stencil_clear_mask;
-
-   GLboolean hw_stencil;
-   GLboolean hw_stipple;
-   
-   /* Texture object bookkeeping
-    */
-   GLuint                nr_heaps;
-   driTexHeap          * texture_heaps[1];
-   driTextureObject      swapped;
-   GLuint                lastStamp;
-
-   struct intel_texture_object *CurrentTexObj[MAX_TEXTURE_UNITS];
-
-   /* State for intelvb.c and inteltris.c.
-    */
-   GLuint RenderIndex;
-   GLmatrix ViewportMatrix;
-   GLenum render_primitive;
-   GLenum reduced_primitive;
-   GLuint vertex_size;
-   unsigned char *verts;	   /* points to tnl->clipspace.vertex_buf */
-
-
-   /* Fallback rasterization functions 
-    */
-   intel_point_func draw_point;
-   intel_line_func draw_line;
-   intel_tri_func draw_tri;
-
-   /* Drawing buffer state
-    */
-   intelRegion *drawRegion;  /* current drawing buffer */
-   intelRegion *readRegion;  /* current reading buffer */
-
-   int drawX;			/* origin of drawable in draw buffer */
-   int drawY;
-   GLuint numClipRects;		/* cliprects for that buffer */
-   drm_clip_rect_t *pClipRects;
-
-   int dirtyAge;
-   int perf_boxes;
-
-   GLuint do_usleeps;
-   int do_irqs;
-   GLuint irqsEmitted;
-   drm_i915_irq_wait_t iw;
-
-   GLboolean scissor;
-   drm_clip_rect_t draw_rect;
-   drm_clip_rect_t scissor_rect;
-
-   drm_context_t hHWContext;
-   drmLock *driHwLock;
-   int driFd;
-
-   __DRIdrawablePrivate *driDrawable;
-   __DRIscreenPrivate *driScreen;
-   intelScreenPrivate *intelScreen; 
-   drmI830Sarea *sarea; 
-
-   /**
-    * Configuration cache
-    */
-   driOptionCache optionCache;
-
-   /* VBI
-    */
-   GLuint vbl_seq;
-   GLuint vblank_flags;
-
-   int64_t swap_ust;
-   int64_t swap_missed_ust;
-
-   GLuint swap_count;
-   GLuint swap_missed_count;
-
-   GLuint swap_scheduled;
-};
-
-
-#define DEBUG_LOCKING	1
-
-#if DEBUG_LOCKING
-extern char *prevLockFile;
-extern int prevLockLine;
-
-#define DEBUG_LOCK()							\
-   do {									\
-      prevLockFile = (__FILE__);					\
-      prevLockLine = (__LINE__);					\
-   } while (0)
-
-#define DEBUG_RESET()							\
-   do {									\
-      prevLockFile = 0;							\
-      prevLockLine = 0;							\
-   } while (0)
-
-/* Slightly less broken way of detecting recursive locking in a
- * threaded environment.  The right way to do this would be to make
- * prevLockFile, prevLockLine thread-local.
- *
- * This technique instead checks to see if the same context is
- * requesting the lock twice -- this will not catch application
- * breakages where the same context is active in two different threads
- * at once, but it will catch driver breakages (recursive locking) in
- * threaded apps.
- */
-#define DEBUG_CHECK_LOCK()						\
-   do {									\
-      if ( *((volatile int *)intel->driHwLock) == 			\
-	   (DRM_LOCK_HELD | intel->hHWContext) ) {			\
-	 fprintf( stderr,						\
-		  "LOCK SET!\n\tPrevious %s:%d\n\tCurrent: %s:%d\n",	\
-		  prevLockFile, prevLockLine, __FILE__, __LINE__ );	\
-	 abort();							\
-      }									\
-   } while (0)
-
-#else
-
-#define DEBUG_LOCK()
-#define DEBUG_RESET()
-#define DEBUG_CHECK_LOCK()
-
-#endif
-
-
-
-
-/* Lock the hardware and validate our state.  
- */
-#define LOCK_HARDWARE( intel )				\
-do {							\
-    char __ret=0;					\
-    DEBUG_CHECK_LOCK();					\
-    assert(!(intel)->locked);				\
-    if ((intel)->swap_scheduled) {			\
-        drmVBlank vbl;					\
-        vbl.request.type = DRM_VBLANK_ABSOLUTE;		\
-        if ((intel)->vblank_flags &			\
-            VBLANK_FLAG_SECONDARY) {			\
-            vbl.request.type |= DRM_VBLANK_SECONDARY;	\
-        }						\
-        vbl.request.sequence = (intel)->vbl_seq;	\
-        drmWaitVBlank((intel)->driFd, &vbl);		\
-        (intel)->swap_scheduled = 0;			\
-    }							\
-    DRM_CAS((intel)->driHwLock, (intel)->hHWContext,	\
-        (DRM_LOCK_HELD|(intel)->hHWContext), __ret);	\
-    if (__ret)						\
-        intelGetLock( (intel), 0 );			\
-      DEBUG_LOCK();					\
-    (intel)->locked = 1;				\
-}while (0)
- 
-  
-  /* Unlock the hardware using the global current context 
-   */
-#define UNLOCK_HARDWARE(intel)						\
-do {									\
-   intel->locked = 0;							\
-   if (0) { 								\
-      intel->perf_boxes |= intel->sarea->perf_boxes;  			\
-      intel->sarea->perf_boxes = 0;					\
-   }									\
-   DRM_UNLOCK((intel)->driFd, (intel)->driHwLock, (intel)->hHWContext);	\
-   DEBUG_RESET();							\
-} while (0)
-
-
-#define SUBPIXEL_X 0.125
-#define SUBPIXEL_Y 0.125
-
-#define INTEL_FIREVERTICES(intel)		\
-do {						\
-   if ((intel)->prim.flush)			\
-      (intel)->prim.flush(&(intel)->ctx);		\
-} while (0)
-
-/* ================================================================
- * Color packing:
- */
-
-#define INTEL_PACKCOLOR4444(r,g,b,a) \
-  ((((a) & 0xf0) << 8) | (((r) & 0xf0) << 4) | ((g) & 0xf0) | ((b) >> 4))
-
-#define INTEL_PACKCOLOR1555(r,g,b,a) \
-  ((((r) & 0xf8) << 7) | (((g) & 0xf8) << 2) | (((b) & 0xf8) >> 3) | \
-    ((a) ? 0x8000 : 0))
-
-#define INTEL_PACKCOLOR565(r,g,b) \
-  ((((r) & 0xf8) << 8) | (((g) & 0xfc) << 3) | (((b) & 0xf8) >> 3))
-
-#define INTEL_PACKCOLOR8888(r,g,b,a) \
-  ((a<<24) | (r<<16) | (g<<8) | b)
-
-
-#define INTEL_PACKCOLOR(format, r,  g,  b, a)		\
-(format == DV_PF_555 ? INTEL_PACKCOLOR1555(r,g,b,a) :	\
- (format == DV_PF_565 ? INTEL_PACKCOLOR565(r,g,b) :	\
-  (format == DV_PF_8888 ? INTEL_PACKCOLOR8888(r,g,b,a) :	\
-   0)))
-
-
-
-/* ================================================================
- * From linux kernel i386 header files, copes with odd sizes better
- * than COPY_DWORDS would:
- */
-#if defined(i386) || defined(__i386__)
-static __inline__ void * __memcpy(void * to, const void * from, size_t n)
-{
-   int d0, d1, d2;
-   __asm__ __volatile__(
-      "rep ; movsl\n\t"
-      "testb $2,%b4\n\t"
-      "je 1f\n\t"
-      "movsw\n"
-      "1:\ttestb $1,%b4\n\t"
-      "je 2f\n\t"
-      "movsb\n"
-      "2:"
-      : "=&c" (d0), "=&D" (d1), "=&S" (d2)
-      :"0" (n/4), "q" (n),"1" ((long) to),"2" ((long) from)
-      : "memory");
-   return (to);
-}
-#else
-#define __memcpy(a,b,c) memcpy(a,b,c)
-#endif
-
-
-
-/* ================================================================
- * Debugging:
- */
-#define DO_DEBUG		1
-#if DO_DEBUG
-extern int INTEL_DEBUG;
-#else
-#define INTEL_DEBUG		0
-#endif
-
-#define DEBUG_TEXTURE	0x1
-#define DEBUG_STATE	0x2
-#define DEBUG_IOCTL	0x4
-#define DEBUG_PRIMS	0x8
-#define DEBUG_VERTS	0x10
-#define DEBUG_FALLBACKS	0x20
-#define DEBUG_VERBOSE	0x40
-#define DEBUG_DRI       0x80
-#define DEBUG_DMA       0x100
-#define DEBUG_SANITY    0x200
-#define DEBUG_SYNC      0x400
-#define DEBUG_SLEEP     0x800
-#define DEBUG_PIXEL     0x1000
-
-
-#define PCI_CHIP_845_G			0x2562
-#define PCI_CHIP_I830_M			0x3577
-#define PCI_CHIP_I855_GM		0x3582
-#define PCI_CHIP_I865_G			0x2572
-#define PCI_CHIP_I915_G			0x2582
-#define PCI_CHIP_I915_GM		0x2592
-#define PCI_CHIP_I945_G			0x2772
-#define PCI_CHIP_I945_GM		0x27A2
-#define PCI_CHIP_I945_GME		0x27AE
-#define PCI_CHIP_G33_G			0x29C2
-#define PCI_CHIP_Q35_G			0x29B2
-#define PCI_CHIP_Q33_G			0x29D2
-
-
-/* ================================================================
- * intel_context.c:
- */
-
-extern void intelInitDriverFunctions( struct dd_function_table *functions );
-
-extern GLboolean intelInitContext( intelContextPtr intel, 
-				   const __GLcontextModes *mesaVis,
-				   __DRIcontextPrivate *driContextPriv,
-				   void *sharedContextPrivate,
-				   struct dd_function_table *functions );
-
-extern void intelGetLock(intelContextPtr intel, GLuint flags);
-extern void intelSetBackClipRects(intelContextPtr intel);
-extern void intelSetFrontClipRects(intelContextPtr intel);
-extern void intelWindowMoved( intelContextPtr intel );
-
-extern void intelInitState( GLcontext *ctx );
-extern const GLubyte *intelGetString( GLcontext *ctx, GLenum name );
-
-
-/* ================================================================
- * intel_state.c:
- */
-extern void intelInitStateFuncs( struct dd_function_table *functions );
-
-#define COMPAREFUNC_ALWAYS		0
-#define COMPAREFUNC_NEVER		0x1
-#define COMPAREFUNC_LESS		0x2
-#define COMPAREFUNC_EQUAL		0x3
-#define COMPAREFUNC_LEQUAL		0x4
-#define COMPAREFUNC_GREATER		0x5
-#define COMPAREFUNC_NOTEQUAL		0x6
-#define COMPAREFUNC_GEQUAL		0x7
-
-#define STENCILOP_KEEP			0
-#define STENCILOP_ZERO			0x1
-#define STENCILOP_REPLACE		0x2
-#define STENCILOP_INCRSAT		0x3
-#define STENCILOP_DECRSAT		0x4
-#define STENCILOP_INCR			0x5
-#define STENCILOP_DECR			0x6
-#define STENCILOP_INVERT		0x7
-
-#define LOGICOP_CLEAR			0
-#define LOGICOP_NOR			0x1
-#define LOGICOP_AND_INV 		0x2
-#define LOGICOP_COPY_INV		0x3
-#define LOGICOP_AND_RVRSE		0x4
-#define LOGICOP_INV			0x5
-#define LOGICOP_XOR			0x6
-#define LOGICOP_NAND			0x7
-#define LOGICOP_AND			0x8
-#define LOGICOP_EQUIV			0x9
-#define LOGICOP_NOOP			0xa
-#define LOGICOP_OR_INV			0xb
-#define LOGICOP_COPY			0xc
-#define LOGICOP_OR_RVRSE		0xd
-#define LOGICOP_OR			0xe
-#define LOGICOP_SET			0xf
-
-#define BLENDFACT_ZERO			0x01
-#define BLENDFACT_ONE			0x02
-#define BLENDFACT_SRC_COLR		0x03
-#define BLENDFACT_INV_SRC_COLR 		0x04
-#define BLENDFACT_SRC_ALPHA		0x05
-#define BLENDFACT_INV_SRC_ALPHA 	0x06
-#define BLENDFACT_DST_ALPHA		0x07
-#define BLENDFACT_INV_DST_ALPHA 	0x08
-#define BLENDFACT_DST_COLR		0x09
-#define BLENDFACT_INV_DST_COLR		0x0a
-#define BLENDFACT_SRC_ALPHA_SATURATE	0x0b
-#define BLENDFACT_CONST_COLOR		0x0c
-#define BLENDFACT_INV_CONST_COLOR	0x0d
-#define BLENDFACT_CONST_ALPHA		0x0e
-#define BLENDFACT_INV_CONST_ALPHA	0x0f
-#define BLENDFACT_MASK          	0x0f
-
-
-extern int intel_translate_compare_func( GLenum func );
-extern int intel_translate_stencil_op( GLenum op );
-extern int intel_translate_blend_factor( GLenum factor );
-extern int intel_translate_logic_op( GLenum opcode );
-
-
-/* ================================================================
- * intel_ioctl.c:
- */
-extern void intel_dump_batchbuffer( long offset,
-				    int *ptr,
-				    int count );
-
-
-/* ================================================================
- * intel_pixel.c:
- */	
-extern void intelInitPixelFuncs( struct dd_function_table *functions );
-
-
-
-#endif
-
diff --git a/i915/intel_ioctl.c b/i915/intel_ioctl.c
deleted file mode 100644
index ede3b63..0000000
--- a/i915/intel_ioctl.c
+++ /dev/null
@@ -1,659 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-
-#include <stdio.h>
-#include <unistd.h>
-#include <errno.h>
-#include <sched.h>
-
-#include "mtypes.h"
-#include "context.h"
-#include "swrast/swrast.h"
-
-#include "intel_context.h"
-#include "intel_ioctl.h"
-#include "intel_batchbuffer.h"
-#include "drm.h"
-
-u_int32_t intelGetLastFrame (intelContextPtr intel) 
-{
-   int ret;
-   u_int32_t frame;
-   drm_i915_getparam_t gp;
-   
-   gp.param = I915_PARAM_LAST_DISPATCH;
-   gp.value = (int *)&frame;
-   ret = drmCommandWriteRead( intel->driFd, DRM_I915_GETPARAM,
-			      &gp, sizeof(gp) );
-   return frame;
-}
-
-int intelEmitIrqLocked( intelContextPtr intel )
-{
-   drmI830IrqEmit ie;
-   int ret, seq;
-      
-   assert(((*(int *)intel->driHwLock) & ~DRM_LOCK_CONT) == 
-	  (DRM_LOCK_HELD|intel->hHWContext));
-
-   ie.irq_seq = &seq;
-	 
-   ret = drmCommandWriteRead( intel->driFd, DRM_I830_IRQ_EMIT, 
-			      &ie, sizeof(ie) );
-   if ( ret ) {
-      fprintf( stderr, "%s: drmI830IrqEmit: %d\n", __FUNCTION__, ret );
-      exit(1);
-   }
-   
-   if (0)
-      fprintf(stderr, "%s -->  %d\n", __FUNCTION__, seq );
-
-   return seq;
-}
-
-void intelWaitIrq( intelContextPtr intel, int seq )
-{
-   int ret;
-      
-   if (0)
-      fprintf(stderr, "%s %d\n", __FUNCTION__, seq );
-
-   intel->iw.irq_seq = seq;
-	 
-   do {
-     ret = drmCommandWrite( intel->driFd, DRM_I830_IRQ_WAIT, &intel->iw, sizeof(intel->iw) );
-   } while (ret == -EAGAIN || ret == -EINTR);
-
-   if ( ret ) {
-      fprintf( stderr, "%s: drmI830IrqWait: %d\n", __FUNCTION__, ret );
-      if (0)
-	 intel_dump_batchbuffer( intel->alloc.offset,
-				 intel->alloc.ptr,
-				 intel->alloc.size );
-      exit(1);
-   }
-}
-
-
-
-static void age_intel( intelContextPtr intel, int age )
-{
-   GLuint i;
-
-   for (i = 0 ; i < MAX_TEXTURE_UNITS ; i++)
-      if (intel->CurrentTexObj[i]) 
-	 intel->CurrentTexObj[i]->age = age;
-}
-
-void intel_dump_batchbuffer( long offset,
-			     int *ptr,
-			     int count )
-{
-   int i;
-   fprintf(stderr, "\n\n\nSTART BATCH (%d dwords):\n", count);
-   for (i = 0; i < count/4; i += 4) 
-      fprintf(stderr, "\t0x%x: 0x%08x 0x%08x 0x%08x 0x%08x\n", 
-	      (unsigned int)offset + i*4, ptr[i], ptr[i+1], ptr[i+2], ptr[i+3]);
-   fprintf(stderr, "END BATCH\n\n\n");
-}
-
-void intelRefillBatchLocked( intelContextPtr intel, GLboolean allow_unlock )
-{
-   GLuint last_irq = intel->alloc.irq_emitted;
-   GLuint half = intel->alloc.size / 2;
-   GLuint buf = (intel->alloc.active_buf ^= 1);
-
-   intel->alloc.irq_emitted = intelEmitIrqLocked( intel );
-
-   if (last_irq) {
-      if (allow_unlock) UNLOCK_HARDWARE( intel ); 
-      intelWaitIrq( intel, last_irq );
-      if (allow_unlock) LOCK_HARDWARE( intel ); 
-   }
-
-   if (0)
-      fprintf(stderr, "%s: now using half %d\n", __FUNCTION__, buf);
-
-   intel->batch.start_offset = intel->alloc.offset + buf * half;
-   intel->batch.ptr = (unsigned char *)intel->alloc.ptr + buf * half;
-   intel->batch.size = half - 8;
-   intel->batch.space = half - 8;
-   assert(intel->batch.space >= 0);
-}
-
-#define MI_BATCH_BUFFER_END 	(0xA<<23)
-
-
-void intelFlushBatchLocked( intelContextPtr intel, 
-			    GLboolean ignore_cliprects,
-			    GLboolean refill,
-			    GLboolean allow_unlock)
-{
-   drmI830BatchBuffer batch;
-
-   assert(intel->locked);
-
-   if (0)
-      fprintf(stderr, "%s used %d of %d offset %x..%x refill %d (started in %s)\n",
-	      __FUNCTION__, 
-	      (intel->batch.size - intel->batch.space), 
-	      intel->batch.size,
-	      intel->batch.start_offset,
-	      intel->batch.start_offset + 
-	      (intel->batch.size - intel->batch.space), 
-	      refill,
-	      intel->batch.func);
-
-   /* Throw away non-effective packets.  Won't work once we have
-    * hardware contexts which would preserve statechanges beyond a
-    * single buffer.
-    */
-   if (intel->numClipRects == 0 && !ignore_cliprects) {
-      
-      /* Without this yeild, an application with no cliprects can hog
-       * the hardware.  Without unlocking, the effect is much worse -
-       * effectively a lock-out of other contexts.
-       */
-      if (allow_unlock) {
-	 UNLOCK_HARDWARE( intel );
-	 sched_yield();
-	 LOCK_HARDWARE( intel );
-      }
-
-      /* Note that any state thought to have been emitted actually
-       * hasn't:
-       */
-      intel->batch.ptr -= (intel->batch.size - intel->batch.space);
-      intel->batch.space = intel->batch.size;
-      intel->vtbl.lost_hardware( intel ); 
-   }
-
-   if (intel->batch.space != intel->batch.size) {
-
-      if (intel->sarea->ctxOwner != intel->hHWContext) {
-	 intel->perf_boxes |= I830_BOX_LOST_CONTEXT;
-	 intel->sarea->ctxOwner = intel->hHWContext;
-      }
-
-      batch.start = intel->batch.start_offset;
-      batch.used = intel->batch.size - intel->batch.space;
-      batch.cliprects = intel->pClipRects;
-      batch.num_cliprects = ignore_cliprects ? 0 : intel->numClipRects;
-      batch.DR1 = 0;
-      batch.DR4 = ((((GLuint)intel->drawX) & 0xffff) | 
-		   (((GLuint)intel->drawY) << 16));
-      
-      if (intel->alloc.offset) {
-	 if ((batch.used & 0x4) == 0) {
-	    ((int *)intel->batch.ptr)[0] = 0;
-	    ((int *)intel->batch.ptr)[1] = MI_BATCH_BUFFER_END;
-	    batch.used += 0x8;
-	    intel->batch.ptr += 0x8;
-	 }
-	 else {
-	    ((int *)intel->batch.ptr)[0] = MI_BATCH_BUFFER_END;
-	    batch.used += 0x4;
-	    intel->batch.ptr += 0x4;
-	 }      
-      }
-
-      if (0)
- 	 intel_dump_batchbuffer( batch.start,
-				 (int *)(intel->batch.ptr - batch.used),
-				 batch.used );
-
-      intel->batch.start_offset += batch.used;
-      intel->batch.size -= batch.used;
-
-      if (intel->batch.size < 8) {
-	 refill = GL_TRUE;
-	 intel->batch.space = intel->batch.size = 0;
-      }
-      else {
-	 intel->batch.size -= 8;
-	 intel->batch.space = intel->batch.size;
-      }
-
-
-      assert(intel->batch.space >= 0);
-      assert(batch.start >= intel->alloc.offset);
-      assert(batch.start < intel->alloc.offset + intel->alloc.size);
-      assert(batch.start + batch.used > intel->alloc.offset);
-      assert(batch.start + batch.used <= 
-	     intel->alloc.offset + intel->alloc.size);
-
-
-      if (intel->alloc.offset) {
-	 if (drmCommandWrite (intel->driFd, DRM_I830_BATCHBUFFER, &batch, 
-			      sizeof(batch))) {
-	    fprintf(stderr, "DRM_I830_BATCHBUFFER: %d\n",  -errno);
-	    UNLOCK_HARDWARE(intel);
-	    exit(1);
-	 }
-      } else {
-	 drmI830CmdBuffer cmd;
-	 cmd.buf = (char *)intel->alloc.ptr + batch.start;
-	 cmd.sz = batch.used;
-	 cmd.DR1 = batch.DR1;
-	 cmd.DR4 = batch.DR4;
-	 cmd.num_cliprects = batch.num_cliprects;
-	 cmd.cliprects = batch.cliprects;
-	 
-	 if (drmCommandWrite (intel->driFd, DRM_I830_CMDBUFFER, &cmd, 
-			      sizeof(cmd))) {
-	    fprintf(stderr, "DRM_I830_CMDBUFFER: %d\n",  -errno);
-	    UNLOCK_HARDWARE(intel);
-	    exit(1);
-	 }
-      }	 
-
-      
-      age_intel(intel, intel->sarea->last_enqueue);
-
-      /* FIXME: use hardware contexts to avoid 'losing' hardware after
-       * each buffer flush.
-       */
-      if (intel->batch.contains_geometry) 
-	 assert(intel->batch.last_emit_state == intel->batch.counter);
-
-      intel->batch.counter++;
-      intel->batch.contains_geometry = 0;
-      intel->batch.func = 0;
-      intel->vtbl.lost_hardware( intel );
-   }
-
-   if (refill)
-      intelRefillBatchLocked( intel, allow_unlock );
-}
-
-void intelFlushBatch( intelContextPtr intel, GLboolean refill )
-{
-   if (intel->locked) {
-      intelFlushBatchLocked( intel, GL_FALSE, refill, GL_FALSE );
-   } 
-   else {
-      LOCK_HARDWARE(intel);
-      intelFlushBatchLocked( intel, GL_FALSE, refill, GL_TRUE );
-      UNLOCK_HARDWARE(intel);
-   }
-}
-
-
-void intelWaitForIdle( intelContextPtr intel )
-{   
-   if (0)
-      fprintf(stderr, "%s\n", __FUNCTION__);
-
-   intel->vtbl.emit_flush( intel );
-   intelFlushBatch( intel, GL_TRUE );
-
-   /* Use an irq to wait for dma idle -- Need to track lost contexts
-    * to shortcircuit consecutive calls to this function:
-    */
-   intelWaitIrq( intel, intel->alloc.irq_emitted );
-   intel->alloc.irq_emitted = 0;
-}
-
-
-/**
- * Check if we need to rotate/warp the front color buffer to the
- * rotated screen.  We generally need to do this when we get a glFlush
- * or glFinish after drawing to the front color buffer.
- */
-static void
-intelCheckFrontRotate(GLcontext *ctx)
-{
-   intelContextPtr intel = INTEL_CONTEXT( ctx );
-   if (intel->ctx.DrawBuffer->_ColorDrawBufferMask[0] == BUFFER_BIT_FRONT_LEFT) {
-      intelScreenPrivate *screen = intel->intelScreen;
-      if (screen->current_rotation != 0) {
-         __DRIdrawablePrivate *dPriv = intel->driDrawable;
-         intelRotateWindow(intel, dPriv, BUFFER_BIT_FRONT_LEFT);
-      }
-   }
-}
-
-
-/**
- * NOT directly called via glFlush.
- */
-void intelFlush( GLcontext *ctx )
-{
-   intelContextPtr intel = INTEL_CONTEXT( ctx );
-
-   if (intel->Fallback)
-      _swrast_flush( ctx );
-
-   INTEL_FIREVERTICES( intel );
-
-   if (intel->batch.size != intel->batch.space)
-      intelFlushBatch( intel, GL_FALSE );
-}
-
-
-/**
- * Called via glFlush.
- */
-void intelglFlush( GLcontext *ctx )
-{
-   intelFlush(ctx);
-   intelCheckFrontRotate(ctx);
-}
-
-
-void intelFinish( GLcontext *ctx  ) 
-{
-   intelContextPtr intel = INTEL_CONTEXT( ctx );
-   intelFlush( ctx );
-   intelWaitForIdle( intel );
-   intelCheckFrontRotate(ctx);
-}
-
-
-void intelClear(GLcontext *ctx, GLbitfield mask)
-{
-   intelContextPtr intel = INTEL_CONTEXT( ctx );
-   const GLuint colorMask = *((GLuint *) &ctx->Color.ColorMask);
-   GLbitfield tri_mask = 0;
-   GLbitfield blit_mask = 0;
-   GLbitfield swrast_mask = 0;
-
-   if (0)
-      fprintf(stderr, "%s\n", __FUNCTION__);
-
-   /* Take care of cliprects, which are handled differently for
-    * clears, etc.
-    */
-   intelFlush( &intel->ctx );
-
-   if (mask & BUFFER_BIT_FRONT_LEFT) {
-      if (colorMask == ~0) {
-	 blit_mask |= BUFFER_BIT_FRONT_LEFT;
-      } 
-      else {
-	 tri_mask |= BUFFER_BIT_FRONT_LEFT;
-      }
-   }
-
-   if (mask & BUFFER_BIT_BACK_LEFT) {
-      if (colorMask == ~0) {
-	 blit_mask |= BUFFER_BIT_BACK_LEFT;
-      } 
-      else {
-	 tri_mask |= BUFFER_BIT_BACK_LEFT;
-      }
-   }
-
-   if (mask & BUFFER_BIT_DEPTH) {
-      blit_mask |= BUFFER_BIT_DEPTH;
-   }
-
-   if (mask & BUFFER_BIT_STENCIL) {
-      if (!intel->hw_stencil) {
-	 swrast_mask |= BUFFER_BIT_STENCIL;
-      }
-      else if ((ctx->Stencil.WriteMask[0] & 0xff) != 0xff) {
-	 tri_mask |= BUFFER_BIT_STENCIL;
-      } 
-      else {
-	 blit_mask |= BUFFER_BIT_STENCIL;
-      }
-   }
-
-   swrast_mask |= (mask & BUFFER_BIT_ACCUM);
-
-   if (blit_mask) 
-      intelClearWithBlit( ctx, blit_mask, 0, 0, 0, 0, 0);
-
-   if (tri_mask) 
-      intel->vtbl.clear_with_tris( intel, tri_mask, 0, 0, 0, 0, 0);
-
-   if (swrast_mask)
-      _swrast_Clear( ctx, swrast_mask );
-}
-
-
-void
-intelRotateWindow(intelContextPtr intel, __DRIdrawablePrivate *dPriv,
-                  GLuint srcBuffer)
-{
-   if (intel->vtbl.rotate_window) {
-      intel->vtbl.rotate_window(intel, dPriv, srcBuffer);
-   }
-}
-
-
-void *intelAllocateAGP( intelContextPtr intel, GLsizei size )
-{
-   int region_offset;
-   drmI830MemAlloc alloc;
-   int ret;
-
-   if (0)
-      fprintf(stderr, "%s: %d bytes\n", __FUNCTION__, size);
-
-   alloc.region = I830_MEM_REGION_AGP;
-   alloc.alignment = 0;
-   alloc.size = size;
-   alloc.region_offset = &region_offset;
-
-   LOCK_HARDWARE(intel);
-
-   /* Make sure the global heap is initialized
-    */
-   if (intel->texture_heaps[0])
-      driAgeTextures( intel->texture_heaps[0] );
-
-
-   ret = drmCommandWriteRead( intel->driFd,
-			      DRM_I830_ALLOC,
-			      &alloc, sizeof(alloc));
-   
-   if (ret) {
-      fprintf(stderr, "%s: DRM_I830_ALLOC ret %d\n", __FUNCTION__, ret);
-      UNLOCK_HARDWARE(intel);
-      return NULL;
-   }
-   
-   if (0)
-      fprintf(stderr, "%s: allocated %d bytes\n", __FUNCTION__, size);
-
-   /* Need to propogate this information (agp memory in use) to our
-    * local texture lru.  The kernel has already updated the global
-    * lru.  An alternative would have been to allocate memory the
-    * usual way and then notify the kernel to pin the allocation.
-    */
-   if (intel->texture_heaps[0])
-      driAgeTextures( intel->texture_heaps[0] );
-
-   UNLOCK_HARDWARE(intel);   
-
-   return (void *)((char *)intel->intelScreen->tex.map + region_offset);
-}
-
-void intelFreeAGP( intelContextPtr intel, void *pointer )
-{
-   int region_offset;
-   drmI830MemFree memfree;
-   int ret;
-
-   region_offset = (char *)pointer - (char *)intel->intelScreen->tex.map;
-
-   if (region_offset < 0 || 
-       region_offset > intel->intelScreen->tex.size) {
-      fprintf(stderr, "offset %d outside range 0..%d\n", region_offset,
-	      intel->intelScreen->tex.size);
-      return;
-   }
-
-   memfree.region = I830_MEM_REGION_AGP;
-   memfree.region_offset = region_offset;
-   
-   ret = drmCommandWrite( intel->driFd,
-			  DRM_I830_FREE,
-			  &memfree, sizeof(memfree));
-   
-   if (ret) 
-      fprintf(stderr, "%s: DRM_I830_FREE ret %d\n", __FUNCTION__, ret);
-}
-
-/* This version of AllocateMemoryMESA allocates only agp memory, and
- * only does so after the point at which the driver has been
- * initialized.
- *
- * Theoretically a valid context isn't required.  However, in this
- * implementation, it is, as I'm using the hardware lock to protect
- * the kernel data structures, and the current context to get the
- * device fd.
- */
-void *intelAllocateMemoryMESA(__DRInativeDisplay *dpy, int scrn,
-			      GLsizei size, GLfloat readfreq,
-			      GLfloat writefreq, GLfloat priority)
-{
-   GET_CURRENT_CONTEXT(ctx);
-
-   if (INTEL_DEBUG & DEBUG_IOCTL)
-      fprintf(stderr, "%s sz %d %f/%f/%f\n", __FUNCTION__, size, readfreq, 
-	      writefreq, priority);
-
-   if (getenv("INTEL_NO_ALLOC"))
-      return NULL;
-   
-   if (!ctx || INTEL_CONTEXT(ctx) == 0) 
-      return NULL;
-   
-   return intelAllocateAGP( INTEL_CONTEXT(ctx), size );
-}
-
-
-/* Called via glXFreeMemoryMESA() */
-void intelFreeMemoryMESA(__DRInativeDisplay *dpy, int scrn, GLvoid *pointer)
-{
-   GET_CURRENT_CONTEXT(ctx);
-   if (INTEL_DEBUG & DEBUG_IOCTL) 
-      fprintf(stderr, "%s %p\n", __FUNCTION__, pointer);
-
-   if (!ctx || INTEL_CONTEXT(ctx) == 0) {
-      fprintf(stderr, "%s: no context\n", __FUNCTION__);
-      return;
-   }
-
-   intelFreeAGP( INTEL_CONTEXT(ctx), pointer );
-}
-
-/* Called via glXGetMemoryOffsetMESA() 
- *
- * Returns offset of pointer from the start of agp aperture.
- */
-GLuint intelGetMemoryOffsetMESA(__DRInativeDisplay *dpy, int scrn, 
-				const GLvoid *pointer)
-{
-   GET_CURRENT_CONTEXT(ctx);
-   intelContextPtr intel;
-
-   if (!ctx || !(intel = INTEL_CONTEXT(ctx)) ) {
-      fprintf(stderr, "%s: no context\n", __FUNCTION__);
-      return ~0;
-   }
-
-   if (!intelIsAgpMemory( intel, pointer, 0 ))
-      return ~0;
-
-   return intelAgpOffsetFromVirtual( intel, pointer );
-}
-
-
-GLboolean intelIsAgpMemory( intelContextPtr intel, const GLvoid *pointer,
-			   GLint size )
-{
-   int offset = (char *)pointer - (char *)intel->intelScreen->tex.map;
-   int valid = (size >= 0 &&
-		offset >= 0 &&
-		offset + size < intel->intelScreen->tex.size);
-
-   if (INTEL_DEBUG & DEBUG_IOCTL)
-      fprintf(stderr, "intelIsAgpMemory( %p ) : %d\n", pointer, valid );
-   
-   return valid;
-}
-
-
-GLuint intelAgpOffsetFromVirtual( intelContextPtr intel, const GLvoid *pointer )
-{
-   int offset = (char *)pointer - (char *)intel->intelScreen->tex.map;
-
-   if (offset < 0 || offset > intel->intelScreen->tex.size)
-      return ~0;
-   else
-      return intel->intelScreen->tex.offset + offset;
-}
-
-
-
-
-
-/* Flip the front & back buffes
- */
-void intelPageFlip( const __DRIdrawablePrivate *dPriv )
-{
-#if 0
-   intelContextPtr intel;
-   int tmp, ret;
-
-   if (INTEL_DEBUG & DEBUG_IOCTL)
-      fprintf(stderr, "%s\n", __FUNCTION__);
-
-   assert(dPriv);
-   assert(dPriv->driContextPriv);
-   assert(dPriv->driContextPriv->driverPrivate);
-
-   intel = (intelContextPtr) dPriv->driContextPriv->driverPrivate;
-
-   intelFlush( &intel->ctx );
-   LOCK_HARDWARE( intel );
-
-   if (dPriv->pClipRects) {
-      *(drm_clip_rect_t *)intel->sarea->boxes = dPriv->pClipRects[0];
-      intel->sarea->nbox = 1;
-   }
-
-   ret = drmCommandNone(intel->driFd, DRM_I830_FLIP); 
-   if (ret) {
-      fprintf(stderr, "%s: %d\n", __FUNCTION__, ret);
-      UNLOCK_HARDWARE( intel );
-      exit(1);
-   }
-
-   tmp = intel->sarea->last_enqueue;
-   intelRefillBatchLocked( intel );
-   UNLOCK_HARDWARE( intel );
-
-
-   intelSetDrawBuffer( &intel->ctx, intel->ctx.Color.DriverDrawBuffer );
-#endif
-}
diff --git a/i915/intel_ioctl.h b/i915/intel_ioctl.h
deleted file mode 100644
index 6ea47e4..0000000
--- a/i915/intel_ioctl.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#ifndef INTEL_IOCTL_H
-#define INTEL_IOCTL_H
-
-#include "intel_context.h"
-
-extern void intelWaitAgeLocked( intelContextPtr intel, int age, GLboolean unlock );
-
-extern void intelClear(GLcontext *ctx, GLbitfield mask);
-
-extern void intelPageFlip( const __DRIdrawablePrivate *dpriv );
-
-extern void intelRotateWindow(intelContextPtr intel,
-                              __DRIdrawablePrivate *dPriv, GLuint srcBuffer);
-
-extern void intelWaitForIdle( intelContextPtr intel );
-extern void intelFlushBatch( intelContextPtr intel, GLboolean refill );
-extern void intelFlushBatchLocked( intelContextPtr intel,
-				   GLboolean ignore_cliprects,
-				   GLboolean refill,
-				   GLboolean allow_unlock);
-extern void intelRefillBatchLocked( intelContextPtr intel, GLboolean allow_unlock );
-extern void intelFinish( GLcontext *ctx );
-extern void intelFlush( GLcontext *ctx );
-extern void intelglFlush( GLcontext *ctx );
-
-extern void *intelAllocateAGP( intelContextPtr intel, GLsizei size );
-extern void intelFreeAGP( intelContextPtr intel, void *pointer );
-
-extern void *intelAllocateMemoryMESA( __DRInativeDisplay *dpy, int scrn, 
-				      GLsizei size, GLfloat readfreq,
-				      GLfloat writefreq, GLfloat priority );
-
-extern void intelFreeMemoryMESA( __DRInativeDisplay *dpy, int scrn, 
-				 GLvoid *pointer );
-
-extern GLuint intelGetMemoryOffsetMESA( __DRInativeDisplay *dpy, int scrn, const GLvoid *pointer );
-extern GLboolean intelIsAgpMemory( intelContextPtr intel, const GLvoid *pointer,
-				  GLint size );
-
-extern GLuint intelAgpOffsetFromVirtual( intelContextPtr intel, const GLvoid *p );
-
-extern void intelWaitIrq( intelContextPtr intel, int seq );
-extern u_int32_t intelGetLastFrame (intelContextPtr intel);
-extern int intelEmitIrqLocked( intelContextPtr intel );
-#endif
diff --git a/i915/intel_pixel.c b/i915/intel_pixel.c
deleted file mode 100644
index 535cbfc..0000000
--- a/i915/intel_pixel.c
+++ /dev/null
@@ -1,502 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#include "glheader.h"
-#include "enums.h"
-#include "mtypes.h"
-#include "macros.h"
-#include "swrast/swrast.h"
-
-#include "intel_screen.h"
-#include "intel_context.h"
-#include "intel_ioctl.h"
-#include "intel_batchbuffer.h"
-
-
-
-static GLboolean
-check_color( const GLcontext *ctx, GLenum type, GLenum format,
-	     const struct gl_pixelstore_attrib *packing,
-	     const void *pixels, GLint sz, GLint pitch )
-{
-   intelContextPtr intel = INTEL_CONTEXT(ctx);
-   GLuint cpp = intel->intelScreen->cpp;
-
-   if (INTEL_DEBUG & DEBUG_PIXEL)
-      fprintf(stderr, "%s\n", __FUNCTION__);
-
-   if (	(pitch & 63) ||
-	ctx->_ImageTransferState ||
-	packing->SwapBytes ||
-	packing->LsbFirst) {
-      if (INTEL_DEBUG & DEBUG_PIXEL)
-	 fprintf(stderr, "%s: failed 1\n", __FUNCTION__);
-      return GL_FALSE;
-   }
-
-   if ( type == GL_UNSIGNED_INT_8_8_8_8_REV && 
-	cpp == 4 && 
-	format == GL_BGRA ) {
-      if (INTEL_DEBUG & DEBUG_PIXEL)
-	 fprintf(stderr, "%s: passed 2\n", __FUNCTION__);
-      return GL_TRUE;
-   }
-
-   if (INTEL_DEBUG & DEBUG_PIXEL)
-      fprintf(stderr, "%s: failed\n", __FUNCTION__);
-
-   return GL_FALSE;
-}
-
-static GLboolean
-check_color_per_fragment_ops( const GLcontext *ctx )
-{
-   int result;
-   result = (!(     ctx->Color.AlphaEnabled || 
-		    ctx->Depth.Test ||
-		    ctx->Fog.Enabled ||
-		    ctx->Scissor.Enabled ||
-		    ctx->Stencil.Enabled ||
-		    !ctx->Color.ColorMask[0] ||
-		    !ctx->Color.ColorMask[1] ||
-		    !ctx->Color.ColorMask[2] ||
-		    !ctx->Color.ColorMask[3] ||
-		    ctx->Color.ColorLogicOpEnabled ||
-		    ctx->Texture._EnabledUnits
-           ) &&
-	   ctx->Current.RasterPosValid);
-   
-   return result;
-}
-
-
-/**
- * Clip the given rectangle against the buffer's bounds (including scissor).
- * \param size returns the 
- * \return GL_TRUE if any pixels remain, GL_FALSE if totally clipped.
- *
- * XXX Replace this with _mesa_clip_drawpixels() and _mesa_clip_readpixels()
- * from Mesa 6.4.  We shouldn't apply scissor for ReadPixels.
- */
-static GLboolean
-clip_pixelrect( const GLcontext *ctx,
-		const GLframebuffer *buffer,
-		GLint *x, GLint *y,
-		GLsizei *width, GLsizei *height)
-{
-   /* left clipping */
-   if (*x < buffer->_Xmin) {
-      *width -= (buffer->_Xmin - *x);
-      *x = buffer->_Xmin;
-   }
-
-   /* right clipping */
-   if (*x + *width > buffer->_Xmax)
-      *width -= (*x + *width - buffer->_Xmax - 1);
-
-   if (*width <= 0)
-      return GL_FALSE;
-
-   /* bottom clipping */
-   if (*y < buffer->_Ymin) {
-      *height -= (buffer->_Ymin - *y);
-      *y = buffer->_Ymin;
-   }
-
-   /* top clipping */
-   if (*y + *height > buffer->_Ymax)
-      *height -= (*y + *height - buffer->_Ymax - 1);
-
-   if (*height <= 0)
-      return GL_FALSE;
-
-   return GL_TRUE;
-}
-
-
-/**
- * Compute intersection of a clipping rectangle and pixel rectangle,
- * returning results in x/y/w/hOut vars.
- * \return GL_TRUE if there's intersection, GL_FALSE if disjoint.
- */
-static INLINE GLboolean
-intersect_region(const drm_clip_rect_t *box,
-		 GLint x, GLint y, GLsizei width, GLsizei height,
-		 GLint *xOut, GLint *yOut, GLint *wOut, GLint *hOut)
-{
-   GLint bx = box->x1;
-   GLint by = box->y1;
-   GLint bw = box->x2 - bx;
-   GLint bh = box->y2 - by;
-
-   if (bx < x) bw -= x - bx, bx = x;
-   if (by < y) bh -= y - by, by = y;
-   if (bx + bw > x + width) bw = x + width - bx;
-   if (by + bh > y + height) bh = y + height - by;
-
-   *xOut = bx;
-   *yOut = by;
-   *wOut = bw;
-   *hOut = bh;
-
-   if (bw <= 0) return GL_FALSE;
-   if (bh <= 0) return GL_FALSE;
-
-   return GL_TRUE;
-}
-
-
-
-static GLboolean
-intelTryReadPixels( GLcontext *ctx,
-		  GLint x, GLint y, GLsizei width, GLsizei height,
-		  GLenum format, GLenum type,
-		  const struct gl_pixelstore_attrib *pack,
-		  GLvoid *pixels )
-{
-   intelContextPtr intel = INTEL_CONTEXT(ctx);
-   GLint size = 0; /* not really used */
-   GLint pitch = pack->RowLength ? pack->RowLength : width;
-
-   if (INTEL_DEBUG & DEBUG_PIXEL)
-      fprintf(stderr, "%s\n", __FUNCTION__);
-
-   /* Only accelerate reading to agp buffers.
-    */
-   if ( !intelIsAgpMemory(intel, pixels, 
-			pitch * height * intel->intelScreen->cpp ) ) {
-      if (INTEL_DEBUG & DEBUG_PIXEL)
-	 fprintf(stderr, "%s: dest not agp\n", __FUNCTION__);
-      return GL_FALSE;
-   }
-
-   /* Need GL_PACK_INVERT_MESA to cope with upsidedown results from
-    * blitter:
-    */
-   if (!pack->Invert) {
-      if (INTEL_DEBUG & DEBUG_PIXEL)
-	 fprintf(stderr, "%s: MESA_PACK_INVERT not set\n", __FUNCTION__);
-      return GL_FALSE;
-   }
-
-   if (!check_color(ctx, type, format, pack, pixels, size, pitch))
-      return GL_FALSE;
-
-   switch ( intel->intelScreen->cpp ) {
-   case 4:
-      break;
-   default:
-      return GL_FALSE;
-   }
-
-
-   /* Although the blits go on the command buffer, need to do this and
-    * fire with lock held to guarentee cliprects and drawing offset are
-    * correct.
-    *
-    * This is an unusual situation however, as the code which flushes
-    * a full command buffer expects to be called unlocked.  As a
-    * workaround, immediately flush the buffer on aquiring the lock.
-    */
-   intelFlush( &intel->ctx );
-   LOCK_HARDWARE( intel );
-   {
-      __DRIdrawablePrivate *dPriv = intel->driDrawable;
-      int nbox = dPriv->numClipRects;
-      int src_offset = intel->readRegion->offset;
-      int src_pitch = intel->intelScreen->front.pitch;
-      int dst_offset = intelAgpOffsetFromVirtual( intel, pixels);
-      drm_clip_rect_t *box = dPriv->pClipRects;
-      int i;
-
-      assert(dst_offset != ~0);  /* should have been caught above */
-
-      if (!clip_pixelrect(ctx, ctx->ReadBuffer, &x, &y, &width, &height)) {
-	 UNLOCK_HARDWARE( intel );
-	 if (INTEL_DEBUG & DEBUG_PIXEL)
-	    fprintf(stderr, "%s totally clipped -- nothing to do\n",
-		    __FUNCTION__);
-	 return GL_TRUE;
-      }
-
-      /* convert to screen coords (y=0=top) */
-      y = dPriv->h - y - height;
-      x += dPriv->x;
-      y += dPriv->y;
-
-      if (INTEL_DEBUG & DEBUG_PIXEL)
-	 fprintf(stderr, "readpixel blit src_pitch %d dst_pitch %d\n",
-		 src_pitch, pitch);
-
-      /* We don't really have to do window clipping for readpixels.
-       * The OpenGL spec says that pixels read from outside the
-       * visible window region (pixel ownership) have undefined value.
-       */
-      for (i = 0 ; i < nbox ; i++)
-      {
-         GLint bx, by, bw, bh;
-         if (intersect_region(box+i, x, y, width, height,
-                              &bx, &by, &bw, &bh)) {
-            intelEmitCopyBlitLocked( intel,
-                                     intel->intelScreen->cpp,
-                                     src_pitch, src_offset,
-                                     pitch, dst_offset,
-                                     bx, by,
-                                     bx - x, by - y,
-                                     bw, bh );
-         }
-      }
-   }
-   UNLOCK_HARDWARE( intel );
-   intelFinish( &intel->ctx );
-
-   return GL_TRUE;
-}
-
-static void
-intelReadPixels( GLcontext *ctx,
-		 GLint x, GLint y, GLsizei width, GLsizei height,
-		 GLenum format, GLenum type,
-		 const struct gl_pixelstore_attrib *pack,
-		 GLvoid *pixels )
-{
-   if (INTEL_DEBUG & DEBUG_PIXEL)
-      fprintf(stderr, "%s\n", __FUNCTION__);
-
-   if (!intelTryReadPixels( ctx, x, y, width, height, format, type, pack, 
-                            pixels))
-      _swrast_ReadPixels( ctx, x, y, width, height, format, type, pack, 
-			  pixels);
-}
-
-
-
-
-static void do_draw_pix( GLcontext *ctx,
-			 GLint x, GLint y, GLsizei width, GLsizei height,
-			 GLint pitch,
-			 const void *pixels,
-			 GLuint dest )
-{
-   intelContextPtr intel = INTEL_CONTEXT(ctx);
-   __DRIdrawablePrivate *dPriv = intel->driDrawable;
-   drm_clip_rect_t *box = dPriv->pClipRects;
-   int nbox = dPriv->numClipRects;
-   int i;
-   int src_offset = intelAgpOffsetFromVirtual( intel, pixels);
-   int src_pitch = pitch;
-
-   assert(src_offset != ~0);  /* should be caught earlier */
-
-   if (INTEL_DEBUG & DEBUG_PIXEL)
-      fprintf(stderr, "%s\n", __FUNCTION__);
-
-   intelFlush( &intel->ctx );
-   LOCK_HARDWARE( intel );
-   if (ctx->DrawBuffer)
-   {
-      y -= height;			/* cope with pixel zoom */
-   
-      if (!clip_pixelrect(ctx, ctx->DrawBuffer,
-			  &x, &y, &width, &height)) {
-	 UNLOCK_HARDWARE( intel );
-	 return;
-      }
-
-      y = dPriv->h - y - height; 	/* convert from gl to hardware coords */
-      x += dPriv->x;
-      y += dPriv->y;
-
-      for (i = 0 ; i < nbox ; i++ )
-      {
-	 GLint bx, by, bw, bh;
-	 if (intersect_region(box + i, x, y, width, height,
-			      &bx, &by, &bw, &bh)) {
-            intelEmitCopyBlitLocked( intel,
-                                     intel->intelScreen->cpp,
-                                     src_pitch, src_offset,
-                                     intel->intelScreen->front.pitch,
-                                     intel->drawRegion->offset,
-                                     bx - x, by - y,
-                                     bx, by,
-                                     bw, bh );
-         }
-      }
-   }
-   UNLOCK_HARDWARE( intel );
-   intelFinish( &intel->ctx );
-}
-
-
-
-static GLboolean
-intelTryDrawPixels( GLcontext *ctx,
-		  GLint x, GLint y, GLsizei width, GLsizei height,
-		  GLenum format, GLenum type,
-		  const struct gl_pixelstore_attrib *unpack,
-		  const GLvoid *pixels )
-{
-   intelContextPtr intel = INTEL_CONTEXT(ctx);
-   GLint pitch = unpack->RowLength ? unpack->RowLength : width;
-   GLuint dest;
-   GLuint cpp = intel->intelScreen->cpp;
-   GLint size = width * pitch * cpp;
-
-   if (INTEL_DEBUG & DEBUG_PIXEL)
-      fprintf(stderr, "%s\n", __FUNCTION__);
-
-   switch (format) {
-   case GL_RGB:
-   case GL_RGBA:
-   case GL_BGRA:
-      dest = intel->drawRegion->offset;
-
-      /* Planemask doesn't have full support in blits.
-       */
-      if (!ctx->Color.ColorMask[RCOMP] ||
-	  !ctx->Color.ColorMask[GCOMP] ||
-	  !ctx->Color.ColorMask[BCOMP] ||
-	  !ctx->Color.ColorMask[ACOMP]) {
-	 if (INTEL_DEBUG & DEBUG_PIXEL)
-	    fprintf(stderr, "%s: planemask\n", __FUNCTION__);
-	 return GL_FALSE;	
-      }
-
-      /* Can't do conversions on agp reads/draws. 
-       */
-      if ( !intelIsAgpMemory( intel, pixels, size ) ) {
-	 if (INTEL_DEBUG & DEBUG_PIXEL)
-	    fprintf(stderr, "%s: not agp memory\n", __FUNCTION__);
-	 return GL_FALSE;
-      }
-
-      if (!check_color(ctx, type, format, unpack, pixels, size, pitch)) {
-	 return GL_FALSE;
-      }
-      if (!check_color_per_fragment_ops(ctx)) {
-	 return GL_FALSE;
-      }
-
-      if (ctx->Pixel.ZoomX != 1.0F ||
-	  ctx->Pixel.ZoomY != -1.0F)
-	 return GL_FALSE;
-      break;
-
-   default:
-      return GL_FALSE;
-   }
-
-   if ( intelIsAgpMemory(intel, pixels, size) )
-   {
-      do_draw_pix( ctx, x, y, width, height, pitch, pixels, dest );
-      return GL_TRUE;
-   }
-   else if (0)
-   {
-      /* Pixels is in regular memory -- get dma buffers and perform
-       * upload through them.  No point doing this for regular uploads
-       * but once we remove some of the restrictions above (colormask,
-       * pixelformat conversion, zoom?, etc), this could be a win.
-       */
-   }
-   else
-      return GL_FALSE;
-
-   return GL_FALSE;
-}
-
-static void
-intelDrawPixels( GLcontext *ctx,
-		 GLint x, GLint y, GLsizei width, GLsizei height,
-		 GLenum format, GLenum type,
-		 const struct gl_pixelstore_attrib *unpack,
-		 const GLvoid *pixels )
-{
-   if (INTEL_DEBUG & DEBUG_PIXEL)
-      fprintf(stderr, "%s\n", __FUNCTION__);
-
-   if (!intelTryDrawPixels( ctx, x, y, width, height, format, type,
-                            unpack, pixels ))
-      _swrast_DrawPixels( ctx, x, y, width, height, format, type,
-			  unpack, pixels );
-}
-
-
-
-
-/**
- * Implement glCopyPixels for the front color buffer (or back buffer Pixmap)
- * for the color buffer.  Don't support zooming, pixel transfer, etc.
- * We do support copying from one window to another, ala glXMakeCurrentRead.
- */
-static void
-intelCopyPixels( GLcontext *ctx,
-		 GLint srcx, GLint srcy, GLsizei width, GLsizei height,
-		 GLint destx, GLint desty, GLenum type )
-{
-#if 0
-   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
-   const SWcontext *swrast = SWRAST_CONTEXT( ctx );
-   XMesaDisplay *dpy = xmesa->xm_visual->display;
-   const XMesaDrawable drawBuffer = xmesa->xm_draw_buffer->buffer;
-   const XMesaDrawable readBuffer = xmesa->xm_read_buffer->buffer;
-   const XMesaGC gc = xmesa->xm_draw_buffer->gc;
-
-   ASSERT(dpy);
-   ASSERT(gc);
-
-   if (drawBuffer &&  /* buffer != 0 means it's a Window or Pixmap */
-       readBuffer &&
-       type == GL_COLOR &&
-       (swrast->_RasterMask & ~CLIP_BIT) == 0 && /* no blend, z-test, etc */
-       ctx->_ImageTransferState == 0 &&  /* no color tables, scale/bias, etc */
-       ctx->Pixel.ZoomX == 1.0 &&        /* no zooming */
-       ctx->Pixel.ZoomY == 1.0) {
-      /* Note: we don't do any special clipping work here.  We could,
-       * but X will do it for us.
-       */
-      srcy = FLIP(xmesa->xm_read_buffer, srcy) - height + 1;
-      desty = FLIP(xmesa->xm_draw_buffer, desty) - height + 1;
-      XCopyArea(dpy, readBuffer, drawBuffer, gc,
-                srcx, srcy, width, height, destx, desty);
-   }
-#else
-   _swrast_CopyPixels(ctx, srcx, srcy, width, height, destx, desty, type );
-#endif
-}
-
-
-
-
-void intelInitPixelFuncs( struct dd_function_table *functions )
-{
-   functions->CopyPixels = intelCopyPixels;
-   if (!getenv("INTEL_NO_BLITS")) {
-      functions->ReadPixels = intelReadPixels;  
-      functions->DrawPixels = intelDrawPixels; 
-   }
-}
diff --git a/i915/intel_pixel_read.c b/i915/intel_pixel_read.c
new file mode 100644
index 0000000..2e31656
--- /dev/null
+++ b/i915/intel_pixel_read.c
@@ -0,0 +1,318 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "glheader.h"
+#include "enums.h"
+#include "mtypes.h"
+#include "macros.h"
+#include "image.h"
+#include "bufferobj.h"
+#include "swrast/swrast.h"
+
+#include "intel_screen.h"
+#include "intel_context.h"
+#include "intel_ioctl.h"
+#include "intel_batchbuffer.h"
+#include "intel_blit.h"
+#include "intel_buffers.h"
+#include "intel_regions.h"
+#include "intel_pixel.h"
+#include "intel_buffer_objects.h"
+
+/* For many applications, the new ability to pull the source buffers
+ * back out of the GTT and then do the packing/conversion operations
+ * in software will be as much of an improvement as trying to get the
+ * blitter and/or texture engine to do the work. 
+ *
+ * This step is gated on private backbuffers.
+ * 
+ * Obviously the frontbuffer can't be pulled back, so that is either
+ * an argument for blit/texture readpixels, or for blitting to a
+ * temporary and then pulling that back.
+ *
+ * When the destination is a pbo, however, it's not clear if it is
+ * ever going to be pulled to main memory (though the access param
+ * will be a good hint).  So it sounds like we do want to be able to
+ * choose between blit/texture implementation on the gpu and pullback
+ * and cpu-based copying.
+ *
+ * Unless you can magically turn client memory into a PBO for the
+ * duration of this call, there will be a cpu-based copying step in
+ * any case.
+ */
+
+
+static GLboolean
+do_texture_readpixels(GLcontext * ctx,
+                      GLint x, GLint y, GLsizei width, GLsizei height,
+                      GLenum format, GLenum type,
+                      const struct gl_pixelstore_attrib *pack,
+                      struct intel_region *dest_region)
+{
+#if 0
+   struct intel_context *intel = intel_context(ctx);
+   intelScreenPrivate *screen = intel->intelScreen;
+   GLint pitch = pack->RowLength ? pack->RowLength : width;
+   __DRIdrawablePrivate *dPriv = intel->driDrawable;
+   int textureFormat;
+   GLenum glTextureFormat;
+   int destFormat, depthFormat, destPitch;
+   drm_clip_rect_t tmp;
+
+   if (INTEL_DEBUG & DEBUG_PIXEL)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+
+   if (ctx->_ImageTransferState ||
+       pack->SwapBytes || pack->LsbFirst || !pack->Invert) {
+      if (INTEL_DEBUG & DEBUG_PIXEL)
+         fprintf(stderr, "%s: check_color failed\n", __FUNCTION__);
+      return GL_FALSE;
+   }
+
+   intel->vtbl.meta_texrect_source(intel, intel_readbuf_region(intel));
+
+   if (!intel->vtbl.meta_render_dest(intel, dest_region, type, format)) {
+      if (INTEL_DEBUG & DEBUG_PIXEL)
+         fprintf(stderr, "%s: couldn't set dest %s/%s\n",
+                 __FUNCTION__,
+                 _mesa_lookup_enum_by_nr(type),
+                 _mesa_lookup_enum_by_nr(format));
+      return GL_FALSE;
+   }
+
+   LOCK_HARDWARE(intel);
+
+   if (intel->driDrawable->numClipRects) {
+      intel->vtbl.install_meta_state(intel);
+      intel->vtbl.meta_no_depth_write(intel);
+      intel->vtbl.meta_no_stencil_write(intel);
+
+      if (!driClipRectToFramebuffer(ctx->ReadBuffer, &x, &y, &width, &height)) {
+         UNLOCK_HARDWARE(intel);
+         SET_STATE(i830, state);
+         if (INTEL_DEBUG & DEBUG_PIXEL)
+            fprintf(stderr, "%s: cliprect failed\n", __FUNCTION__);
+         return GL_TRUE;
+      }
+
+      y = dPriv->h - y - height;
+      x += dPriv->x;
+      y += dPriv->y;
+
+
+      /* Set the frontbuffer up as a large rectangular texture.
+       */
+      intel->vtbl.meta_tex_rect_source(intel, src_region, textureFormat);
+
+
+      intel->vtbl.meta_texture_blend_replace(i830, glTextureFormat);
+
+
+      /* Set the 3d engine to draw into the destination region:
+       */
+
+      intel->vtbl.meta_draw_region(intel, dest_region);
+      intel->vtbl.meta_draw_format(intel, destFormat, depthFormat);     /* ?? */
+
+
+      /* Draw a single quad, no cliprects:
+       */
+      intel->vtbl.meta_disable_cliprects(intel);
+
+      intel->vtbl.draw_quad(intel,
+                            0, width, 0, height,
+                            0x00ff00ff, x, x + width, y, y + height);
+
+      intel->vtbl.leave_meta_state(intel);
+   }
+   UNLOCK_HARDWARE(intel);
+
+   intel_region_wait_fence(ctx, dest_region);   /* required by GL */
+   return GL_TRUE;
+#endif
+
+   return GL_FALSE;
+}
+
+
+
+
+static GLboolean
+do_blit_readpixels(GLcontext * ctx,
+                   GLint x, GLint y, GLsizei width, GLsizei height,
+                   GLenum format, GLenum type,
+                   const struct gl_pixelstore_attrib *pack, GLvoid * pixels)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_region *src = intel_readbuf_region(intel);
+   struct intel_buffer_object *dst = intel_buffer_object(pack->BufferObj);
+   GLuint dst_offset;
+   GLuint rowLength;
+   dri_fence *fence = NULL;
+
+   if (INTEL_DEBUG & DEBUG_PIXEL)
+      _mesa_printf("%s\n", __FUNCTION__);
+
+   if (!src)
+      return GL_FALSE;
+
+   if (dst) {
+      /* XXX This validation should be done by core mesa:
+       */
+      if (!_mesa_validate_pbo_access(2, pack, width, height, 1,
+                                     format, type, pixels)) {
+         _mesa_error(ctx, GL_INVALID_OPERATION, "glDrawPixels");
+         return GL_TRUE;
+      }
+   }
+   else {
+      /* PBO only for now:
+       */
+      if (INTEL_DEBUG & DEBUG_PIXEL)
+         _mesa_printf("%s - not PBO\n", __FUNCTION__);
+      return GL_FALSE;
+   }
+
+
+   if (ctx->_ImageTransferState ||
+       !intel_check_blit_format(src, format, type)) {
+      if (INTEL_DEBUG & DEBUG_PIXEL)
+         _mesa_printf("%s - bad format for blit\n", __FUNCTION__);
+      return GL_FALSE;
+   }
+
+   if (pack->Alignment != 1 || pack->SwapBytes || pack->LsbFirst) {
+      if (INTEL_DEBUG & DEBUG_PIXEL)
+         _mesa_printf("%s: bad packing params\n", __FUNCTION__);
+      return GL_FALSE;
+   }
+
+   if (pack->RowLength > 0)
+      rowLength = pack->RowLength;
+   else
+      rowLength = width;
+
+   if (pack->Invert) {
+      if (INTEL_DEBUG & DEBUG_PIXEL)
+         _mesa_printf("%s: MESA_PACK_INVERT not done yet\n", __FUNCTION__);
+      return GL_FALSE;
+   }
+   else {
+      rowLength = -rowLength;
+   }
+
+   /* XXX 64-bit cast? */
+   dst_offset = (GLuint) _mesa_image_address(2, pack, pixels, width, height,
+                                             format, type, 0, 0, 0);
+
+
+   /* Although the blits go on the command buffer, need to do this and
+    * fire with lock held to guarentee cliprects are correct.
+    */
+   intelFlush(&intel->ctx);
+   LOCK_HARDWARE(intel);
+
+   if (intel->driDrawable->numClipRects) {
+      GLboolean all = (width * height * src->cpp == dst->Base.Size &&
+                       x == 0 && dst_offset == 0);
+
+      dri_bo *dst_buffer = intel_bufferobj_buffer(intel, dst,
+						  all ? INTEL_WRITE_FULL :
+						  INTEL_WRITE_PART);
+      __DRIdrawablePrivate *dPriv = intel->driDrawable;
+      int nbox = dPriv->numClipRects;
+      drm_clip_rect_t *box = dPriv->pClipRects;
+      drm_clip_rect_t rect;
+      drm_clip_rect_t src_rect;
+      int i;
+
+      src_rect.x1 = dPriv->x + x;
+      src_rect.y1 = dPriv->y + dPriv->h - (y + height);
+      src_rect.x2 = src_rect.x1 + width;
+      src_rect.y2 = src_rect.y1 + height;
+
+
+
+      for (i = 0; i < nbox; i++) {
+         if (!intel_intersect_cliprects(&rect, &src_rect, &box[i]))
+            continue;
+
+         intelEmitCopyBlit(intel,
+                           src->cpp,
+                           src->pitch, src->buffer, 0, src->tiled,
+                           rowLength, dst_buffer, dst_offset, GL_FALSE,
+                           rect.x1,
+                           rect.y1,
+                           rect.x1 - src_rect.x1,
+                           rect.y2 - src_rect.y2,
+                           rect.x2 - rect.x1, rect.y2 - rect.y1,
+			   GL_COPY);
+      }
+
+      intel_batchbuffer_flush(intel->batch);
+      fence = intel->batch->last_fence;
+      dri_fence_reference(fence);
+
+   }
+   UNLOCK_HARDWARE(intel);
+
+   if (fence) {
+      dri_fence_wait(fence);
+      dri_fence_unreference(fence);
+   }
+
+   if (INTEL_DEBUG & DEBUG_PIXEL)
+      _mesa_printf("%s - DONE\n", __FUNCTION__);
+
+   return GL_TRUE;
+}
+
+void
+intelReadPixels(GLcontext * ctx,
+                GLint x, GLint y, GLsizei width, GLsizei height,
+                GLenum format, GLenum type,
+                const struct gl_pixelstore_attrib *pack, GLvoid * pixels)
+{
+   if (INTEL_DEBUG & DEBUG_PIXEL)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   intelFlush(ctx);
+
+   if (do_blit_readpixels
+       (ctx, x, y, width, height, format, type, pack, pixels))
+      return;
+
+   if (do_texture_readpixels
+       (ctx, x, y, width, height, format, type, pack, pixels))
+      return;
+
+   if (INTEL_DEBUG & DEBUG_PIXEL)
+      _mesa_printf("%s: fallback to swrast\n", __FUNCTION__);
+
+   _swrast_ReadPixels(ctx, x, y, width, height, format, type, pack, pixels);
+}
diff --git a/i915/intel_render.c b/i915/intel_render.c
index 773779a..5e6500c 100644
--- a/i915/intel_render.c
+++ b/i915/intel_render.c
@@ -39,6 +39,7 @@
 
 #include "tnl/t_context.h"
 #include "tnl/t_vertex.h"
+#include "tnl/t_pipeline.h"
 
 #include "intel_screen.h"
 #include "intel_context.h"
@@ -51,14 +52,14 @@
  * dma buffers.  Use strip/fan hardware primitives where possible.
  * Try to simulate missing primitives with indexed vertices.
  */
-#define HAVE_POINTS      0  /* Has it, but can't use because subpixel has to
-			     * be adjusted for points on the INTEL/I845G
-			     */
+#define HAVE_POINTS      0      /* Has it, but can't use because subpixel has to
+                                 * be adjusted for points on the INTEL/I845G
+                                 */
 #define HAVE_LINES       1
 #define HAVE_LINE_STRIPS 1
 #define HAVE_TRIANGLES   1
 #define HAVE_TRI_STRIPS  1
-#define HAVE_TRI_STRIP_1 0  /* has it, template can't use it yet */
+#define HAVE_TRI_STRIP_1 0      /* has it, template can't use it yet */
 #define HAVE_TRI_FANS    1
 #define HAVE_POLYGONS    1
 #define HAVE_QUADS       0
@@ -66,7 +67,7 @@
 
 #define HAVE_ELTS        0
 
-static GLuint hw_prim[GL_POLYGON+1] = {
+static GLuint hw_prim[GL_POLYGON + 1] = {
    0,
    PRIM3D_LINELIST,
    PRIM3D_LINESTRIP,
@@ -79,7 +80,7 @@ static GLuint hw_prim[GL_POLYGON+1] = {
    PRIM3D_POLY
 };
 
-static const GLenum reduced_prim[GL_POLYGON+1] = {  
+static const GLenum reduced_prim[GL_POLYGON + 1] = {
    GL_POINTS,
    GL_LINES,
    GL_LINES,
@@ -92,58 +93,61 @@ static const GLenum reduced_prim[GL_POLYGON+1] = {
    GL_TRIANGLES
 };
 
-static const int scale_prim[GL_POLYGON+1] = {  
-   0,				/* fallback case */
+static const int scale_prim[GL_POLYGON + 1] = {
+   0,                           /* fallback case */
    1,
    2,
    2,
    1,
    3,
    3,
-   0,				/* fallback case */
-   0,				/* fallback case */
+   0,                           /* fallback case */
+   0,                           /* fallback case */
    3
 };
 
 
-static void intelDmaPrimitive( intelContextPtr intel, GLenum prim )
+static void
+intelDmaPrimitive(struct intel_context *intel, GLenum prim)
 {
-   if (0) fprintf(stderr, "%s %s\n", __FUNCTION__, _mesa_lookup_enum_by_nr(prim));
+   if (0)
+      fprintf(stderr, "%s %s\n", __FUNCTION__, _mesa_lookup_enum_by_nr(prim));
    INTEL_FIREVERTICES(intel);
-   intel->vtbl.reduced_primitive_state( intel, reduced_prim[prim] );
-   intelStartInlinePrimitive( intel, hw_prim[prim] );
+   intel->vtbl.reduced_primitive_state(intel, reduced_prim[prim]);
+   intelStartInlinePrimitive(intel, hw_prim[prim], LOOP_CLIPRECTS);
 }
 
 
-#define LOCAL_VARS intelContextPtr intel = INTEL_CONTEXT(ctx)
+#define LOCAL_VARS struct intel_context *intel = intel_context(ctx)
 #define INIT( prim ) 				\
 do {						\
    intelDmaPrimitive( intel, prim );		\
 } while (0)
-#define FLUSH()  INTEL_FIREVERTICES( intel )
+
+#define FLUSH() INTEL_FIREVERTICES(intel)
 
 #define GET_SUBSEQUENT_VB_MAX_VERTS() \
-  (((intel->alloc.size / 2) - 1500) / (intel->vertex_size*4))
+  ((intel->batch->size - 1500) / (intel->vertex_size*4))
 #define GET_CURRENT_VB_MAX_VERTS() GET_SUBSEQUENT_VB_MAX_VERTS()
 
 #define ALLOC_VERTS( nr ) \
    intelExtendInlinePrimitive( intel, (nr) * intel->vertex_size )
-  
+
 #define EMIT_VERTS( ctx, j, nr, buf ) \
-  _tnl_emit_vertices_to_buffer(ctx, j, (j)+(nr), buf )  
+  _tnl_emit_vertices_to_buffer(ctx, j, (j)+(nr), buf )
 
 #define TAG(x) intel_##x
 #include "tnl_dd/t_dd_dmatmp.h"
-  
-  
+
+
 /**********************************************************************/
 /*                          Render pipeline stage                     */
 /**********************************************************************/
 
 /* Heuristic to choose between the two render paths:  
  */
-static GLboolean choose_render( intelContextPtr intel,
-				struct vertex_buffer *VB )
+static GLboolean
+choose_render(struct intel_context *intel, struct vertex_buffer *VB)
 {
    int vertsz = intel->vertex_size;
    int cost_render = 0;
@@ -153,20 +157,20 @@ static GLboolean choose_render( intelContextPtr intel,
    int nr_rverts = 0;
    int rprim = intel->reduced_primitive;
    int i = 0;
-   
-   for (i = 0 ; i < VB->PrimitiveCount ; i++) {
+
+   for (i = 0; i < VB->PrimitiveCount; i++) {
       GLuint prim = VB->Primitive[i].mode;
       GLuint length = VB->Primitive[i].count;
 
       if (!length)
-	 continue;
+         continue;
 
       nr_prims++;
       nr_rverts += length * scale_prim[prim & PRIM_MODE_MASK];
 
       if (reduced_prim[prim & PRIM_MODE_MASK] != rprim) {
-	 nr_rprims++;
-	 rprim = reduced_prim[prim & PRIM_MODE_MASK];
+         nr_rprims++;
+         rprim = reduced_prim[prim & PRIM_MODE_MASK];
       }
    }
 
@@ -177,64 +181,82 @@ static GLboolean choose_render( intelContextPtr intel,
 
    /* One point for every 1024 dwords (4k) of dma:
     */
-   cost_render += (vertsz * i) / 1024; 
-   cost_fallback += (vertsz * nr_rverts) / 1024; 
+   cost_render += (vertsz * i) / 1024;
+   cost_fallback += (vertsz * nr_rverts) / 1024;
 
    if (0)
       fprintf(stderr, "cost render: %d fallback: %d\n",
-	      cost_render, cost_fallback);
+              cost_render, cost_fallback);
 
-   if (cost_render > cost_fallback) 
+   if (cost_render > cost_fallback)
       return GL_FALSE;
 
    return GL_TRUE;
 }
 
 
-static GLboolean intel_run_render( GLcontext *ctx, 
-				 struct tnl_pipeline_stage *stage )
+static GLboolean
+intel_run_render(GLcontext * ctx, struct tnl_pipeline_stage *stage)
 {
-   intelContextPtr intel = INTEL_CONTEXT(ctx);
+   struct intel_context *intel = intel_context(ctx);
    TNLcontext *tnl = TNL_CONTEXT(ctx);
    struct vertex_buffer *VB = &tnl->vb;
    GLuint i;
 
+   intel->vtbl.render_prevalidate( intel );
+
    /* Don't handle clipping or indexed vertices.
     */
-   if (intel->RenderIndex != 0 || 
-       !intel_validate_render( ctx, VB ) || 
-       !choose_render( intel, VB )) {
+   if (intel->RenderIndex != 0 ||
+       !intel_validate_render(ctx, VB) || !choose_render(intel, VB)) {
       return GL_TRUE;
    }
 
    tnl->clipspace.new_inputs |= VERT_BIT_POS;
 
-   tnl->Driver.Render.Start( ctx );
-   
-   for (i = 0 ; i < VB->PrimitiveCount ; i++)
-   {
+   tnl->Driver.Render.Start(ctx);
+
+   for (i = 0; i < VB->PrimitiveCount; i++) {
       GLuint prim = _tnl_translate_prim(&VB->Primitive[i]);
       GLuint start = VB->Primitive[i].start;
       GLuint length = VB->Primitive[i].count;
 
       if (!length)
-	 continue;
+         continue;
 
-      intel_render_tab_verts[prim & PRIM_MODE_MASK]( ctx, start, start + length,
-						     prim );
+      intel_render_tab_verts[prim & PRIM_MODE_MASK] (ctx, start,
+                                                     start + length, prim);
    }
-      
-   tnl->Driver.Render.Finish( ctx );
 
-   return GL_FALSE;     /* finished the pipe */
+   tnl->Driver.Render.Finish(ctx);
+
+   INTEL_FIREVERTICES(intel);
+
+   return GL_FALSE;             /* finished the pipe */
 }
 
-const struct tnl_pipeline_stage _intel_render_stage =
-{
+static const struct tnl_pipeline_stage _intel_render_stage = {
    "intel render",
    NULL,
    NULL,
    NULL,
    NULL,
-   intel_run_render	/* run */
+   intel_run_render             /* run */
+};
+
+const struct tnl_pipeline_stage *intel_pipeline[] = {
+   &_tnl_vertex_transform_stage,
+   &_tnl_vertex_cull_stage,
+   &_tnl_normal_transform_stage,
+   &_tnl_lighting_stage,
+   &_tnl_fog_coordinate_stage,
+   &_tnl_texgen_stage,
+   &_tnl_texture_transform_stage,
+   &_tnl_point_attenuation_stage,
+   &_tnl_vertex_program_stage,
+#if 1
+   &_intel_render_stage,        /* ADD: unclipped rastersetup-to-dma */
+#endif
+   &_tnl_render_stage,
+   0,
 };
diff --git a/i915/intel_rotate.c b/i915/intel_rotate.c
deleted file mode 100644
index a77640e..0000000
--- a/i915/intel_rotate.c
+++ /dev/null
@@ -1,221 +0,0 @@
-
-/**
- * Routines for simple 2D->2D transformations for rotated, flipped screens.
- *
- * XXX This code is not intel-specific.  Move it into a common/utility
- * someday.
- */
-
-#include "intel_rotate.h"
-
-#define MIN2(A, B)   ( ((A) < (B)) ? (A) : (B) )
-
-#define ABS(A)  ( ((A) < 0) ? -(A) : (A) )
-
-
-void
-matrix23Set(struct matrix23 *m,
-            int m00, int m01, int m02,
-            int m10, int m11, int m12)
-{
-   m->m00 = m00;   m->m01 = m01;   m->m02 = m02;
-   m->m10 = m10;   m->m11 = m11;   m->m12 = m12;
-}
-
-
-/*
- * Transform (x,y) coordinate by the given matrix.
- */
-void
-matrix23TransformCoordf(const struct matrix23 *m, float *x, float *y)
-{
-   const float x0 = *x;
-   const float y0 = *y;
-
-   *x = m->m00 * x0 + m->m01 * y0 + m->m02;
-   *y = m->m10 * x0 + m->m11 * y0 + m->m12;
-}
-
-
-void
-matrix23TransformCoordi(const struct matrix23 *m, int *x, int *y)
-{
-   const int x0 = *x;
-   const int y0 = *y;
-
-   *x = m->m00 * x0 + m->m01 * y0 + m->m02;
-   *y = m->m10 * x0 + m->m11 * y0 + m->m12;
-}
-
-
-/*
- * Transform a width and height by the given matrix.
- * XXX this could be optimized quite a bit.
- */
-void
-matrix23TransformDistance(const struct matrix23 *m, int *xDist, int *yDist)
-{
-   int x0 = 0, y0 = 0;
-   int x1 = *xDist, y1 = 0;
-   int x2 = 0, y2 = *yDist;
-   matrix23TransformCoordi(m, &x0, &y0);
-   matrix23TransformCoordi(m, &x1, &y1);
-   matrix23TransformCoordi(m, &x2, &y2);
-
-   *xDist = (x1 - x0) + (x2 - x0);
-   *yDist = (y1 - y0) + (y2 - y0);
-
-   if (*xDist < 0)
-       *xDist = -*xDist;
-   if (*yDist < 0)
-       *yDist = -*yDist;
-}
-
-
-/**
- * Transform the rect defined by (x, y, w, h) by m.
- */
-void
-matrix23TransformRect(const struct matrix23 *m, int *x, int *y, int *w, int *h)
-{
-   int x0 = *x, y0 = *y;
-   int x1 = *x + *w, y1 = *y;
-   int x2 = *x + *w, y2 = *y + *h;
-   int x3 = *x, y3 = *y + *h;
-   matrix23TransformCoordi(m, &x0, &y0);
-   matrix23TransformCoordi(m, &x1, &y1);
-   matrix23TransformCoordi(m, &x2, &y2);
-   matrix23TransformCoordi(m, &x3, &y3);
-   *w = ABS(x1 - x0) + ABS(x2 - x1);
-   /**w = ABS(*w);*/
-   *h = ABS(y1 - y0) + ABS(y2 - y1);
-   /**h = ABS(*h);*/
-   *x = MIN2(x0, x1);
-   *x = MIN2(*x, x2);
-   *y = MIN2(y0, y1);
-   *y = MIN2(*y, y2);
-}
-
-
-/*
- * Make rotation matrix for width X height screen.
- */
-void
-matrix23Rotate(struct matrix23 *m, int width, int height, int angle)
-{
-   switch (angle) {
-   case 0:
-      matrix23Set(m, 1, 0, 0, 0, 1, 0);
-      break;
-   case 90:
-      matrix23Set(m, 0, 1, 0,  -1, 0, width);
-      break;
-   case 180:
-      matrix23Set(m, -1, 0, width,  0, -1, height);
-      break;
-   case 270:
-      matrix23Set(m, 0, -1, height,  1, 0, 0);
-      break;
-   default:
-      /*abort()*/;
-   }
-}
-
-
-/*
- * Make flip/reflection matrix for width X height screen.
- */
-void
-matrix23Flip(struct matrix23 *m, int width, int height, int xflip, int yflip)
-{
-   if (xflip) {
-      m->m00 = -1;  m->m01 = 0;   m->m02 = width - 1;
-   }
-   else {
-      m->m00 = 1;   m->m01 = 0;   m->m02 = 0;
-   }
-   if (yflip) {
-      m->m10 = 0;   m->m11 = -1;  m->m12 = height - 1;
-   }
-   else {
-      m->m10 = 0;   m->m11 = 1;   m->m12 = 0;
-   }
-}
-
-
-/*
- * result = a * b
- */
-void
-matrix23Multiply(struct matrix23 *result,
-                 const struct matrix23 *a, const struct matrix23 *b)
-{
-   result->m00 = a->m00 * b->m00 + a->m01 * b->m10;
-   result->m01 = a->m00 * b->m01 + a->m01 * b->m11;
-   result->m02 = a->m00 * b->m02 + a->m01 * b->m12 + a->m02;
-
-   result->m10 = a->m10 * b->m00 + a->m11 * b->m10;
-   result->m11 = a->m10 * b->m01 + a->m11 * b->m11;
-   result->m12 = a->m10 * b->m02 + a->m11 * b->m12 + a->m12;
-}
-
-
-#if 000
-
-#include <stdio.h>
-
-int
-main(int argc, char *argv[])
-{
-   int width = 500, height = 400;
-   int rot;
-   int fx = 0, fy = 0;  /* flip x and/or y ? */
-   int coords[4][2];
-
-   /* four corner coords to test with */
-   coords[0][0] = 0;  coords[0][1] = 0;
-   coords[1][0] = width-1;  coords[1][1] = 0;
-   coords[2][0] = width-1;  coords[2][1] = height-1;
-   coords[3][0] = 0;  coords[3][1] = height-1;
-
-
-   for (rot = 0; rot < 360; rot += 90) {
-      struct matrix23 rotate, flip, m;
-      int i;
-
-      printf("Rot %d, xFlip %d, yFlip %d:\n", rot, fx, fy);
-
-      /* make transformation matrix 'm' */
-      matrix23Rotate(&rotate, width, height, rot);
-      matrix23Flip(&flip, width, height, fx, fy);
-      matrix23Multiply(&m, &rotate, &flip);
-
-      /* xform four coords */
-      for (i = 0; i < 4; i++) {
-         int x = coords[i][0];
-         int y = coords[i][1];
-         matrix23TransformCoordi(&m, &x, &y);
-         printf("  %d, %d  -> %d %d\n", coords[i][0], coords[i][1], x, y);
-      }
-
-      /* xform width, height */
-      {
-         int x = width;
-         int y = height;
-         matrix23TransformDistance(&m, &x, &y);
-         printf("  %d x %d -> %d x %d\n", width, height, x, y);
-      }
-
-      /* xform rect */
-      {
-         int x = 50, y = 10, w = 200, h = 100;
-         matrix23TransformRect(&m, &x, &y, &w, &h);
-         printf("  %d,%d %d x %d -> %d, %d %d x %d\n", 50, 10, 200, 100,
-                x, y, w, h);
-      }
-
-   }
-
-   return 0;
-}
-#endif
diff --git a/i915/intel_rotate.h b/i915/intel_rotate.h
deleted file mode 100644
index 0da45d2..0000000
--- a/i915/intel_rotate.h
+++ /dev/null
@@ -1,41 +0,0 @@
-#ifndef INTEL_ROTATE_H
-#define INTEL_ROTATE_H 1
-
-struct matrix23
-{
-   int m00, m01, m02;
-   int m10, m11, m12;
-};
-
-
-
-extern void
-matrix23Set(struct matrix23 *m,
-            int m00, int m01, int m02,
-            int m10, int m11, int m12);
-
-extern void
-matrix23TransformCoordi(const struct matrix23 *m, int *x, int *y);
-
-extern void
-matrix23TransformCoordf(const struct matrix23 *m, float *x, float *y);
-
-extern void
-matrix23TransformDistance(const struct matrix23 *m, int *xDist, int *yDist);
-
-extern void
-matrix23TransformRect(const struct matrix23 *m,
-                      int *x, int *y, int *w, int *h);
-
-extern void
-matrix23Rotate(struct matrix23 *m, int width, int height, int angle);
-
-extern void
-matrix23Flip(struct matrix23 *m, int width, int height, int xflip, int yflip);
-
-extern void
-matrix23Multiply(struct matrix23 *result,
-                 const struct matrix23 *a, const struct matrix23 *b);
-
-
-#endif /* INTEL_ROTATE_H */
diff --git a/i915/intel_screen.c b/i915/intel_screen.c
deleted file mode 100644
index ca8610b..0000000
--- a/i915/intel_screen.c
+++ /dev/null
@@ -1,690 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#include "glheader.h"
-#include "context.h"
-#include "framebuffer.h"
-#include "matrix.h"
-#include "renderbuffer.h"
-#include "simple_list.h"
-#include "utils.h"
-#include "vblank.h"
-#include "xmlpool.h"
-
-
-#include "intel_screen.h"
-
-#include "intel_tex.h"
-#include "intel_span.h"
-#include "intel_tris.h"
-#include "intel_ioctl.h"
-
-#include "i830_dri.h"
-
-PUBLIC const char __driConfigOptions[] =
-DRI_CONF_BEGIN
-    DRI_CONF_SECTION_PERFORMANCE
-       DRI_CONF_FTHROTTLE_MODE(DRI_CONF_FTHROTTLE_IRQS) 
-       DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_0)
-    DRI_CONF_SECTION_END
-    DRI_CONF_SECTION_QUALITY
-       DRI_CONF_FORCE_S3TC_ENABLE(false)
-       DRI_CONF_ALLOW_LARGE_TEXTURES(1)
-      DRI_CONF_SECTION_END
-DRI_CONF_END;
-const GLuint __driNConfigOptions = 4;
-
-#ifdef USE_NEW_INTERFACE
-static PFNGLXCREATECONTEXTMODES create_context_modes = NULL;
-#endif /*USE_NEW_INTERFACE*/
-
-extern const struct dri_extension card_extensions[];
-
-/**
- * Map all the memory regions described by the screen.
- * \return GL_TRUE if success, GL_FALSE if error.
- */
-GLboolean
-intelMapScreenRegions(__DRIscreenPrivate *sPriv)
-{
-   intelScreenPrivate *intelScreen = (intelScreenPrivate *)sPriv->private;
-
-   if (intelScreen->front.handle) {
-      if (drmMap(sPriv->fd,
-                 intelScreen->front.handle,
-                 intelScreen->front.size,
-                 (drmAddress *)&intelScreen->front.map) != 0) {
-         _mesa_problem(NULL, "drmMap(frontbuffer) failed!");
-         return GL_FALSE;
-      }
-   }
-   else {
-      _mesa_warning(NULL, "no front buffer handle in intelMapScreenRegions!");
-   }
-
-   if (drmMap(sPriv->fd,
-              intelScreen->back.handle,
-              intelScreen->back.size,
-              (drmAddress *)&intelScreen->back.map) != 0) {
-      intelUnmapScreenRegions(intelScreen);
-      return GL_FALSE;
-   }
-
-   if (drmMap(sPriv->fd,
-              intelScreen->depth.handle,
-              intelScreen->depth.size,
-              (drmAddress *)&intelScreen->depth.map) != 0) {
-      intelUnmapScreenRegions(intelScreen);
-      return GL_FALSE;
-   }
-
-   if (drmMap(sPriv->fd,
-              intelScreen->tex.handle,
-              intelScreen->tex.size,
-              (drmAddress *)&intelScreen->tex.map) != 0) {
-      intelUnmapScreenRegions(intelScreen);
-      return GL_FALSE;
-   }
-
-   if (0)
-      printf("Mappings:  front: %p  back: %p  depth: %p  tex: %p\n",
-          intelScreen->front.map,
-          intelScreen->back.map,
-          intelScreen->depth.map,
-          intelScreen->tex.map);
-   return GL_TRUE;
-}
-
-
-void
-intelUnmapScreenRegions(intelScreenPrivate *intelScreen)
-{
-#define REALLY_UNMAP 1
-   if (intelScreen->front.map) {
-#if REALLY_UNMAP
-      if (drmUnmap(intelScreen->front.map, intelScreen->front.size) != 0)
-         printf("drmUnmap front failed!\n");
-#endif
-      intelScreen->front.map = NULL;
-   }
-   if (intelScreen->back.map) {
-#if REALLY_UNMAP
-      if (drmUnmap(intelScreen->back.map, intelScreen->back.size) != 0)
-         printf("drmUnmap back failed!\n");
-#endif
-      intelScreen->back.map = NULL;
-   }
-   if (intelScreen->depth.map) {
-#if REALLY_UNMAP
-      drmUnmap(intelScreen->depth.map, intelScreen->depth.size);
-      intelScreen->depth.map = NULL;
-#endif
-   }
-   if (intelScreen->tex.map) {
-#if REALLY_UNMAP
-      drmUnmap(intelScreen->tex.map, intelScreen->tex.size);
-      intelScreen->tex.map = NULL;
-#endif
-   }
-}
-
-
-static void
-intelPrintDRIInfo(intelScreenPrivate *intelScreen,
-                  __DRIscreenPrivate *sPriv,
-                  I830DRIPtr gDRIPriv)
-{
-   fprintf(stderr, "*** Front size:   0x%x  offset: 0x%x  pitch: %d\n",
-           intelScreen->front.size, intelScreen->front.offset,
-           intelScreen->front.pitch);
-   fprintf(stderr, "*** Back size:    0x%x  offset: 0x%x  pitch: %d\n",
-           intelScreen->back.size, intelScreen->back.offset,
-           intelScreen->back.pitch);
-   fprintf(stderr, "*** Depth size:   0x%x  offset: 0x%x  pitch: %d\n",
-           intelScreen->depth.size, intelScreen->depth.offset,
-           intelScreen->depth.pitch);
-   fprintf(stderr, "*** Rotated size: 0x%x  offset: 0x%x  pitch: %d\n",
-           intelScreen->rotated.size, intelScreen->rotated.offset,
-           intelScreen->rotated.pitch);
-   fprintf(stderr, "*** Texture size: 0x%x  offset: 0x%x\n",
-           intelScreen->tex.size, intelScreen->tex.offset);
-   fprintf(stderr, "*** Memory : 0x%x\n", gDRIPriv->mem);
-}
-
-
-static void
-intelPrintSAREA(const drmI830Sarea *sarea)
-{
-   fprintf(stderr, "SAREA: sarea width %d  height %d\n", sarea->width, sarea->height);
-   fprintf(stderr, "SAREA: pitch: %d\n", sarea->pitch);
-   fprintf(stderr,
-           "SAREA: front offset: 0x%08x  size: 0x%x  handle: 0x%x\n",
-           sarea->front_offset, sarea->front_size,
-           (unsigned) sarea->front_handle);
-   fprintf(stderr,
-           "SAREA: back  offset: 0x%08x  size: 0x%x  handle: 0x%x\n",
-           sarea->back_offset, sarea->back_size,
-           (unsigned) sarea->back_handle);
-   fprintf(stderr, "SAREA: depth offset: 0x%08x  size: 0x%x  handle: 0x%x\n",
-           sarea->depth_offset, sarea->depth_size,
-           (unsigned) sarea->depth_handle);
-   fprintf(stderr, "SAREA: tex   offset: 0x%08x  size: 0x%x  handle: 0x%x\n",
-           sarea->tex_offset, sarea->tex_size,
-           (unsigned) sarea->tex_handle);
-   fprintf(stderr, "SAREA: rotation: %d\n", sarea->rotation);
-   fprintf(stderr,
-           "SAREA: rotated offset: 0x%08x  size: 0x%x\n",
-           sarea->rotated_offset, sarea->rotated_size);
-   fprintf(stderr, "SAREA: rotated pitch: %d\n", sarea->rotated_pitch);
-}
-
-
-/**
- * A number of the screen parameters are obtained/computed from
- * information in the SAREA.  This function updates those parameters.
- */
-void
-intelUpdateScreenFromSAREA(intelScreenPrivate *intelScreen,
-                           drmI830Sarea *sarea)
-{
-   intelScreen->width = sarea->width;
-   intelScreen->height = sarea->height;
-
-   intelScreen->front.offset = sarea->front_offset;
-   intelScreen->front.pitch = sarea->pitch * intelScreen->cpp;
-   intelScreen->front.handle = sarea->front_handle;
-   intelScreen->front.size = sarea->front_size;
-
-   intelScreen->back.offset = sarea->back_offset;
-   intelScreen->back.pitch = sarea->pitch * intelScreen->cpp;
-   intelScreen->back.handle = sarea->back_handle;
-   intelScreen->back.size = sarea->back_size;
-			 
-   intelScreen->depth.offset = sarea->depth_offset;
-   intelScreen->depth.pitch = sarea->pitch * intelScreen->cpp;
-   intelScreen->depth.handle = sarea->depth_handle;
-   intelScreen->depth.size = sarea->depth_size;
-
-   intelScreen->tex.offset = sarea->tex_offset;
-   intelScreen->logTextureGranularity = sarea->log_tex_granularity;
-   intelScreen->tex.handle = sarea->tex_handle;
-   intelScreen->tex.size = sarea->tex_size;
-
-   intelScreen->rotated.offset = sarea->rotated_offset;
-   intelScreen->rotated.pitch = sarea->rotated_pitch * intelScreen->cpp;
-   intelScreen->rotated.size = sarea->rotated_size;
-   intelScreen->current_rotation = sarea->rotation;
-   matrix23Rotate(&intelScreen->rotMatrix,
-                  sarea->width, sarea->height, sarea->rotation);
-   intelScreen->rotatedWidth = sarea->virtualX;
-   intelScreen->rotatedHeight = sarea->virtualY;
-
-   if (0)
-      intelPrintSAREA(sarea);
-}
-
-
-static GLboolean intelInitDriver(__DRIscreenPrivate *sPriv)
-{
-   intelScreenPrivate *intelScreen;
-   I830DRIPtr         gDRIPriv = (I830DRIPtr)sPriv->pDevPriv;
-   drmI830Sarea *sarea;
-   PFNGLXSCRENABLEEXTENSIONPROC glx_enable_extension =
-     (PFNGLXSCRENABLEEXTENSIONPROC) (*dri_interface->getProcAddress("glxEnableExtension"));
-   void * const psc = sPriv->psc->screenConfigs;
-
-   if (sPriv->devPrivSize != sizeof(I830DRIRec)) {
-      fprintf(stderr,"\nERROR!  sizeof(I830DRIRec) does not match passed size from device driver\n");
-      return GL_FALSE;
-   }
-
-   /* Allocate the private area */
-   intelScreen = (intelScreenPrivate *)CALLOC(sizeof(intelScreenPrivate));
-   if (!intelScreen) {
-      fprintf(stderr,"\nERROR!  Allocating private area failed\n");
-      return GL_FALSE;
-   }
-   /* parse information in __driConfigOptions */
-   driParseOptionInfo (&intelScreen->optionCache,
-		       __driConfigOptions, __driNConfigOptions);
-
-   intelScreen->driScrnPriv = sPriv;
-   sPriv->private = (void *)intelScreen;
-   intelScreen->sarea_priv_offset = gDRIPriv->sarea_priv_offset;
-   sarea = (drmI830Sarea *)
-         (((GLubyte *)sPriv->pSAREA)+intelScreen->sarea_priv_offset);
-
-   intelScreen->deviceID = gDRIPriv->deviceID;
-   intelScreen->mem = gDRIPriv->mem;
-   intelScreen->cpp = gDRIPriv->cpp;
-
-   switch (gDRIPriv->bitsPerPixel) {
-   case 15: intelScreen->fbFormat = DV_PF_555; break;
-   case 16: intelScreen->fbFormat = DV_PF_565; break;
-   case 32: intelScreen->fbFormat = DV_PF_8888; break;
-   }
-			 
-   intelUpdateScreenFromSAREA(intelScreen, sarea);
-
-   if (0)
-      intelPrintDRIInfo(intelScreen, sPriv, gDRIPriv);
-
-   if (!intelMapScreenRegions(sPriv)) {
-      fprintf(stderr,"\nERROR!  mapping regions\n");
-      _mesa_free(intelScreen);
-      sPriv->private = NULL;
-      return GL_FALSE;
-   }
-
-   intelScreen->drmMinor = sPriv->drmMinor;
-
-   /* Determine if IRQs are active? */
-   {
-      int ret;
-      drmI830GetParam gp;
-
-      gp.param = I830_PARAM_IRQ_ACTIVE;
-      gp.value = &intelScreen->irq_active;
-
-      ret = drmCommandWriteRead( sPriv->fd, DRM_I830_GETPARAM,
-				 &gp, sizeof(gp));
-      if (ret) {
-	 fprintf(stderr, "drmI830GetParam: %d\n", ret);
-	 return GL_FALSE;
-      }
-   }
-
-   /* Determine if batchbuffers are allowed */
-   {
-      int ret;
-      drmI830GetParam gp;
-
-      gp.param = I830_PARAM_ALLOW_BATCHBUFFER;
-      gp.value = &intelScreen->allow_batchbuffer;
-
-      ret = drmCommandWriteRead( sPriv->fd, DRM_I830_GETPARAM,
-				 &gp, sizeof(gp));
-      if (ret) {
-	 fprintf(stderr, "drmI830GetParam: (%d) %d\n", gp.param, ret);
-	 return GL_FALSE;
-      }
-   }
-
-   if (glx_enable_extension != NULL) {
-      (*glx_enable_extension)( psc, "GLX_SGI_swap_control" );
-      (*glx_enable_extension)( psc, "GLX_SGI_video_sync" );
-      (*glx_enable_extension)( psc, "GLX_MESA_swap_control" );
-      (*glx_enable_extension)( psc, "GLX_MESA_swap_frame_usage" );
-      (*glx_enable_extension)( psc, "GLX_SGI_make_current_read" );
-      (*glx_enable_extension)( psc, "GLX_MESA_allocate_memory" );
-      (*glx_enable_extension)( psc, "GLX_MESA_copy_sub_buffer" );
-   }
-   
-   sPriv->psc->allocateMemory = (void *) intelAllocateMemoryMESA;
-   sPriv->psc->freeMemory     = (void *) intelFreeMemoryMESA;
-   sPriv->psc->memoryOffset   = (void *) intelGetMemoryOffsetMESA;
-
-   return GL_TRUE;
-}
-		
-		
-static void intelDestroyScreen(__DRIscreenPrivate *sPriv)
-{
-   intelScreenPrivate *intelScreen = (intelScreenPrivate *)sPriv->private;
-
-   intelUnmapScreenRegions(intelScreen);
-
-   driDestroyOptionInfo (&intelScreen->optionCache);
-
-   FREE(intelScreen);
-   sPriv->private = NULL;
-}
-
-
-static GLboolean intelCreateBuffer( __DRIscreenPrivate *driScrnPriv,
-				    __DRIdrawablePrivate *driDrawPriv,
-				    const __GLcontextModes *mesaVis,
-				    GLboolean isPixmap )
-{
-   intelScreenPrivate *screen = (intelScreenPrivate *) driScrnPriv->private;
-
-   if (isPixmap) {
-      return GL_FALSE; /* not implemented */
-   } else {
-      GLboolean swStencil = (mesaVis->stencilBits > 0 && 
-			     mesaVis->depthBits != 24);
-
-      struct gl_framebuffer *fb = _mesa_create_framebuffer(mesaVis);
-
-      {
-         driRenderbuffer *frontRb
-            = driNewRenderbuffer(GL_RGBA,
-                                 screen->front.map,
-                                 screen->cpp,
-                                 screen->front.offset, screen->front.pitch,
-                                 driDrawPriv);
-         intelSetSpanFunctions(frontRb, mesaVis);
-         _mesa_add_renderbuffer(fb, BUFFER_FRONT_LEFT, &frontRb->Base);
-      }
-
-      if (mesaVis->doubleBufferMode) {
-         driRenderbuffer *backRb
-            = driNewRenderbuffer(GL_RGBA,
-                                 screen->back.map,
-                                 screen->cpp,
-                                 screen->back.offset, screen->back.pitch,
-                                 driDrawPriv);
-         intelSetSpanFunctions(backRb, mesaVis);
-         _mesa_add_renderbuffer(fb, BUFFER_BACK_LEFT, &backRb->Base);
-      }
-
-      if (mesaVis->depthBits == 16) {
-         driRenderbuffer *depthRb
-            = driNewRenderbuffer(GL_DEPTH_COMPONENT16,
-                                 screen->depth.map,
-                                 screen->cpp,
-                                 screen->depth.offset, screen->depth.pitch,
-                                 driDrawPriv);
-         intelSetSpanFunctions(depthRb, mesaVis);
-         _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depthRb->Base);
-      }
-      else if (mesaVis->depthBits == 24) {
-         driRenderbuffer *depthRb
-            = driNewRenderbuffer(GL_DEPTH_COMPONENT24,
-                                 screen->depth.map,
-                                 screen->cpp,
-                                 screen->depth.offset, screen->depth.pitch,
-                                 driDrawPriv);
-         intelSetSpanFunctions(depthRb, mesaVis);
-         _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depthRb->Base);
-      }
-
-      if (mesaVis->stencilBits > 0 && !swStencil) {
-         driRenderbuffer *stencilRb
-            = driNewRenderbuffer(GL_STENCIL_INDEX8_EXT,
-                                 screen->depth.map,
-                                 screen->cpp,
-                                 screen->depth.offset, screen->depth.pitch,
-                                 driDrawPriv);
-         intelSetSpanFunctions(stencilRb, mesaVis);
-         _mesa_add_renderbuffer(fb, BUFFER_STENCIL, &stencilRb->Base);
-      }
-
-      _mesa_add_soft_renderbuffers(fb,
-                                   GL_FALSE, /* color */
-                                   GL_FALSE, /* depth */
-                                   swStencil,
-                                   mesaVis->accumRedBits > 0,
-                                   GL_FALSE, /* alpha */
-                                   GL_FALSE /* aux */);
-      driDrawPriv->driverPrivate = (void *) fb;
-
-      return (driDrawPriv->driverPrivate != NULL);
-   }
-}
-
-static void intelDestroyBuffer(__DRIdrawablePrivate *driDrawPriv)
-{
-   _mesa_unreference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate)));
-}
-
-
-/**
- * Get information about previous buffer swaps.
- */
-static int
-intelGetSwapInfo( __DRIdrawablePrivate *dPriv, __DRIswapInfo * sInfo )
-{
-   intelContextPtr intel;
-
-   if ( (dPriv == NULL) || (dPriv->driContextPriv == NULL)
-	|| (dPriv->driContextPriv->driverPrivate == NULL)
-	|| (sInfo == NULL) ) {
-      return -1;
-   }
-
-   intel = dPriv->driContextPriv->driverPrivate;
-   sInfo->swap_count = intel->swap_count;
-   sInfo->swap_ust = intel->swap_ust;
-   sInfo->swap_missed_count = intel->swap_missed_count;
-
-   sInfo->swap_missed_usage = (sInfo->swap_missed_count != 0)
-       ? driCalculateSwapUsage( dPriv, 0, intel->swap_missed_ust )
-       : 0.0;
-
-   return 0;
-}
-
-
-/* There are probably better ways to do this, such as an
- * init-designated function to register chipids and createcontext
- * functions.
- */
-extern GLboolean i830CreateContext( const __GLcontextModes *mesaVis,
-				    __DRIcontextPrivate *driContextPriv,
-				    void *sharedContextPrivate);
-
-extern GLboolean i915CreateContext( const __GLcontextModes *mesaVis,
-				   __DRIcontextPrivate *driContextPriv,
-				   void *sharedContextPrivate);
-
-
-
-
-static GLboolean intelCreateContext( const __GLcontextModes *mesaVis,
-				   __DRIcontextPrivate *driContextPriv,
-				   void *sharedContextPrivate)
-{
-   __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv;
-   intelScreenPrivate *intelScreen = (intelScreenPrivate *)sPriv->private;
-
-   switch (intelScreen->deviceID) {
-   case PCI_CHIP_845_G:
-   case PCI_CHIP_I830_M:
-   case PCI_CHIP_I855_GM:
-   case PCI_CHIP_I865_G:
-      return i830CreateContext( mesaVis, driContextPriv, 
-				sharedContextPrivate );
-
-   case PCI_CHIP_I915_G:
-   case PCI_CHIP_I915_GM:
-   case PCI_CHIP_I945_G:
-   case PCI_CHIP_I945_GM:
-   case PCI_CHIP_I945_GME:
-   case PCI_CHIP_G33_G:
-   case PCI_CHIP_Q35_G:
-   case PCI_CHIP_Q33_G:
-      return i915CreateContext( mesaVis, driContextPriv, 
-			       sharedContextPrivate );
- 
-   default:
-      fprintf(stderr, "Unrecognized deviceID %x\n", intelScreen->deviceID);
-      return GL_FALSE;
-   }
-}
-
-
-static const struct __DriverAPIRec intelAPI = {
-   .InitDriver      = intelInitDriver,
-   .DestroyScreen   = intelDestroyScreen,
-   .CreateContext   = intelCreateContext,
-   .DestroyContext  = intelDestroyContext,
-   .CreateBuffer    = intelCreateBuffer,
-   .DestroyBuffer   = intelDestroyBuffer,
-   .SwapBuffers     = intelSwapBuffers,
-   .MakeCurrent     = intelMakeCurrent,
-   .UnbindContext   = intelUnbindContext,
-   .GetSwapInfo     = intelGetSwapInfo,
-   .GetMSC          = driGetMSC32,
-   .WaitForMSC      = driWaitForMSC32,
-   .WaitForSBC      = NULL,
-   .SwapBuffersMSC  = NULL,
-   .CopySubBuffer   = intelCopySubBuffer
-};
-
-
-static __GLcontextModes *
-intelFillInModes( unsigned pixel_bits, unsigned depth_bits,
-		 unsigned stencil_bits, GLboolean have_back_buffer )
-{
-   __GLcontextModes * modes;
-   __GLcontextModes * m;
-   unsigned num_modes;
-   unsigned depth_buffer_factor;
-   unsigned back_buffer_factor;
-   GLenum fb_format;
-   GLenum fb_type;
-
-   /* GLX_SWAP_COPY_OML is only supported because the Intel driver doesn't
-    * support pageflipping at all.
-    */
-   static const GLenum back_buffer_modes[] = {
-      GLX_NONE, GLX_SWAP_UNDEFINED_OML, GLX_SWAP_COPY_OML
-   };
-
-   u_int8_t depth_bits_array[3];
-   u_int8_t stencil_bits_array[3];
-
-
-   depth_bits_array[0] = 0;
-   depth_bits_array[1] = depth_bits;
-   depth_bits_array[2] = depth_bits;
-
-   /* Just like with the accumulation buffer, always provide some modes
-    * with a stencil buffer.  It will be a sw fallback, but some apps won't
-    * care about that.
-    */
-   stencil_bits_array[0] = 0;
-   stencil_bits_array[1] = 0;
-   stencil_bits_array[2] = (stencil_bits == 0) ? 8 : stencil_bits;
-
-   depth_buffer_factor = ((depth_bits != 0) || (stencil_bits != 0)) ? 3 : 1;
-   back_buffer_factor  = (have_back_buffer) ? 3 : 1;
-
-   num_modes = depth_buffer_factor * back_buffer_factor * 4;
-
-    if ( pixel_bits == 16 ) {
-        fb_format = GL_RGB;
-        fb_type = GL_UNSIGNED_SHORT_5_6_5;
-    }
-    else {
-        fb_format = GL_BGRA;
-        fb_type = GL_UNSIGNED_INT_8_8_8_8_REV;
-    }
-
-   modes = (*dri_interface->createContextModes)( num_modes, sizeof( __GLcontextModes ) );
-   m = modes;
-   if ( ! driFillInModes( & m, fb_format, fb_type,
-			  depth_bits_array, stencil_bits_array, depth_buffer_factor,
-			  back_buffer_modes, back_buffer_factor,
-			  GLX_TRUE_COLOR ) ) {
-	fprintf( stderr, "[%s:%u] Error creating FBConfig!\n",
-		 __func__, __LINE__ );
-	return NULL;
-   }
-   if ( ! driFillInModes( & m, fb_format, fb_type,
-			  depth_bits_array, stencil_bits_array, depth_buffer_factor,
-			  back_buffer_modes, back_buffer_factor,
-			  GLX_DIRECT_COLOR ) ) {
-	fprintf( stderr, "[%s:%u] Error creating FBConfig!\n",
-		 __func__, __LINE__ );
-	return NULL;
-   }
-
-   /* Mark the visual as slow if there are "fake" stencil bits.
-    */
-   for ( m = modes ; m != NULL ; m = m->next ) {
-      if ( (m->stencilBits != 0) && (m->stencilBits != stencil_bits) ) {
-	 m->visualRating = GLX_SLOW_CONFIG;
-      }
-   }
-
-   return modes;
-}
-
-
-/**
- * This is the bootstrap function for the driver.  libGL supplies all of the
- * requisite information about the system, and the driver initializes itself.
- * This routine also fills in the linked list pointed to by \c driver_modes
- * with the \c __GLcontextModes that the driver can support for windows or
- * pbuffers.
- * 
- * \return A pointer to a \c __DRIscreenPrivate on success, or \c NULL on 
- *         failure.
- */
-PUBLIC
-void * __driCreateNewScreen_20050727( __DRInativeDisplay *dpy, int scrn, __DRIscreen *psc,
-			     const __GLcontextModes * modes,
-			     const __DRIversion * ddx_version,
-			     const __DRIversion * dri_version,
-			     const __DRIversion * drm_version,
-			     const __DRIframebuffer * frame_buffer,
-			     drmAddress pSAREA, int fd, 
-			     int internal_api_version,
-			     const __DRIinterfaceMethods * interface,
-			     __GLcontextModes ** driver_modes )
-			     
-{
-   __DRIscreenPrivate *psp;
-   static const __DRIversion ddx_expected = { 1, 5, 0 };
-   static const __DRIversion dri_expected = { 4, 0, 0 };
-   static const __DRIversion drm_expected = { 1, 4, 0 };
-
-   dri_interface = interface;
-
-   if ( ! driCheckDriDdxDrmVersions2( "i915",
-				      dri_version, & dri_expected,
-				      ddx_version, & ddx_expected,
-				      drm_version, & drm_expected ) ) {
-      return NULL;
-   }
-
-   psp = __driUtilCreateNewScreen(dpy, scrn, psc, NULL,
-				  ddx_version, dri_version, drm_version,
-				  frame_buffer, pSAREA, fd,
-				  internal_api_version, &intelAPI);
-   if ( psp != NULL ) {
-      I830DRIPtr dri_priv = (I830DRIPtr) psp->pDevPriv;
-      *driver_modes = intelFillInModes( dri_priv->cpp * 8,
-					(dri_priv->cpp == 2) ? 16 : 24,
-					(dri_priv->cpp == 2) ? 0  : 8,
-					1 );
-
-      /* Calling driInitExtensions here, with a NULL context pointer, does not actually
-       * enable the extensions.  It just makes sure that all the dispatch offsets for all
-       * the extensions that *might* be enables are known.  This is needed because the
-       * dispatch offsets need to be known when _mesa_context_create is called, but we can't
-       * enable the extensions until we have a context pointer.
-       *
-       * Hello chicken.  Hello egg.  How are you two today?
-       */
-      driInitExtensions( NULL, card_extensions, GL_FALSE );
-   }
-
-   return (void *) psp;
-}
diff --git a/i915/intel_screen.h b/i915/intel_screen.h
deleted file mode 100644
index 24cfd9b..0000000
--- a/i915/intel_screen.h
+++ /dev/null
@@ -1,112 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#ifndef _INTEL_INIT_H_
-#define _INTEL_INIT_H_
-
-#include <sys/time.h>
-#include "xmlconfig.h"
-#include "dri_util.h"
-#include "intel_rotate.h"
-#include "i830_common.h"
-
-
-/* This roughly corresponds to a gl_renderbuffer (Mesa 6.4) */
-typedef struct {
-   drm_handle_t handle;
-   drmSize size;        /* region size in bytes */
-   char *map;           /* memory map */
-   int offset;          /* from start of video mem, in bytes */
-   int pitch;           /* row stride, in bytes */
-} intelRegion;
-
-typedef struct 
-{
-   intelRegion front;
-   intelRegion back;
-   intelRegion rotated;
-   intelRegion depth;
-   intelRegion tex;
-   
-   int deviceID;
-   int width;
-   int height;
-   int mem;         /* unused */
-   
-   int cpp;         /* for front and back buffers */
-   int fbFormat;
-
-   int logTextureGranularity;
-   
-   __DRIscreenPrivate *driScrnPriv;
-   unsigned int sarea_priv_offset;
-
-   int drmMinor;
-
-   int irq_active;
-   int allow_batchbuffer;
-
-   struct matrix23 rotMatrix;
-
-   int current_rotation;  /* 0, 90, 180 or 270 */
-   int rotatedWidth, rotatedHeight;
-
-   /**
-   * Configuration cache with default values for all contexts
-   */
-   driOptionCache optionCache;
-} intelScreenPrivate;
-
-
-extern GLboolean
-intelMapScreenRegions(__DRIscreenPrivate *sPriv);
-
-extern void
-intelUnmapScreenRegions(intelScreenPrivate *intelScreen);
-
-extern void
-intelUpdateScreenFromSAREA(intelScreenPrivate *intelScreen,
-                           drmI830Sarea *sarea);
-
-extern void
-intelDestroyContext(__DRIcontextPrivate *driContextPriv);
-
-extern GLboolean
-intelUnbindContext(__DRIcontextPrivate *driContextPriv);
-
-extern GLboolean
-intelMakeCurrent(__DRIcontextPrivate *driContextPriv,
-                 __DRIdrawablePrivate *driDrawPriv,
-                 __DRIdrawablePrivate *driReadPriv);
-
-extern void
-intelSwapBuffers(__DRIdrawablePrivate *dPriv);
-
-extern void
-intelCopySubBuffer( __DRIdrawablePrivate *dPriv, int x, int y, int w, int h );
-
-#endif
diff --git a/i915/intel_span.c b/i915/intel_span.c
deleted file mode 100644
index c3ffc4b..0000000
--- a/i915/intel_span.c
+++ /dev/null
@@ -1,258 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#include "glheader.h"
-#include "macros.h"
-#include "mtypes.h"
-#include "colormac.h"
-
-#include "intel_screen.h"
-
-#include "intel_span.h"
-#include "intel_ioctl.h"
-#include "swrast/swrast.h"
-
-
-#define DBG 0
-
-#define LOCAL_VARS						\
-   intelContextPtr intel = INTEL_CONTEXT(ctx);			\
-   __DRIdrawablePrivate *dPriv = intel->driDrawable;		\
-   driRenderbuffer *drb = (driRenderbuffer *) rb;		\
-   GLuint pitch = drb->pitch;					\
-   GLuint height = dPriv->h;					\
-   char *buf = (char *) drb->Base.Data +			\
-			dPriv->x * drb->cpp +			\
-			dPriv->y * pitch;			\
-   GLushort p;							\
-   (void) buf; (void) p
-
-#define LOCAL_DEPTH_VARS					\
-   intelContextPtr intel = INTEL_CONTEXT(ctx);			\
-   __DRIdrawablePrivate *dPriv = intel->driDrawable;		\
-   driRenderbuffer *drb = (driRenderbuffer *) rb;		\
-   GLuint pitch = drb->pitch;					\
-   GLuint height = dPriv->h;					\
-   char *buf = (char *) drb->Base.Data +			\
-			dPriv->x * drb->cpp +			\
-			dPriv->y * pitch
-
-#define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS 
-
-#define INIT_MONO_PIXEL(p,color)\
-	 p = INTEL_PACKCOLOR565(color[0],color[1],color[2])
-
-#define Y_FLIP(_y) (height - _y - 1)
-
-#define HW_LOCK()
-
-#define HW_UNLOCK()
-
-/* 16 bit, 565 rgb color spanline and pixel functions
- */
-#define WRITE_RGBA( _x, _y, r, g, b, a )				\
-   *(GLushort *)(buf + _x*2 + _y*pitch)  = ( (((int)r & 0xf8) << 8) |	\
-		                             (((int)g & 0xfc) << 3) |	\
-		                             (((int)b & 0xf8) >> 3))
-#define WRITE_PIXEL( _x, _y, p )  \
-   *(GLushort *)(buf + _x*2 + _y*pitch) = p
-
-#define READ_RGBA( rgba, _x, _y )				\
-do {								\
-   GLushort p = *(GLushort *)(buf + _x*2 + _y*pitch);		\
-   rgba[0] = (((p >> 11) & 0x1f) * 255) / 31;			\
-   rgba[1] = (((p >>  5) & 0x3f) * 255) / 63;			\
-   rgba[2] = (((p >>  0) & 0x1f) * 255) / 31;			\
-   rgba[3] = 255;						\
-} while(0)
-
-#define TAG(x) intel##x##_565
-#include "spantmp.h"
-
-/* 15 bit, 555 rgb color spanline and pixel functions
- */
-#define WRITE_RGBA( _x, _y, r, g, b, a )			\
-   *(GLushort *)(buf + _x*2 + _y*pitch)  = (((r & 0xf8) << 7) |	\
-		                            ((g & 0xf8) << 3) |	\
-                         		    ((b & 0xf8) >> 3))
-
-#define WRITE_PIXEL( _x, _y, p )  \
-   *(GLushort *)(buf + _x*2 + _y*pitch)  = p
-
-#define READ_RGBA( rgba, _x, _y )				\
-do {								\
-   GLushort p = *(GLushort *)(buf + _x*2 + _y*pitch);		\
-   rgba[0] = (p >> 7) & 0xf8;					\
-   rgba[1] = (p >> 3) & 0xf8;					\
-   rgba[2] = (p << 3) & 0xf8;					\
-   rgba[3] = 255;						\
-} while(0)
-
-#define TAG(x) intel##x##_555
-#include "spantmp.h"
-
-/* 16 bit depthbuffer functions.
- */
-#define WRITE_DEPTH( _x, _y, d ) \
-   *(GLushort *)(buf + (_x)*2 + (_y)*pitch)  = d;
-
-#define READ_DEPTH( d, _x, _y )	\
-   d = *(GLushort *)(buf + (_x)*2 + (_y)*pitch);	 
-
-
-#define TAG(x) intel##x##_z16
-#include "depthtmp.h"
-
-
-#undef LOCAL_VARS
-#define LOCAL_VARS						\
-   intelContextPtr intel = INTEL_CONTEXT(ctx);			\
-   __DRIdrawablePrivate *dPriv = intel->driDrawable;		\
-   driRenderbuffer *drb = (driRenderbuffer *) rb;		\
-   GLuint pitch = drb->pitch;					\
-   GLuint height = dPriv->h;					\
-   char *buf = (char *)drb->Base.Data +				\
-			dPriv->x * drb->cpp +			\
-			dPriv->y * pitch;			\
-   GLuint p;							\
-   (void) buf; (void) p
-
-#undef INIT_MONO_PIXEL
-#define INIT_MONO_PIXEL(p,color)\
-	 p = INTEL_PACKCOLOR8888(color[0],color[1],color[2],color[3])
-
-/* 32 bit, 8888 argb color spanline and pixel functions
- */
-#define WRITE_RGBA(_x, _y, r, g, b, a)			\
-    *(GLuint *)(buf + _x*4 + _y*pitch) = ((r << 16) |	\
-					  (g << 8)  |	\
-					  (b << 0)  |	\
-					  (a << 24) )
-
-#define WRITE_PIXEL(_x, _y, p)			\
-    *(GLuint *)(buf + _x*4 + _y*pitch) = p
-
-
-#define READ_RGBA(rgba, _x, _y)					\
-    do {							\
-	GLuint p = *(GLuint *)(buf + _x*4 + _y*pitch);		\
-	rgba[0] = (p >> 16) & 0xff;				\
-	rgba[1] = (p >> 8)  & 0xff;				\
-	rgba[2] = (p >> 0)  & 0xff;				\
-	rgba[3] = (p >> 24) & 0xff;				\
-    } while (0)
-
-#define TAG(x) intel##x##_8888
-#include "spantmp.h"
-
-
-/* 24/8 bit interleaved depth/stencil functions
- */
-#define WRITE_DEPTH( _x, _y, d ) {			\
-   GLuint tmp = *(GLuint *)(buf + (_x)*4 + (_y)*pitch);	\
-   tmp &= 0xff000000;					\
-   tmp |= (d) & 0xffffff;				\
-   *(GLuint *)(buf + (_x)*4 + (_y)*pitch) = tmp;		\
-}
-
-#define READ_DEPTH( d, _x, _y )		\
-   d = *(GLuint *)(buf + (_x)*4 + (_y)*pitch) & 0xffffff;
-
-
-#define TAG(x) intel##x##_z24_s8
-#include "depthtmp.h"
-
-#define WRITE_STENCIL( _x, _y, d ) {			\
-   GLuint tmp = *(GLuint *)(buf + (_x)*4 + (_y)*pitch);	\
-   tmp &= 0xffffff;					\
-   tmp |= ((d)<<24);					\
-   *(GLuint *)(buf + (_x)*4 + (_y)*pitch) = tmp;		\
-}
-
-#define READ_STENCIL( d, _x, _y )			\
-   d = *(GLuint *)(buf + (_x)*4 + (_y)*pitch) >> 24;
-
-#define TAG(x) intel##x##_z24_s8
-#include "stenciltmp.h"
-
-
-/* Move locking out to get reasonable span performance.
- */
-void intelSpanRenderStart( GLcontext *ctx )
-{
-   intelContextPtr intel = INTEL_CONTEXT(ctx);
-
-   intelFlush(&intel->ctx);
-   LOCK_HARDWARE(intel);
-   intelWaitForIdle(intel);
-}
-
-void intelSpanRenderFinish( GLcontext *ctx )
-{
-   intelContextPtr intel = INTEL_CONTEXT( ctx );
-   _swrast_flush( ctx );
-   UNLOCK_HARDWARE( intel );
-}
-
-void intelInitSpanFuncs( GLcontext *ctx )
-{
-   struct swrast_device_driver *swdd = _swrast_GetDeviceDriverReference(ctx);
-   swdd->SpanRenderStart = intelSpanRenderStart;
-   swdd->SpanRenderFinish = intelSpanRenderFinish; 
-}
-
-
-/**
- * Plug in the Get/Put routines for the given driRenderbuffer.
- */
-void
-intelSetSpanFunctions(driRenderbuffer *drb, const GLvisual *vis)
-{
-   if (drb->Base.InternalFormat == GL_RGBA) {
-      if (vis->redBits == 5 && vis->greenBits == 5 && vis->blueBits == 5) {
-         intelInitPointers_555(&drb->Base);
-      }
-      else if (vis->redBits == 5 && vis->greenBits == 6 && vis->blueBits == 5) {
-         intelInitPointers_565(&drb->Base);
-      }
-      else {
-         assert(vis->redBits == 8);
-         assert(vis->greenBits == 8);
-         assert(vis->blueBits == 8);
-         intelInitPointers_8888(&drb->Base);
-      }
-   }
-   else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT16) {
-      intelInitDepthPointers_z16(&drb->Base);
-   }
-   else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT24) {
-      intelInitDepthPointers_z24_s8(&drb->Base);
-   }
-   else if (drb->Base.InternalFormat == GL_STENCIL_INDEX8_EXT) {
-      intelInitStencilPointers_z24_s8(&drb->Base);
-   }
-}
diff --git a/i915/intel_state.c b/i915/intel_state.c
index e5988a5..d1ca11d 100644
--- a/i915/intel_state.c
+++ b/i915/intel_state.c
@@ -30,252 +30,266 @@
 #include "context.h"
 #include "macros.h"
 #include "enums.h"
+#include "colormac.h"
 #include "dd.h"
 
 #include "intel_screen.h"
 #include "intel_context.h"
+#include "intel_fbo.h"
+#include "intel_regions.h"
 #include "swrast/swrast.h"
 
-int intel_translate_compare_func( GLenum func )
+int 
+intel_translate_shadow_compare_func( GLenum func )
 {
    switch(func) {
    case GL_NEVER: 
-      return COMPAREFUNC_NEVER; 
+       return COMPAREFUNC_ALWAYS; 
    case GL_LESS: 
-      return COMPAREFUNC_LESS; 
+       return COMPAREFUNC_LEQUAL; 
    case GL_LEQUAL: 
-      return COMPAREFUNC_LEQUAL; 
+       return COMPAREFUNC_LESS;
    case GL_GREATER: 
-      return COMPAREFUNC_GREATER; 
+       return COMPAREFUNC_GEQUAL; 
    case GL_GEQUAL: 
-      return COMPAREFUNC_GEQUAL; 
+      return COMPAREFUNC_GREATER; 
    case GL_NOTEQUAL: 
-      return COMPAREFUNC_NOTEQUAL; 
-   case GL_EQUAL: 
       return COMPAREFUNC_EQUAL; 
+   case GL_EQUAL: 
+      return COMPAREFUNC_NOTEQUAL; 
    case GL_ALWAYS: 
-      return COMPAREFUNC_ALWAYS; 
+       return COMPAREFUNC_NEVER; 
+   }
+
+   fprintf(stderr, "Unknown value in %s: %x\n", __FUNCTION__, func);
+   return COMPAREFUNC_NEVER; 
+}
+
+int
+intel_translate_compare_func(GLenum func)
+{
+   switch (func) {
+   case GL_NEVER:
+      return COMPAREFUNC_NEVER;
+   case GL_LESS:
+      return COMPAREFUNC_LESS;
+   case GL_LEQUAL:
+      return COMPAREFUNC_LEQUAL;
+   case GL_GREATER:
+      return COMPAREFUNC_GREATER;
+   case GL_GEQUAL:
+      return COMPAREFUNC_GEQUAL;
+   case GL_NOTEQUAL:
+      return COMPAREFUNC_NOTEQUAL;
+   case GL_EQUAL:
+      return COMPAREFUNC_EQUAL;
+   case GL_ALWAYS:
+      return COMPAREFUNC_ALWAYS;
    }
 
    fprintf(stderr, "Unknown value in %s: %x\n", __FUNCTION__, func);
-   return COMPAREFUNC_ALWAYS; 
+   return COMPAREFUNC_ALWAYS;
 }
 
-int intel_translate_stencil_op( GLenum op )
+int
+intel_translate_stencil_op(GLenum op)
 {
-   switch(op) {
-   case GL_KEEP: 
-      return STENCILOP_KEEP; 
-   case GL_ZERO: 
-      return STENCILOP_ZERO; 
-   case GL_REPLACE: 
-      return STENCILOP_REPLACE; 
-   case GL_INCR: 
+   switch (op) {
+   case GL_KEEP:
+      return STENCILOP_KEEP;
+   case GL_ZERO:
+      return STENCILOP_ZERO;
+   case GL_REPLACE:
+      return STENCILOP_REPLACE;
+   case GL_INCR:
       return STENCILOP_INCRSAT;
-   case GL_DECR: 
+   case GL_DECR:
       return STENCILOP_DECRSAT;
    case GL_INCR_WRAP:
-      return STENCILOP_INCR; 
+      return STENCILOP_INCR;
    case GL_DECR_WRAP:
-      return STENCILOP_DECR; 
-   case GL_INVERT: 
-      return STENCILOP_INVERT; 
-   default: 
+      return STENCILOP_DECR;
+   case GL_INVERT:
+      return STENCILOP_INVERT;
+   default:
       return STENCILOP_ZERO;
    }
 }
 
-int intel_translate_blend_factor( GLenum factor )
+int
+intel_translate_blend_factor(GLenum factor)
 {
-   switch(factor) {
-   case GL_ZERO: 
-      return BLENDFACT_ZERO; 
-   case GL_SRC_ALPHA: 
-      return BLENDFACT_SRC_ALPHA; 
-   case GL_ONE: 
-      return BLENDFACT_ONE; 
-   case GL_SRC_COLOR: 
-      return BLENDFACT_SRC_COLR; 
-   case GL_ONE_MINUS_SRC_COLOR: 
-      return BLENDFACT_INV_SRC_COLR; 
-   case GL_DST_COLOR: 
-      return BLENDFACT_DST_COLR; 
-   case GL_ONE_MINUS_DST_COLOR: 
-      return BLENDFACT_INV_DST_COLR; 
+   switch (factor) {
+   case GL_ZERO:
+      return BLENDFACT_ZERO;
+   case GL_SRC_ALPHA:
+      return BLENDFACT_SRC_ALPHA;
+   case GL_ONE:
+      return BLENDFACT_ONE;
+   case GL_SRC_COLOR:
+      return BLENDFACT_SRC_COLR;
+   case GL_ONE_MINUS_SRC_COLOR:
+      return BLENDFACT_INV_SRC_COLR;
+   case GL_DST_COLOR:
+      return BLENDFACT_DST_COLR;
+   case GL_ONE_MINUS_DST_COLOR:
+      return BLENDFACT_INV_DST_COLR;
    case GL_ONE_MINUS_SRC_ALPHA:
-      return BLENDFACT_INV_SRC_ALPHA; 
-   case GL_DST_ALPHA: 
-      return BLENDFACT_DST_ALPHA; 
+      return BLENDFACT_INV_SRC_ALPHA;
+   case GL_DST_ALPHA:
+      return BLENDFACT_DST_ALPHA;
    case GL_ONE_MINUS_DST_ALPHA:
-      return BLENDFACT_INV_DST_ALPHA; 
-   case GL_SRC_ALPHA_SATURATE: 
+      return BLENDFACT_INV_DST_ALPHA;
+   case GL_SRC_ALPHA_SATURATE:
       return BLENDFACT_SRC_ALPHA_SATURATE;
    case GL_CONSTANT_COLOR:
-      return BLENDFACT_CONST_COLOR; 
+      return BLENDFACT_CONST_COLOR;
    case GL_ONE_MINUS_CONSTANT_COLOR:
       return BLENDFACT_INV_CONST_COLOR;
    case GL_CONSTANT_ALPHA:
-      return BLENDFACT_CONST_ALPHA; 
+      return BLENDFACT_CONST_ALPHA;
    case GL_ONE_MINUS_CONSTANT_ALPHA:
       return BLENDFACT_INV_CONST_ALPHA;
    }
-   
+
    fprintf(stderr, "Unknown value in %s: %x\n", __FUNCTION__, factor);
    return BLENDFACT_ZERO;
 }
 
-int intel_translate_logic_op( GLenum opcode )
+int
+intel_translate_logic_op(GLenum opcode)
 {
-   switch(opcode) {
-   case GL_CLEAR: 
-      return LOGICOP_CLEAR; 
-   case GL_AND: 
-      return LOGICOP_AND; 
-   case GL_AND_REVERSE: 
-      return LOGICOP_AND_RVRSE; 
-   case GL_COPY: 
-      return LOGICOP_COPY; 
-   case GL_COPY_INVERTED: 
-      return LOGICOP_COPY_INV; 
-   case GL_AND_INVERTED: 
-      return LOGICOP_AND_INV; 
-   case GL_NOOP: 
-      return LOGICOP_NOOP; 
-   case GL_XOR: 
-      return LOGICOP_XOR; 
-   case GL_OR: 
-      return LOGICOP_OR; 
-   case GL_OR_INVERTED: 
-      return LOGICOP_OR_INV; 
-   case GL_NOR: 
-      return LOGICOP_NOR; 
-   case GL_EQUIV: 
-      return LOGICOP_EQUIV; 
-   case GL_INVERT: 
-      return LOGICOP_INV; 
-   case GL_OR_REVERSE: 
-      return LOGICOP_OR_RVRSE; 
-   case GL_NAND: 
-      return LOGICOP_NAND; 
-   case GL_SET: 
-      return LOGICOP_SET; 
-   default:
+   switch (opcode) {
+   case GL_CLEAR:
+      return LOGICOP_CLEAR;
+   case GL_AND:
+      return LOGICOP_AND;
+   case GL_AND_REVERSE:
+      return LOGICOP_AND_RVRSE;
+   case GL_COPY:
+      return LOGICOP_COPY;
+   case GL_COPY_INVERTED:
+      return LOGICOP_COPY_INV;
+   case GL_AND_INVERTED:
+      return LOGICOP_AND_INV;
+   case GL_NOOP:
+      return LOGICOP_NOOP;
+   case GL_XOR:
+      return LOGICOP_XOR;
+   case GL_OR:
+      return LOGICOP_OR;
+   case GL_OR_INVERTED:
+      return LOGICOP_OR_INV;
+   case GL_NOR:
+      return LOGICOP_NOR;
+   case GL_EQUIV:
+      return LOGICOP_EQUIV;
+   case GL_INVERT:
+      return LOGICOP_INV;
+   case GL_OR_REVERSE:
+      return LOGICOP_OR_RVRSE;
+   case GL_NAND:
+      return LOGICOP_NAND;
+   case GL_SET:
       return LOGICOP_SET;
-   }
-}
-
-static void intelDrawBuffer(GLcontext *ctx, GLenum mode )
-{
-   intelContextPtr intel = INTEL_CONTEXT(ctx);
-   int front = 0;
- 
-   if (!ctx->DrawBuffer)
-      return;
-
-   switch ( ctx->DrawBuffer->_ColorDrawBufferMask[0] ) {
-   case BUFFER_BIT_FRONT_LEFT:
-      front = 1;
-      FALLBACK( intel, INTEL_FALLBACK_DRAW_BUFFER, GL_FALSE );
-      break;
-   case BUFFER_BIT_BACK_LEFT:
-      front = 0;
-      FALLBACK( intel, INTEL_FALLBACK_DRAW_BUFFER, GL_FALSE );
-      break;
    default:
-      FALLBACK( intel, INTEL_FALLBACK_DRAW_BUFFER, GL_TRUE );
-      return;
-   }
-
-   if ( intel->sarea->pf_current_page == 1 ) 
-      front ^= 1;
-   
-   intelSetFrontClipRects( intel );
-
-   if (front) {
-      intel->drawRegion = &intel->intelScreen->front;
-      intel->readRegion = &intel->intelScreen->front;
-   } else {
-      intel->drawRegion = &intel->intelScreen->back;
-      intel->readRegion = &intel->intelScreen->back;
+      return LOGICOP_SET;
    }
-
-   intel->vtbl.set_color_region( intel, intel->drawRegion );
-}
-
-static void intelReadBuffer( GLcontext *ctx, GLenum mode )
-{
-   /* nothing, until we implement h/w glRead/CopyPixels or CopyTexImage */
 }
 
 
-static void intelClearColor(GLcontext *ctx, const GLfloat color[4])
+static void
+intelClearColor(GLcontext * ctx, const GLfloat color[4])
 {
-   intelContextPtr intel = INTEL_CONTEXT(ctx);
-   intelScreenPrivate *screen = intel->intelScreen;
+   struct intel_context *intel = intel_context(ctx);
+   GLubyte clear[4];
 
-   CLAMPED_FLOAT_TO_UBYTE(intel->clear_red, color[0]);
-   CLAMPED_FLOAT_TO_UBYTE(intel->clear_green, color[1]);
-   CLAMPED_FLOAT_TO_UBYTE(intel->clear_blue, color[2]);
-   CLAMPED_FLOAT_TO_UBYTE(intel->clear_alpha, color[3]);
+   CLAMPED_FLOAT_TO_UBYTE(clear[0], color[0]);
+   CLAMPED_FLOAT_TO_UBYTE(clear[1], color[1]);
+   CLAMPED_FLOAT_TO_UBYTE(clear[2], color[2]);
+   CLAMPED_FLOAT_TO_UBYTE(clear[3], color[3]);
 
-   intel->ClearColor = INTEL_PACKCOLOR(screen->fbFormat,
-				       intel->clear_red, 
-				       intel->clear_green, 
-				       intel->clear_blue, 
-				       intel->clear_alpha);
+   /* compute both 32 and 16-bit clear values */
+   intel->ClearColor8888 = INTEL_PACKCOLOR8888(clear[0], clear[1],
+                                               clear[2], clear[3]);
+   intel->ClearColor565 = INTEL_PACKCOLOR565(clear[0], clear[1], clear[2]);
 }
 
 
-static void intelCalcViewport( GLcontext *ctx )
+/**
+ * Update the viewport transformation matrix.  Depends on:
+ *  - viewport pos/size
+ *  - depthrange
+ *  - window pos/size or FBO size
+ */
+static void
+intelCalcViewport(GLcontext * ctx)
 {
-   intelContextPtr intel = INTEL_CONTEXT(ctx);
+   struct intel_context *intel = intel_context(ctx);
    const GLfloat *v = ctx->Viewport._WindowMap.m;
+   const GLfloat depthScale = 1.0F / ctx->DrawBuffer->_DepthMaxF;
    GLfloat *m = intel->ViewportMatrix.m;
-   GLint h = 0;
+   GLfloat yScale, yBias;
+
+   if (ctx->DrawBuffer->Name) {
+      /* User created FBO */
+      struct intel_renderbuffer *irb
+         = intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[0]);
+      if (irb && !irb->RenderToTexture) {
+         /* y=0=top */
+         yScale = -1.0;
+         yBias = irb->Base.Height;
+      }
+      else {
+         /* y=0=bottom */
+         yScale = 1.0;
+         yBias = 0.0;
+      }
+   }
+   else {
+      /* window buffer, y=0=top */
+      yScale = -1.0;
+      yBias = (intel->driDrawable) ? intel->driDrawable->h : 0.0F;
+   }
 
-   if (intel->driDrawable) 
-      h = intel->driDrawable->h + SUBPIXEL_Y;
+   m[MAT_SX] = v[MAT_SX];
+   m[MAT_TX] = v[MAT_TX];
 
-   /* See also intel_translate_vertex.  SUBPIXEL adjustments can be done
-    * via state vars, too.
-    */
-   m[MAT_SX] =   v[MAT_SX];
-   m[MAT_TX] =   v[MAT_TX] + SUBPIXEL_X;
-   m[MAT_SY] = - v[MAT_SY];
-   m[MAT_TY] = - v[MAT_TY] + h;
-   m[MAT_SZ] =   v[MAT_SZ] * intel->depth_scale;
-   m[MAT_TZ] =   v[MAT_TZ] * intel->depth_scale;
+   m[MAT_SY] = v[MAT_SY] * yScale;
+   m[MAT_TY] = v[MAT_TY] * yScale + yBias;
+
+   m[MAT_SZ] = v[MAT_SZ] * depthScale;
+   m[MAT_TZ] = v[MAT_TZ] * depthScale;
 }
 
-static void intelViewport( GLcontext *ctx,
-			  GLint x, GLint y,
-			  GLsizei width, GLsizei height )
+static void
+intelViewport(GLcontext * ctx,
+              GLint x, GLint y, GLsizei width, GLsizei height)
 {
-   intelCalcViewport( ctx );
+   intelCalcViewport(ctx);
 }
 
-static void intelDepthRange( GLcontext *ctx,
-			    GLclampd nearval, GLclampd farval )
+static void
+intelDepthRange(GLcontext * ctx, GLclampd nearval, GLclampd farval)
 {
-   intelCalcViewport( ctx );
+   intelCalcViewport(ctx);
 }
 
 /* Fallback to swrast for select and feedback.
  */
-static void intelRenderMode( GLcontext *ctx, GLenum mode )
+static void
+intelRenderMode(GLcontext * ctx, GLenum mode)
 {
-   intelContextPtr intel = INTEL_CONTEXT(ctx);
-   FALLBACK( intel, INTEL_FALLBACK_RENDERMODE, (mode != GL_RENDER) );
+   struct intel_context *intel = intel_context(ctx);
+   FALLBACK(intel, INTEL_FALLBACK_RENDERMODE, (mode != GL_RENDER));
 }
 
 
-void intelInitStateFuncs( struct dd_function_table *functions )
+void
+intelInitStateFuncs(struct dd_function_table *functions)
 {
-   functions->DrawBuffer = intelDrawBuffer;
-   functions->ReadBuffer = intelReadBuffer;
    functions->RenderMode = intelRenderMode;
    functions->Viewport = intelViewport;
    functions->DepthRange = intelDepthRange;
    functions->ClearColor = intelClearColor;
 }
-
diff --git a/i915/intel_structs.h b/i915/intel_structs.h
new file mode 100644
index 0000000..522e3bd
--- /dev/null
+++ b/i915/intel_structs.h
@@ -0,0 +1,132 @@
+#ifndef INTEL_STRUCTS_H
+#define INTEL_STRUCTS_H
+
+struct br0 {
+   GLuint length:8;
+   GLuint pad0:3;
+   GLuint dst_tiled:1;
+   GLuint pad1:8;
+   GLuint write_rgb:1;
+   GLuint write_alpha:1;
+   GLuint opcode:7;
+   GLuint client:3;
+};
+
+   
+struct br13 {
+   GLint dest_pitch:16;
+   GLuint rop:8;
+   GLuint color_depth:2;
+   GLuint pad1:3;
+   GLuint mono_source_transparency:1;
+   GLuint clipping_enable:1;
+   GLuint pad0:1;
+};
+
+
+
+/* This is an attempt to move some of the 2D interaction in this
+ * driver to using structs for packets rather than a bunch of #defines
+ * and dwords.
+ */
+struct xy_color_blit {
+   struct br0 br0;
+   struct br13 br13;
+
+   struct {
+      GLuint dest_x1:16;
+      GLuint dest_y1:16;
+   } dw2;
+
+   struct {
+      GLuint dest_x2:16;
+      GLuint dest_y2:16;
+   } dw3;
+   
+   GLuint dest_base_addr;
+   GLuint color;
+};
+
+struct xy_src_copy_blit {
+   struct br0 br0;
+   struct br13 br13;
+
+   struct {
+      GLuint dest_x1:16;
+      GLuint dest_y1:16;
+   } dw2;
+
+   struct {
+      GLuint dest_x2:16;
+      GLuint dest_y2:16;
+   } dw3;
+   
+   GLuint dest_base_addr;
+
+   struct {
+      GLuint src_x1:16;
+      GLuint src_y1:16;
+   } dw5;
+
+   struct {
+      GLint src_pitch:16;
+      GLuint pad:16;
+   } dw6;
+   
+   GLuint src_base_addr;
+};
+
+struct xy_setup_blit {
+   struct br0 br0;
+   struct br13 br13;
+
+   struct {
+      GLuint clip_x1:16;
+      GLuint clip_y1:16;
+   } dw2;
+
+   struct {
+      GLuint clip_x2:16;
+      GLuint clip_y2:16;
+   } dw3;
+      
+   GLuint dest_base_addr;
+   GLuint background_color;
+   GLuint foreground_color;
+   GLuint pattern_base_addr;
+};
+
+
+struct xy_text_immediate_blit {
+   struct {
+      GLuint length:8;
+      GLuint pad2:3;
+      GLuint dst_tiled:1;
+      GLuint pad1:4;
+      GLuint byte_packed:1;
+      GLuint pad0:5;
+      GLuint opcode:7;
+      GLuint client:3;
+   } dw0;
+
+   struct {
+      GLuint dest_x1:16;
+      GLuint dest_y1:16;
+   } dw1;
+
+   struct {
+      GLuint dest_x2:16;
+      GLuint dest_y2:16;
+   } dw2;   
+
+   /* Src bitmap data follows as inline dwords.
+    */
+};
+
+
+#define CLIENT_2D 0x2
+#define OPCODE_XY_SETUP_BLT 0x1
+#define OPCODE_XY_COLOR_BLT 0x50
+#define OPCODE_XY_TEXT_IMMEDIATE_BLT 0x31
+
+#endif
diff --git a/i915/intel_tex.c b/i915/intel_tex.c
deleted file mode 100644
index 5bd2806..0000000
--- a/i915/intel_tex.c
+++ /dev/null
@@ -1,877 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#include "glheader.h"
-#include "mtypes.h"
-#include "imports.h"
-#include "macros.h"
-#include "simple_list.h"
-#include "enums.h"
-#include "image.h"
-#include "texstore.h"
-#include "texformat.h"
-#include "teximage.h"
-#include "texmem.h"
-#include "texobj.h"
-#include "swrast/swrast.h"
-
-#include "mm.h"
-
-#include "intel_screen.h"
-#include "intel_batchbuffer.h"
-#include "intel_context.h"
-#include "intel_tex.h"
-#include "intel_ioctl.h"
-
-
-
-static GLboolean
-intelValidateClientStorage( intelContextPtr intel, GLenum target,
-			    GLint internalFormat,
-			    GLint srcWidth, GLint srcHeight, 
-			    GLenum format, GLenum type,  const void *pixels,
-			    const struct gl_pixelstore_attrib *packing,
-			    struct gl_texture_object *texObj,
-			    struct gl_texture_image *texImage)
-
-{
-   GLcontext *ctx = &intel->ctx;
-   int texelBytes;
-
-   if (0)
-      fprintf(stderr, "intformat %s format %s type %s\n",
-	      _mesa_lookup_enum_by_nr( internalFormat ),
-	      _mesa_lookup_enum_by_nr( format ),
-	      _mesa_lookup_enum_by_nr( type ));
-
-   if (!ctx->Unpack.ClientStorage)
-      return 0;
-
-   if (ctx->_ImageTransferState ||
-       texImage->IsCompressed ||
-       texObj->GenerateMipmap)
-      return 0;
-
-
-   /* This list is incomplete
-    */
-   switch ( internalFormat ) {
-   case GL_RGBA:
-      if ( format == GL_BGRA && type == GL_UNSIGNED_INT_8_8_8_8_REV ) {
-	 texImage->TexFormat = &_mesa_texformat_argb8888;
-	 texelBytes = 4;
-      }
-      else
-	 return 0;
-      break;
-
-   case GL_RGB:
-      if ( format == GL_RGB && type == GL_UNSIGNED_SHORT_5_6_5 ) {
-	 texImage->TexFormat = &_mesa_texformat_rgb565;
-	 texelBytes = 2;
-      }
-      else
-	 return 0;
-      break;
-
-   case GL_YCBCR_MESA:
-      if ( format == GL_YCBCR_MESA && 
-	   type == GL_UNSIGNED_SHORT_8_8_REV_APPLE ) {
-	 texImage->TexFormat = &_mesa_texformat_ycbcr_rev;
-	 texelBytes = 2;
-      }
-      else if ( format == GL_YCBCR_MESA && 
-		(type == GL_UNSIGNED_SHORT_8_8_APPLE || 
-		 type == GL_UNSIGNED_BYTE)) {
-	 texImage->TexFormat = &_mesa_texformat_ycbcr;
-	 texelBytes = 2;
-      }
-      else
-	 return 0;
-      break;
-      
-	 
-   default:
-      return 0;
-   }
-
-   /* Could deal with these packing issues, but currently don't:
-    */
-   if (packing->SkipPixels || 
-       packing->SkipRows || 
-       packing->SwapBytes ||
-       packing->LsbFirst) {
-      return 0;
-   }
-
-   {      
-      GLint srcRowStride = _mesa_image_row_stride(packing, srcWidth,
-						  format, type);
-
-      
-      if (0)
-	 fprintf(stderr, "%s: srcRowStride %d/%x\n", 
-		 __FUNCTION__, srcRowStride, srcRowStride);
-
-      /* Could check this later in upload, pitch restrictions could be
-       * relaxed, but would need to store the image pitch somewhere,
-       * as packing details might change before image is uploaded:
-       */
-      if (!intelIsAgpMemory( intel, pixels, srcHeight * srcRowStride ) ||
-	  (srcRowStride & 63))
-	 return 0;
-
-
-      /* Have validated that _mesa_transfer_teximage would be a straight
-       * memcpy at this point.  NOTE: future calls to TexSubImage will
-       * overwrite the client data.  This is explicitly mentioned in the
-       * extension spec.
-       */
-      texImage->Data = (void *)pixels;
-      texImage->IsClientData = GL_TRUE;
-      texImage->RowStride = srcRowStride / texelBytes;
-      return 1;
-   }
-}
-
- 
-
-static void intelTexImage1D( GLcontext *ctx, GLenum target, GLint level,
-			    GLint internalFormat,
-			    GLint width, GLint border,
-			    GLenum format, GLenum type, const GLvoid *pixels,
-			    const struct gl_pixelstore_attrib *packing,
-			    struct gl_texture_object *texObj,
-			    struct gl_texture_image *texImage )
-{
-   driTextureObject * t = (driTextureObject *) texObj->DriverData;
-
-   assert(t);
-   intelFlush( ctx );
-   driSwapOutTextureObject( t );
-
-   texImage->IsClientData = GL_FALSE;
-
-   _mesa_store_teximage1d( ctx, target, level, internalFormat,
-			   width, border, format, type,
-			   pixels, packing, texObj, texImage );
-
-   t->dirty_images[0] |= (1 << level);
-}
-
-static void intelTexSubImage1D( GLcontext *ctx, 
-			       GLenum target,
-			       GLint level,	
-			       GLint xoffset,
-				GLsizei width,
-			       GLenum format, GLenum type,
-			       const GLvoid *pixels,
-			       const struct gl_pixelstore_attrib *packing,
-			       struct gl_texture_object *texObj,
-			       struct gl_texture_image *texImage )
-{
-   driTextureObject * t = (driTextureObject *) texObj->DriverData;
-
-   assert(t);
-   intelFlush( ctx );
-   driSwapOutTextureObject( t );
-
-   _mesa_store_texsubimage1d(ctx, target, level, xoffset, width, 
-			     format, type, pixels, packing, texObj,
-			     texImage);
-}
-
-
-/* Handles 2D, CUBE, RECT:
- */
-static void intelTexImage2D( GLcontext *ctx, GLenum target, GLint level,
-			    GLint internalFormat,
-			    GLint width, GLint height, GLint border,
-			    GLenum format, GLenum type, const GLvoid *pixels,
-			    const struct gl_pixelstore_attrib *packing,
-			    struct gl_texture_object *texObj,
-			    struct gl_texture_image *texImage )
-{
-   driTextureObject * t = (driTextureObject *) texObj->DriverData;
-   GLuint face;
-
-   /* which cube face or ordinary 2D image */
-   switch (target) {
-   case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
-   case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
-   case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
-      face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
-      ASSERT(face < 6);
-      break;
-   default:
-      face = 0;
-   }
-
-   assert(t);
-   intelFlush( ctx );
-   driSwapOutTextureObject( t );
-   texImage->IsClientData = GL_FALSE;
-
-   if (intelValidateClientStorage( INTEL_CONTEXT(ctx), target, 
-				   internalFormat, 
-				   width, height, 
-				   format, type, pixels, 
-				   packing, texObj, texImage)) {
-      if (INTEL_DEBUG & DEBUG_TEXTURE)
-	 fprintf(stderr, "%s: Using client storage\n", __FUNCTION__); 
-   }
-   else {
-      _mesa_store_teximage2d( ctx, target, level, internalFormat,
-			      width, height, border, format, type,
-			      pixels, packing, texObj, texImage );
-
-      t->dirty_images[face] |= (1 << level);
-   }
-}
-
-static void intelTexSubImage2D( GLcontext *ctx, 
-			       GLenum target,
-			       GLint level,	
-			       GLint xoffset, GLint yoffset,
-			       GLsizei width, GLsizei height,
-			       GLenum format, GLenum type,
-			       const GLvoid *pixels,
-			       const struct gl_pixelstore_attrib *packing,
-			       struct gl_texture_object *texObj,
-			       struct gl_texture_image *texImage )
-{
-   driTextureObject * t = (driTextureObject *) texObj->DriverData;
-   GLuint face;
-
-   /* which cube face or ordinary 2D image */
-   switch (target) {
-   case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
-   case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
-   case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
-      face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
-      ASSERT(face < 6);
-      break;
-   default:
-      face = 0;
-   }
-
-   if (texImage->IsClientData &&
-       (char *)pixels == (char *)texImage->Data + 
-       ((xoffset + yoffset * texImage->RowStride) * 
-	texImage->TexFormat->TexelBytes)) {
-
-      /* Notification only - no upload required */
-   }
-   else {
-      assert( t ); /* this _should_ be true */
-      intelFlush( ctx );
-      driSwapOutTextureObject( t );
-
-      _mesa_store_texsubimage2d(ctx, target, level, xoffset, yoffset, width, 
-				height, format, type, pixels, packing, texObj,
-				texImage);
-
-      t->dirty_images[face] |= (1 << level);
-   }
-}
-
-static void intelCompressedTexImage2D( GLcontext *ctx, GLenum target, GLint level,
-                              GLint internalFormat,
-                              GLint width, GLint height, GLint border,
-                              GLsizei imageSize, const GLvoid *data,
-                              struct gl_texture_object *texObj,
-                              struct gl_texture_image *texImage )
-{
-   driTextureObject * t = (driTextureObject *) texObj->DriverData;
-   GLuint face;
-
-   /* which cube face or ordinary 2D image */
-   switch (target) {
-   case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
-   case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
-   case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
-      face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
-      ASSERT(face < 6);
-      break;
-   default:
-      face = 0;
-   }
-
-   assert(t);
-   intelFlush( ctx );
-   
-   driSwapOutTextureObject( t );
-   texImage->IsClientData = GL_FALSE;
-
-   if (INTEL_DEBUG & DEBUG_TEXTURE)
-     fprintf(stderr, "%s: Using normal storage\n", __FUNCTION__); 
-   
-   _mesa_store_compressed_teximage2d(ctx, target, level, internalFormat, width,
-				     height, border, imageSize, data, texObj, texImage);
-   
-   t->dirty_images[face] |= (1 << level);
-}
-
-
-static void intelCompressedTexSubImage2D( GLcontext *ctx, GLenum target, GLint level,
-                                 GLint xoffset, GLint yoffset,
-                                 GLsizei width, GLsizei height,
-                                 GLenum format,
-                                 GLsizei imageSize, const GLvoid *data,
-                                 struct gl_texture_object *texObj,
-                                 struct gl_texture_image *texImage )
-{
-   driTextureObject * t = (driTextureObject *) texObj->DriverData;
-   GLuint face;
-
-
-   /* which cube face or ordinary 2D image */
-   switch (target) {
-   case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
-   case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
-   case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
-      face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
-      ASSERT(face < 6);
-      break;
-   default:
-      face = 0;
-   }
-
-   assert( t ); /* this _should_ be true */
-   intelFlush( ctx );
-   driSwapOutTextureObject( t );
-   
-   _mesa_store_compressed_texsubimage2d(ctx, target, level, xoffset, yoffset, width,
-					height, format, imageSize, data, texObj, texImage);
-   
-   t->dirty_images[face] |= (1 << level);
-}
-
-
-static void intelTexImage3D( GLcontext *ctx, GLenum target, GLint level,
-                            GLint internalFormat,
-                            GLint width, GLint height, GLint depth,
-                            GLint border,
-                            GLenum format, GLenum type, const GLvoid *pixels,
-                            const struct gl_pixelstore_attrib *packing,
-                            struct gl_texture_object *texObj,
-                            struct gl_texture_image *texImage )
-{
-   driTextureObject * t = (driTextureObject *) texObj->DriverData;
-
-   assert(t);
-   driSwapOutTextureObject( t );
-   texImage->IsClientData = GL_FALSE;
-
-   _mesa_store_teximage3d(ctx, target, level, internalFormat,
-			  width, height, depth, border,
-			  format, type, pixels,
-			  &ctx->Unpack, texObj, texImage);
-   
-   t->dirty_images[0] |= (1 << level);
-}
-
-
-static void
-intelTexSubImage3D( GLcontext *ctx, GLenum target, GLint level,
-                   GLint xoffset, GLint yoffset, GLint zoffset,
-                   GLsizei width, GLsizei height, GLsizei depth,
-                   GLenum format, GLenum type,
-                   const GLvoid *pixels,
-                   const struct gl_pixelstore_attrib *packing,
-                   struct gl_texture_object *texObj,
-                   struct gl_texture_image *texImage )
-{
-   driTextureObject * t = (driTextureObject *) texObj->DriverData;
-
-   assert( t ); /* this _should_ be true */
-   driSwapOutTextureObject( t );
-
-   _mesa_store_texsubimage3d(ctx, target, level, xoffset, yoffset, zoffset,
-                             width, height, depth,
-                             format, type, pixels, packing, texObj, texImage);
-
-   t->dirty_images[0] |= (1 << level);
-}
-
-
-
-
-static void intelDeleteTexture( GLcontext *ctx, struct gl_texture_object *tObj )
-{
-   driTextureObject * t = (driTextureObject *) tObj->DriverData;
-
-   if ( t != NULL ) {
-      intelFlush( ctx );
-      driDestroyTextureObject( t );
-   }
-   
-   /* Free mipmap images and the texture object itself */
-   _mesa_delete_texture_object(ctx, tObj);
-}
-
-
-static const struct gl_texture_format *
-intelChooseTextureFormat( GLcontext *ctx, GLint internalFormat,
-			 GLenum format, GLenum type )
-{
-   intelContextPtr intel = INTEL_CONTEXT( ctx );
-   const GLboolean do32bpt = ( intel->intelScreen->cpp == 4 &&
-			       intel->intelScreen->tex.size > 4*1024*1024);
-
-   switch ( internalFormat ) {
-   case 4:
-   case GL_RGBA:
-   case GL_COMPRESSED_RGBA:
-      if ( format == GL_BGRA ) {
-	 if ( type == GL_UNSIGNED_INT_8_8_8_8_REV ) {
-	    return &_mesa_texformat_argb8888;
-	 }
-         else if ( type == GL_UNSIGNED_SHORT_4_4_4_4_REV ) {
-            return &_mesa_texformat_argb4444;
-	 }
-         else if ( type == GL_UNSIGNED_SHORT_1_5_5_5_REV ) {
-	    return &_mesa_texformat_argb1555;
-	 }
-      }
-      return do32bpt ? &_mesa_texformat_argb8888 : &_mesa_texformat_argb4444;
-
-   case 3:
-   case GL_RGB:
-   case GL_COMPRESSED_RGB:
-      if ( format == GL_RGB && type == GL_UNSIGNED_SHORT_5_6_5 ) {
-	 return &_mesa_texformat_rgb565;
-      }
-      return do32bpt ? &_mesa_texformat_argb8888 : &_mesa_texformat_rgb565;
-
-   case GL_RGBA8:
-   case GL_RGB10_A2:
-   case GL_RGBA12:
-   case GL_RGBA16:
-      return do32bpt ? &_mesa_texformat_argb8888 : &_mesa_texformat_argb4444;
-
-   case GL_RGBA4:
-   case GL_RGBA2:
-      return &_mesa_texformat_argb4444;
-
-   case GL_RGB5_A1:
-      return &_mesa_texformat_argb1555;
-
-   case GL_RGB8:
-   case GL_RGB10:
-   case GL_RGB12:
-   case GL_RGB16:
-      return do32bpt ? &_mesa_texformat_argb8888 : &_mesa_texformat_rgb565;
-
-   case GL_RGB5:
-   case GL_RGB4:
-   case GL_R3_G3_B2:
-      return &_mesa_texformat_rgb565;
-
-   case GL_ALPHA:
-   case GL_ALPHA4:
-   case GL_ALPHA8:
-   case GL_ALPHA12:
-   case GL_ALPHA16:
-   case GL_COMPRESSED_ALPHA:
-      return &_mesa_texformat_a8;
-
-   case 1:
-   case GL_LUMINANCE:
-   case GL_LUMINANCE4:
-   case GL_LUMINANCE8:
-   case GL_LUMINANCE12:
-   case GL_LUMINANCE16:
-   case GL_COMPRESSED_LUMINANCE:
-      return &_mesa_texformat_l8;
-
-   case 2:
-   case GL_LUMINANCE_ALPHA:
-   case GL_LUMINANCE4_ALPHA4:
-   case GL_LUMINANCE6_ALPHA2:
-   case GL_LUMINANCE8_ALPHA8:
-   case GL_LUMINANCE12_ALPHA4:
-   case GL_LUMINANCE12_ALPHA12:
-   case GL_LUMINANCE16_ALPHA16:
-   case GL_COMPRESSED_LUMINANCE_ALPHA:
-      return &_mesa_texformat_al88;
-
-   case GL_INTENSITY:
-   case GL_INTENSITY4:
-   case GL_INTENSITY8:
-   case GL_INTENSITY12:
-   case GL_INTENSITY16:
-   case GL_COMPRESSED_INTENSITY:
-      return &_mesa_texformat_i8;
-
-   case GL_YCBCR_MESA:
-      if (type == GL_UNSIGNED_SHORT_8_8_MESA ||
-	  type == GL_UNSIGNED_BYTE)
-         return &_mesa_texformat_ycbcr;
-      else
-         return &_mesa_texformat_ycbcr_rev;
-
-   case GL_COMPRESSED_RGB_FXT1_3DFX:
-     return &_mesa_texformat_rgb_fxt1;
-   case GL_COMPRESSED_RGBA_FXT1_3DFX:
-     return &_mesa_texformat_rgba_fxt1;
-
-   case GL_RGB_S3TC:
-   case GL_RGB4_S3TC:
-   case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
-     return &_mesa_texformat_rgb_dxt1;
-
-   case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
-     return &_mesa_texformat_rgba_dxt1;
-
-   case GL_RGBA_S3TC:
-   case GL_RGBA4_S3TC:
-   case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
-     return &_mesa_texformat_rgba_dxt3;
-
-   case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
-      return &_mesa_texformat_rgba_dxt5;
-
-   case GL_DEPTH_COMPONENT:
-   case GL_DEPTH_COMPONENT16:
-   case GL_DEPTH_COMPONENT24:
-   case GL_DEPTH_COMPONENT32:
-      return &_mesa_texformat_z16;
-
-   default:
-      fprintf(stderr, "unexpected texture format %s in %s\n", 
-	      _mesa_lookup_enum_by_nr(internalFormat),
-	      __FUNCTION__);
-      return NULL;
-   }
-
-   return NULL; /* never get here */
-}
-
-
-
-void intelDestroyTexObj(intelContextPtr intel, intelTextureObjectPtr t)
-{
-   unsigned   i;
-
-   if ( intel == NULL ) 
-      return;
-
-   if ( t->age > intel->dirtyAge )
-      intel->dirtyAge = t->age;
-
-   for ( i = 0 ; i < MAX_TEXTURE_UNITS ; i++ ) {
-      if ( t == intel->CurrentTexObj[ i ] ) 
-	 intel->CurrentTexObj[ i ] = NULL;
-   }
-}
-
-
-
-/* Upload an image from mesa's internal copy.  Image may be 1D, 2D or
- * 3D.  Cubemaps are expanded elsewhere.
- */
-static void intelUploadTexImage( intelContextPtr intel,
-				 intelTextureObjectPtr t,
-				 const struct gl_texture_image *image,
-				 const GLuint offset )
-{
-
-   if (!image || !image->Data) 
-      return;
-
-   if (image->Depth == 1 && image->IsClientData) {
-      if (INTEL_DEBUG & DEBUG_TEXTURE)
-	 fprintf(stderr, "Blit uploading\n");
-
-      /* Do it with a blit.
-       */
-      intelEmitCopyBlitLocked( intel,
-			       image->TexFormat->TexelBytes,
-			       image->RowStride, /* ? */
-			       intelGetMemoryOffsetMESA( NULL, 0, image->Data ),
-			       t->Pitch / image->TexFormat->TexelBytes,
-			       intelGetMemoryOffsetMESA( NULL, 0, t->BufAddr + offset ),
-			       0, 0,
-			       0, 0,
-			       image->Width,
-			       image->Height);
-   }
-   else if (image->IsCompressed) {
-      GLuint row_len = 0;
-      GLubyte *dst = (GLubyte *)(t->BufAddr + offset);
-      GLubyte *src = (GLubyte *)image->Data;
-      GLuint j;
-
-      /* must always copy whole blocks (8/16 bytes) */
-      switch (image->InternalFormat) {
-	case GL_COMPRESSED_RGB_FXT1_3DFX:
-	case GL_COMPRESSED_RGBA_FXT1_3DFX:
-	case GL_RGB_S3TC:
-	case GL_RGB4_S3TC:
-	case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
-	case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
-	  row_len = (image->Width * 2 + 7) & ~7;
-	  break;
-	case GL_RGBA_S3TC:
-	case GL_RGBA4_S3TC:
-	case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
-	case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
-	  row_len = (image->Width * 4 + 15) & ~15;
-	  break;
-	default:
-	  fprintf(stderr,"Internal Compressed format not supported %d\n", image->InternalFormat);
-	  break;
-      }
-
-      if (INTEL_DEBUG & DEBUG_TEXTURE)
-	 fprintf(stderr, 
-		 "Upload image %dx%dx%d offset %xm row_len %x "
-		 "pitch %x depth_pitch %x\n",
-		 image->Width, image->Height, image->Depth, offset,
-		 row_len, t->Pitch, t->depth_pitch);
-
-      if (row_len) {
-	 for (j = 0 ; j < (image->Height + 3)/4 ; j++, dst += (t->Pitch)) {
-	   __memcpy(dst, src, row_len );
-	   src += row_len;
-	 }
-      }
-   }
-   /* Time for another vtbl entry:
-    */
-   else if (intel->intelScreen->deviceID == PCI_CHIP_I945_G ||
-            intel->intelScreen->deviceID == PCI_CHIP_I945_GM ||
-            intel->intelScreen->deviceID == PCI_CHIP_I945_GME ||
-            intel->intelScreen->deviceID == PCI_CHIP_G33_G ||
-            intel->intelScreen->deviceID == PCI_CHIP_Q33_G ||
-            intel->intelScreen->deviceID == PCI_CHIP_Q35_G) {
-      GLuint row_len = image->Width * image->TexFormat->TexelBytes;
-      GLubyte *dst = (GLubyte *)(t->BufAddr + offset);
-      GLubyte *src = (GLubyte *)image->Data;
-      GLuint d, j;
-
-      if (INTEL_DEBUG & DEBUG_TEXTURE)
-	 fprintf(stderr, 
-		 "Upload image %dx%dx%d offset %xm row_len %x "
-		 "pitch %x depth_pitch %x\n",
-		 image->Width, image->Height, image->Depth, offset,
-		 row_len, t->Pitch, t->depth_pitch);
-
-      if (row_len == t->Pitch) {
-	 memcpy( dst, src, row_len * image->Height * image->Depth );
-      }
-      else { 
-	 GLuint x = 0, y = 0;
-
-	 for (d = 0 ; d < image->Depth ; d++) {
-	    GLubyte *dst0 = dst + x + y * t->Pitch;
-
-	    for (j = 0 ; j < image->Height ; j++) {
-	       __memcpy(dst0, src, row_len );
-	       src += row_len;
-	       dst0 += t->Pitch;
-	    }
-
-	    x += MIN2(4, row_len); /* Guess: 4 byte minimum alignment */
-	    if (x > t->Pitch) {
-	       x = 0;
-	       y += image->Height;
-	    }
-	 }
-      }
-
-   }
-   else {
-      GLuint row_len = image->Width * image->TexFormat->TexelBytes;
-      GLubyte *dst = (GLubyte *)(t->BufAddr + offset);
-      GLubyte *src = (GLubyte *)image->Data;
-      GLuint d, j;
-
-      if (INTEL_DEBUG & DEBUG_TEXTURE)
-	 fprintf(stderr, 
-		 "Upload image %dx%dx%d offset %xm row_len %x "
-		 "pitch %x depth_pitch %x\n",
-		 image->Width, image->Height, image->Depth, offset,
-		 row_len, t->Pitch, t->depth_pitch);
-
-      if (row_len == t->Pitch) {
-	 for (d = 0; d < image->Depth; d++) {
-	    memcpy( dst, src, t->Pitch * image->Height );
-	    dst += t->depth_pitch;
-	    src += row_len * image->Height;
-	 }
-      }
-      else { 
-	 for (d = 0 ; d < image->Depth ; d++) {
-	    for (j = 0 ; j < image->Height ; j++) {
-	       __memcpy(dst, src, row_len );
-	       src += row_len;
-	       dst += t->Pitch;
-	    }
-
-	    dst += t->depth_pitch - (t->Pitch * image->Height);
-	 }
-      }
-   }
-}
-
-
-
-int intelUploadTexImages( intelContextPtr intel, 
-			  intelTextureObjectPtr t,
-			  GLuint face)
-{
-   const int numLevels = t->base.lastLevel - t->base.firstLevel + 1;
-   const struct gl_texture_image *firstImage = t->image[face][t->base.firstLevel].image;
-   int pitch = firstImage->RowStride * firstImage->TexFormat->TexelBytes;
-
-   /* Can we texture out of the existing client data? */
-   if ( numLevels == 1 &&
-	firstImage->IsClientData &&
-	(pitch & 3) == 0) {
-
-      if (INTEL_DEBUG & DEBUG_TEXTURE)
-	 fprintf(stderr, "AGP texturing from client memory\n");
-
-      t->TextureOffset = intelAgpOffsetFromVirtual( intel, firstImage->Data );
-      t->BufAddr = 0;
-      t->dirty = ~0;
-      return GL_TRUE;
-   }
-   else {
-      if (INTEL_DEBUG & DEBUG_TEXTURE) 
-	 fprintf(stderr, "Uploading client data to agp\n");
-
-      INTEL_FIREVERTICES( intel );
-      LOCK_HARDWARE( intel );
-
-      if ( t->base.memBlock == NULL ) {
-	 int heap;
-
-	 heap = driAllocateTexture( intel->texture_heaps, intel->nr_heaps,
-				    (driTextureObject *) t );
-	 if ( heap == -1 ) {
-	    UNLOCK_HARDWARE( intel );
-	    return GL_FALSE;
-	 }
-
-	 /* Set the base offset of the texture image */
-	 t->BufAddr = (GLubyte *) (intel->intelScreen->tex.map + 
-				   t->base.memBlock->ofs);
-	 t->TextureOffset = intel->intelScreen->tex.offset + t->base.memBlock->ofs;
-	 t->dirty = ~0;
-      }
-
-
-      /* Let the world know we've used this memory recently.
-       */
-      driUpdateTextureLRU( (driTextureObject *) t );
-
-
-      /* Upload any images that are new */
-      if (t->base.dirty_images[face]) {
-	 int i;
-
- 	 intelWaitForIdle( intel );
-	    
-	 for (i = 0 ; i < numLevels ; i++) { 
-	    int level = i + t->base.firstLevel;
-
-	    if (t->base.dirty_images[face] & (1<<level)) {
-
-	       const struct gl_texture_image *image = t->image[face][i].image;
-	       GLuint offset = t->image[face][i].offset;
-
-     	       if (INTEL_DEBUG & DEBUG_TEXTURE)
-	          fprintf(stderr, "upload level %d, offset %x\n", 
-			  level, offset);
-
-	       intelUploadTexImage( intel, t, image, offset );
-	    }
-	 }
-	 t->base.dirty_images[face] = 0;
-	 intel->perf_boxes |= I830_BOX_TEXTURE_LOAD;
-      }
-      
-      UNLOCK_HARDWARE( intel );
-      return GL_TRUE;
-   }
-}
-
-/**
- * Allocate a new texture object.
- * Called via ctx->Driver.NewTextureObject.
- * Note: this function will be called during context creation to
- * allocate the default texture objects.
- * Note: we could use containment here to 'derive' the driver-specific
- * texture object from the core mesa gl_texture_object.  Not done at this time.
- */
-static struct gl_texture_object *
-intelNewTextureObject( GLcontext *ctx, GLuint name, GLenum target )
-{
-   struct gl_texture_object *obj = _mesa_new_texture_object(ctx, name, target);
-   INTEL_CONTEXT(ctx)->vtbl.alloc_tex_obj( obj );
-   return obj;
-}
-
-
-void intelInitTextureFuncs( struct dd_function_table *functions )
-{
-   functions->NewTextureObject          = intelNewTextureObject;
-   functions->ChooseTextureFormat       = intelChooseTextureFormat;
-   functions->TexImage1D                = intelTexImage1D;
-   functions->TexImage2D                = intelTexImage2D;
-   functions->TexImage3D                = intelTexImage3D;
-   functions->TexSubImage1D             = intelTexSubImage1D;
-   functions->TexSubImage2D             = intelTexSubImage2D;
-   functions->TexSubImage3D             = intelTexSubImage3D;
-   functions->CopyTexImage1D            = _swrast_copy_teximage1d;
-   functions->CopyTexImage2D            = _swrast_copy_teximage2d;
-   functions->CopyTexSubImage1D         = _swrast_copy_texsubimage1d;
-   functions->CopyTexSubImage2D         = _swrast_copy_texsubimage2d;
-   functions->CopyTexSubImage3D         = _swrast_copy_texsubimage3d;
-   functions->DeleteTexture             = intelDeleteTexture;
-   functions->UpdateTexturePalette      = NULL;
-   functions->IsTextureResident         = driIsTextureResident;
-   functions->TestProxyTexImage         = _mesa_test_proxy_teximage;
-   functions->DeleteTexture             = intelDeleteTexture;
-   functions->CompressedTexImage2D      = intelCompressedTexImage2D;
-   functions->CompressedTexSubImage2D   = intelCompressedTexSubImage2D;
-}
diff --git a/i915/intel_texmem.c b/i915/intel_texmem.c
deleted file mode 100644
index 09beec9..0000000
--- a/i915/intel_texmem.c
+++ /dev/null
@@ -1,72 +0,0 @@
-#include "texmem.h"
-#include "simple_list.h"
-#include "imports.h"
-#include "macros.h"
-
-#include "intel_tex.h"
-
-static GLuint
-driLog2( GLuint n )
-{
-   GLuint log2;
-
-   for ( log2 = 1 ; n > 1 ; log2++ ) {
-      n >>= 1;
-   }
-
-   return log2;
-}
-
-static void calculate_heap_size( driTexHeap * heap, unsigned size, 
-				 unsigned nr_regions, unsigned alignmentShift )
-{
-   unsigned     l;
-
-   l = driLog2( (size - 1) / nr_regions );
-   if ( l < alignmentShift )
-   {
-      l = alignmentShift;
-   }
-
-   heap->logGranularity = l;
-   heap->size = size & ~((1L << l) - 1);
-}
-
-
-GLboolean 
-intel_driReinitTextureHeap( driTexHeap *heap,
-			    unsigned size )
-{
-   driTextureObject *t, *tmp;
-
-   /* Kick out everything:
-    */
-   foreach_s ( t, tmp, & heap->texture_objects ) {
-      if ( t->tObj != NULL ) {
-	 driSwapOutTextureObject( t );
-      }
-      else {
-	 driDestroyTextureObject( t );
-      }
-   }
-   
-   /* Destroy the memory manager:
-    */
-   mmDestroy( heap->memory_heap );
-      
-   /* Recreate the memory manager:
-    */
-   calculate_heap_size(heap, size, heap->nrRegions, heap->alignmentShift);
-   heap->memory_heap = mmInit( 0, heap->size );
-   if ( heap->memory_heap == NULL ) {
-      fprintf(stderr, "driReinitTextureHeap: couldn't recreate memory heap\n");
-      FREE( heap );
-      return GL_FALSE;
-   }
-
-   make_empty_list( & heap->texture_objects );
-
-   return GL_TRUE;
-}
-
-
diff --git a/i915/intel_tris.c b/i915/intel_tris.c
index b2787ee..bbb4e0f 100644
--- a/i915/intel_tris.c
+++ b/i915/intel_tris.c
@@ -29,6 +29,8 @@
 #include "context.h"
 #include "macros.h"
 #include "enums.h"
+#include "texobj.h"
+#include "state.h"
 #include "dd.h"
 
 #include "swrast/swrast.h"
@@ -38,19 +40,117 @@
 #include "tnl/t_vertex.h"
 
 #include "intel_screen.h"
+#include "intel_context.h"
 #include "intel_tris.h"
 #include "intel_batchbuffer.h"
+#include "intel_buffers.h"
 #include "intel_reg.h"
 #include "intel_span.h"
+#include "intel_tex.h"
 
-/* XXX we shouldn't include these headers in this file, but we need them
- * for fallbackStrings, below.
+static void intelRenderPrimitive(GLcontext * ctx, GLenum prim);
+static void intelRasterPrimitive(GLcontext * ctx, GLenum rprim,
+                                 GLuint hwprim);
+
+/*
+ */
+static void
+intel_flush_inline_primitive(struct intel_context *intel)
+{
+   GLuint used = intel->batch->ptr - intel->prim.start_ptr;
+
+   assert(intel->prim.primitive != ~0);
+
+/*    _mesa_printf("/\n"); */
+
+   if (used < 8)
+      goto do_discard;
+
+   *(int *) intel->prim.start_ptr = (_3DPRIMITIVE |
+                                     intel->prim.primitive | (used / 4 - 2));
+
+   goto finished;
+
+ do_discard:
+   intel->batch->ptr -= used;
+
+ finished:
+   intel->prim.primitive = ~0;
+   intel->prim.start_ptr = 0;
+   intel->prim.flush = 0;
+}
+
+
+/* Emit a primitive referencing vertices in a vertex buffer.
  */
-#include "i830_context.h"
-#include "i915_context.h"
+void
+intelStartInlinePrimitive(struct intel_context *intel,
+                          GLuint prim, GLuint batch_flags)
+{
+   BATCH_LOCALS;
+
+   intel_wait_flips(intel);
+
+   intel->vtbl.emit_state(intel);
+
+   intel->no_batch_wrap = GL_TRUE;
+
+/*    _mesa_printf("%s *", __progname); */
+
+   /* Emit a slot which will be filled with the inline primitive
+    * command later.
+    */
+   BEGIN_BATCH(2, batch_flags);
+   OUT_BATCH(0);
+
+   assert((intel->batch->dirty_state & (1<<1)) == 0);
+
+   intel->prim.start_ptr = intel->batch->ptr;
+   intel->prim.primitive = prim;
+   intel->prim.flush = intel_flush_inline_primitive;
+
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+
+   intel->no_batch_wrap = GL_FALSE;
+
+/*    _mesa_printf(">"); */
+}
+
+
+void
+intelWrapInlinePrimitive(struct intel_context *intel)
+{
+   GLuint prim = intel->prim.primitive;
+   enum cliprect_mode cliprect_mode = intel->batch->cliprect_mode;
+
+   intel_flush_inline_primitive(intel);
+   intel_batchbuffer_flush(intel->batch);
+   intelStartInlinePrimitive(intel, prim, cliprect_mode);  /* ??? */
+}
+
+GLuint *
+intelExtendInlinePrimitive(struct intel_context *intel, GLuint dwords)
+{
+   GLuint sz = dwords * sizeof(GLuint);
+   GLuint *ptr;
+
+   assert(intel->prim.flush == intel_flush_inline_primitive);
+
+   if (intel_batchbuffer_space(intel->batch) < sz)
+      intelWrapInlinePrimitive(intel);
+
+/*    _mesa_printf("."); */
+
+   intel->vtbl.assert_not_dirty(intel);
+
+   ptr = (GLuint *) intel->batch->ptr;
+   intel->batch->ptr += sz;
+
+   return ptr;
+}
+
 
-static void intelRenderPrimitive( GLcontext *ctx, GLenum prim );
-static void intelRasterPrimitive( GLcontext *ctx, GLenum rprim, GLuint hwprim );
 
 /***********************************************************************
  *                    Emit primitives as inline vertices               *
@@ -69,75 +169,81 @@ do {								\
 #else
 #define COPY_DWORDS( j, vb, vertsize, v )	\
 do {						\
-   if (0) fprintf(stderr, "\n");	\
    for ( j = 0 ; j < vertsize ; j++ ) {		\
-      if (0) fprintf(stderr, "   -- v(%d): %x/%f\n",j,	\
-	      ((GLuint *)v)[j],			\
-	      ((GLfloat *)v)[j]);		\
       vb[j] = ((GLuint *)v)[j];			\
    }						\
    vb += vertsize;				\
 } while (0)
 #endif
 
-static void __inline__ intel_draw_quad( intelContextPtr intel,
-					intelVertexPtr v0,
-					intelVertexPtr v1,
-					intelVertexPtr v2,
-					intelVertexPtr v3 )
+static void
+intel_draw_quad(struct intel_context *intel,
+                intelVertexPtr v0,
+                intelVertexPtr v1, intelVertexPtr v2, intelVertexPtr v3)
 {
    GLuint vertsize = intel->vertex_size;
-   GLuint *vb = intelExtendInlinePrimitive( intel, 6 * vertsize );
+   GLuint *vb = intelExtendInlinePrimitive(intel, 6 * vertsize);
    int j;
 
-   COPY_DWORDS( j, vb, vertsize, v0 );
-   COPY_DWORDS( j, vb, vertsize, v1 );
-   COPY_DWORDS( j, vb, vertsize, v3 );
-   COPY_DWORDS( j, vb, vertsize, v1 );
-   COPY_DWORDS( j, vb, vertsize, v2 );
-   COPY_DWORDS( j, vb, vertsize, v3 );
+   COPY_DWORDS(j, vb, vertsize, v0);
+   COPY_DWORDS(j, vb, vertsize, v1);
+
+   /* If smooth shading, draw like a trifan which gives better
+    * rasterization.  Otherwise draw as two triangles with provoking
+    * vertex in third position as required for flat shading.
+    */
+   if (intel->ctx.Light.ShadeModel == GL_FLAT) {
+      COPY_DWORDS(j, vb, vertsize, v3);
+      COPY_DWORDS(j, vb, vertsize, v1);
+   }
+   else {
+      COPY_DWORDS(j, vb, vertsize, v2);
+      COPY_DWORDS(j, vb, vertsize, v0);
+   }
+
+   COPY_DWORDS(j, vb, vertsize, v2);
+   COPY_DWORDS(j, vb, vertsize, v3);
 }
 
-static void __inline__ intel_draw_triangle( intelContextPtr intel,
-					    intelVertexPtr v0,
-					    intelVertexPtr v1,
-					    intelVertexPtr v2 )
+static void
+intel_draw_triangle(struct intel_context *intel,
+                    intelVertexPtr v0, intelVertexPtr v1, intelVertexPtr v2)
 {
    GLuint vertsize = intel->vertex_size;
-   GLuint *vb = intelExtendInlinePrimitive( intel, 3 * vertsize );
+   GLuint *vb = intelExtendInlinePrimitive(intel, 3 * vertsize);
    int j;
-   
-   COPY_DWORDS( j, vb, vertsize, v0 );
-   COPY_DWORDS( j, vb, vertsize, v1 );
-   COPY_DWORDS( j, vb, vertsize, v2 );
+
+   COPY_DWORDS(j, vb, vertsize, v0);
+   COPY_DWORDS(j, vb, vertsize, v1);
+   COPY_DWORDS(j, vb, vertsize, v2);
 }
 
 
-static __inline__ void intel_draw_line( intelContextPtr intel,
-					intelVertexPtr v0,
-					intelVertexPtr v1 )
+static void
+intel_draw_line(struct intel_context *intel,
+                intelVertexPtr v0, intelVertexPtr v1)
 {
    GLuint vertsize = intel->vertex_size;
-   GLuint *vb = intelExtendInlinePrimitive( intel, 2 * vertsize );
+   GLuint *vb = intelExtendInlinePrimitive(intel, 2 * vertsize);
    int j;
 
-   COPY_DWORDS( j, vb, vertsize, v0 );
-   COPY_DWORDS( j, vb, vertsize, v1 );
+   COPY_DWORDS(j, vb, vertsize, v0);
+   COPY_DWORDS(j, vb, vertsize, v1);
 }
 
 
-static __inline__ void intel_draw_point( intelContextPtr intel,
-					 intelVertexPtr v0 )
+static void
+intel_draw_point(struct intel_context *intel, intelVertexPtr v0)
 {
    GLuint vertsize = intel->vertex_size;
-   GLuint *vb = intelExtendInlinePrimitive( intel, vertsize );
+   GLuint *vb = intelExtendInlinePrimitive(intel, vertsize);
    int j;
 
    /* Adjust for sub pixel position -- still required for conform. */
-   *(float *)&vb[0] = v0->v.x - 0.125;
-   *(float *)&vb[1] = v0->v.y - 0.125;
-   for (j = 2 ; j < vertsize ; j++)
-     vb[j] = v0->ui[j];
+   *(float *) &vb[0] = v0->v.x;
+   *(float *) &vb[1] = v0->v.y;
+   for (j = 2; j < vertsize; j++)
+      vb[j] = v0->ui[j];
 }
 
 
@@ -146,13 +252,17 @@ static __inline__ void intel_draw_point( intelContextPtr intel,
  *                Fixup for ARB_point_parameters                       *
  ***********************************************************************/
 
-static void intel_atten_point( intelContextPtr intel, intelVertexPtr v0 )
+/* Currently not working - VERT_ATTRIB_POINTSIZE isn't correctly
+ * represented in the fragment program InputsRead field.
+ */
+static void
+intel_atten_point(struct intel_context *intel, intelVertexPtr v0)
 {
    GLcontext *ctx = &intel->ctx;
    GLfloat psz[4], col[4], restore_psz, restore_alpha;
 
-   _tnl_get_attr( ctx, v0, _TNL_ATTRIB_POINTSIZE, psz );
-   _tnl_get_attr( ctx, v0, _TNL_ATTRIB_COLOR0, col );
+   _tnl_get_attr(ctx, v0, _TNL_ATTRIB_POINTSIZE, psz);
+   _tnl_get_attr(ctx, v0, _TNL_ATTRIB_COLOR0, col);
 
    restore_psz = psz[0];
    restore_alpha = col[3];
@@ -170,19 +280,19 @@ static void intel_atten_point( intelContextPtr intel, intelVertexPtr v0 )
       psz[0] = 1.0;
 
    if (restore_psz != psz[0] || restore_alpha != col[3]) {
-      _tnl_set_attr( ctx, v0, _TNL_ATTRIB_POINTSIZE, psz);
-      _tnl_set_attr( ctx, v0, _TNL_ATTRIB_COLOR0, col);
-   
-      intel_draw_point( intel, v0 );
+      _tnl_set_attr(ctx, v0, _TNL_ATTRIB_POINTSIZE, psz);
+      _tnl_set_attr(ctx, v0, _TNL_ATTRIB_COLOR0, col);
+
+      intel_draw_point(intel, v0);
 
       psz[0] = restore_psz;
       col[3] = restore_alpha;
 
-      _tnl_set_attr( ctx, v0, _TNL_ATTRIB_POINTSIZE, psz);
-      _tnl_set_attr( ctx, v0, _TNL_ATTRIB_COLOR0, col);
+      _tnl_set_attr(ctx, v0, _TNL_ATTRIB_POINTSIZE, psz);
+      _tnl_set_attr(ctx, v0, _TNL_ATTRIB_COLOR0, col);
    }
    else
-      intel_draw_point( intel, v0 );
+      intel_draw_point(intel, v0);
 }
 
 
@@ -195,45 +305,59 @@ static void intel_atten_point( intelContextPtr intel, intelVertexPtr v0 )
 
 
 
-static void intel_wpos_triangle( intelContextPtr intel,
-				 intelVertexPtr v0,
-				 intelVertexPtr v1,
-				 intelVertexPtr v2 )
+static void
+intel_wpos_triangle(struct intel_context *intel,
+                    intelVertexPtr v0, intelVertexPtr v1, intelVertexPtr v2)
 {
    GLuint offset = intel->wpos_offset;
    GLuint size = intel->wpos_size;
-   
-   __memcpy( ((char *)v0) + offset, v0, size );
-   __memcpy( ((char *)v1) + offset, v1, size );
-   __memcpy( ((char *)v2) + offset, v2, size );
+   GLfloat *v0_wpos = (GLfloat *)((char *)v0 + offset);
+   GLfloat *v1_wpos = (GLfloat *)((char *)v1 + offset);
+   GLfloat *v2_wpos = (GLfloat *)((char *)v2 + offset);
+
+   __memcpy(v0_wpos, v0, size);
+   __memcpy(v1_wpos, v1, size);
+   __memcpy(v2_wpos, v2, size);
+
+   v0_wpos[1] = -v0_wpos[1] + intel->driDrawable->h;
+   v1_wpos[1] = -v1_wpos[1] + intel->driDrawable->h;
+   v2_wpos[1] = -v2_wpos[1] + intel->driDrawable->h;
+
 
-   intel_draw_triangle( intel, v0, v1, v2 );
+   intel_draw_triangle(intel, v0, v1, v2);
 }
 
 
-static void intel_wpos_line( intelContextPtr intel,
-			     intelVertexPtr v0,
-			     intelVertexPtr v1 )
+static void
+intel_wpos_line(struct intel_context *intel,
+                intelVertexPtr v0, intelVertexPtr v1)
 {
    GLuint offset = intel->wpos_offset;
    GLuint size = intel->wpos_size;
+   GLfloat *v0_wpos = (GLfloat *)((char *)v0 + offset);
+   GLfloat *v1_wpos = (GLfloat *)((char *)v1 + offset);
+
+   __memcpy(v0_wpos, v0, size);
+   __memcpy(v1_wpos, v1, size);
 
-   __memcpy( ((char *)v0) + offset, v0, size );
-   __memcpy( ((char *)v1) + offset, v1, size );
+   v0_wpos[1] = -v0_wpos[1] + intel->driDrawable->h;
+   v1_wpos[1] = -v1_wpos[1] + intel->driDrawable->h;
 
-   intel_draw_line( intel, v0, v1 );
+   intel_draw_line(intel, v0, v1);
 }
 
 
-static void intel_wpos_point( intelContextPtr intel,
-			      intelVertexPtr v0 )
+static void
+intel_wpos_point(struct intel_context *intel, intelVertexPtr v0)
 {
    GLuint offset = intel->wpos_offset;
    GLuint size = intel->wpos_size;
+   GLfloat *v0_wpos = (GLfloat *)((char *)v0 + offset);
 
-   __memcpy( ((char *)v0) + offset, v0, size );
+   __memcpy(v0_wpos, v0, size);
+   v0_wpos[1] = -v0_wpos[1] + intel->driDrawable->h;
 
-   intel_draw_point( intel, v0 );
+   intel_draw_point(intel, v0);
 }
 
 
@@ -290,11 +414,12 @@ do { 						\
 #define INTEL_MAX_TRIFUNC	0x10
 
 
-static struct {
-   tnl_points_func	        points;
-   tnl_line_func		line;
-   tnl_triangle_func	triangle;
-   tnl_quad_func		quad;
+static struct
+{
+   tnl_points_func points;
+   tnl_line_func line;
+   tnl_triangle_func triangle;
+   tnl_quad_func quad;
 } rast_tab[INTEL_MAX_TRIFUNC];
 
 
@@ -355,10 +480,10 @@ do {							\
 #define VERT_RESTORE_SPEC( idx ) if (specoffset) v[idx]->ui[specoffset] = spec[idx]
 
 #define LOCAL_VARS(n)							\
-   intelContextPtr intel = INTEL_CONTEXT(ctx);				\
-   GLuint color[n], spec[n];						\
-   GLuint coloroffset = intel->coloroffset;		\
-   GLboolean specoffset = intel->specoffset;			\
+   struct intel_context *intel = intel_context(ctx);			\
+   GLuint color[n] = { 0, }, spec[n] = { 0, };				\
+   GLuint coloroffset = intel->coloroffset;				\
+   GLboolean specoffset = intel->specoffset;				\
    (void) color; (void) spec; (void) coloroffset; (void) specoffset;
 
 
@@ -366,7 +491,7 @@ do {							\
  *                Helpers for rendering unfilled primitives            *
  ***********************************************************************/
 
-static const GLuint hw_prim[GL_POLYGON+1] = {
+static const GLuint hw_prim[GL_POLYGON + 1] = {
    PRIM3D_POINTLIST,
    PRIM3D_LINELIST,
    PRIM3D_LINELIST,
@@ -456,7 +581,8 @@ static const GLuint hw_prim[GL_POLYGON+1] = {
 #include "tnl_dd/t_dd_tritmp.h"
 
 
-static void init_rast_tab( void )
+static void
+init_rast_tab(void)
 {
    init();
    init_offset();
@@ -487,10 +613,8 @@ static void init_rast_tab( void )
  * primitives.
  */
 static void
-intel_fallback_tri( intelContextPtr intel,
-		   intelVertex *v0,
-		   intelVertex *v1,
-		   intelVertex *v2 )
+intel_fallback_tri(struct intel_context *intel,
+                   intelVertex * v0, intelVertex * v1, intelVertex * v2)
 {
    GLcontext *ctx = &intel->ctx;
    SWvertex v[3];
@@ -498,19 +622,20 @@ intel_fallback_tri( intelContextPtr intel,
    if (0)
       fprintf(stderr, "\n%s\n", __FUNCTION__);
 
-   _swsetup_Translate( ctx, v0, &v[0] );
-   _swsetup_Translate( ctx, v1, &v[1] );
-   _swsetup_Translate( ctx, v2, &v[2] );
-   intelSpanRenderStart( ctx );
-   _swrast_Triangle( ctx, &v[0], &v[1], &v[2] );
-   intelSpanRenderFinish( ctx );
+   INTEL_FIREVERTICES(intel);
+
+   _swsetup_Translate(ctx, v0, &v[0]);
+   _swsetup_Translate(ctx, v1, &v[1]);
+   _swsetup_Translate(ctx, v2, &v[2]);
+   intelSpanRenderStart(ctx);
+   _swrast_Triangle(ctx, &v[0], &v[1], &v[2]);
+   intelSpanRenderFinish(ctx);
 }
 
 
 static void
-intel_fallback_line( intelContextPtr intel,
-		    intelVertex *v0,
-		    intelVertex *v1 )
+intel_fallback_line(struct intel_context *intel,
+                    intelVertex * v0, intelVertex * v1)
 {
    GLcontext *ctx = &intel->ctx;
    SWvertex v[2];
@@ -518,17 +643,18 @@ intel_fallback_line( intelContextPtr intel,
    if (0)
       fprintf(stderr, "\n%s\n", __FUNCTION__);
 
-   _swsetup_Translate( ctx, v0, &v[0] );
-   _swsetup_Translate( ctx, v1, &v[1] );
-   intelSpanRenderStart( ctx );
-   _swrast_Line( ctx, &v[0], &v[1] );
-   intelSpanRenderFinish( ctx );
-}
+   INTEL_FIREVERTICES(intel);
 
+   _swsetup_Translate(ctx, v0, &v[0]);
+   _swsetup_Translate(ctx, v1, &v[1]);
+   intelSpanRenderStart(ctx);
+   _swrast_Line(ctx, &v[0], &v[1]);
+   intelSpanRenderFinish(ctx);
+}
 
 static void
-intel_fallback_point( intelContextPtr intel,
-		     intelVertex *v0 )
+intel_fallback_point(struct intel_context *intel,
+		     intelVertex * v0)
 {
    GLcontext *ctx = &intel->ctx;
    SWvertex v[1];
@@ -536,12 +662,13 @@ intel_fallback_point( intelContextPtr intel,
    if (0)
       fprintf(stderr, "\n%s\n", __FUNCTION__);
 
-   _swsetup_Translate( ctx, v0, &v[0] );
-   intelSpanRenderStart( ctx );
-   _swrast_Point( ctx, &v[0] );
-   intelSpanRenderFinish( ctx );
-}
+   INTEL_FIREVERTICES(intel);
 
+   _swsetup_Translate(ctx, v0, &v[0]);
+   intelSpanRenderStart(ctx);
+   _swrast_Point(ctx, &v[0]);
+   intelSpanRenderFinish(ctx);
+}
 
 
 /**********************************************************************/
@@ -558,7 +685,7 @@ intel_fallback_point( intelContextPtr intel,
 #define INIT(x) intelRenderPrimitive( ctx, x )
 #undef LOCAL_VARS
 #define LOCAL_VARS						\
-    intelContextPtr intel = INTEL_CONTEXT(ctx);			\
+    struct intel_context *intel = intel_context(ctx);			\
     GLubyte *vertptr = (GLubyte *)intel->verts;			\
     const GLuint vertsize = intel->vertex_size;       	\
     const GLuint * const elt = TNL_CONTEXT(ctx)->vb.Elts;	\
@@ -581,10 +708,10 @@ intel_fallback_point( intelContextPtr intel,
 
 
 
-static void intelRenderClippedPoly( GLcontext *ctx, const GLuint *elts,
-				   GLuint n )
+static void
+intelRenderClippedPoly(GLcontext * ctx, const GLuint * elts, GLuint n)
 {
-   intelContextPtr intel = INTEL_CONTEXT(ctx);
+   struct intel_context *intel = intel_context(ctx);
    TNLcontext *tnl = TNL_CONTEXT(ctx);
    struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
    GLuint prim = intel->render_primitive;
@@ -593,39 +720,40 @@ static void intelRenderClippedPoly( GLcontext *ctx, const GLuint *elts,
     */
    {
       GLuint *tmp = VB->Elts;
-      VB->Elts = (GLuint *)elts;
-      tnl->Driver.Render.PrimTabElts[GL_POLYGON]( ctx, 0, n, 
-						  PRIM_BEGIN|PRIM_END );
+      VB->Elts = (GLuint *) elts;
+      tnl->Driver.Render.PrimTabElts[GL_POLYGON] (ctx, 0, n,
+                                                  PRIM_BEGIN | PRIM_END);
       VB->Elts = tmp;
    }
 
    /* Restore the render primitive
     */
    if (prim != GL_POLYGON)
-      tnl->Driver.Render.PrimitiveNotify( ctx, prim );
+      tnl->Driver.Render.PrimitiveNotify(ctx, prim);
 }
 
-static void intelRenderClippedLine( GLcontext *ctx, GLuint ii, GLuint jj )
+static void
+intelRenderClippedLine(GLcontext * ctx, GLuint ii, GLuint jj)
 {
    TNLcontext *tnl = TNL_CONTEXT(ctx);
 
-   tnl->Driver.Render.Line( ctx, ii, jj );
+   tnl->Driver.Render.Line(ctx, ii, jj);
 }
 
-static void intelFastRenderClippedPoly( GLcontext *ctx, const GLuint *elts,
-				       GLuint n )
+static void
+intelFastRenderClippedPoly(GLcontext * ctx, const GLuint * elts, GLuint n)
 {
-   intelContextPtr intel = INTEL_CONTEXT( ctx );
+   struct intel_context *intel = intel_context(ctx);
    const GLuint vertsize = intel->vertex_size;
-   GLuint *vb = intelExtendInlinePrimitive( intel, (n-2) * 3 * vertsize );
-   GLubyte *vertptr = (GLubyte *)intel->verts;
-   const GLuint *start = (const GLuint *)V(elts[0]);
-   int i,j;
-
-   for (i = 2 ; i < n ; i++) {
-      COPY_DWORDS( j, vb, vertsize, V(elts[i-1]) );
-      COPY_DWORDS( j, vb, vertsize, V(elts[i]) );
-      COPY_DWORDS( j, vb, vertsize, start );
+   GLuint *vb = intelExtendInlinePrimitive(intel, (n - 2) * 3 * vertsize);
+   GLubyte *vertptr = (GLubyte *) intel->verts;
+   const GLuint *start = (const GLuint *) V(elts[0]);
+   int i, j;
+
+   for (i = 2; i < n; i++) {
+      COPY_DWORDS(j, vb, vertsize, V(elts[i - 1]));
+      COPY_DWORDS(j, vb, vertsize, V(elts[i]));
+      COPY_DWORDS(j, vb, vertsize, start);
    }
 }
 
@@ -636,68 +764,75 @@ static void intelFastRenderClippedPoly( GLcontext *ctx, const GLuint *elts,
 
 
 
-#define POINT_FALLBACK (0)
-#define LINE_FALLBACK (DD_LINE_STIPPLE)
-#define TRI_FALLBACK (0)
-#define ANY_FALLBACK_FLAGS (POINT_FALLBACK|LINE_FALLBACK|TRI_FALLBACK|\
-                            DD_TRI_STIPPLE|DD_POINT_ATTEN)
-#define ANY_RASTER_FLAGS (DD_TRI_LIGHT_TWOSIDE|DD_TRI_OFFSET|DD_TRI_UNFILLED)
+#define ANY_FALLBACK_FLAGS (DD_LINE_STIPPLE | DD_TRI_STIPPLE | DD_POINT_ATTEN | DD_POINT_SMOOTH | DD_TRI_SMOOTH)
+#define ANY_RASTER_FLAGS (DD_TRI_LIGHT_TWOSIDE | DD_TRI_OFFSET | DD_TRI_UNFILLED)
 
-void intelChooseRenderState(GLcontext *ctx)
+void
+intelChooseRenderState(GLcontext * ctx)
 {
    TNLcontext *tnl = TNL_CONTEXT(ctx);
-   intelContextPtr intel = INTEL_CONTEXT(ctx);
+   struct intel_context *intel = intel_context(ctx);
    GLuint flags = ctx->_TriangleCaps;
    const struct gl_fragment_program *fprog = ctx->FragmentProgram._Current;
    GLboolean have_wpos = (fprog && (fprog->Base.InputsRead & FRAG_BIT_WPOS));
    GLuint index = 0;
 
    if (INTEL_DEBUG & DEBUG_STATE)
-     fprintf(stderr,"\n%s\n",__FUNCTION__);
+      fprintf(stderr, "\n%s\n", __FUNCTION__);
 
-   if ((flags & (ANY_FALLBACK_FLAGS|ANY_RASTER_FLAGS)) || have_wpos) {
+   if ((flags & (ANY_FALLBACK_FLAGS | ANY_RASTER_FLAGS)) || have_wpos) {
 
       if (flags & ANY_RASTER_FLAGS) {
-	 if (flags & DD_TRI_LIGHT_TWOSIDE)    index |= INTEL_TWOSIDE_BIT;
-	 if (flags & DD_TRI_OFFSET)	      index |= INTEL_OFFSET_BIT;
-	 if (flags & DD_TRI_UNFILLED)	      index |= INTEL_UNFILLED_BIT;
+         if (flags & DD_TRI_LIGHT_TWOSIDE)
+            index |= INTEL_TWOSIDE_BIT;
+         if (flags & DD_TRI_OFFSET)
+            index |= INTEL_OFFSET_BIT;
+         if (flags & DD_TRI_UNFILLED)
+            index |= INTEL_UNFILLED_BIT;
       }
 
       if (have_wpos) {
-	 intel->draw_point = intel_wpos_point;
-	 intel->draw_line = intel_wpos_line;
-	 intel->draw_tri = intel_wpos_triangle;
+         intel->draw_point = intel_wpos_point;
+         intel->draw_line = intel_wpos_line;
+         intel->draw_tri = intel_wpos_triangle;
 
-	 /* Make sure these get called:
-	  */
-	 index |= INTEL_FALLBACK_BIT;
+         /* Make sure these get called:
+          */
+         index |= INTEL_FALLBACK_BIT;
       }
       else {
-	 intel->draw_point = intel_draw_point;
-	 intel->draw_line = intel_draw_line;
-	 intel->draw_tri = intel_draw_triangle;
+         intel->draw_point = intel_draw_point;
+         intel->draw_line = intel_draw_line;
+         intel->draw_tri = intel_draw_triangle;
       }
 
       /* Hook in fallbacks for specific primitives.
        */
-      if (flags & ANY_FALLBACK_FLAGS)
-      {
-	 if (flags & POINT_FALLBACK)
-	    intel->draw_point = intel_fallback_point;
-
-	 if (flags & LINE_FALLBACK)
-	    intel->draw_line = intel_fallback_line;
-
-	 if (flags & TRI_FALLBACK)
-	    intel->draw_tri = intel_fallback_tri;
-
-	 if ((flags & DD_TRI_STIPPLE) && !intel->hw_stipple) 
-	    intel->draw_tri = intel_fallback_tri;
-
-	 if (flags & DD_POINT_ATTEN)
-	    intel->draw_point = intel_atten_point;
-
-	 index |= INTEL_FALLBACK_BIT;
+      if (flags & ANY_FALLBACK_FLAGS) {
+         if (flags & DD_LINE_STIPPLE)
+            intel->draw_line = intel_fallback_line;
+
+         if ((flags & DD_TRI_STIPPLE) && !intel->hw_stipple)
+            intel->draw_tri = intel_fallback_tri;
+
+         if (flags & DD_TRI_SMOOTH) {
+	    if (intel->strict_conformance)
+	       intel->draw_tri = intel_fallback_tri;
+	 }
+
+         if (flags & DD_POINT_ATTEN) {
+	    if (0)
+	       intel->draw_point = intel_atten_point;
+	    else
+	       intel->draw_point = intel_fallback_point;
+	 }
+
+	 if (flags & DD_POINT_SMOOTH) {
+	    if (intel->strict_conformance)
+	       intel->draw_point = intel_fallback_point;
+	 }
+
+         index |= INTEL_FALLBACK_BIT;
       }
    }
 
@@ -710,20 +845,21 @@ void intelChooseRenderState(GLcontext *ctx)
       tnl->Driver.Render.Quad = rast_tab[index].quad;
 
       if (index == 0) {
-	 tnl->Driver.Render.PrimTabVerts = intel_render_tab_verts;
-	 tnl->Driver.Render.PrimTabElts = intel_render_tab_elts;
-	 tnl->Driver.Render.ClippedLine = line; /* from tritmp.h */
-	 tnl->Driver.Render.ClippedPolygon = intelFastRenderClippedPoly;
-      } else {
-	 tnl->Driver.Render.PrimTabVerts = _tnl_render_tab_verts;
-	 tnl->Driver.Render.PrimTabElts = _tnl_render_tab_elts;
-	 tnl->Driver.Render.ClippedLine = intelRenderClippedLine;
-	 tnl->Driver.Render.ClippedPolygon = intelRenderClippedPoly;
+         tnl->Driver.Render.PrimTabVerts = intel_render_tab_verts;
+         tnl->Driver.Render.PrimTabElts = intel_render_tab_elts;
+         tnl->Driver.Render.ClippedLine = line; /* from tritmp.h */
+         tnl->Driver.Render.ClippedPolygon = intelFastRenderClippedPoly;
+      }
+      else {
+         tnl->Driver.Render.PrimTabVerts = _tnl_render_tab_verts;
+         tnl->Driver.Render.PrimTabElts = _tnl_render_tab_elts;
+         tnl->Driver.Render.ClippedLine = intelRenderClippedLine;
+         tnl->Driver.Render.ClippedPolygon = intelRenderClippedPoly;
       }
    }
 }
 
-static const GLenum reduced_prim[GL_POLYGON+1] = {
+static const GLenum reduced_prim[GL_POLYGON + 1] = {
    GL_POINTS,
    GL_LINES,
    GL_LINES,
@@ -744,35 +880,52 @@ static const GLenum reduced_prim[GL_POLYGON+1] = {
 
 
 
-static void intelRunPipeline( GLcontext *ctx )
+static void
+intelRunPipeline(GLcontext * ctx)
 {
-   intelContextPtr intel = INTEL_CONTEXT(ctx);
+   struct intel_context *intel = intel_context(ctx);
+
+   _mesa_lock_context_textures(ctx);
+   
+   if (ctx->NewState)
+      _mesa_update_state_locked(ctx);
 
    if (intel->NewGLState) {
       if (intel->NewGLState & _NEW_TEXTURE) {
-	 intel->vtbl.update_texture_state( intel ); 
+         intel->vtbl.update_texture_state(intel);
       }
 
       if (!intel->Fallback) {
-	 if (intel->NewGLState & _INTEL_NEW_RENDERSTATE)
-	    intelChooseRenderState( ctx );
+         if (intel->NewGLState & _INTEL_NEW_RENDERSTATE)
+            intelChooseRenderState(ctx);
       }
 
       intel->NewGLState = 0;
    }
 
-   _tnl_run_pipeline( ctx );
+   _tnl_run_pipeline(ctx);
+
+   _mesa_unlock_context_textures(ctx);
 }
 
-static void intelRenderStart( GLcontext *ctx )
+static void
+intelRenderStart(GLcontext * ctx)
 {
-   INTEL_CONTEXT(ctx)->vtbl.render_start( INTEL_CONTEXT(ctx) );
+   struct intel_context *intel = intel_context(ctx);
+
+   intel->vtbl.render_start(intel_context(ctx));
+   intel->vtbl.emit_state(intel);
 }
 
-static void intelRenderFinish( GLcontext *ctx )
+static void
+intelRenderFinish(GLcontext * ctx)
 {
-   if (INTEL_CONTEXT(ctx)->RenderIndex & INTEL_FALLBACK_BIT)
-      _swrast_flush( ctx );
+   struct intel_context *intel = intel_context(ctx);
+
+   if (intel->RenderIndex & INTEL_FALLBACK_BIT)
+      _swrast_flush(ctx);
+
+   INTEL_FIREVERTICES(intel);
 }
 
 
@@ -781,28 +934,33 @@ static void intelRenderFinish( GLcontext *ctx )
  /* System to flush dma and emit state changes based on the rasterized
   * primitive.
   */
-static void intelRasterPrimitive( GLcontext *ctx, GLenum rprim, GLuint hwprim )
+static void
+intelRasterPrimitive(GLcontext * ctx, GLenum rprim, GLuint hwprim)
 {
-   intelContextPtr intel = INTEL_CONTEXT(ctx);
+   struct intel_context *intel = intel_context(ctx);
 
    if (0)
-      fprintf(stderr, "%s %s %x\n", __FUNCTION__, 
-	      _mesa_lookup_enum_by_nr(rprim), hwprim);
+      fprintf(stderr, "%s %s %x\n", __FUNCTION__,
+              _mesa_lookup_enum_by_nr(rprim), hwprim);
+
+   intel->vtbl.reduced_primitive_state(intel, rprim);
 
-   intel->vtbl.reduced_primitive_state( intel, rprim );
-    
    /* Start a new primitive.  Arrange to have it flushed later on.
     */
-   if (hwprim != intel->prim.primitive) 
-      intelStartInlinePrimitive( intel, hwprim );
+   if (hwprim != intel->prim.primitive) {
+      INTEL_FIREVERTICES(intel);
+
+      intelStartInlinePrimitive(intel, hwprim, LOOP_CLIPRECTS);
+   }
 }
 
 
-/* 
- */
-static void intelRenderPrimitive( GLcontext *ctx, GLenum prim )
+ /* 
+  */
+static void
+intelRenderPrimitive(GLcontext * ctx, GLenum prim)
 {
-   intelContextPtr intel = INTEL_CONTEXT(ctx);
+   struct intel_context *intel = intel_context(ctx);
 
    if (0)
       fprintf(stderr, "%s %s\n", __FUNCTION__, _mesa_lookup_enum_by_nr(prim));
@@ -817,63 +975,54 @@ static void intelRenderPrimitive( GLcontext *ctx, GLenum prim )
     * lower level functions in that case, potentially pingponging the
     * state:
     */
-   if (reduced_prim[prim] == GL_TRIANGLES && 
+   if (reduced_prim[prim] == GL_TRIANGLES &&
        (ctx->_TriangleCaps & DD_TRI_UNFILLED))
       return;
 
    /* Set some primitive-dependent state and Start? a new primitive.
     */
-   intelRasterPrimitive( ctx, reduced_prim[prim], hw_prim[prim] );
+   intelRasterPrimitive(ctx, reduced_prim[prim], hw_prim[prim]);
 }
 
 
-/**********************************************************************/
-/*           Transition to/from hardware rasterization.               */
-/**********************************************************************/
-
-static struct {
-   GLuint bit;
-   const char *str;
-} fallbackStrings[] = {
-   { INTEL_FALLBACK_DRAW_BUFFER, "Draw buffer" },
-   { INTEL_FALLBACK_READ_BUFFER, "Read buffer" },
-   { INTEL_FALLBACK_USER, "User" },
-   { INTEL_FALLBACK_NO_BATCHBUFFER, "No Batchbuffer" },
-   { INTEL_FALLBACK_NO_TEXMEM, "No Texmem" },
-   { INTEL_FALLBACK_RENDERMODE, "Rendermode" },
-
-   { I830_FALLBACK_TEXTURE, "i830 texture" },
-   { I830_FALLBACK_COLORMASK, "i830 colormask" },
-   { I830_FALLBACK_STENCIL, "i830 stencil" },
-   { I830_FALLBACK_STIPPLE, "i830 stipple" },
-   { I830_FALLBACK_LOGICOP, "i830 logicop" },
-
-   { I915_FALLBACK_TEXTURE, "i915 texture" },
-   { I915_FALLBACK_COLORMASK, "i915 colormask" },
-   { I915_FALLBACK_STENCIL, "i915 stencil" },
-   { I915_FALLBACK_STIPPLE, "i915 stipple" },
-   { I915_FALLBACK_PROGRAM, "i915 program" },
-   { I915_FALLBACK_LOGICOP, "i915 logicop" },
-   { I915_FALLBACK_POLYGON_SMOOTH, "i915 polygon smooth" },
-   { I915_FALLBACK_POINT_SMOOTH, "i915 point smooth" },
-
-   { 0, NULL }
+ /**********************************************************************/
+ /*           Transition to/from hardware rasterization.               */
+ /**********************************************************************/
+
+static char *fallbackStrings[] = {
+   [0] = "Draw buffer",
+   [1] = "Read buffer",
+   [2] = "Depth buffer",
+   [3] = "Stencil buffer",
+   [4] = "User disable",
+   [5] = "Render mode",
+
+   [12] = "Texture",
+   [13] = "Color mask",
+   [14] = "Stencil",
+   [15] = "Stipple",
+   [16] = "Program",
+   [17] = "Logic op",
+   [18] = "Smooth polygon",
+   [19] = "Smooth point",
 };
 
 
-static const char *
+static char *
 getFallbackString(GLuint bit)
 {
-   int i;
-   for (i = 0; fallbackStrings[i].bit; i++) {
-      if (fallbackStrings[i].bit == bit)
-         return fallbackStrings[i].str;
+   int i = 0;
+   while (bit > 1) {
+      i++;
+      bit >>= 1;
    }
-   return "unknown fallback bit";
+   return fallbackStrings[i];
 }
 
 
-void intelFallback( intelContextPtr intel, GLuint bit, GLboolean mode )
+
+void
+intelFallback(struct intel_context *intel, GLuint bit, GLboolean mode)
 {
    GLcontext *ctx = &intel->ctx;
    TNLcontext *tnl = TNL_CONTEXT(ctx);
@@ -883,20 +1032,19 @@ void intelFallback( intelContextPtr intel, GLuint bit, GLboolean mode )
       intel->Fallback |= bit;
       if (oldfallback == 0) {
          intelFlush(ctx);
-         if (INTEL_DEBUG & DEBUG_FALLBACKS) 
-            fprintf(stderr, "ENTER FALLBACK 0x%x: %s\n",
+         if (INTEL_DEBUG & DEBUG_FALLBACKS)
+            fprintf(stderr, "ENTER FALLBACK %x: %s\n",
                     bit, getFallbackString(bit));
-         _swsetup_Wakeup( ctx );
+         _swsetup_Wakeup(ctx);
          intel->RenderIndex = ~0;
       }
    }
    else {
       intel->Fallback &= ~bit;
       if (oldfallback == bit) {
-         _swrast_flush( ctx );
-         if (INTEL_DEBUG & DEBUG_FALLBACKS) 
-            fprintf(stderr, "LEAVE FALLBACK 0x%x: %s\n",
-                    bit, getFallbackString(bit));
+         _swrast_flush(ctx);
+         if (INTEL_DEBUG & DEBUG_FALLBACKS)
+            fprintf(stderr, "LEAVE FALLBACK %s\n", getFallbackString(bit));
          tnl->Driver.Render.Start = intelRenderStart;
          tnl->Driver.Render.PrimitiveNotify = intelRenderPrimitive;
          tnl->Driver.Render.Finish = intelRenderFinish;
@@ -904,18 +1052,94 @@ void intelFallback( intelContextPtr intel, GLuint bit, GLboolean mode )
          tnl->Driver.Render.CopyPV = _tnl_copy_pv;
          tnl->Driver.Render.Interp = _tnl_interp;
 
-         _tnl_invalidate_vertex_state( ctx, ~0 );
-         _tnl_invalidate_vertices( ctx, ~0 );
-         _tnl_install_attrs( ctx, 
-                             intel->vertex_attrs, 
-                             intel->vertex_attr_count,
-                             intel->ViewportMatrix.m, 0 ); 
+         _tnl_invalidate_vertex_state(ctx, ~0);
+         _tnl_invalidate_vertices(ctx, ~0);
+         _tnl_install_attrs(ctx,
+                            intel->vertex_attrs,
+                            intel->vertex_attr_count,
+                            intel->ViewportMatrix.m, 0);
 
          intel->NewGLState |= _INTEL_NEW_RENDERSTATE;
       }
    }
 }
 
+union fi
+{
+   GLfloat f;
+   GLint i;
+};
+
+
+/**********************************************************************/
+/*             Used only with the metaops callbacks.                  */
+/**********************************************************************/
+static void
+intel_meta_draw_poly(struct intel_context *intel,
+                     GLuint n,
+                     GLfloat xy[][2],
+                     GLfloat z, GLuint color, GLfloat tex[][2])
+{
+   union fi *vb;
+   GLint i;
+   GLboolean was_locked = intel->locked;
+
+   if (!was_locked)
+       LOCK_HARDWARE(intel);
+
+   /* All 3d primitives should be emitted with LOOP_CLIPRECTS,
+    * otherwise the drawing origin (DR4) might not be set correctly.
+    */
+   intelStartInlinePrimitive(intel, PRIM3D_TRIFAN, LOOP_CLIPRECTS);
+   vb = (union fi *) intelExtendInlinePrimitive(intel, n * 6);
+
+   for (i = 0; i < n; i++) {
+      vb[0].f = xy[i][0];
+      vb[1].f = xy[i][1];
+      vb[2].f = z;
+      vb[3].i = color;
+      vb[4].f = tex[i][0];
+      vb[5].f = tex[i][1];
+      vb += 6;
+   }
+
+   INTEL_FIREVERTICES(intel);
+
+   if (!was_locked)
+       UNLOCK_HARDWARE(intel);
+}
+
+static void
+intel_meta_draw_quad(struct intel_context *intel,
+                     GLfloat x0, GLfloat x1,
+                     GLfloat y0, GLfloat y1,
+                     GLfloat z,
+                     GLuint color,
+                     GLfloat s0, GLfloat s1, GLfloat t0, GLfloat t1)
+{
+   GLfloat xy[4][2];
+   GLfloat tex[4][2];
+
+   xy[0][0] = x0;
+   xy[0][1] = y0;
+   xy[1][0] = x1;
+   xy[1][1] = y0;
+   xy[2][0] = x1;
+   xy[2][1] = y1;
+   xy[3][0] = x0;
+   xy[3][1] = y1;
+
+   tex[0][0] = s0;
+   tex[0][1] = t0;
+   tex[1][0] = s1;
+   tex[1][1] = t0;
+   tex[2][0] = s1;
+   tex[2][1] = t1;
+   tex[3][0] = s0;
+   tex[3][1] = t1;
+
+   intel_meta_draw_poly(intel, 4, xy, z, color, tex);
+}
 
 
 
@@ -924,8 +1148,10 @@ void intelFallback( intelContextPtr intel, GLuint bit, GLboolean mode )
 /**********************************************************************/
 
 
-void intelInitTriFuncs( GLcontext *ctx )
+void
+intelInitTriFuncs(GLcontext * ctx)
 {
+   struct intel_context *intel = intel_context(ctx);
    TNLcontext *tnl = TNL_CONTEXT(ctx);
    static int firsttime = 1;
 
@@ -942,4 +1168,6 @@ void intelInitTriFuncs( GLcontext *ctx )
    tnl->Driver.Render.BuildVertices = _tnl_build_vertices;
    tnl->Driver.Render.CopyPV = _tnl_copy_pv;
    tnl->Driver.Render.Interp = _tnl_interp;
+
+   intel->vtbl.meta_draw_quad = intel_meta_draw_quad;
 }
diff --git a/i915/intel_tris.h b/i915/intel_tris.h
index d7e382f..021e5c6 100644
--- a/i915/intel_tris.h
+++ b/i915/intel_tris.h
@@ -30,6 +30,8 @@
 
 #include "mtypes.h"
 
+
+
 #define _INTEL_NEW_RENDERSTATE (_DD_NEW_LINE_STIPPLE |		\
 			       _DD_NEW_TRI_UNFILLED |		\
 			       _DD_NEW_TRI_LIGHT_TWOSIDE |	\
@@ -38,9 +40,15 @@
 			       _NEW_PROGRAM |		\
 			       _NEW_POLYGONSTIPPLE)
 
-extern void intelInitTriFuncs( GLcontext *ctx );
+extern void intelInitTriFuncs(GLcontext * ctx);
+
+extern void intelChooseRenderState(GLcontext * ctx);
+
+extern void intelStartInlinePrimitive(struct intel_context *intel,
+                                      GLuint prim, GLuint flags);
+extern void intelWrapInlinePrimitive(struct intel_context *intel);
 
-extern void intelPrintRenderState( const char *msg, GLuint state );
-extern void intelChooseRenderState( GLcontext *ctx );
+GLuint *intelExtendInlinePrimitive(struct intel_context *intel,
+                                   GLuint dwords);
 
 #endif
diff --git a/i915/server/i830_common.h b/i915/server/i830_common.h
deleted file mode 100644
index fb6ceaa..0000000
--- a/i915/server/i830_common.h
+++ /dev/null
@@ -1,212 +0,0 @@
-/**************************************************************************
-
-Copyright 2001 VA Linux Systems Inc., Fremont, California.
-Copyright 2002 Tungsten Graphics Inc., Cedar Park, Texas.
-
-All Rights Reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a
-copy of this software and associated documentation files (the "Software"),
-to deal in the Software without restriction, including without limitation
-on the rights to use, copy, modify, merge, publish, distribute, sub
-license, and/or sell copies of the Software, and to permit persons to whom
-the Software is furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice (including the next
-paragraph) shall be included in all copies or substantial portions of the
-Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
-ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
-DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
-OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
-USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-**************************************************************************/
-
-/* $XFree86: xc/programs/Xserver/hw/xfree86/drivers/i810/i830_common.h,v 1.1 2002/09/11 00:29:32 dawes Exp $ */
-
-#ifndef _I830_COMMON_H_
-#define _I830_COMMON_H_
-
-
-#define I830_NR_TEX_REGIONS 255	/* maximum due to use of chars for next/prev */
-#define I830_LOG_MIN_TEX_REGION_SIZE 14
-
-
-/* Driver specific DRM command indices
- * NOTE: these are not OS specific, but they are driver specific
- */
-#define DRM_I830_INIT                     0x00
-#define DRM_I830_FLUSH                    0x01
-#define DRM_I830_FLIP                     0x02
-#define DRM_I830_BATCHBUFFER              0x03
-#define DRM_I830_IRQ_EMIT                 0x04
-#define DRM_I830_IRQ_WAIT                 0x05
-#define DRM_I830_GETPARAM                 0x06
-#define DRM_I830_SETPARAM                 0x07
-#define DRM_I830_ALLOC                    0x08
-#define DRM_I830_FREE                     0x09
-#define DRM_I830_INIT_HEAP                0x0a
-#define DRM_I830_CMDBUFFER                0x0b
-#define DRM_I830_DESTROY_HEAP             0x0c
-
-typedef struct {
-   enum {
-      I830_INIT_DMA = 0x01,
-      I830_CLEANUP_DMA = 0x02,
-      I830_RESUME_DMA = 0x03
-   } func;
-   unsigned int mmio_offset;
-   int sarea_priv_offset;
-   unsigned int ring_start;
-   unsigned int ring_end;
-   unsigned int ring_size;
-   unsigned int front_offset;
-   unsigned int back_offset;
-   unsigned int depth_offset;
-   unsigned int w;
-   unsigned int h;
-   unsigned int pitch;
-   unsigned int pitch_bits;
-   unsigned int back_pitch;
-   unsigned int depth_pitch;
-   unsigned int cpp;
-   unsigned int chipset;
-} drmI830Init;
-
-typedef struct {
-	drmTextureRegion texList[I830_NR_TEX_REGIONS+1];
-        int last_upload;	/* last time texture was uploaded */
-        int last_enqueue;	/* last time a buffer was enqueued */
-	int last_dispatch;	/* age of the most recently dispatched buffer */
-	int ctxOwner;		/* last context to upload state */
-	int texAge;
-        int pf_enabled;		/* is pageflipping allowed? */
-        int pf_active;               
-        int pf_current_page;	/* which buffer is being displayed? */
-        int perf_boxes;	        /* performance boxes to be displayed */   
-	int width, height;      /* screen size in pixels */
-
-	drm_handle_t front_handle;
-	int front_offset;
-	int front_size;
-
-	drm_handle_t back_handle;
-	int back_offset;
-	int back_size;
-
-	drm_handle_t depth_handle;
-	int depth_offset;
-	int depth_size;
-
-	drm_handle_t tex_handle;
-	int tex_offset;
-	int tex_size;
-	int log_tex_granularity;
-	int pitch;
-	int rotation;           /* 0, 90, 180 or 270 */
-	int rotated_offset;
-	int rotated_size;
-	int rotated_pitch;
-	int virtualX, virtualY;
-
-	unsigned int front_tiled;
-	unsigned int back_tiled;
-	unsigned int depth_tiled;
-	unsigned int rotated_tiled;
-	unsigned int rotated2_tiled;
-
-	int pipeA_x;
-	int pipeA_y;
-	int pipeA_w;
-	int pipeA_h;
-	int pipeB_x;
-	int pipeB_y;
-	int pipeB_w;
-	int pipeB_h;
-} drmI830Sarea;
-
-/* Flags for perf_boxes
- */
-#define I830_BOX_RING_EMPTY    0x1 /* populated by kernel */
-#define I830_BOX_FLIP          0x2 /* populated by kernel */
-#define I830_BOX_WAIT          0x4 /* populated by kernel & client */
-#define I830_BOX_TEXTURE_LOAD  0x8 /* populated by kernel */
-#define I830_BOX_LOST_CONTEXT  0x10 /* populated by client */
-
-
-typedef struct {
-   	int start;		/* agp offset */
-	int used;		/* nr bytes in use */
-	int DR1;		/* hw flags for GFX_OP_DRAWRECT_INFO */
-        int DR4;		/* window origin for GFX_OP_DRAWRECT_INFO*/
-	int num_cliprects;	/* mulitpass with multiple cliprects? */
-        drm_clip_rect_t *cliprects; /* pointer to userspace cliprects */
-} drmI830BatchBuffer;
-
-typedef struct {
-   	char *buf;		/* agp offset */
-	int sz; 		/* nr bytes in use */
-	int DR1;		/* hw flags for GFX_OP_DRAWRECT_INFO */
-        int DR4;		/* window origin for GFX_OP_DRAWRECT_INFO*/
-	int num_cliprects;	/* mulitpass with multiple cliprects? */
-        drm_clip_rect_t *cliprects; /* pointer to userspace cliprects */
-} drmI830CmdBuffer;
- 
-typedef struct {
-	int *irq_seq;
-} drmI830IrqEmit;
-
-typedef struct {
-	int irq_seq;
-} drmI830IrqWait;
-
-typedef struct {
-	int param;
-	int *value;
-} drmI830GetParam;
-
-#define I830_PARAM_IRQ_ACTIVE     1
-#define I830_PARAM_ALLOW_BATCHBUFFER   2 
-
-typedef struct {
-	int param;
-	int value;
-} drmI830SetParam;
-
-#define I830_SETPARAM_USE_MI_BATCHBUFFER_START  1
-#define I830_SETPARAM_TEX_LRU_LOG_GRANULARITY   2
-#define I830_SETPARAM_ALLOW_BATCHBUFFER         3
-
-
-/* A memory manager for regions of shared memory:
- */
-#define I830_MEM_REGION_AGP 1
-
-typedef struct {
-	int region;
-	int alignment;
-	int size;
-	int *region_offset;	/* offset from start of fb or agp */
-} drmI830MemAlloc;
-
-typedef struct {
-	int region;
-	int region_offset;
-} drmI830MemFree;
-
-typedef struct {
-	int region;
-	int size;
-	int start;	
-} drmI830MemInitHeap;
-
-typedef struct {
-	int region;
-} drmI830MemDestroyHeap;
-
-
-#endif /* _I830_DRM_H_ */
diff --git a/i915/server/i830_dri.h b/i915/server/i830_dri.h
deleted file mode 100644
index 6c9a709..0000000
--- a/i915/server/i830_dri.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/* $XFree86: xc/programs/Xserver/hw/xfree86/drivers/i810/i830_dri.h,v 1.4 2002/10/30 12:52:18 alanh Exp $ */
-
-#ifndef _I830_DRI_H
-#define _I830_DRI_H
-
-#include "xf86drm.h"
-#include "i830_common.h"
-
-#define I830_MAX_DRAWABLES 256
-
-#define I830_MAJOR_VERSION 1
-#define I830_MINOR_VERSION 3
-#define I830_PATCHLEVEL 0
-
-#define I830_REG_SIZE 0x80000
-
-typedef struct _I830DRIRec {
-   drm_handle_t regs;
-   drmSize regsSize;
-
-   drmSize backbufferSize;
-   drm_handle_t backbuffer;
-
-   drmSize depthbufferSize;
-   drm_handle_t depthbuffer;
-
-   drmSize rotatedSize;
-   drm_handle_t rotatedbuffer;
-
-   drm_handle_t textures;
-   int textureSize;
-
-   drm_handle_t agp_buffers;
-   drmSize agp_buf_size;
-
-   int deviceID;
-   int width;
-   int height;
-   int mem;
-   int cpp;
-   int bitsPerPixel;
-
-   int fbOffset;
-   int fbStride;
-
-   int backOffset;
-   int backPitch;
-
-   int depthOffset;
-   int depthPitch;
-
-   int rotatedOffset;
-   int rotatedPitch;
-
-   int logTextureGranularity;
-   int textureOffset;
-
-   int irq;
-   int sarea_priv_offset;
-} I830DRIRec, *I830DRIPtr;
-
-typedef struct {
-   /* Nothing here yet */
-   int dummy;
-} I830ConfigPrivRec, *I830ConfigPrivPtr;
-
-typedef struct {
-   /* Nothing here yet */
-   int dummy;
-} I830DRIContextRec, *I830DRIContextPtr;
-
-
-#endif
diff --git a/i965/Makefile.am b/i965/Makefile.am
index 163ad0f..b1b816c 100644
--- a/i965/Makefile.am
+++ b/i965/Makefile.am
@@ -1,30 +1,38 @@
 AM_CFLAGS = -DIN_DRI_DRIVER -DGLX_DIRECT_RENDERING -DGLX_INDIRECT_RENDERING
 
+I965_CFLAGS = -I../shared -I../shared/server
+
 i965_dri_la_LTLIBRARIES = i965_dri.la
-i965_dri_la_CFLAGS = $(AM_CFLAGS) $(DRM_CFLAGS) $(DRI_CFLAGS) -Iserver -I../shared
+i965_dri_la_CFLAGS = $(AM_CFLAGS) $(DRM_CFLAGS) $(DRI_CFLAGS) $(I965_CFLAGS)
 i965_dri_la_LDFLAGS = -module -noprefix -avoid-version -lm -ldl\
 	 $(DRM_LIBS) $(DRI_LIBS)
 i965_dri_ladir = @libdir@/dri
 i965_dri_la_SOURCES = \
-	bufmgr_fake.c \
-	intel_batchbuffer.c \
-	intel_blit.c \
-	intel_buffer_objects.c \
-	intel_buffers.c \
-	intel_context.c \
-	intel_ioctl.c \
-	intel_mipmap_tree.c \
-	intel_regions.c \
-	intel_screen.c \
-	intel_span.c \
-	intel_pixel_copy.c \
-	intel_pixel_bitmap.c \
+	../shared/intel_batchbuffer.c \
+	../shared/intel_blit.c \
+	../shared/intel_buffer_objects.c \
+	../shared/intel_buffers.c \
+	../shared/intel_bufmgr_ttm.c \
+	../shared/intel_context.c \
+	../shared/intel_decode.c \
+	../shared/intel_depthstencil.c \
+	../shared/intel_fbo.c \
+	../shared/intel_ioctl.c \
+	../shared/intel_mipmap_tree.c \
+	../shared/intel_regions.c \
+	../shared/intel_screen.c \
+	../shared/intel_span.c \
+	../shared/intel_pixel.c \
+	../shared/intel_pixel_copy.c \
+	../shared/intel_pixel_bitmap.c \
 	intel_state.c \
-	intel_tex.c \
+	../shared/intel_tex.c \
+	../shared/intel_tex_copy.c \
+	../shared/intel_tex_format.c \
+	../shared/intel_tex_image.c \
 	../shared/intel_tex_layout.c \
-	intel_tex_validate.c \
-	brw_aub.c \
-	brw_aub_playback.c \
+	../shared/intel_tex_subimage.c \
+	../shared/intel_tex_validate.c \
 	brw_cc.c \
 	brw_clip.c \
 	brw_clip_line.c \
@@ -45,7 +53,6 @@ i965_dri_la_SOURCES = \
 	brw_gs.c \
 	brw_gs_emit.c \
 	brw_gs_state.c \
-	brw_hal.c \
 	brw_metaops.c \
 	brw_misc_state.c \
 	brw_program.c \
@@ -54,7 +61,7 @@ i965_dri_la_SOURCES = \
 	brw_sf_state.c \
 	brw_state_batch.c \
 	brw_state_cache.c \
-	brw_state_pool.c \
+	brw_state_dump.c \
 	brw_state_upload.c \
 	brw_tex.c \
 	brw_tex_layout.c \
@@ -71,9 +78,10 @@ i965_dri_la_SOURCES = \
 	brw_wm_emit.c \
 	brw_wm_fp.c \
 	brw_wm_iz.c \
+	brw_wm_glsl.c \
 	brw_wm_pass0.c \
 	brw_wm_pass1.c \
 	brw_wm_pass2.c \
 	brw_wm_sampler_state.c \
 	brw_wm_state.c \
-	brw_wm_surface_state.c 
+	brw_wm_surface_state.c
diff --git a/i965/brw_aub.c b/i965/brw_aub.c
deleted file mode 100644
index c549f7a..0000000
--- a/i965/brw_aub.c
+++ /dev/null
@@ -1,353 +0,0 @@
-/*
- Copyright (C) Intel Corp.  2006.  All Rights Reserved.
- Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
- develop this 3D driver.
- 
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
- 
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial
- portions of the Software.
- 
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- 
- **********************************************************************/
- /*
-  * Authors:
-  *   Keith Whitwell <keith@tungstengraphics.com>
-  */
-
-#include "brw_context.h"
-#include "brw_aub.h"
-#include "intel_regions.h"
-#include <stdio.h>
-
-extern char *__progname;
-
-
-/* Registers to control page table
- */
-#define PGETBL_CTL       0x2020
-#define PGETBL_ENABLED   0x1
-
-#define NR_GTT_ENTRIES  65536	/* 256 mb */
-
-#define FAIL										\
-do {											\
-   fprintf(stderr, "failed to write aub data at %s/%d\n", __FUNCTION__, __LINE__);	\
-   exit(1);										\
-} while (0)
-
-
-/* Emit the headers at the top of each aubfile.  Initialize the GTT.
- */
-static void init_aubfile( FILE *aub_file )
-{   
-   struct aub_file_header fh;
-   struct aub_block_header bh;
-   unsigned int data;
-
-   static int nr;
-   
-   nr++;
-
-   /* Emit the aub header:
-    */
-   memset(&fh, 0, sizeof(fh));
-
-   fh.instruction_type = AUB_FILE_HEADER;
-   fh.minor = 0x0;
-   fh.major = 0x7;
-   memcpy(fh.application, __progname, sizeof(fh.application));
-   fh.day = (nr>>24) & 0xff;
-   fh.month = 0x0;
-   fh.year = 0x0;
-   fh.timezone = 0x0;
-   fh.second = nr & 0xff;
-   fh.minute = (nr>>8) & 0xff;
-   fh.hour = (nr>>16) & 0xff;
-   fh.comment_length = 0x0;   
-
-   if (fwrite(&fh, sizeof(fh), 1, aub_file) < 1) 
-      FAIL;
-         
-   /* Setup the GTT starting at main memory address zero (!):
-    */
-   memset(&bh, 0, sizeof(bh));
-   
-   bh.instruction_type = AUB_BLOCK_HEADER;
-   bh.operation = BH_MMI0_WRITE32;
-   bh.type = 0x0;
-   bh.address_space = ADDR_GTT;	/* ??? */
-   bh.general_state_type = 0x0;
-   bh.surface_state_type = 0x0;
-   bh.address = PGETBL_CTL;
-   bh.length = 0x4;
-
-   if (fwrite(&bh, sizeof(bh), 1, aub_file) < 1) 
-      FAIL;
-
-   data = 0x0 | PGETBL_ENABLED;
-
-   if (fwrite(&data, sizeof(data), 1, aub_file) < 1) 
-      FAIL;
-}
-
-
-static void init_aub_gtt( struct brw_context *brw,
-			  GLuint start_offset, 
-			  GLuint size )
-{
-   FILE *aub_file = brw->intel.aub_file;
-   struct aub_block_header bh;
-   unsigned int i;
-
-   assert(start_offset + size < NR_GTT_ENTRIES * 4096);
-
-
-   memset(&bh, 0, sizeof(bh));
-   
-   bh.instruction_type = AUB_BLOCK_HEADER;
-   bh.operation = BH_DATA_WRITE;
-   bh.type = 0x0;
-   bh.address_space = ADDR_MAIN;
-   bh.general_state_type = 0x0;
-   bh.surface_state_type = 0x0;
-   bh.address =  start_offset / 4096 * 4;
-   bh.length = size / 4096 * 4;
-
-   if (fwrite(&bh, sizeof(bh), 1, aub_file) < 1) 
-      FAIL;
-
-   for (i = 0; i < size / 4096; i++) {
-      GLuint data = brw->next_free_page | 1;
-
-      brw->next_free_page += 4096;
-
-      if (fwrite(&data, sizeof(data), 1, aub_file) < 1) 
-	 FAIL;
-   }
-
-}
-
-static void write_block_header( FILE *aub_file,
-				struct aub_block_header *bh,
-				const GLuint *data,
-				GLuint sz )
-{
-   sz = (sz + 3) & ~3;
-
-   if (fwrite(bh, sizeof(*bh), 1, aub_file) < 1) 
-      FAIL;
-
-   if (fwrite(data, sz, 1, aub_file) < 1) 
-      FAIL;
-
-   fflush(aub_file);
-}
-
-
-static void write_dump_bmp( FILE *aub_file,
-			    struct aub_dump_bmp *db )
-{
-   if (fwrite(db, sizeof(*db), 1, aub_file) < 1) 
-      FAIL;
-
-   fflush(aub_file);
-}
-
-
-
-static void brw_aub_gtt_data( struct intel_context *intel,
-			      GLuint offset,
-			      const void *data,
-			      GLuint sz,
-			      GLuint type,
-			      GLuint state_type )
-{
-   struct aub_block_header bh;
-
-   bh.instruction_type = AUB_BLOCK_HEADER;
-   bh.operation = BH_DATA_WRITE;
-   bh.type = type;
-   bh.address_space = ADDR_GTT;
-   bh.pad0 = 0;
-
-   if (type == DW_GENERAL_STATE) {
-      bh.general_state_type = state_type;
-      bh.surface_state_type = 0;
-   }
-   else {
-      bh.general_state_type = 0;
-      bh.surface_state_type = state_type;
-   }
-
-   bh.pad1 = 0;
-   bh.address = offset;
-   bh.length = sz;
-
-   write_block_header(intel->aub_file, &bh, data, sz);
-}
-
-
-
-static void brw_aub_gtt_cmds( struct intel_context *intel,
-			      GLuint offset,
-			      const void *data,
-			      GLuint sz )
-{
-   struct brw_context *brw = brw_context(&intel->ctx);
-   struct aub_block_header bh;   
-   GLuint type = CW_PRIMARY_RING_A;
-   
-
-   bh.instruction_type = AUB_BLOCK_HEADER;
-   bh.operation = BH_COMMAND_WRITE;
-   bh.type = type;
-   bh.address_space = ADDR_GTT;
-   bh.pad0 = 0;
-   bh.general_state_type = 0;
-   bh.surface_state_type = 0;
-   bh.pad1 = 0;
-   bh.address = offset;
-   bh.length = sz;
-
-   write_block_header(brw->intel.aub_file, &bh, data, sz);
-}
-
-static void brw_aub_dump_bmp( struct intel_context *intel,
-			      GLuint buffer )
-{
-   struct brw_context *brw = brw_context(&intel->ctx);
-   intelScreenPrivate *intelScreen = brw->intel.intelScreen;
-   struct aub_dump_bmp db;
-   GLuint format;
-
-   if (intelScreen->cpp == 4)
-      format = 0x7;
-   else
-      format = 0x3;
-
-
-   if (buffer == 0) {
-      db.instruction_type = AUB_DUMP_BMP;
-      db.xmin = 0;
-      db.ymin = 0;
-      db.format = format;
-      db.bpp = intelScreen->cpp * 8;
-      db.pitch = intelScreen->front.pitch / intelScreen->cpp;
-      db.xsize = intelScreen->width;
-      db.ysize = intelScreen->height;
-      db.addr = intelScreen->front.offset;
-      db.unknown = 0x0;		/* 4: xmajor tiled, 0: not tiled */
-
-      write_dump_bmp(brw->intel.aub_file, &db);
-   }
-   else {
-      db.instruction_type = AUB_DUMP_BMP;
-      db.xmin = 0;
-      db.ymin = 0;
-      db.format = format;
-      db.bpp = intel->back_region->cpp * 8;
-      db.pitch = intel->back_region->pitch;
-      db.xsize = intel->back_region->pitch;
-      db.ysize = intel->back_region->height;
-      db.addr = intelScreen->back.offset;
-      db.unknown = intel->back_region->tiled ? 0x4 : 0x0;
-
-      write_dump_bmp(brw->intel.aub_file, &db);
-   }
-}
-
-/* Attempt to prevent monster aubfiles by closing and reopening when
- * the state pools wrap.
- */
-static void brw_aub_wrap( struct intel_context *intel )
-{
-   struct brw_context *brw = brw_context(&intel->ctx);   
-   if (intel->aub_file) {
-      brw_aub_destroy(brw);
-      brw_aub_init(brw);
-   }
-   brw->wrap = 1;		/* ??? */
-}
-
-
-int brw_aub_init( struct brw_context *brw )
-{
-   struct intel_context *intel = &brw->intel;
-   intelScreenPrivate *intelScreen = intel->intelScreen;
-   char filename[80];
-   int val;
-   static int i = 0;
-
-   i++;
-
-   if (_mesa_getenv("INTEL_REPLAY"))
-      return 0;
-
-   if (_mesa_getenv("INTEL_AUBFILE")) {
-      val = snprintf(filename, sizeof(filename), "%s%d.aub", _mesa_getenv("INTEL_AUBFILE"), i%4);
-      _mesa_printf("--> Aub file: %s\n", filename);
-      brw->intel.aub_file = fopen(filename, "w");
-   }
-   else if (_mesa_getenv("INTEL_AUB")) {
-      val = snprintf(filename, sizeof(filename), "%s.aub", __progname);
-      if (val < 0 || val > sizeof(filename)) 
-	 strcpy(filename, "default.aub");   
-   
-      _mesa_printf("--> Aub file: %s\n", filename);
-      brw->intel.aub_file = fopen(filename, "w");
-   }
-   else {
-      return 0;
-   }
-
-   if (!brw->intel.aub_file) {
-      _mesa_printf("couldn't open aubfile\n");
-      exit(1);
-   }
-
-   brw->intel.vtbl.aub_commands = brw_aub_gtt_cmds;
-   brw->intel.vtbl.aub_dump_bmp = brw_aub_dump_bmp;
-   brw->intel.vtbl.aub_gtt_data = brw_aub_gtt_data;
-   brw->intel.vtbl.aub_wrap = brw_aub_wrap;
-   
-   init_aubfile(brw->intel.aub_file);
-
-   /* The GTT is located starting address zero in main memory.  Pages
-    * to populate the gtt start after this point.
-    */
-   brw->next_free_page = (NR_GTT_ENTRIES * 4 + 4095) & ~4095;
-
-   /* More or less correspond with all the agp regions mapped by the
-    * driver:
-    */
-   init_aub_gtt(brw, 0, 4096*4); /* so new fulsim doesn't crash */
-   init_aub_gtt(brw, intelScreen->front.offset, intelScreen->back.size);
-   init_aub_gtt(brw, intelScreen->back.offset, intelScreen->back.size);
-   init_aub_gtt(brw, intelScreen->depth.offset, intelScreen->back.size);
-   init_aub_gtt(brw, intelScreen->tex.offset, intelScreen->tex.size);
-
-   return 0;
-}
-
-void brw_aub_destroy( struct brw_context *brw )
-{
-   if (brw->intel.aub_file) {
-      fclose(brw->intel.aub_file);
-      brw->intel.aub_file = NULL;
-   }
-}
diff --git a/i965/brw_aub.h b/i965/brw_aub.h
deleted file mode 100644
index 198e36d..0000000
--- a/i965/brw_aub.h
+++ /dev/null
@@ -1,172 +0,0 @@
-/*
- Copyright (C) Intel Corp.  2006.  All Rights Reserved.
- Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
- develop this 3D driver.
- 
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
- 
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial
- portions of the Software.
- 
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- 
- **********************************************************************/
- /*
-  * Authors:
-  *   Keith Whitwell <keith@tungstengraphics.com>
-  */
-
-#ifndef BRW_AUB_H
-#define BRW_AUB_H
-
-struct aub_file_header {
-   unsigned int instruction_type;
-   unsigned int pad0:16;
-   unsigned int minor:8;
-   unsigned int major:8;
-   unsigned char application[8*4];
-   unsigned int day:8;
-   unsigned int month:8;
-   unsigned int year:16;
-   unsigned int timezone:8;
-   unsigned int second:8;
-   unsigned int minute:8;
-   unsigned int hour:8;
-   unsigned int comment_length:16;   
-   unsigned int pad1:16;
-};
-
-struct aub_block_header {
-   unsigned int instruction_type;
-   unsigned int operation:8;
-   unsigned int type:8;
-   unsigned int address_space:8;
-   unsigned int pad0:8;
-   unsigned int general_state_type:8;
-   unsigned int surface_state_type:8;
-   unsigned int pad1:16;
-   unsigned int address;
-   unsigned int length;
-};
-
-struct aub_dump_bmp {
-   unsigned int instruction_type;
-   unsigned int xmin:16;
-   unsigned int ymin:16;
-   unsigned int pitch:16;
-   unsigned int bpp:8;
-   unsigned int format:8;
-   unsigned int xsize:16;
-   unsigned int ysize:16;
-   unsigned int addr;
-   unsigned int unknown;
-};
-
-enum bh_operation {
-   BH_COMMENT,
-   BH_DATA_WRITE,
-   BH_COMMAND_WRITE,
-   BH_MMI0_WRITE32,
-   BH_END_SCENE,
-   BH_CONFIG_MEMORY_MAP,
-   BH_MAX_OPERATION
-};
-
-enum command_write_type {
-   CW_HWB_RING = 1,
-   CW_PRIMARY_RING_A,
-   CW_PRIMARY_RING_B,		/* XXX - disagreement with listaub! */
-   CW_PRIMARY_RING_C,
-   CW_MAX_TYPE
-};
-
-enum data_write_type {
-   DW_NOTYPE,
-   DW_BATCH_BUFFER,
-   DW_BIN_BUFFER,
-   DW_BIN_POINTER_LIST,
-   DW_SLOW_STATE_BUFFER,
-   DW_VERTEX_BUFFER,
-   DW_2D_MAP,
-   DW_CUBE_MAP,
-   DW_INDIRECT_STATE_BUFFER,
-   DW_VOLUME_MAP,
-   DW_1D_MAP,
-   DW_CONSTANT_BUFFER,
-   DW_CONSTANT_URB_ENTRY,
-   DW_INDEX_BUFFER,
-   DW_GENERAL_STATE,
-   DW_SURFACE_STATE,
-   DW_MEDIA_OBJECT_INDIRECT_DATA,
-   DW_MAX_TYPE
-};
-
-enum data_write_general_state_type {
-   DWGS_NOTYPE,
-   DWGS_VERTEX_SHADER_STATE,
-   DWGS_GEOMETRY_SHADER_STATE ,
-   DWGS_CLIPPER_STATE,
-   DWGS_STRIPS_FANS_STATE,
-   DWGS_WINDOWER_IZ_STATE,
-   DWGS_COLOR_CALC_STATE,
-   DWGS_CLIPPER_VIEWPORT_STATE,	/* was 0x7 */
-   DWGS_STRIPS_FANS_VIEWPORT_STATE,
-   DWGS_COLOR_CALC_VIEWPORT_STATE, /* was 0x9 */
-   DWGS_SAMPLER_STATE,
-   DWGS_KERNEL_INSTRUCTIONS,
-   DWGS_SCRATCH_SPACE,
-   DWGS_SAMPLER_DEFAULT_COLOR,
-   DWGS_INTERFACE_DESCRIPTOR,
-   DWGS_VLD_STATE,
-   DWGS_VFE_STATE,
-   DWGS_MAX_TYPE
-};
-
-enum data_write_surface_state_type {
-   DWSS_NOTYPE,
-   DWSS_BINDING_TABLE_STATE,
-   DWSS_SURFACE_STATE,
-   DWSS_MAX_TYPE
-};
-
-enum memory_map_type {
-   MM_DEFAULT,
-   MM_DYNAMIC,
-   MM_MAX_TYPE
-};
-
-enum address_space {
-   ADDR_GTT,
-   ADDR_LOCAL,
-   ADDR_MAIN,
-   ADDR_MAX
-};
-
-
-#define AUB_FILE_HEADER 0xe085000b
-#define AUB_BLOCK_HEADER 0xe0c10003
-#define AUB_DUMP_BMP 0xe09e0004
-
-struct brw_context;
-struct intel_context;
-
-int brw_aub_init( struct brw_context *brw );
-void brw_aub_destroy( struct brw_context *brw );
-
-int brw_playback_aubfile(struct brw_context *brw,
-			 const char *filename);
-
-#endif
diff --git a/i965/brw_aub_playback.c b/i965/brw_aub_playback.c
deleted file mode 100644
index 99d9475..0000000
--- a/i965/brw_aub_playback.c
+++ /dev/null
@@ -1,446 +0,0 @@
-
-#include <stdio.h>
-#include <sys/mman.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <unistd.h>
-#include <fcntl.h>
-
-#include "brw_aub.h"
-#include "brw_defines.h"
-#include "brw_context.h"
-#include "intel_ioctl.h"
-#include "bufmgr.h"
-
-struct aub_state {
-   struct intel_context *intel;
-   const char *map;
-   unsigned int csr;
-   unsigned int sz;
-};
-
-
-static int gobble( struct aub_state *s, int size )
-{
-   if (s->csr + size > s->sz) {
-      _mesa_printf("EOF in %s\n", __FUNCTION__);
-      return 1;
-   }
-
-   s->csr += size;
-   return 0;
-}
-
-static void flush_and_fence( struct aub_state *s )
-{
-   struct intel_context *intel = s->intel;
-   GLuint buf[2];
-
-   buf[0] = intel->vtbl.flush_cmd();
-   buf[1] = 0;
-
-   intel_cmd_ioctl(intel, (char *)&buf, sizeof(buf));
-      
-   intelWaitIrq( intel, intelEmitIrqLocked( intel ));
-}
-
-static void flush_cmds( struct aub_state *s,
-			const void *data,
-			int len )
-{
-   DBG("%s %d\n", __FUNCTION__, len);
-
-   if (len & 0x4) {
-      unsigned int *tmp = malloc(len + 4);
-      DBG("padding to octword\n");
-      memcpy(tmp, data, len);
-      tmp[len/4] = MI_NOOP;
-      flush_cmds(s, tmp, len+4);
-      free(tmp);
-      return;
-   }
-
-   /* For ring data, just send off immediately via an ioctl.
-    * This differs slightly from how the stream was executed
-    * initially as this would have been a batchbuffer.
-    */
-   intel_cmd_ioctl(s->intel, (void *)data, len);
-
-   if (1)
-      flush_and_fence(s);
-}
-
-static const char *pstrings[] = {
-   "none",
-   "POINTLIST",
-   "LINELIST",
-   "LINESTRIP",
-   "TRILIST",
-   "TRISTRIP",
-   "TRIFAN",
-   "QUADLIST",
-   "QUADSTRIP",
-   "LINELIST_ADJ",
-   "LINESTRIP_ADJ",
-   "TRILIST_ADJ",
-   "TRISTRIP_ADJ",
-   "TRISTRIP_REVERSE",
-   "POLYGON",
-   "RECTLIST",
-   "LINELOOP",
-   "POINTLIST_BF",
-   "LINESTRIP_CONT",
-   "LINESTRIP_BF",
-   "LINESTRIP_CONT_BF",
-   "TRIFAN_NOSTIPPLE",
-};
-
-static void do_3d_prim( struct aub_state *s,
-			const void *data,
-			int len )
-{
-   struct brw_3d_primitive prim;
-   const struct brw_3d_primitive *orig = data;
-   int i;
-
-   assert(len == sizeof(prim));
-   memcpy(&prim, data, sizeof(prim));
-
-#define START 0
-#define BLOCK (12*28)
-
-   if (orig->verts_per_instance < BLOCK)
-      flush_cmds(s, &prim, sizeof(prim));
-   else {
-      for (i = START; i + BLOCK < orig->verts_per_instance; i += BLOCK/2) {
-	 prim.start_vert_location = i;
-	 prim.verts_per_instance = BLOCK;
-	 _mesa_printf("%sprim %d/%s verts %d..%d (of %d)\n", 
-		      prim.header.indexed ? "INDEXED " : "",
-		      prim.header.topology, pstrings[prim.header.topology%16],
-		      prim.start_vert_location, 
-		      prim.start_vert_location + prim.verts_per_instance,
-		      orig->verts_per_instance);
-	 flush_cmds(s, &prim, sizeof(prim));
-      }
-   }
-}
-
-
-
-static struct {
-   int cmd;
-   const char *name;
-   int has_length;
-} cmd_info[] = {
-   { 0, "NOOP", 0 },
-   { 0x5410, "XY_COLOR_BLT_RGB", 1 },
-   { 0x5430, "XY_COLOR_BLT_RGBA", 1 },
-   { 0x54d0, "XY_SRC_COPY_BLT_RGB", 1 },
-   { 0x54f0, "XY_SRC_COPY_BLT_RGBA", 1 },
-   { CMD_URB_FENCE, "URB_FENCE",  1 },
-   { CMD_CONST_BUFFER_STATE, "CONST_BUFFER_STATE",  1 },
-   { CMD_CONST_BUFFER, "CONST_BUFFER",  1 },
-   { CMD_STATE_BASE_ADDRESS, "STATE_BASE_ADDRESS",  1 },
-   { CMD_STATE_INSN_POINTER, "STATE_INSN_POINTER",  1 },
-   { CMD_PIPELINE_SELECT_965, "PIPELINE_SELECT", 0, },
-   { CMD_PIPELINE_SELECT_IGD, "PIPELINE_SELECT", 0,},
-   { CMD_PIPELINED_STATE_POINTERS, "PIPELINED_STATE_POINTERS", 1 },
-   { CMD_BINDING_TABLE_PTRS, "BINDING_TABLE_PTRS", 1 },
-   { CMD_VERTEX_BUFFER, "VERTEX_BUFFER", 1 },
-   { CMD_VERTEX_ELEMENT, "VERTEX_ELEMENT", 1 },
-   { CMD_INDEX_BUFFER, "INDEX_BUFFER", 1 },
-   { CMD_VF_STATISTICS_965, "VF_STATISTICS", 0 },
-   { CMD_VF_STATISTICS_IGD, "VF_STATISTICS", 0 },
-   { CMD_DRAW_RECT, "DRAW_RECT", 1 },
-   { CMD_BLEND_CONSTANT_COLOR, "BLEND_CONSTANT_COLOR", 1 },
-   { CMD_CHROMA_KEY, "CHROMA_KEY", 1 },
-   { CMD_DEPTH_BUFFER, "DEPTH_BUFFER", 1 },
-   { CMD_POLY_STIPPLE_OFFSET, "POLY_STIPPLE_OFFSET", 1 },
-   { CMD_POLY_STIPPLE_PATTERN, "POLY_STIPPLE_PATTERN", 1 },
-   { CMD_LINE_STIPPLE_PATTERN, "LINE_STIPPLE_PATTERN", 1 },
-   { CMD_AA_LINE_PARAMETERS, "AA_LINE_PARAMETERS", 1},
-   { CMD_GLOBAL_DEPTH_OFFSET_CLAMP, "GLOBAL_DEPTH_OFFSET_CLAMP", 1 },
-   { CMD_PIPE_CONTROL, "PIPE_CONTROL", 1 },
-   { CMD_MI_FLUSH, "MI_FLUSH", 0 },
-   { CMD_3D_PRIM, "3D_PRIM", 1 },
-};
-
-#define NR_CMDS (sizeof(cmd_info)/sizeof(cmd_info[0]))
-
-
-static int find_command( unsigned int cmd )
-{
-   int i;
-
-   for (i = 0; i < NR_CMDS; i++) 
-      if (cmd == cmd_info[i].cmd) 
-	 return i;
-
-   return -1;
-}
-
-
-
-static int parse_commands( struct aub_state *s,
-			   const unsigned int *data,
-			   int len )
-{
-   while (len) {
-      int cmd = data[0] >> 16;
-      int dwords;
-      int i;
-
-      i = find_command(cmd);
-
-      if (i < 0) {
-	 _mesa_printf("couldn't find info for cmd %x\n", cmd);
-	 return 1;
-      }
-
-      if (cmd_info[i].has_length)
-	 dwords = (data[0] & 0xff) + 2;
-      else
-	 dwords = 1;
-
-      _mesa_printf("%s (%d dwords) 0x%x\n", cmd_info[i].name, dwords, data[0]);
-
-      if (len < dwords * 4) {
-	 _mesa_printf("EOF in %s (%d bytes)\n", __FUNCTION__, len);
-	 return 1;
-      }
-
-
-      if (0 && cmd == CMD_3D_PRIM)
-	 do_3d_prim(s, data, dwords * 4);
-      else
-	 flush_cmds(s, data, dwords * 4);
-
-      data += dwords;
-      len -= dwords * 4;
-   }
-
-   return 0;
-}
-
-
-
-static void parse_data_write( struct aub_state *s,
-			     const struct aub_block_header *bh,
-			     void *dest,
-			     const unsigned int *data,
-			     int len )
-{
-   switch (bh->type) {
-   case DW_GENERAL_STATE:
-      switch (bh->general_state_type) {
-      case DWGS_VERTEX_SHADER_STATE: {
-	 struct brw_vs_unit_state vs;
-	 assert(len == sizeof(vs));
-
-	 _mesa_printf("DWGS_VERTEX_SHADER_STATE\n");
-	 memcpy(&vs, data, sizeof(vs));
-
-/* 	 vs.vs6.vert_cache_disable = 1;  */
-/*  	 vs.thread4.max_threads = 4;  */
-
-	 memcpy(dest, &vs, sizeof(vs));
-	 return;
-      }
-      case DWGS_CLIPPER_STATE: {
-	 struct brw_clip_unit_state clip;
-	 assert(len == sizeof(clip));
-
-	 _mesa_printf("DWGS_CLIPPER_STATE\n");
-	 memcpy(&clip, data, sizeof(clip));
-
-/* 	 clip.thread4.max_threads = 0; */
-/*   	 clip.clip5.clip_mode = BRW_CLIPMODE_REJECT_ALL;   */
-
-	 memcpy(dest, &clip, sizeof(clip));
-	 return;
-      }
-
-      case DWGS_NOTYPE:
-      case DWGS_GEOMETRY_SHADER_STATE:
-      case DWGS_STRIPS_FANS_STATE:
-	 break;
-
-      case DWGS_WINDOWER_IZ_STATE: {
-	    struct brw_wm_unit_state wm;
-	    assert(len == sizeof(wm));
-
-	    _mesa_printf("DWGS_WINDOWER_IZ_STATE\n");
-	    memcpy(&wm, data, sizeof(wm));
-
-/* 	    wm.wm5.max_threads = 10; */
-
-	    memcpy(dest, &wm, sizeof(wm));
-	    return;
-	 }
-
-      case DWGS_COLOR_CALC_STATE:
-      case DWGS_CLIPPER_VIEWPORT_STATE:
-      case DWGS_STRIPS_FANS_VIEWPORT_STATE:
-      case DWGS_COLOR_CALC_VIEWPORT_STATE:
-      case DWGS_SAMPLER_STATE:
-      case DWGS_KERNEL_INSTRUCTIONS:
-      case DWGS_SCRATCH_SPACE:
-      case DWGS_SAMPLER_DEFAULT_COLOR:
-      case DWGS_INTERFACE_DESCRIPTOR:
-      case DWGS_VLD_STATE:
-      case DWGS_VFE_STATE:
-      default:
-	 break;
-      }
-      break;
-   case DW_SURFACE_STATE:
-      break;
-   case DW_1D_MAP:
-   case DW_2D_MAP:
-   case DW_CUBE_MAP:
-   case DW_VOLUME_MAP:
-   case DW_CONSTANT_BUFFER:
-   case DW_CONSTANT_URB_ENTRY:
-   case DW_VERTEX_BUFFER:
-   case DW_INDEX_BUFFER:
-   default:
-      break;
-   }
-
-   memcpy(dest, data, len);
-}
-
-
-/* In order to work, the memory layout has to be the same as the X
- * server which created the aubfile.
- */
-static int parse_block_header( struct aub_state *s )
-{
-   struct aub_block_header *bh = (struct aub_block_header *)(s->map + s->csr);
-   void *data = (void *)(bh + 1);
-   unsigned int len = (bh->length + 3) & ~3;
-
-   _mesa_printf("block header at 0x%x\n", s->csr);
-
-   if (s->csr + len + sizeof(*bh) > s->sz) {
-      _mesa_printf("EOF in data in %s\n", __FUNCTION__);
-      return 1;
-   }
-
-   if (bh->address_space == ADDR_GTT) {
-
-      switch (bh->operation)
-      {
-      case BH_DATA_WRITE: {
-	 void *dest = bmFindVirtual( s->intel, bh->address, len );
-	 if (dest == NULL) {
-	    _mesa_printf("Couldn't find virtual address for offset %x\n", bh->address);
-	    return 1;
-	 }
-
-#if 1
-	 parse_data_write(s, bh, dest, data, len);
-#else
-	 memcpy(dest, data, len);
-#endif
-	 break;
-      }
-      case BH_COMMAND_WRITE:
-#if 0
-	 intel_cmd_ioctl(s->intel, (void *)data, len);
-#else
-	 if (parse_commands(s, data, len) != 0)
-	    _mesa_printf("parse_commands failed\n");
-#endif
-	 break;
-      default:
-	 break;
-      }
-   }
-
-   s->csr += sizeof(*bh) + len;
-   return 0;
-}
-
-
-#define AUB_FILE_HEADER 0xe085000b
-#define AUB_BLOCK_HEADER 0xe0c10003
-#define AUB_DUMP_BMP 0xe09e0004
-
-int brw_playback_aubfile(struct brw_context *brw,
-			 const char *filename)
-{
-   struct intel_context *intel = &brw->intel;
-   struct aub_state state;
-   struct stat sb;
-   int fd;
-   int retval = 0;
-
-   state.intel = intel;
-
-   fd = open(filename, O_RDONLY, 0);
-   if (fd < 0) {
-      _mesa_printf("couldn't open aubfile: %s\n", filename);
-      return 1;
-   }
-
-   if (fstat(fd, &sb) != 0) {
-      _mesa_printf("couldn't open %s\n", filename);
-      return 1;
-   }
-
-   state.csr = 0;
-   state.sz = sb.st_size;
-   state.map = mmap(NULL, sb.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
-   
-   if (state.map == NULL) {
-      _mesa_printf("couldn't mmap %s\n", filename);
-      return 1;
-   }
-
-   LOCK_HARDWARE(intel); 
-   {
-      /* Make sure we don't confuse anything that might happen to be
-       * going on with the hardware:
-       */
-/*       bmEvictAll(intel); */
-/*       intel->vtbl.lost_hardware(intel); */
-      
-
-      /* Replay the aubfile item by item: 
-       */
-      while (retval == 0 && 
-	     state.csr != state.sz) {
-	 unsigned int insn = *(unsigned int *)(state.map + state.csr);
-
-	 switch (insn) {
-	 case AUB_FILE_HEADER:
-	    retval = gobble(&state, sizeof(struct aub_file_header));
-	    break;
-	 
-	 case AUB_BLOCK_HEADER:   
-	    retval = parse_block_header(&state);
-	    break;
-	 
-	 case AUB_DUMP_BMP:
-	    retval = gobble(&state, sizeof(struct aub_dump_bmp));
-	    break;
-	 
-	 default:
-	    _mesa_printf("unknown instruction %x\n", insn);
-	    retval = 1;
-	    break;
-	 }
-      }
-   }
-   UNLOCK_HARDWARE(intel);
-   return retval;
-}
-
-
-
-
-
-
-		  
diff --git a/i965/brw_cc.c b/i965/brw_cc.c
index 8a1d152..9d8984f 100644
--- a/i965/brw_cc.c
+++ b/i965/brw_cc.c
@@ -37,7 +37,7 @@
 #include "macros.h"
 #include "enums.h"
 
-static void upload_cc_vp( struct brw_context *brw )
+static int upload_cc_vp( struct brw_context *brw )
 {
    struct brw_cc_viewport ccv;
 
@@ -46,7 +46,9 @@ static void upload_cc_vp( struct brw_context *brw )
    ccv.min_depth = 0.0;
    ccv.max_depth = 1.0;
 
-   brw->cc.vp_gs_offset = brw_cache_data( &brw->cache[BRW_CC_VP], &ccv );
+   dri_bo_unreference(brw->cc.vp_bo);
+   brw->cc.vp_bo = brw_cache_data( &brw->cache, BRW_CC_VP, &ccv, NULL, 0 );
+   return dri_bufmgr_check_aperture_space(brw->cc.vp_bo);
 }
 
 const struct brw_tracked_state brw_cc_vp = {
@@ -55,57 +57,148 @@ const struct brw_tracked_state brw_cc_vp = {
       .brw = BRW_NEW_CONTEXT,
       .cache = 0
    },
-   .update = upload_cc_vp
+   .prepare = upload_cc_vp
 };
 
+struct brw_cc_unit_key {
+   GLboolean stencil, stencil_two_side, color_blend, alpha_enabled;
 
-static void upload_cc_unit( struct brw_context *brw )
+   GLenum stencil_func[2], stencil_fail_op[2];
+   GLenum stencil_pass_depth_fail_op[2], stencil_pass_depth_pass_op[2];
+   GLubyte stencil_ref[2], stencil_write_mask[2], stencil_test_mask[2];
+   GLenum logic_op;
+
+   GLenum blend_eq_rgb, blend_eq_a;
+   GLenum blend_src_rgb, blend_src_a;
+   GLenum blend_dst_rgb, blend_dst_a;
+
+   GLenum alpha_func;
+   GLclampf alpha_ref;
+
+   GLboolean dither;
+
+   GLboolean depth_test, depth_write;
+   GLenum depth_func;
+};
+
+static void
+cc_unit_populate_key(struct brw_context *brw, struct brw_cc_unit_key *key)
+{
+   struct gl_stencil_attrib *stencil = brw->attribs.Stencil;
+
+   memset(key, 0, sizeof(*key));
+
+   key->stencil = stencil->Enabled;
+   key->stencil_two_side = stencil->_TestTwoSide;
+
+   if (key->stencil) {
+      key->stencil_func[0] = stencil->Function[0];
+      key->stencil_fail_op[0] = stencil->FailFunc[0];
+      key->stencil_pass_depth_fail_op[0] = stencil->ZFailFunc[0];
+      key->stencil_pass_depth_pass_op[0] = stencil->ZPassFunc[0];
+      key->stencil_ref[0] = stencil->Ref[0];
+      key->stencil_write_mask[0] = stencil->WriteMask[0];
+      key->stencil_test_mask[0] = stencil->ValueMask[0];
+   }
+   if (key->stencil_two_side) {
+      key->stencil_func[1] = stencil->Function[1];
+      key->stencil_fail_op[1] = stencil->FailFunc[1];
+      key->stencil_pass_depth_fail_op[1] = stencil->ZFailFunc[1];
+      key->stencil_pass_depth_pass_op[1] = stencil->ZPassFunc[1];
+      key->stencil_ref[1] = stencil->Ref[1];
+      key->stencil_write_mask[1] = stencil->WriteMask[1];
+      key->stencil_test_mask[1] = stencil->ValueMask[1];
+   }
+
+   if (brw->attribs.Color->_LogicOpEnabled)
+      key->logic_op = brw->attribs.Color->LogicOp;
+   else
+      key->logic_op = GL_COPY;
+
+   key->color_blend = brw->attribs.Color->BlendEnabled;
+   if (key->color_blend) {
+      key->blend_eq_rgb = brw->attribs.Color->BlendEquationRGB;
+      key->blend_eq_a = brw->attribs.Color->BlendEquationA;
+      key->blend_src_rgb = brw->attribs.Color->BlendSrcRGB;
+      key->blend_dst_rgb = brw->attribs.Color->BlendDstRGB;
+      key->blend_src_a = brw->attribs.Color->BlendSrcA;
+      key->blend_dst_a = brw->attribs.Color->BlendDstA;
+   }
+
+   key->alpha_enabled = brw->attribs.Color->AlphaEnabled;
+   if (key->alpha_enabled) {
+      key->alpha_func = brw->attribs.Color->AlphaFunc;
+      key->alpha_ref = brw->attribs.Color->AlphaRef;
+   }
+
+   key->dither = brw->attribs.Color->DitherFlag;
+
+   key->depth_test = brw->attribs.Depth->Test;
+   if (key->depth_test) {
+      key->depth_func = brw->attribs.Depth->Func;
+      key->depth_write = brw->attribs.Depth->Mask;
+   }
+}
+
+/**
+ * Creates the state cache entry for the given CC unit key.
+ */
+static dri_bo *
+cc_unit_create_from_key(struct brw_context *brw, struct brw_cc_unit_key *key)
 {
    struct brw_cc_unit_state cc;
-   
+   dri_bo *bo;
+
    memset(&cc, 0, sizeof(cc));
 
    /* _NEW_STENCIL */
-   if (brw->attribs.Stencil->Enabled) {
-      cc.cc0.stencil_enable = brw->attribs.Stencil->Enabled;
-      cc.cc0.stencil_func = intel_translate_compare_func(brw->attribs.Stencil->Function[0]);
-      cc.cc0.stencil_fail_op = intel_translate_stencil_op(brw->attribs.Stencil->FailFunc[0]);
-      cc.cc0.stencil_pass_depth_fail_op = intel_translate_stencil_op(brw->attribs.Stencil->ZFailFunc[0]);
-      cc.cc0.stencil_pass_depth_pass_op = intel_translate_stencil_op(brw->attribs.Stencil->ZPassFunc[0]);
-      cc.cc1.stencil_ref = brw->attribs.Stencil->Ref[0];
-      cc.cc1.stencil_write_mask = brw->attribs.Stencil->WriteMask[0];
-      cc.cc1.stencil_test_mask = brw->attribs.Stencil->ValueMask[0];
-
-      if (brw->attribs.Stencil->TestTwoSide) {
-	 cc.cc0.bf_stencil_enable = brw->attribs.Stencil->TestTwoSide;
-	 cc.cc0.bf_stencil_func = intel_translate_compare_func(brw->attribs.Stencil->Function[1]);
-	 cc.cc0.bf_stencil_fail_op = intel_translate_stencil_op(brw->attribs.Stencil->FailFunc[1]);
-	 cc.cc0.bf_stencil_pass_depth_fail_op = intel_translate_stencil_op(brw->attribs.Stencil->ZFailFunc[1]);
-	 cc.cc0.bf_stencil_pass_depth_pass_op = intel_translate_stencil_op(brw->attribs.Stencil->ZPassFunc[1]);
-	 cc.cc1.bf_stencil_ref = brw->attribs.Stencil->Ref[1];
-	 cc.cc2.bf_stencil_write_mask = brw->attribs.Stencil->WriteMask[1];
-	 cc.cc2.bf_stencil_test_mask = brw->attribs.Stencil->ValueMask[1];
+   if (key->stencil) {
+      cc.cc0.stencil_enable = 1;
+      cc.cc0.stencil_func =
+	 intel_translate_compare_func(key->stencil_func[0]);
+      cc.cc0.stencil_fail_op =
+	 intel_translate_stencil_op(key->stencil_fail_op[0]);
+      cc.cc0.stencil_pass_depth_fail_op =
+	 intel_translate_stencil_op(key->stencil_pass_depth_fail_op[0]);
+      cc.cc0.stencil_pass_depth_pass_op =
+	 intel_translate_stencil_op(key->stencil_pass_depth_pass_op[0]);
+      cc.cc1.stencil_ref = key->stencil_ref[0];
+      cc.cc1.stencil_write_mask = key->stencil_write_mask[0];
+      cc.cc1.stencil_test_mask = key->stencil_test_mask[0];
+
+      if (key->stencil_two_side) {
+	 cc.cc0.bf_stencil_enable = 1;
+	 cc.cc0.bf_stencil_func =
+	    intel_translate_compare_func(key->stencil_func[1]);
+	 cc.cc0.bf_stencil_fail_op =
+	    intel_translate_stencil_op(key->stencil_fail_op[1]);
+	 cc.cc0.bf_stencil_pass_depth_fail_op =
+	    intel_translate_stencil_op(key->stencil_pass_depth_fail_op[1]);
+	 cc.cc0.bf_stencil_pass_depth_pass_op =
+	    intel_translate_stencil_op(key->stencil_pass_depth_pass_op[1]);
+	 cc.cc1.bf_stencil_ref = key->stencil_ref[1];
+	 cc.cc2.bf_stencil_write_mask = key->stencil_write_mask[1];
+	 cc.cc2.bf_stencil_test_mask = key->stencil_test_mask[1];
       }
 
       /* Not really sure about this:
        */
-      if (brw->attribs.Stencil->WriteMask[0] ||
-	  (brw->attribs.Stencil->TestTwoSide && brw->attribs.Stencil->WriteMask[1]))
+      if (key->stencil_write_mask[0] ||
+	  (key->stencil_two_side && key->stencil_write_mask[1]))
 	 cc.cc0.stencil_write_enable = 1;
    }
 
    /* _NEW_COLOR */
-   if (brw->attribs.Color->_LogicOpEnabled) {
+   if (key->logic_op != GL_COPY) {
       cc.cc2.logicop_enable = 1;
-      cc.cc5.logicop_func = intel_translate_logic_op( brw->attribs.Color->LogicOp );
-   }
-   else if (brw->attribs.Color->BlendEnabled) {
-      GLenum eqRGB = brw->attribs.Color->BlendEquationRGB;
-      GLenum eqA = brw->attribs.Color->BlendEquationA;
-      GLenum srcRGB = brw->attribs.Color->BlendSrcRGB;
-      GLenum dstRGB = brw->attribs.Color->BlendDstRGB;
-      GLenum srcA = brw->attribs.Color->BlendSrcA;
-      GLenum dstA = brw->attribs.Color->BlendDstA;
+      cc.cc5.logicop_func = intel_translate_logic_op(key->logic_op);
+   } else if (key->color_blend) {
+      GLenum eqRGB = key->blend_eq_rgb;
+      GLenum eqA = key->blend_eq_a;
+      GLenum srcRGB = key->blend_src_rgb;
+      GLenum dstRGB = key->blend_dst_rgb;
+      GLenum srcA = key->blend_src_a;
+      GLenum dstA = key->blend_dst_a;
 
       if (eqRGB == GL_MIN || eqRGB == GL_MAX) {
 	 srcRGB = dstRGB = GL_ONE;
@@ -115,49 +208,78 @@ static void upload_cc_unit( struct brw_context *brw )
 	 srcA = dstA = GL_ONE;
       }
 
-      cc.cc6.dest_blend_factor = brw_translate_blend_factor(dstRGB); 
-      cc.cc6.src_blend_factor = brw_translate_blend_factor(srcRGB); 
-      cc.cc6.blend_function = brw_translate_blend_equation( eqRGB );
+      cc.cc6.dest_blend_factor = brw_translate_blend_factor(dstRGB);
+      cc.cc6.src_blend_factor = brw_translate_blend_factor(srcRGB);
+      cc.cc6.blend_function = brw_translate_blend_equation(eqRGB);
 
-      cc.cc5.ia_dest_blend_factor = brw_translate_blend_factor(dstA); 
-      cc.cc5.ia_src_blend_factor = brw_translate_blend_factor(srcA); 
-      cc.cc5.ia_blend_function = brw_translate_blend_equation( eqA );
+      cc.cc5.ia_dest_blend_factor = brw_translate_blend_factor(dstA);
+      cc.cc5.ia_src_blend_factor = brw_translate_blend_factor(srcA);
+      cc.cc5.ia_blend_function = brw_translate_blend_equation(eqA);
 
       cc.cc3.blend_enable = 1;
-      cc.cc3.ia_blend_enable = (srcA != srcRGB || 
-				dstA != dstRGB || 
+      cc.cc3.ia_blend_enable = (srcA != srcRGB ||
+				dstA != dstRGB ||
 				eqA != eqRGB);
    }
 
-   if (brw->attribs.Color->AlphaEnabled) {
+   if (key->alpha_enabled) {
       cc.cc3.alpha_test = 1;
-      cc.cc3.alpha_test_func = intel_translate_compare_func(brw->attribs.Color->AlphaFunc);
-
-      UNCLAMPED_FLOAT_TO_UBYTE(cc.cc7.alpha_ref.ub[0], brw->attribs.Color->AlphaRef);
-
+      cc.cc3.alpha_test_func = intel_translate_compare_func(key->alpha_func);
       cc.cc3.alpha_test_format = BRW_ALPHATEST_FORMAT_UNORM8;
+
+      UNCLAMPED_FLOAT_TO_UBYTE(cc.cc7.alpha_ref.ub[0], key->alpha_ref);
    }
 
-   if (brw->attribs.Color->DitherFlag) {
+   if (key->dither) {
       cc.cc5.dither_enable = 1;
-      cc.cc6.y_dither_offset = 0; 
-      cc.cc6.x_dither_offset = 0;     
+      cc.cc6.y_dither_offset = 0;
+      cc.cc6.x_dither_offset = 0;
    }
 
    /* _NEW_DEPTH */
-   if (brw->attribs.Depth->Test) {
-      cc.cc2.depth_test = brw->attribs.Depth->Test;
-      cc.cc2.depth_test_function = intel_translate_compare_func(brw->attribs.Depth->Func);
-      cc.cc2.depth_write_enable = brw->attribs.Depth->Mask;
+   if (key->depth_test) {
+      cc.cc2.depth_test = 1;
+      cc.cc2.depth_test_function = intel_translate_compare_func(key->depth_func);
+      cc.cc2.depth_write_enable = key->depth_write;
    }
- 
+
    /* CACHE_NEW_CC_VP */
-   cc.cc4.cc_viewport_state_offset =  brw->cc.vp_gs_offset >> 5;
- 
+   cc.cc4.cc_viewport_state_offset = brw->cc.vp_bo->offset >> 5; /* reloc */
+
    if (INTEL_DEBUG & DEBUG_STATS)
-      cc.cc5.statistics_enable = 1; 
+      cc.cc5.statistics_enable = 1;
+
+   bo = brw_upload_cache(&brw->cache, BRW_CC_UNIT,
+			 key, sizeof(*key),
+			 &brw->cc.vp_bo, 1,
+			 &cc, sizeof(cc),
+			 NULL, NULL);
+
+   /* Emit CC viewport relocation */
+   dri_emit_reloc(bo,
+		  DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
+		  0,
+		  offsetof(struct brw_cc_unit_state, cc4),
+		  brw->cc.vp_bo);
+
+   return bo;
+}
+
+static int prepare_cc_unit( struct brw_context *brw )
+{
+   struct brw_cc_unit_key key;
+
+   cc_unit_populate_key(brw, &key);
+
+   dri_bo_unreference(brw->cc.state_bo);
+   brw->cc.state_bo = brw_search_cache(&brw->cache, BRW_CC_UNIT,
+				       &key, sizeof(key),
+				       &brw->cc.vp_bo, 1,
+				       NULL);
 
-   brw->cc.state_gs_offset = brw_cache_data( &brw->cache[BRW_CC_UNIT], &cc );
+   if (brw->cc.state_bo == NULL)
+      brw->cc.state_bo = cc_unit_create_from_key(brw, &key);
+   return dri_bufmgr_check_aperture_space(brw->cc.state_bo);
 }
 
 const struct brw_tracked_state brw_cc_unit = {
@@ -166,7 +288,7 @@ const struct brw_tracked_state brw_cc_unit = {
       .brw = 0,
       .cache = CACHE_NEW_CC_VP
    },
-   .update = upload_cc_unit
+   .prepare = prepare_cc_unit,
 };
 
 
diff --git a/i965/brw_clip.c b/i965/brw_clip.c
index 8f907be..540108e 100644
--- a/i965/brw_clip.c
+++ b/i965/brw_clip.c
@@ -119,31 +119,19 @@ static void compile_clip_prog( struct brw_context *brw,
 
    /* Upload
     */
-   brw->clip.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_CLIP_PROG],
-						&c.key,
-						sizeof(c.key),
-						program,
-						program_size,
-						&c.prog_data,
-						&brw->clip.prog_data );
+   dri_bo_unreference(brw->clip.prog_bo);
+   brw->clip.prog_bo = brw_upload_cache( &brw->cache,
+					 BRW_CLIP_PROG,
+					 &c.key, sizeof(c.key),
+					 NULL, 0,
+					 program, program_size,
+					 &c.prog_data,
+					 &brw->clip.prog_data );
 }
 
-
-static GLboolean search_cache( struct brw_context *brw, 
-			       struct brw_clip_prog_key *key )
-{
-   return brw_search_cache(&brw->cache[BRW_CLIP_PROG], 
-			   key, sizeof(*key),
-			   &brw->clip.prog_data,
-			   &brw->clip.prog_gs_offset);
-}
-
-
-
-
 /* Calculate interpolants for triangle and line rasterization.
  */
-static void upload_clip_prog( struct brw_context *brw )
+static int upload_clip_prog( struct brw_context *brw )
 {
    GLcontext *ctx = &brw->intel.ctx;
    struct brw_clip_prog_key key;
@@ -180,12 +168,10 @@ static void upload_clip_prog( struct brw_context *brw )
 	       offset_front = 0;
 	       break;
 	    case GL_LINE:
-	       key.do_unfilled = 1;
 	       fill_front = CLIP_LINE;
 	       offset_front = brw->attribs.Polygon->OffsetLine;
 	       break;
 	    case GL_POINT:
-	       key.do_unfilled = 1;
 	       fill_front = CLIP_POINT;
 	       offset_front = brw->attribs.Polygon->OffsetPoint;
 	       break;
@@ -200,26 +186,23 @@ static void upload_clip_prog( struct brw_context *brw )
 	       offset_back = 0;
 	       break;
 	    case GL_LINE:
-	       key.do_unfilled = 1;
 	       fill_back = CLIP_LINE;
 	       offset_back = brw->attribs.Polygon->OffsetLine;
 	       break;
 	    case GL_POINT:
-	       key.do_unfilled = 1;
 	       fill_back = CLIP_POINT;
 	       offset_back = brw->attribs.Polygon->OffsetPoint;
 	       break;
 	    }
 	 }
 
-    if (brw->attribs.Polygon->BackMode != GL_FILL ||
-        brw->attribs.Polygon->FrontMode != GL_FILL)
-        key.do_unfilled = 1;
+	 if (brw->attribs.Polygon->BackMode != GL_FILL ||
+	     brw->attribs.Polygon->FrontMode != GL_FILL) {
+	    key.do_unfilled = 1;
 
-	 /* Most cases the fixed function units will handle.  Cases where
-	  * one or more polygon faces are unfilled will require help:
-	  */
-	 if (key.do_unfilled) {
+	    /* Most cases the fixed function units will handle.  Cases where
+	     * one or more polygon faces are unfilled will require help:
+	     */
 	    key.clip_mode = BRW_CLIPMODE_CLIP_NON_REJECTED;
 
 	    if (offset_back || offset_front) {
@@ -252,8 +235,15 @@ static void upload_clip_prog( struct brw_context *brw )
       }
    }
 
-   if (!search_cache(brw, &key))
+   dri_bo_unreference(brw->clip.prog_bo);
+   brw->clip.prog_bo = brw_search_cache(&brw->cache, BRW_CLIP_PROG,
+					&key, sizeof(key),
+					NULL, 0,
+					&brw->clip.prog_data);
+   if (brw->clip.prog_bo == NULL)
       compile_clip_prog( brw, &key );
+
+   return dri_bufmgr_check_aperture_space(brw->clip.prog_bo);
 }
 
 
@@ -266,5 +256,5 @@ const struct brw_tracked_state brw_clip_prog = {
       .brw   = (BRW_NEW_REDUCED_PRIMITIVE),
       .cache = CACHE_NEW_VS_PROG
    },
-   .update = upload_clip_prog
+   .prepare = upload_clip_prog
 };
diff --git a/i965/brw_clip.h b/i965/brw_clip.h
index 49b2770..e067478 100644
--- a/i965/brw_clip.h
+++ b/i965/brw_clip.h
@@ -42,7 +42,7 @@
  * up polygon offset and flatshading at this point:
  */
 struct brw_clip_prog_key {
-   GLuint attrs:16;		
+   GLuint attrs:32;		
    GLuint primitive:4;
    GLuint nr_userclip:3;
    GLuint do_flat_shading:1;
@@ -51,7 +51,7 @@ struct brw_clip_prog_key {
    GLuint fill_ccw:2;		/* includes cull information */
    GLuint offset_cw:1;
    GLuint offset_ccw:1;
-   GLuint pad0:1;
+   GLuint pad0:17;
 
    GLuint copy_bfc_cw:1;
    GLuint copy_bfc_ccw:1;
@@ -167,4 +167,9 @@ void brw_clip_copy_colors( struct brw_clip_compile *c,
 
 void brw_clip_init_clipmask( struct brw_clip_compile *c );
 
+struct brw_reg get_tmp( struct brw_clip_compile *c );
+
+void brw_clip_project_position(struct brw_clip_compile *c,
+             struct brw_reg pos );
+
 #endif
diff --git a/i965/brw_clip_line.c b/i965/brw_clip_line.c
index 8318227..0930e6a 100644
--- a/i965/brw_clip_line.c
+++ b/i965/brw_clip_line.c
@@ -130,6 +130,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c )
    struct brw_instruction *plane_loop;
    struct brw_instruction *plane_active;
    struct brw_instruction *is_negative;
+   struct brw_instruction *is_neg2;
    struct brw_instruction *not_culled;
    struct brw_reg v1_null_ud = retype(vec1(brw_null_reg()), BRW_REGISTER_TYPE_UD);
 
@@ -146,6 +147,16 @@ static void clip_and_emit_line( struct brw_clip_compile *c )
    brw_clip_init_planes(c);
    brw_clip_init_clipmask(c);
 
+   /* -ve rhw workaround */
+   if (!(BRW_IS_GM45(p->brw) || BRW_IS_G4X(p->brw))) {
+      brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+      brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2),
+              brw_imm_ud(1<<20));
+      brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(0x3f));
+   }
+
+   brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
    plane_loop = brw_DO(p, BRW_EXECUTE_1);
    {
       /* if (planemask & 1)
@@ -183,13 +194,20 @@ static void clip_and_emit_line( struct brw_clip_compile *c )
 	    /* Coming back in.  We know that both cannot be negative
 	     * because the line would have been culled in that case.
 	     */
-	    brw_ADD(p, c->reg.t, c->reg.dp0, negate(c->reg.dp1));
-	    brw_math_invert(p, c->reg.t, c->reg.t);
-	    brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp0);
 
-	    brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t0 );
-	    brw_MOV(p, c->reg.t0, c->reg.t);
-	    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+	    /* If both are positive, do nothing */
+             brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.dp0, brw_imm_f(0.0));
+             is_neg2 = brw_IF(p, BRW_EXECUTE_1);
+             {
+		brw_ADD(p, c->reg.t, c->reg.dp0, negate(c->reg.dp1));
+		brw_math_invert(p, c->reg.t, c->reg.t);
+		brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp0);
+
+		brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t0 );
+		brw_MOV(p, c->reg.t0, c->reg.t);
+		brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+	     }
+	     brw_ENDIF(p, is_neg2);
 	 }
 	 brw_ENDIF(p, is_negative);	 
       }
diff --git a/i965/brw_clip_state.c b/i965/brw_clip_state.c
index 37a25a9..2d0b24c 100644
--- a/i965/brw_clip_state.c
+++ b/i965/brw_clip_state.c
@@ -34,46 +34,75 @@
 #include "brw_defines.h"
 #include "macros.h"
 
+struct brw_clip_unit_key {
+   unsigned int total_grf;
+   unsigned int urb_entry_read_length;
+   unsigned int curb_entry_read_length;
+   unsigned int clip_mode;
 
+   unsigned int curbe_offset;
 
-static void upload_clip_unit( struct brw_context *brw )
-{
-   struct brw_clip_unit_state clip;
+   unsigned int nr_urb_entries, urb_size;
+};
 
-   memset(&clip, 0, sizeof(clip));
+static void
+clip_unit_populate_key(struct brw_context *brw, struct brw_clip_unit_key *key)
+{
+   memset(key, 0, sizeof(*key));
 
    /* CACHE_NEW_CLIP_PROG */
-   clip.thread0.grf_reg_count = ((brw->clip.prog_data->total_grf-1) & ~15) / 16;
-   clip.thread0.kernel_start_pointer = brw->clip.prog_gs_offset >> 6;
-   clip.thread3.urb_entry_read_length = brw->clip.prog_data->urb_read_length;
-   clip.thread3.const_urb_entry_read_length = brw->clip.prog_data->curb_read_length;
-   clip.clip5.clip_mode = brw->clip.prog_data->clip_mode;
+   key->total_grf = brw->clip.prog_data->total_grf;
+   key->urb_entry_read_length = brw->clip.prog_data->urb_read_length;
+   key->curb_entry_read_length = brw->clip.prog_data->curb_read_length;
+   key->clip_mode = brw->clip.prog_data->clip_mode;
 
    /* BRW_NEW_CURBE_OFFSETS */
-   clip.thread3.const_urb_entry_read_offset = brw->curbe.clip_start * 2;
+   key->curbe_offset = brw->curbe.clip_start;
 
    /* BRW_NEW_URB_FENCE */
-   clip.thread4.nr_urb_entries = brw->urb.nr_clip_entries; 
-   clip.thread4.urb_entry_allocation_size = brw->urb.vsize - 1;
-   clip.thread4.max_threads = 0; /* Hmm, maybe the max is 1 or 2 threads */
+   key->nr_urb_entries = brw->urb.nr_clip_entries;
+   key->urb_size = brw->urb.vsize;
+}
 
-   if (INTEL_DEBUG & DEBUG_STATS)
-      clip.thread4.stats_enable = 1; 
+static dri_bo *
+clip_unit_create_from_key(struct brw_context *brw,
+			  struct brw_clip_unit_key *key)
+{
+   struct brw_clip_unit_state clip;
+   dri_bo *bo;
+
+   memset(&clip, 0, sizeof(clip));
+
+   clip.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1;
+   /* reloc */
+   clip.thread0.kernel_start_pointer = brw->clip.prog_bo->offset >> 6;
 
-   /* CONSTANT */
    clip.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
    clip.thread1.single_program_flow = 1;
+
+   clip.thread3.urb_entry_read_length = key->urb_entry_read_length;
+   clip.thread3.const_urb_entry_read_length = key->curb_entry_read_length;
+   clip.thread3.const_urb_entry_read_offset = key->curbe_offset * 2;
    clip.thread3.dispatch_grf_start_reg = 1;
    clip.thread3.urb_entry_read_offset = 0;
+
+   clip.thread4.nr_urb_entries = key->nr_urb_entries;
+   clip.thread4.urb_entry_allocation_size = key->urb_size - 1;
+   clip.thread4.max_threads = 1; /* 2 threads */
+
+   if (INTEL_DEBUG & DEBUG_STATS)
+      clip.thread4.stats_enable = 1;
+
    clip.clip5.userclip_enable_flags = 0x7f;
    clip.clip5.userclip_must_clip = 1;
    clip.clip5.guard_band_enable = 0;
    clip.clip5.viewport_z_clip_enable = 1;
    clip.clip5.viewport_xy_clip_enable = 1;
    clip.clip5.vertex_position_space = BRW_CLIP_NDCSPACE;
-   clip.clip5.api_mode = BRW_CLIP_API_OGL;   
+   clip.clip5.api_mode = BRW_CLIP_API_OGL;
+   clip.clip5.clip_mode = key->clip_mode;
 
-   if (BRW_IS_IGD(brw))
+   if (BRW_IS_GM45(brw) || BRW_IS_G4X(brw))
       clip.clip5.negative_w_clip_test = 1;
 
    clip.clip6.clipper_viewport_state_ptr = 0;
@@ -82,9 +111,42 @@ static void upload_clip_unit( struct brw_context *brw )
    clip.viewport_ymin = -1;
    clip.viewport_ymax = 1;
 
-   brw->clip.state_gs_offset = brw_cache_data( &brw->cache[BRW_CLIP_UNIT], &clip );
+   bo = brw_upload_cache(&brw->cache, BRW_CLIP_UNIT,
+			 key, sizeof(*key),
+			 &brw->clip.prog_bo, 1,
+			 &clip, sizeof(clip),
+			 NULL, NULL);
+
+   /* Emit clip program relocation */
+   assert(brw->clip.prog_bo);
+   dri_emit_reloc(bo,
+		  DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
+		  clip.thread0.grf_reg_count << 1,
+		  offsetof(struct brw_clip_unit_state, thread0),
+		  brw->clip.prog_bo);
+
+   return bo;
 }
 
+static int upload_clip_unit( struct brw_context *brw )
+{
+   struct brw_clip_unit_key key;
+   int ret = 0;
+
+   clip_unit_populate_key(brw, &key);
+
+   dri_bo_unreference(brw->clip.state_bo);
+   brw->clip.state_bo = brw_search_cache(&brw->cache, BRW_CLIP_UNIT,
+					 &key, sizeof(key),
+					 &brw->clip.prog_bo, 1,
+					 NULL);
+   if (brw->clip.state_bo == NULL) {
+      brw->clip.state_bo = clip_unit_create_from_key(brw, &key);
+   }
+
+   ret = dri_bufmgr_check_aperture_space(brw->clip.state_bo);
+   return ret;
+}
 
 const struct brw_tracked_state brw_clip_unit = {
    .dirty = {
@@ -93,5 +155,5 @@ const struct brw_tracked_state brw_clip_unit = {
 		BRW_NEW_URB_FENCE),
       .cache = CACHE_NEW_CLIP_PROG
    },
-   .update = upload_clip_unit
+   .prepare = upload_clip_unit,
 };
diff --git a/i965/brw_clip_tri.c b/i965/brw_clip_tri.c
index 0fc7306..0003901 100644
--- a/i965/brw_clip_tri.c
+++ b/i965/brw_clip_tri.c
@@ -42,6 +42,10 @@
 #include "brw_util.h"
 #include "brw_clip.h"
 
+static void release_tmps( struct brw_clip_compile *c )
+{
+   c->last_tmp = c->first_tmp;
+}
 
 
 void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, 
@@ -435,15 +439,103 @@ static void maybe_do_clip_tri( struct brw_clip_compile *c )
    brw_ENDIF(p, do_clip);
 }
 
-
+static void brw_clip_test( struct brw_clip_compile *c )
+{
+    struct brw_reg t = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
+    struct brw_reg t1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
+    struct brw_reg t2 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
+    struct brw_reg t3 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
+
+    struct brw_reg v0 = get_tmp(c);
+    struct brw_reg v1 = get_tmp(c);
+    struct brw_reg v2 = get_tmp(c);
+
+    struct brw_indirect vt0 = brw_indirect(0, 0);
+    struct brw_indirect vt1 = brw_indirect(1, 0);
+    struct brw_indirect vt2 = brw_indirect(2, 0);
+
+    struct brw_compile *p = &c->func;
+
+    brw_MOV(p, get_addr_reg(vt0), brw_address(c->reg.vertex[0]));
+    brw_MOV(p, get_addr_reg(vt1), brw_address(c->reg.vertex[1]));
+    brw_MOV(p, get_addr_reg(vt2), brw_address(c->reg.vertex[2]));
+    brw_MOV(p, v0, deref_4f(vt0, c->offset[VERT_RESULT_HPOS]));
+    brw_MOV(p, v1, deref_4f(vt1, c->offset[VERT_RESULT_HPOS]));
+    brw_MOV(p, v2, deref_4f(vt2, c->offset[VERT_RESULT_HPOS]));
+
+    /* test nearz, xmin, ymin plane */
+    brw_CMP(p, t1, BRW_CONDITIONAL_LE, negate(v0), get_element(v0, 3)); 
+    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+    brw_CMP(p, t2, BRW_CONDITIONAL_LE, negate(v1), get_element(v1, 3)); 
+    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+    brw_CMP(p, t3, BRW_CONDITIONAL_LE, negate(v2), get_element(v2, 3)); 
+    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+    brw_XOR(p, t, t1, t2);
+    brw_XOR(p, t1, t2, t3);
+    brw_OR(p, t, t, t1);
+
+    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, 
+	    get_element(t, 0), brw_imm_ud(0));
+    brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<5)));
+    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, 
+	    get_element(t, 1), brw_imm_ud(0));
+    brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<3)));
+    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, 
+	    get_element(t, 2), brw_imm_ud(0));
+    brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<1)));
+    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+    /* test farz, xmax, ymax plane */
+    brw_CMP(p, t1, BRW_CONDITIONAL_L, v0, get_element(v0, 3)); 
+    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+    brw_CMP(p, t2, BRW_CONDITIONAL_L, v1, get_element(v1, 3)); 
+    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+    brw_CMP(p, t3, BRW_CONDITIONAL_L, v2, get_element(v2, 3)); 
+    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+    brw_XOR(p, t, t1, t2);
+    brw_XOR(p, t1, t2, t3);
+    brw_OR(p, t, t, t1);
+
+    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, 
+	    get_element(t, 0), brw_imm_ud(0));
+    brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<4)));
+    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, 
+	    get_element(t, 1), brw_imm_ud(0));
+    brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<2)));
+    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, 
+	    get_element(t, 2), brw_imm_ud(0));
+    brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<0)));
+    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+    release_tmps(c);
+}
 
 
 void brw_emit_tri_clip( struct brw_clip_compile *c )
 {
+   struct brw_instruction *neg_rhw;
+   struct brw_compile *p = &c->func;
    brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6);
    brw_clip_tri_init_vertices(c);
    brw_clip_init_clipmask(c);
 
+   /* if -ve rhw workaround bit is set, 
+      do cliptest */
+   if (!(BRW_IS_GM45(p->brw) || BRW_IS_G4X(p->brw))) {
+      brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+      brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2), 
+              brw_imm_ud(1<<20));
+      neg_rhw = brw_IF(p, BRW_EXECUTE_1); 
+      {
+         brw_clip_test(c);
+      }
+      brw_ENDIF(p, neg_rhw);
+   }
    /* Can't push into do_clip_tri because with polygon (or quad)
     * flatshading, need to apply the flatshade here because we don't
     * respect the PV when converting to trifan for emit:
@@ -462,6 +554,3 @@ void brw_emit_tri_clip( struct brw_clip_compile *c )
     */
    brw_clip_kill_thread(c);
 }
-
-
-
diff --git a/i965/brw_clip_unfilled.c b/i965/brw_clip_unfilled.c
index 918e000..6f20d79 100644
--- a/i965/brw_clip_unfilled.c
+++ b/i965/brw_clip_unfilled.c
@@ -58,10 +58,30 @@ static void compute_tri_direction( struct brw_clip_compile *c )
    struct brw_reg v2 = byte_offset(c->reg.vertex[2], c->offset[VERT_RESULT_HPOS]); 
 
 
+   struct brw_reg v0n = get_tmp(c);
+   struct brw_reg v1n = get_tmp(c);
+   struct brw_reg v2n = get_tmp(c);
+
+   /* Convert to NDC.
+    * NOTE: We can't modify the original vertex coordinates,
+    * as it may impact further operations.
+    * So, we have to keep normalized coordinates in temp registers.
+    *
+    * TBD-KC
+    * Try to optimize unnecessary MOV's.
+    */
+   brw_MOV(p, v0n, v0);
+   brw_MOV(p, v1n, v1);
+   brw_MOV(p, v2n, v2);
+
+   brw_clip_project_position(c, v0n);
+   brw_clip_project_position(c, v1n);
+   brw_clip_project_position(c, v2n);
+
    /* Calculate the vectors of two edges of the triangle:
     */
-   brw_ADD(p, e, v0, negate(v2)); 
-   brw_ADD(p, f, v1, negate(v2)); 
+   brw_ADD(p, e, v0n, negate(v2n)); 
+   brw_ADD(p, f, v1n, negate(v2n)); 
 
    /* Take their crossproduct:
     */
@@ -220,8 +240,8 @@ static void apply_one_offset( struct brw_clip_compile *c,
 			  struct brw_indirect vert )
 {
    struct brw_compile *p = &c->func;
-   struct brw_reg pos = deref_4f(vert, c->offset[VERT_RESULT_HPOS]);
-   struct brw_reg z = get_element(pos, 2);
+   struct brw_reg z = deref_1f(vert, c->header_position_offset +
+			       2 * type_sz(BRW_REGISTER_TYPE_F));
 
    brw_ADD(p, z, z, vec1(c->reg.offset));
 }
diff --git a/i965/brw_clip_util.c b/i965/brw_clip_util.c
index 41d9b75..c32bd4e 100644
--- a/i965/brw_clip_util.c
+++ b/i965/brw_clip_util.c
@@ -46,8 +46,7 @@
 
 
 
-
-static struct brw_reg get_tmp( struct brw_clip_compile *c )
+struct brw_reg get_tmp( struct brw_clip_compile *c )
 {
    struct brw_reg tmp = brw_vec4_grf(c->last_tmp, 0);
 
@@ -90,7 +89,7 @@ void brw_clip_init_planes( struct brw_clip_compile *c )
 
 /* Project 'pos' to screen space (or back again), overwrite with results:
  */
-static void brw_clip_project_position(struct brw_clip_compile *c, struct brw_reg pos )
+void brw_clip_project_position(struct brw_clip_compile *c, struct brw_reg pos )
 {
    struct brw_compile *p = &c->func;
 
@@ -272,6 +271,7 @@ void brw_clip_kill_thread(struct brw_clip_compile *c)
 
 
 
+
 struct brw_reg brw_clip_plane0_address( struct brw_clip_compile *c )
 {
    return brw_address(c->reg.fixed_planes);
@@ -327,8 +327,7 @@ void brw_clip_init_clipmask( struct brw_clip_compile *c )
    
    /* Shift so that lowest outcode bit is rightmost: 
     */
-   brw_MOV(p, c->reg.planemask, incoming);
-   brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(26));
+   brw_SHR(p, c->reg.planemask, incoming, brw_imm_ud(26));
 
    if (c->key.nr_userclip) {
       struct brw_reg tmp = retype(vec1(get_tmp(c)), BRW_REGISTER_TYPE_UD);
@@ -342,15 +341,5 @@ void brw_clip_init_clipmask( struct brw_clip_compile *c )
       
       release_tmp(c, tmp);
    }
-
-   if (!BRW_IS_IGD(p->brw)) {
-       /* Test for -ve rhw workaround 
-        */
-       brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
-       brw_AND(p, vec1(brw_null_reg()), incoming, brw_imm_ud(1<<20));
-       brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(0x3f));
-   }
-
-   brw_set_predicate_control(p, BRW_PREDICATE_NONE);
 }
 
diff --git a/i965/brw_context.c b/i965/brw_context.c
index 397a9bd..1c7ad5c 100644
--- a/i965/brw_context.c
+++ b/i965/brw_context.c
@@ -31,7 +31,6 @@
 
 
 #include "brw_context.h"
-#include "brw_aub.h"
 #include "brw_defines.h"
 #include "brw_draw.h"
 #include "brw_vs.h"
@@ -39,33 +38,35 @@
 #include "intel_tex.h"
 #include "intel_blit.h"
 #include "intel_batchbuffer.h"
+#include "intel_pixel.h"
+#include "intel_span.h"
+#include "tnl/t_pipeline.h"
 
 #include "utils.h"
 #include "api_noop.h"
 #include "vtxfmt.h"
 
+#include "shader/shader_api.h"
+
 /***************************************
  * Mesa's Driver Functions
  ***************************************/
 
-static const struct dri_extension brw_extensions[] =
+static void brwUseProgram(GLcontext *ctx, GLuint program)
 {
-    { "GL_ARB_depth_texture",              NULL },
-    { "GL_ARB_fragment_program",           NULL },
-    { "GL_ARB_shadow",                     NULL },
-    { "GL_EXT_shadow_funcs",               NULL },
-    /* ARB extn won't work if not enabled */
-    { "GL_SGIX_depth_texture",             NULL },
-    { "GL_ARB_texture_env_crossbar",       NULL },
-    { NULL,                                NULL }
-};
-
+   _mesa_use_program(ctx, program);
+}
 
+static void brwInitProgFuncs( struct dd_function_table *functions )
+{
+   functions->UseProgram = brwUseProgram;
+}
 static void brwInitDriverFunctions( struct dd_function_table *functions )
 {
    intelInitDriverFunctions( functions );
-   brwInitTextureFuncs( functions );
+
    brwInitFragProgFuncs( functions );
+   brwInitProgFuncs( functions );
 }
 
 
@@ -116,10 +117,15 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis,
       return GL_FALSE;
    }
 
+   /* Initialize swrast, tnl driver tables: */
+   intelInitSpanFuncs(ctx);
+
+   TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
+
    ctx->Const.MaxTextureUnits = BRW_MAX_TEX_UNIT;
    ctx->Const.MaxTextureImageUnits = BRW_MAX_TEX_UNIT;
    ctx->Const.MaxTextureCoordUnits = BRW_MAX_TEX_UNIT;
-
+   ctx->Const.MaxVertexTextureImageUnits = 0; /* no vertex shader textures */
 
    /* Advertise the full hardware capabilities.  The new memory
     * manager should cope much better with overload situations:
@@ -132,11 +138,6 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis,
    
 /*    ctx->Const.MaxNativeVertexProgramTemps = 32; */
 
-
-   driInitExtensions( ctx, brw_extensions, GL_FALSE );
-
-   brw_aub_init( brw );
-
    brw_init_attribs( brw );
    brw_init_metaops( brw );
    brw_init_state( brw );
@@ -144,8 +145,6 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis,
    brw->state.dirty.mesa = ~0;
    brw->state.dirty.brw = ~0;
 
-   memset(&brw->wm.bind, ~0, sizeof(brw->wm.bind));
-
    brw->emit_state_always = 0;
 
    ctx->FragmentProgram._MaintainTexEnvProgram = 1;
@@ -154,16 +153,6 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis,
 
    brw_ProgramCacheInit( ctx );
 
-   brw_FrameBufferTexInit( brw );
-
-   {
-      const char *filename = getenv("INTEL_REPLAY");
-      if (filename) {
-	 brw_playback_aubfile(brw, filename);
-	 exit(0);
-      }
-   }
-
    return GL_TRUE;
 }
 
diff --git a/i965/brw_context.h b/i965/brw_context.h
index 08fdc54..32e0554 100644
--- a/i965/brw_context.h
+++ b/i965/brw_context.h
@@ -135,13 +135,23 @@ struct brw_context;
 #define BRW_NEW_METAOPS                 0x1000
 #define BRW_NEW_FENCE                   0x2000
 #define BRW_NEW_LOCK                    0x4000
-
-
+/**
+ * Used for any batch entry with a relocated pointer that will be used
+ * by any 3D rendering.
+ */
+#define BRW_NEW_BATCH			0x8000
+/** brw->depth_region updated */
+#define BRW_NEW_DEPTH_BUFFER		0x10000
 
 struct brw_state_flags {
+   /** State update flags signalled by mesa internals */
    GLuint mesa;
-   GLuint cache;
+   /**
+    * State update flags signalled as the result of brw_tracked_state updates
+    */
    GLuint brw;
+   /** State update flags signalled by brw_state_cache.c searches */
+   GLuint cache;
 };
 
 struct brw_vertex_program {
@@ -230,32 +240,46 @@ struct brw_vs_ouput_sizes {
 
 
 #define BRW_MAX_TEX_UNIT 8
-#define BRW_WM_MAX_SURF BRW_MAX_TEX_UNIT + 1
-
-/* Create a fixed sized struct for caching binding tables:
- */
-struct brw_surface_binding_table {
-   GLuint surf_ss_offset[BRW_WM_MAX_SURF];
-};
-
-
-struct brw_cache;
+#define BRW_WM_MAX_SURF BRW_MAX_TEX_UNIT + MAX_DRAW_BUFFERS
 
-struct brw_mem_pool {
-   struct buffer *buffer;
-
-   GLuint size;
-   GLuint offset;		/* offset of first free byte */
+enum brw_cache_id {
+   BRW_CC_VP,
+   BRW_CC_UNIT,
+   BRW_WM_PROG,
+   BRW_SAMPLER_DEFAULT_COLOR,
+   BRW_SAMPLER,
+   BRW_WM_UNIT,
+   BRW_SF_PROG,
+   BRW_SF_VP,
+   BRW_SF_UNIT,
+   BRW_VS_UNIT,
+   BRW_VS_PROG,
+   BRW_GS_UNIT,
+   BRW_GS_PROG,
+   BRW_CLIP_VP,
+   BRW_CLIP_UNIT,
+   BRW_CLIP_PROG,
+   BRW_SS_SURFACE,
+   BRW_SS_SURF_BIND,
 
-   struct brw_context *brw;
+   BRW_MAX_CACHE
 };
 
 struct brw_cache_item {
+   /**
+    * Effectively part of the key, cache_id identifies what kind of state
+    * buffer is involved, and also which brw->state.dirty.cache flag should
+    * be set when this cache item is chosen.
+    */
+   enum brw_cache_id cache_id;
+   /** 32-bit hash of the key data */
    GLuint hash;
    GLuint key_size;		/* for variable-sized keys */
    const void *key;
+   dri_bo **reloc_bufs;
+   GLuint nr_reloc_bufs;
 
-   GLuint offset;		/* offset within pool's buffer */
+   dri_bo *bo;
    GLuint data_size;
 
    struct brw_cache_item *next;
@@ -264,23 +288,19 @@ struct brw_cache_item {
 
 
 struct brw_cache {
-   GLuint id;
-
-   const char *name;
-
    struct brw_context *brw;
-   struct brw_mem_pool *pool;
 
    struct brw_cache_item **items;
    GLuint size, n_items;
-   
-   GLuint key_size;		/* for fixed-size keys */
-   GLuint aux_size;
 
-   GLuint aub_type;
-   GLuint aub_sub_type;
-   
-   GLuint last_addr;			/* offset of active item */
+   GLuint key_size[BRW_MAX_CACHE];		/* for fixed-size keys */
+   GLuint aux_size[BRW_MAX_CACHE];
+   char *name[BRW_MAX_CACHE];
+
+   /* Record of the last BOs chosen for each cache_id.  Used to set
+    * brw->state.dirty.cache when a new cache item is chosen.
+    */
+   dri_bo *last_bo[BRW_MAX_CACHE];
 };
 
 
@@ -312,34 +332,8 @@ struct brw_state_pointers {
  */
 struct brw_tracked_state {
    struct brw_state_flags dirty;
-   void (*update)( struct brw_context *brw );
-};
-
-
-enum brw_cache_id {
-   BRW_CC_VP,
-   BRW_CC_UNIT,
-   BRW_WM_PROG,
-   BRW_SAMPLER_DEFAULT_COLOR,
-   BRW_SAMPLER,
-   BRW_WM_UNIT,
-   BRW_SF_PROG,
-   BRW_SF_VP,
-   BRW_SF_UNIT,
-   BRW_VS_UNIT,
-   BRW_VS_PROG,
-   BRW_GS_UNIT,
-   BRW_GS_PROG,
-   BRW_CLIP_VP,
-   BRW_CLIP_UNIT,
-   BRW_CLIP_PROG,
-
-   /* These two are in the SS pool:
-    */
-   BRW_SS_SURFACE,
-   BRW_SS_SURF_BIND,
-
-   BRW_MAX_CACHE
+   int (*prepare)( struct brw_context *brw );
+   void (*emit)( struct brw_context *brw );
 };
 
 /* Flags for brw->state.cache.
@@ -363,16 +357,6 @@ enum brw_cache_id {
 #define CACHE_NEW_SURFACE                (1<<BRW_SS_SURFACE)
 #define CACHE_NEW_SURF_BIND              (1<<BRW_SS_SURF_BIND)
 
-
-
-
-enum brw_mempool_id {
-   BRW_GS_POOL,
-   BRW_SS_POOL,
-   BRW_MAX_POOL
-};
-
-
 struct brw_cached_batch_item {
    struct header *header;
    GLuint sz;
@@ -389,12 +373,16 @@ struct brw_cached_batch_item {
 struct brw_vertex_element {
    const struct gl_client_array *glarray;
 
-   struct brw_vertex_element_state *vep;
-
-   GLuint index;
+   /** Size of a complete element */
    GLuint element_size;
+   /** Number of uploaded elements for this input. */
    GLuint count;
-   GLuint vbo_rebase_offset;
+   /** Byte stride between elements in the uploaded array */
+   GLuint stride;
+   /** Offset of the first element within the buffer object */
+   unsigned int offset;
+   /** Buffer object containing the uploaded vertex data */
+   dri_bo *bo;
 };
 
 
@@ -431,40 +419,31 @@ struct brw_context
    GLboolean emit_state_always;
    GLboolean wrap;
    GLboolean tmp_fallback;
+   GLboolean no_batch_wrap;
 
    struct {
       struct brw_state_flags dirty;
       struct brw_tracked_state **atoms;
       GLuint nr_atoms;
 
-
-      struct intel_region *draw_region;
+      GLuint nr_draw_regions;
+      struct intel_region *draw_regions[MAX_DRAW_BUFFERS];
       struct intel_region *depth_region;
    } state;
 
    struct brw_state_pointers attribs;
-   struct brw_mem_pool pool[BRW_MAX_POOL];
-   struct brw_cache cache[BRW_MAX_CACHE];
+   struct brw_cache cache;
    struct brw_cached_batch_item *cached_batch_items;
 
    struct {
-
-      /* Arrays with buffer objects to copy non-bufferobj arrays into
-       * for upload:
-       */
-      struct gl_client_array vbo_array[VERT_ATTRIB_MAX];
-
       struct brw_vertex_element inputs[VERT_ATTRIB_MAX];
 
 #define BRW_NR_UPLOAD_BUFS 17
 #define BRW_UPLOAD_INIT_SIZE (128*1024)
 
       struct {
-	 struct gl_buffer_object *vbo[BRW_NR_UPLOAD_BUFS];
-	 GLuint buf;
+	 dri_bo *bo;
 	 GLuint offset;
-	 GLuint size;
-	 GLuint wrap;
       } upload;
 
       /* Summary of size and varying of active arrays, so we can check
@@ -483,9 +462,12 @@ struct brw_context
       struct gl_buffer_object *vbo;
 
       struct intel_region *saved_draw_region;
+      GLuint saved_nr_draw_regions;
       struct intel_region *saved_depth_region;
 
-      GLuint restore_draw_mask;
+      GLuint restore_draw_buffers[MAX_DRAW_BUFFERS];
+      GLuint restore_num_draw_buffers;
+
       struct gl_fragment_program *restore_fp;
       
       GLboolean active;
@@ -552,7 +534,11 @@ struct brw_context
        */
       struct brw_tracked_state tracked_state;
 
-      GLuint gs_offset;
+      dri_bo *curbe_bo;
+      /** Offset within curbe_bo of space for current curbe entry */
+      GLuint curbe_offset;
+      /** Offset within curbe_bo of space for next curbe entry */
+      GLuint curbe_next_offset;
 
       GLfloat *last_buf;
       GLuint last_bufsz;
@@ -561,33 +547,33 @@ struct brw_context
    struct {
       struct brw_vs_prog_data *prog_data;
 
-      GLuint prog_gs_offset;
-      GLuint state_gs_offset;	
+      dri_bo *prog_bo;
+      dri_bo *state_bo;
    } vs;
 
    struct {
       struct brw_gs_prog_data *prog_data;
 
       GLboolean prog_active;
-      GLuint prog_gs_offset;
-      GLuint state_gs_offset;	
+      dri_bo *prog_bo;
+      dri_bo *state_bo;
    } gs;
 
    struct {
       struct brw_clip_prog_data *prog_data;
 
-      GLuint prog_gs_offset;
-      GLuint vp_gs_offset;
-      GLuint state_gs_offset;	
+      dri_bo *prog_bo;
+      dri_bo *state_bo;
+      dri_bo *vp_bo;
    } clip;
 
 
    struct {
       struct brw_sf_prog_data *prog_data;
 
-      GLuint prog_gs_offset;
-      GLuint vp_gs_offset;
-      GLuint state_gs_offset;
+      dri_bo *prog_bo;
+      dri_bo *state_bo;
+      dri_bo *vp_bo;
    } sf;
 
    struct {
@@ -598,33 +584,31 @@ struct brw_context
        */
       GLuint input_size_masks[4];
 
-
-      /* State structs
-       */
-      struct brw_sampler_default_color sdc[BRW_MAX_TEX_UNIT];
-      struct brw_sampler_state sampler[BRW_MAX_TEX_UNIT];
+      /** Array of surface default colors (texture border color) */
+      dri_bo *sdc_bo[BRW_MAX_TEX_UNIT];
 
       GLuint render_surf;
       GLuint nr_surfaces;      
 
       GLuint max_threads;
-      struct buffer *scratch_buffer;
-      GLuint scratch_buffer_size;
+      dri_bo *scratch_buffer;
 
       GLuint sampler_count;
-      GLuint sampler_gs_offset;
+      dri_bo *sampler_bo;
 
-      struct brw_surface_binding_table bind;
-      GLuint bind_ss_offset;
+      /** Binding table of pointers to surf_bo entries */
+      dri_bo *bind_bo;
+      dri_bo *surf_bo[BRW_WM_MAX_SURF];
 
-      GLuint prog_gs_offset;
-      GLuint state_gs_offset;
+      dri_bo *prog_bo;
+      dri_bo *state_bo;
    } wm;
 
 
    struct {
-      GLuint vp_gs_offset;
-      GLuint state_gs_offset;
+      dri_bo *prog_bo;
+      dri_bo *state_bo;
+      dri_bo *vp_bo;
    } cc;
 
    
@@ -657,19 +641,24 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis,
 /*======================================================================
  * brw_state.c
  */
-void brw_validate_state( struct brw_context *brw );
+int brw_validate_state( struct brw_context *brw );
 void brw_init_state( struct brw_context *brw );
 void brw_destroy_state( struct brw_context *brw );
 
 
+/*======================================================================
+ * brw_state_dump.c
+ */
+void brw_debug_batch(struct intel_context *intel);
 
 /*======================================================================
  * brw_tex.c
  */
 void brwUpdateTextureState( struct intel_context *intel );
-void brwInitTextureFuncs( struct dd_function_table *functions );
-void brw_FrameBufferTexInit( struct brw_context *brw );
+void brw_FrameBufferTexInit( struct brw_context *brw,
+			     struct intel_region *region );
 void brw_FrameBufferTexDestroy( struct brw_context *brw );
+void brw_validate_textures( struct brw_context *brw );
 
 /*======================================================================
  * brw_metaops.c
@@ -696,11 +685,13 @@ void brw_upload_constant_buffer_state(struct brw_context *brw);
  * Inline conversion functions.  These are better-typed than the
  * macros used previously:
  */
-static inline struct brw_context *
+static INLINE struct brw_context *
 brw_context( GLcontext *ctx )
 {
    return (struct brw_context *)ctx;
 }
 
+#define DO_SETUP_BITS ((1<<(FRAG_ATTRIB_MAX)) - 1)
+
 #endif
 
diff --git a/i965/brw_curbe.c b/i965/brw_curbe.c
index 5bf0ed5..5ff4e29 100644
--- a/i965/brw_curbe.c
+++ b/i965/brw_curbe.c
@@ -42,12 +42,11 @@
 #include "brw_defines.h"
 #include "brw_state.h"
 #include "brw_util.h"
-#include "brw_aub.h"
 
 
 /* Partition the CURBE between the various users of constant values:
  */
-static void calculate_curbe_offsets( struct brw_context *brw )
+static int calculate_curbe_offsets( struct brw_context *brw )
 {
    /* CACHE_NEW_WM_PROG */
    GLuint nr_fp_regs = (brw->wm.prog_data->nr_params + 15) / 16;
@@ -118,6 +117,7 @@ static void calculate_curbe_offsets( struct brw_context *brw )
 
       brw->state.dirty.brw |= BRW_NEW_CURBE_OFFSETS;
    }
+   return 0;
 }
 
 
@@ -127,7 +127,7 @@ const struct brw_tracked_state brw_curbe_offsets = {
       .brw  = BRW_NEW_VERTEX_PROGRAM,
       .cache = CACHE_NEW_WM_PROG
    },
-   .update = calculate_curbe_offsets
+   .prepare = calculate_curbe_offsets
 };
 
 
@@ -183,12 +183,11 @@ static GLfloat fixed_plane[6][4] = {
  * cache mechanism, but maybe would benefit from a comparison against
  * the current uploaded set of constants.
  */
-static void upload_constant_buffer(struct brw_context *brw)
+static int prepare_constant_buffer(struct brw_context *brw)
 {
    GLcontext *ctx = &brw->intel.ctx;
    struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program;
    struct brw_fragment_program *fp = (struct brw_fragment_program *)brw->fragment_program;
-   struct brw_mem_pool *pool = &brw->pool[BRW_GS_POOL];
    GLuint sz = brw->curbe.total_size;
    GLuint bufsz = sz * 16 * sizeof(GLfloat);
    GLfloat *buf;
@@ -202,13 +201,6 @@ static void upload_constant_buffer(struct brw_context *brw)
    brw->curbe.tracked_state.dirty.mesa |= fp->param_state;
 
    if (sz == 0) {
-      struct brw_constant_buffer cb;
-      cb.header.opcode = CMD_CONST_BUFFER;
-      cb.header.length = sizeof(cb)/4 - 2;
-      cb.header.valid = 0;
-      cb.bits0.buffer_length = 0;
-      cb.bits0.buffer_address = 0;
-      BRW_BATCH_STRUCT(brw, &cb);
 
       if (brw->curbe.last_buf) {
 	 free(brw->curbe.last_buf);
@@ -216,7 +208,7 @@ static void upload_constant_buffer(struct brw_context *brw)
 	 brw->curbe.last_bufsz  = 0;
       }
        
-      return;
+      return 0;
    }
 
    buf = (GLfloat *)malloc(bufsz);
@@ -290,11 +282,11 @@ static void upload_constant_buffer(struct brw_context *brw)
 		   brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1);
    }
 
-   if (brw->curbe.last_buf &&
+   if (brw->curbe.curbe_bo != NULL &&
+       brw->curbe.last_buf &&
        bufsz == brw->curbe.last_bufsz &&
        memcmp(buf, brw->curbe.last_buf, bufsz) == 0) {
       free(buf);
-/*       return; */
    } 
    else {
       if (brw->curbe.last_buf)
@@ -302,61 +294,69 @@ static void upload_constant_buffer(struct brw_context *brw)
       brw->curbe.last_buf = buf;
       brw->curbe.last_bufsz = bufsz;
 
-      
-      if (!brw_pool_alloc(pool, 
-			  bufsz,
-			  6,
-			  &brw->curbe.gs_offset)) {
-	 _mesa_printf("out of GS memory for curbe\n");
-	 assert(0);
-	 return;
+      if (brw->curbe.curbe_bo != NULL &&
+	  brw->curbe.curbe_next_offset + bufsz > brw->curbe.curbe_bo->size)
+      {
+	 dri_bo_unreference(brw->curbe.curbe_bo);
+	 brw->curbe.curbe_bo = NULL;
       }
-            
+
+      if (brw->curbe.curbe_bo == NULL) {
+	 /* Allocate a single page for CURBE entries for this batchbuffer.
+	  * They're generally around 64b.
+	  */
+	 brw->curbe.curbe_bo = dri_bo_alloc(brw->intel.bufmgr, "CURBE",
+					    4096, 1 << 6,
+					    DRM_BO_FLAG_MEM_LOCAL |
+					    DRM_BO_FLAG_CACHED |
+					    DRM_BO_FLAG_CACHED_MAPPED);
+	 brw->curbe.curbe_next_offset = 0;
+      }
+
+      brw->curbe.curbe_offset = brw->curbe.curbe_next_offset;
+      brw->curbe.curbe_next_offset += bufsz;
+      brw->curbe.curbe_next_offset = ALIGN(brw->curbe.curbe_next_offset, 64);
 
       /* Copy data to the buffer:
        */
-      bmBufferSubDataAUB(&brw->intel,
-			 pool->buffer,
-			 brw->curbe.gs_offset, 
-			 bufsz, 
-			 buf,
-			 DW_CONSTANT_BUFFER,
-			 0);
+      dri_bo_subdata(brw->curbe.curbe_bo, brw->curbe.curbe_offset, bufsz, buf);
    }
 
-   /* TODO: only emit the constant_buffer packet when necessary, ie:
-      - contents have changed
-      - offset has changed
-      - hw requirements due to other packets emitted.
-   */
-   {
-      struct brw_constant_buffer cb;
-      
-      memset(&cb, 0, sizeof(cb));
-
-      cb.header.opcode = CMD_CONST_BUFFER;
-      cb.header.length = sizeof(cb)/4 - 2;
-      cb.header.valid = 1;
-      cb.bits0.buffer_length = sz - 1;
-      cb.bits0.buffer_address = brw->curbe.gs_offset >> 6;
-      
-      /* Because this provokes an action (ie copy the constants into the
-       * URB), it shouldn't be shortcircuited if identical to the
-       * previous time - because eg. the urb destination may have
-       * changed, or the urb contents different to last time.  
-       *
-       * Note that the data referred to is actually copied internally,
-       * not just used in place according to passed pointer.
-       *
-       * It appears that the CS unit takes care of using each available
-       * URB entry (Const URB Entry == CURBE) in turn, and issuing
-       * flushes as necessary when doublebuffering of CURBEs isn't
-       * possible.
-       */
-/*       intel_batchbuffer_align(brw->intel.batch, 64, sizeof(cb)); */
-      BRW_BATCH_STRUCT(brw, &cb);
-/*       intel_batchbuffer_align(brw->intel.batch, 64, 0); */
+
+   /* Because this provokes an action (ie copy the constants into the
+    * URB), it shouldn't be shortcircuited if identical to the
+    * previous time - because eg. the urb destination may have
+    * changed, or the urb contents different to last time.
+    *
+    * Note that the data referred to is actually copied internally,
+    * not just used in place according to passed pointer.
+    *
+    * It appears that the CS unit takes care of using each available
+    * URB entry (Const URB Entry == CURBE) in turn, and issuing
+    * flushes as necessary when doublebuffering of CURBEs isn't
+    * possible.
+    */
+
+   /* check aperture space for this bo */
+   return dri_bufmgr_check_aperture_space(brw->curbe.curbe_bo);
+}
+
+
+static void emit_constant_buffer(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+   GLuint sz = brw->curbe.total_size;
+
+   BEGIN_BATCH(2, IGNORE_CLIPRECTS);
+   if (sz == 0) {
+      OUT_BATCH((CMD_CONST_BUFFER << 16) | (2 - 2));
+      OUT_BATCH(0);
+   } else {
+      OUT_BATCH((CMD_CONST_BUFFER << 16) | (1 << 8) | (2 - 2));
+      OUT_RELOC(brw->curbe.curbe_bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
+		(sz - 1) + brw->curbe.curbe_offset);
    }
+   ADVANCE_BATCH();
 }
 
 /* This tracked state is unique in that the state it monitors varies
@@ -372,9 +372,11 @@ const struct brw_tracked_state brw_constant_buffer = {
 	       BRW_NEW_VERTEX_PROGRAM |
 	       BRW_NEW_URB_FENCE | /* Implicit - hardware requires this, not used above */
 	       BRW_NEW_PSP | /* Implicit - hardware requires this, not used above */
-	       BRW_NEW_CURBE_OFFSETS),
+	       BRW_NEW_CURBE_OFFSETS |
+	       BRW_NEW_BATCH),
       .cache = (CACHE_NEW_WM_PROG) 
    },
-   .update = upload_constant_buffer
+   .prepare = prepare_constant_buffer,
+   .emit = emit_constant_buffer,
 };
 
diff --git a/i965/brw_defines.h b/i965/brw_defines.h
index 101828b..92c058a 100644
--- a/i965/brw_defines.h
+++ b/i965/brw_defines.h
@@ -240,6 +240,8 @@
 #define BRW_FRONTWINDING_CW      0
 #define BRW_FRONTWINDING_CCW     1
 
+#define BRW_SPRITE_POINT_ENABLE  16
+
 #define BRW_INDEX_BYTE     0
 #define BRW_INDEX_WORD     1
 #define BRW_INDEX_DWORD    2
@@ -485,20 +487,6 @@
 #define BRW_VERTEX_SUBPIXEL_PRECISION_8BITS  0
 #define BRW_VERTEX_SUBPIXEL_PRECISION_4BITS  1
 
-#define BRW_VERTEXBUFFER_ACCESS_VERTEXDATA     0
-#define BRW_VERTEXBUFFER_ACCESS_INSTANCEDATA   1
-
-#define BRW_VFCOMPONENT_NOSTORE      0
-#define BRW_VFCOMPONENT_STORE_SRC    1
-#define BRW_VFCOMPONENT_STORE_0      2
-#define BRW_VFCOMPONENT_STORE_1_FLT  3
-#define BRW_VFCOMPONENT_STORE_1_INT  4
-#define BRW_VFCOMPONENT_STORE_VID    5
-#define BRW_VFCOMPONENT_STORE_IID    6
-#define BRW_VFCOMPONENT_STORE_PID    7
-
-
-
 /* Execution Unit (EU) defines
  */
 
@@ -816,15 +804,39 @@
 #define CMD_STATE_BASE_ADDRESS        0x6101
 #define CMD_STATE_INSN_POINTER        0x6102
 #define CMD_PIPELINE_SELECT_965       0x6104
-#define CMD_PIPELINE_SELECT_IGD       0x6904
+#define CMD_PIPELINE_SELECT_GM45      0x6904
 
 #define CMD_PIPELINED_STATE_POINTERS  0x7800
 #define CMD_BINDING_TABLE_PTRS        0x7801
+
 #define CMD_VERTEX_BUFFER             0x7808
+# define BRW_VB0_INDEX_SHIFT		27
+# define BRW_VB0_ACCESS_VERTEXDATA	(0 << 26)
+# define BRW_VB0_ACCESS_INSTANCEDATA	(1 << 26)
+# define BRW_VB0_PITCH_SHIFT		0
+
 #define CMD_VERTEX_ELEMENT            0x7809
+# define BRW_VE0_INDEX_SHIFT		27
+# define BRW_VE0_FORMAT_SHIFT		16
+# define BRW_VE0_VALID			(1 << 26)
+# define BRW_VE0_SRC_OFFSET_SHIFT	0
+# define BRW_VE1_COMPONENT_NOSTORE	0
+# define BRW_VE1_COMPONENT_STORE_SRC	1
+# define BRW_VE1_COMPONENT_STORE_0	2
+# define BRW_VE1_COMPONENT_STORE_1_FLT	3
+# define BRW_VE1_COMPONENT_STORE_1_INT	4
+# define BRW_VE1_COMPONENT_STORE_VID	5
+# define BRW_VE1_COMPONENT_STORE_IID	6
+# define BRW_VE1_COMPONENT_STORE_PID	7
+# define BRW_VE1_COMPONENT_0_SHIFT	28
+# define BRW_VE1_COMPONENT_1_SHIFT	24
+# define BRW_VE1_COMPONENT_2_SHIFT	20
+# define BRW_VE1_COMPONENT_3_SHIFT	16
+# define BRW_VE1_DST_OFFSET_SHIFT	0
+
 #define CMD_INDEX_BUFFER              0x780a
 #define CMD_VF_STATISTICS_965         0x780b
-#define CMD_VF_STATISTICS_IGD         0x680b
+#define CMD_VF_STATISTICS_GM45        0x680b
 
 #define CMD_DRAW_RECT                 0x7900
 #define CMD_BLEND_CONSTANT_COLOR      0x7901
@@ -848,9 +860,12 @@
 #define R02_PRIM_END    0x1
 #define R02_PRIM_START  0x2
 
-#define BRW_IS_IGD(brw)     ((brw)->intel.intelScreen->deviceID == PCI_CHIP_IGD_GM)
-#define CMD_PIPELINE_SELECT(brw)       ((BRW_IS_IGD(brw)) ? CMD_PIPELINE_SELECT_IGD : CMD_PIPELINE_SELECT_965)
-#define CMD_VF_STATISTICS(brw)         ((BRW_IS_IGD(brw)) ? CMD_VF_STATISTICS_IGD : CMD_VF_STATISTICS_965)
-#define URB_SIZES(brw)                 ((BRW_IS_IGD(brw)) ? 384 : 256)  /* 512 bit unit */
+#include "intel_chipset.h"
+
+#define BRW_IS_GM45(brw)        (IS_GM45_GM((brw)->intel.intelScreen->deviceID))
+#define BRW_IS_G4X(brw)         (IS_G4X((brw)->intel.intelScreen->deviceID))
+#define CMD_PIPELINE_SELECT(brw)        ((BRW_IS_GM45(brw) || BRW_IS_G4X(brw)) ? CMD_PIPELINE_SELECT_GM45 : CMD_PIPELINE_SELECT_965)
+#define CMD_VF_STATISTICS(brw)          ((BRW_IS_GM45(brw) || BRW_IS_G4X(brw)) ? CMD_VF_STATISTICS_GM45 : CMD_VF_STATISTICS_965)
+#define URB_SIZES(brw)                  ((BRW_IS_GM45(brw) || BRW_IS_G4X(brw)) ? 384 : 256)  /* 512 bit unit */
 
 #endif
diff --git a/i965/brw_draw.c b/i965/brw_draw.c
index f796472..f90c5f7 100644
--- a/i965/brw_draw.c
+++ b/i965/brw_draw.c
@@ -36,7 +36,6 @@
 #include "brw_draw.h"
 #include "brw_defines.h"
 #include "brw_context.h"
-#include "brw_aub.h"
 #include "brw_state.h"
 #include "brw_fallback.h"
 
@@ -47,8 +46,9 @@
 #include "tnl/tnl.h"
 #include "vbo/vbo_context.h"
 #include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
 
-
+#define FILE_DEBUG_FLAG DEBUG_BATCH
 
 static GLuint hw_prim[GL_POLYGON+1] = {
    _3DPRIM_POINTLIST,
@@ -83,8 +83,9 @@ static const GLenum reduced_prim[GL_POLYGON+1] = {
  * programs be immune to the active primitive (ie. cope with all
  * possibilities).  That may not be realistic however.
  */
-static GLuint brw_set_prim(struct brw_context *brw, GLenum prim)
+static GLuint brw_set_prim(struct brw_context *brw, GLenum prim, GLboolean *need_flush)
 {
+   int ret;
    if (INTEL_DEBUG & DEBUG_PRIMS)
       _mesa_printf("PRIM: %s\n", _mesa_lookup_enum_by_nr(prim));
    
@@ -105,7 +106,9 @@ static GLuint brw_set_prim(struct brw_context *brw, GLenum prim)
 	 brw->state.dirty.brw |= BRW_NEW_REDUCED_PRIMITIVE;
       }
 
-      brw_validate_state(brw);
+      ret = brw_validate_state(brw);
+      if (ret)
+         *need_flush = GL_TRUE;
    }
 
    return hw_prim[prim];
@@ -123,30 +126,12 @@ static GLuint trim(GLenum prim, GLuint length)
 }
 
 
-static void brw_emit_cliprect( struct brw_context *brw, 
-			       const drm_clip_rect_t *rect )
-{
-   struct brw_drawrect bdr;
-
-   bdr.header.opcode = CMD_DRAW_RECT;
-   bdr.header.length = sizeof(bdr)/4 - 2;
-   bdr.xmin = rect->x1;
-   bdr.xmax = rect->x2 - 1;
-   bdr.ymin = rect->y1;
-   bdr.ymax = rect->y2 - 1;
-   bdr.xorg = brw->intel.drawX;
-   bdr.yorg = brw->intel.drawY;
-
-   intel_batchbuffer_data( brw->intel.batch, &bdr, sizeof(bdr), 
-			   INTEL_BATCH_NO_CLIPRECTS);
-}
-
-
 static void brw_emit_prim( struct brw_context *brw, 
 			   const struct _mesa_prim *prim )
 
 {
    struct brw_3d_primitive prim_packet;
+   GLboolean need_flush = GL_FALSE;
 
    if (INTEL_DEBUG & DEBUG_PRIMS)
       _mesa_printf("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode), 
@@ -155,7 +140,7 @@ static void brw_emit_prim( struct brw_context *brw,
    prim_packet.header.opcode = CMD_3D_PRIM;
    prim_packet.header.length = sizeof(prim_packet)/4 - 2;
    prim_packet.header.pad = 0;
-   prim_packet.header.topology = brw_set_prim(brw, prim->mode);
+   prim_packet.header.topology = brw_set_prim(brw, prim->mode, &need_flush);
    prim_packet.header.indexed = prim->indexed;
 
    prim_packet.verts_per_instance = trim(prim->mode, prim->count);
@@ -165,9 +150,11 @@ static void brw_emit_prim( struct brw_context *brw,
    prim_packet.base_vert_location = 0;
 
    if (prim_packet.verts_per_instance) {
-      intel_batchbuffer_data( brw->intel.batch, &prim_packet, sizeof(prim_packet), 
-			      INTEL_BATCH_NO_CLIPRECTS);
+      intel_batchbuffer_data( brw->intel.batch, &prim_packet,
+			      sizeof(prim_packet), LOOP_CLIPRECTS);
    }
+
+   assert(need_flush == GL_FALSE);
 }
 
 static void brw_merge_inputs( struct brw_context *brw,
@@ -270,11 +257,17 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx,
    struct intel_context *intel = intel_context(ctx);
    struct brw_context *brw = brw_context(ctx);
    GLboolean retval = GL_FALSE;
-   GLuint i, j;
+   GLuint i;
+   GLuint ib_offset;
+   dri_bo *ib_bo;
+   GLboolean force_flush = GL_FALSE;
+   int ret;
 
    if (ctx->NewState)
       _mesa_update_state( ctx );
 
+   brw_validate_textures( brw );
+
    /* Bind all inputs, derive varying and size information:
     */
    brw_merge_inputs( brw, arrays );
@@ -289,19 +282,41 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx,
    LOCK_HARDWARE(intel);
 
    if (brw->intel.numClipRects == 0) {
-      assert(intel->batch->ptr == intel->batch->map + intel->batch->offset);
       UNLOCK_HARDWARE(intel);
       return GL_TRUE;
    }
 
    {
+      /* Flush the batch if it's approaching full, so that we don't wrap while
+       * we've got validated state that needs to be in the same batch as the
+       * primitives.  This fraction is just a guess (minimal full state plus
+       * a primitive is around 512 bytes), and would be better if we had
+       * an upper bound of how much we might emit in a single
+       * brw_try_draw_prims().
+       */
+   flush:
+      if (force_flush)
+         brw->no_batch_wrap = GL_FALSE;
+
+      if (intel->batch->ptr - intel->batch->map > intel->batch->size * 3 / 4
+	/* brw_emit_prim may change the cliprect_mode to LOOP_CLIPRECTS */
+	  || intel->batch->cliprect_mode != LOOP_CLIPRECTS || (force_flush == GL_TRUE))
+	      intel_batchbuffer_flush(intel->batch);
+
+      force_flush = GL_FALSE;
+      brw->no_batch_wrap = GL_TRUE;
+
       /* Set the first primitive early, ahead of validate_state:
        */
-      brw_set_prim(brw, prim[0].mode);
+      brw_set_prim(brw, prim[0].mode, &force_flush);
 
       /* XXX:  Need to separate validate and upload of state.  
        */
-      brw_validate_state( brw );
+      ret = brw_validate_state( brw );
+      if (ret) {
+         force_flush = GL_TRUE;
+         goto flush;
+      }
 
       /* Various fallback checks:
        */
@@ -310,76 +325,42 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx,
 
       if (check_fallbacks( brw, prim, nr_prims ))
 	 goto out;
+
+      /* need to account for index buffer and vertex buffer */
+      if (ib) {
+         ret = brw_prepare_indices( brw, ib , &ib_bo, &ib_offset);
+         if (ret) {
+            force_flush = GL_TRUE;
+            goto flush;
+         }
+      }
+
+      ret = brw_prepare_vertices( brw, min_index, max_index);
+      if (ret < 0)
+         goto out;
+
+      if (ret > 0) {
+         force_flush = GL_TRUE;
+         goto flush;
+      }
 	  
       /* Upload index, vertex data: 
        */
       if (ib)
-	 brw_upload_indices( brw, ib );
+	brw_emit_indices( brw, ib, ib_bo, ib_offset);
 
-      if (!brw_upload_vertices( brw, min_index, max_index)) {
-	 goto out;
-      }
+      brw_emit_vertices( brw, min_index, max_index);
 
-      /* For single cliprect, state is already emitted: 
-       */
-      if (brw->intel.numClipRects == 1) {
-	 for (i = 0; i < nr_prims; i++) {
-	    brw_emit_prim(brw, &prim[i]);   
-	 }
-      }
-      else {
-	 /* Otherwise, explicitly do the cliprects at this point:
-	  */
-          GLuint nprims = 0;
-	 for (j = 0; j < brw->intel.numClipRects; j++) {
-	    brw_emit_cliprect(brw, &brw->intel.pClipRects[j]);
-
-	    /* Emit prims to batchbuffer: 
-	     */
-	    for (i = 0; i < nr_prims; i++) {
-	       brw_emit_prim(brw, &prim[i]);   
-
-          if (++nprims == VBO_MAX_PRIM) {
-              intel_batchbuffer_flush(brw->intel.batch);
-              nprims = 0;
-          }
-	    }
-	 }
+      for (i = 0; i < nr_prims; i++) {
+	 brw_emit_prim(brw, &prim[i]);
       }
-      
-      intel->need_flush = GL_TRUE;
+
       retval = GL_TRUE;
    }
 
  out:
 
-   /* Currently have to do this to synchronize with the map/unmap of
-    * the vertex buffer in brw_exec_api.c.  Not sure if there is any
-    * way around this, as not every flush is due to a buffer filling
-    * up.
-    */
-   if (!intel_batchbuffer_flush( brw->intel.batch )) {
-      DBG("%s intel_batchbuffer_flush failed\n", __FUNCTION__);
-      retval = GL_FALSE;
-   }
-
-   if (retval && intel->thrashing) {
-      bmSetFence(intel);
-   }
-
-   /* Free any old data so it doesn't clog up texture memory - we
-    * won't be referencing it again.
-    */
-   while (brw->vb.upload.wrap != brw->vb.upload.buf) {
-      ctx->Driver.BufferData(ctx,
-			     GL_ARRAY_BUFFER_ARB,
-			     BRW_UPLOAD_INIT_SIZE,
-			     NULL,
-			     GL_DYNAMIC_DRAW_ARB,
-			     brw->vb.upload.vbo[brw->vb.upload.wrap]);
-      brw->vb.upload.wrap++;
-      brw->vb.upload.wrap %= BRW_NR_UPLOAD_BUFS;
-   }
+   brw->no_batch_wrap = GL_FALSE;
 
    UNLOCK_HARDWARE(intel);
 
@@ -425,7 +406,6 @@ void brw_draw_prims( GLcontext *ctx,
 		     GLuint min_index,
 		     GLuint max_index )
 {
-   struct intel_context *intel = intel_context(ctx);
    GLboolean retval;
 
    /* Decide if we want to rebase.  If so we end up recursing once
@@ -445,20 +425,6 @@ void brw_draw_prims( GLcontext *ctx,
     */
    retval = brw_try_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
 
-   
-   /* This looks like out-of-memory but potentially we have
-    * situation where there is enough memory but it has become
-    * fragmented.  Clear out all heaps and start from scratch by
-    * faking a contended lock event:  (done elsewhere)
-    */
-   if (!retval && !intel->Fallback && bmError(intel)) {
-      DBG("retrying\n");
-      /* Then try a second time only to upload textures and draw the
-       * primitives:
-       */
-      retval = brw_try_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
-   }
-
    /* Otherwise, we really are out of memory.  Pass the drawing
     * command to the software tnl module and which will in turn call
     * swrast to do the drawing.
@@ -467,57 +433,22 @@ void brw_draw_prims( GLcontext *ctx,
        _swsetup_Wakeup(ctx);
       _tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
    }
-
-   if (intel->aub_file && (INTEL_DEBUG & DEBUG_SYNC)) {
-      intelFinish( &intel->ctx );
-      intel->aub_wrap = 1;
-   }
-}
-
-
-static void brw_invalidate_vbo_cb( struct intel_context *intel, void *ptr )
-{
-   /* nothing to do, we don't rely on the contents being preserved */
 }
 
-
 void brw_draw_init( struct brw_context *brw )
 {
    GLcontext *ctx = &brw->intel.ctx;
    struct vbo_context *vbo = vbo_context(ctx);
-   GLuint i;
-   
+
    /* Register our drawing function: 
     */
    vbo->draw_prims = brw_draw_prims;
-
-   brw->vb.upload.size = BRW_UPLOAD_INIT_SIZE;
-
-   for (i = 0; i < BRW_NR_UPLOAD_BUFS; i++) {
-      brw->vb.upload.vbo[i] = ctx->Driver.NewBufferObject(ctx, 1, GL_ARRAY_BUFFER_ARB);
-      
-      /* NOTE:  These are set to no-backing-store.
-       */
-      bmBufferSetInvalidateCB(&brw->intel,
-			      intel_bufferobj_buffer(intel_buffer_object(brw->vb.upload.vbo[i])),
-			      brw_invalidate_vbo_cb,
-			      &brw->intel,
-			      GL_TRUE);
-   }
-
-   ctx->Driver.BufferData( ctx, 
-			   GL_ARRAY_BUFFER_ARB, 
-			   BRW_UPLOAD_INIT_SIZE,
-			   NULL,
-			   GL_DYNAMIC_DRAW_ARB,
-			   brw->vb.upload.vbo[0] );
 }
 
 void brw_draw_destroy( struct brw_context *brw )
 {
-   GLcontext *ctx = &brw->intel.ctx;
-   GLuint i;
-   
-   for (i = 0; i < BRW_NR_UPLOAD_BUFS; i++)
-      ctx->Driver.DeleteBuffer(ctx, brw->vb.upload.vbo[i]);
+   if (brw->vb.upload.bo != NULL) {
+      dri_bo_unreference(brw->vb.upload.bo);
+      brw->vb.upload.bo = NULL;
+   }
 }
diff --git a/i965/brw_draw.h b/i965/brw_draw.h
index 0f7b738..b354740 100644
--- a/i965/brw_draw.h
+++ b/i965/brw_draw.h
@@ -31,6 +31,7 @@
 #include "mtypes.h"		/* for GLcontext... */
 #include "vbo/vbo.h"
 
+#include "dri_bufmgr.h"
 struct brw_context;
 
 
@@ -53,10 +54,21 @@ void brw_init_current_values(GLcontext *ctx,
 
 /* brw_draw_upload.c
  */
-void brw_upload_indices( struct brw_context *brw,
-			 const struct _mesa_index_buffer *index_buffer);
+int brw_prepare_indices( struct brw_context *brw,
+			 const struct _mesa_index_buffer *index_buffer,
+			 dri_bo **bo_return,
+			 GLuint *offset_return);
 
-GLboolean brw_upload_vertices( struct brw_context *brw,
+void brw_emit_indices( struct brw_context *brw,
+		       const struct _mesa_index_buffer *index_buffer,
+		       dri_bo *bo,
+		       GLuint offset);
+
+int brw_prepare_vertices( struct brw_context *brw,
+			       GLuint min_index,
+			       GLuint max_index );
+
+void brw_emit_vertices( struct brw_context *brw,
 			       GLuint min_index,
 			       GLuint max_index );
 
diff --git a/i965/brw_draw_upload.c b/i965/brw_draw_upload.c
index 6150cac..7946ffd 100644
--- a/i965/brw_draw_upload.c
+++ b/i965/brw_draw_upload.c
@@ -36,43 +36,13 @@
 #include "brw_draw.h"
 #include "brw_defines.h"
 #include "brw_context.h"
-#include "brw_aub.h"
 #include "brw_state.h"
 #include "brw_fallback.h"
 
 #include "intel_ioctl.h"
 #include "intel_batchbuffer.h"
 #include "intel_buffer_objects.h"
-
-
-struct brw_array_state {
-   union header_union header;
-
-   struct {
-      union {
-	 struct {
-	    GLuint pitch:11; 
-	    GLuint pad:15;
-	    GLuint access_type:1; 
-	    GLuint vb_index:5; 
-	 } bits;
-	 GLuint dword;
-      } vb0;
-   
-      struct buffer *buffer;
-      GLuint offset;
-
-      GLuint max_index;   
-      GLuint instance_data_step_rate;
-
-   } vb[BRW_VBP_MAX];
-};
-
-
-static struct buffer *array_buffer( const struct gl_client_array *array )
-{
-   return intel_bufferobj_buffer(intel_buffer_object(array->BufferObj));
-}
+#include "intel_tex.h"
 
 static GLuint double_types[5] = {
    0,
@@ -247,194 +217,169 @@ static GLuint get_index_type(GLenum type)
    }
 }
 
-static void copy_strided_array( GLubyte *dest, 
-				const GLubyte *src, 
-				GLuint size, 
-				GLuint stride,
-				GLuint count )
-{
-   if (size == stride) 
-      do_memcpy(dest, src, count * size);
-   else {
-      GLuint i,j;
-   
-      for (i = 0; i < count; i++) {
-	 for (j = 0; j < size; j++)
-	    *dest++ = *src++;
-	 src += (stride - size);
-      }
-   }
-}
-
 static void wrap_buffers( struct brw_context *brw,
 			  GLuint size )
 {
-   GLcontext *ctx = &brw->intel.ctx;
-
    if (size < BRW_UPLOAD_INIT_SIZE)
       size = BRW_UPLOAD_INIT_SIZE;
 
-   brw->vb.upload.buf++;
-   brw->vb.upload.buf %= BRW_NR_UPLOAD_BUFS;
    brw->vb.upload.offset = 0;
 
-   ctx->Driver.BufferData(ctx,
-			  GL_ARRAY_BUFFER_ARB,
-			  size,
-			  NULL,
-			  GL_DYNAMIC_DRAW_ARB,
-			  brw->vb.upload.vbo[brw->vb.upload.buf]);
+   if (brw->vb.upload.bo != NULL)
+      dri_bo_unreference(brw->vb.upload.bo);
+   brw->vb.upload.bo = dri_bo_alloc(brw->intel.bufmgr, "temporary VBO",
+				    size, 1,
+				    DRM_BO_FLAG_MEM_LOCAL |
+				    DRM_BO_FLAG_CACHED |
+				    DRM_BO_FLAG_CACHED_MAPPED);
+
+   /* Set the internal VBO\ to no-backing-store.  We only use them as a
+    * temporary within a brw_try_draw_prims while the lock is held.
+    */
+   /* DON'T DO THIS AS IF WE HAVE TO RE-ORG MEMORY WE NEED SOMEWHERE WITH
+      FAKE TO PUSH THIS STUFF */
+//   if (!brw->intel.ttm)
+//      dri_bo_fake_disable_backing_store(brw->vb.upload.bo, NULL, NULL);
 }
 
 static void get_space( struct brw_context *brw,
 		       GLuint size,
-		       struct gl_buffer_object **vbo_return,
+		       dri_bo **bo_return,
 		       GLuint *offset_return )
 {
-   size = (size + 63) & ~63;
-   
-   if (brw->vb.upload.offset + size > BRW_UPLOAD_INIT_SIZE)
+   size = ALIGN(size, 64);
+
+   if (brw->vb.upload.bo == NULL ||
+       brw->vb.upload.offset + size > brw->vb.upload.bo->size) {
       wrap_buffers(brw, size);
+   }
 
-   *vbo_return = brw->vb.upload.vbo[brw->vb.upload.buf];
+   dri_bo_reference(brw->vb.upload.bo);
+   *bo_return = brw->vb.upload.bo;
    *offset_return = brw->vb.upload.offset;
 
    brw->vb.upload.offset += size;
 }
 
-
-
-static struct gl_client_array *
+static void
 copy_array_to_vbo_array( struct brw_context *brw,
-			 GLuint i,
-			 const struct gl_client_array *array,
-			 GLuint element_size,
-			 GLuint count)
+			 struct brw_vertex_element *element,
+			 GLuint dst_stride)
 {
-   GLcontext *ctx = &brw->intel.ctx;
-   struct gl_client_array *vbo_array = &brw->vb.vbo_array[i];
-   GLuint size = count * element_size;
-   struct gl_buffer_object *vbo;
-   GLuint offset;
-   GLuint new_stride;
+   GLuint size = element->count * dst_stride;
 
-   get_space(brw, size, &vbo, &offset);
+   get_space(brw, size, &element->bo, &element->offset);
 
-   if (array->StrideB == 0) {
-      assert(count == 1);
-      new_stride = 0;
+   if (element->glarray->StrideB == 0) {
+      assert(element->count == 1);
+      element->stride = 0;
+   } else {
+      element->stride = dst_stride;
    }
-   else 
-      new_stride = element_size;
-
-   vbo_array->Size = array->Size;
-   vbo_array->Type = array->Type;
-   vbo_array->Stride = new_stride;
-   vbo_array->StrideB = new_stride;   
-   vbo_array->Ptr = (const void *)offset;
-   vbo_array->Enabled = 1;
-   vbo_array->Normalized = array->Normalized;
-   vbo_array->_MaxElement = array->_MaxElement;	/* ? */
-   vbo_array->BufferObj = vbo;
-
-   {
-      GLubyte *map = ctx->Driver.MapBuffer(ctx,
-					   GL_ARRAY_BUFFER_ARB,
-					   GL_DYNAMIC_DRAW_ARB,
-					   vbo);
-   
-      map += offset;
 
-      copy_strided_array( map, 
-			  array->Ptr,
-			  element_size,
-			  array->StrideB,
-			  count);
+   if (dst_stride == element->glarray->StrideB) {
+      dri_bo_subdata(element->bo,
+		     element->offset,
+		     size,
+		     element->glarray->Ptr);
+   } else {
+      void *data;
+      char *dest;
+      const char *src = element->glarray->Ptr;
+      int i;
+
+      data = _mesa_malloc(dst_stride * element->count);
+      dest = data;
+      for (i = 0; i < element->count; i++) {
+	 memcpy(dest, src, dst_stride);
+	 src += element->glarray->StrideB;
+	 dest += dst_stride;
+      }
 
-      ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER_ARB, vbo_array->BufferObj);
+      dri_bo_subdata(element->bo,
+		     element->offset,
+		     size,
+		     data);
+      _mesa_free(data);
    }
-
-   return vbo_array;
 }
 
-
-
-static struct gl_client_array *
-interleaved_vbo_array( struct brw_context *brw,
-		       GLuint i,
-		       const struct gl_client_array *uploaded_array,
-		       const struct gl_client_array *array,
-		       const char *ptr)
-{
-   struct gl_client_array *vbo_array = &brw->vb.vbo_array[i];
-
-   vbo_array->Size = array->Size;
-   vbo_array->Type = array->Type;
-   vbo_array->Stride = array->Stride;
-   vbo_array->StrideB = array->StrideB;   
-   vbo_array->Ptr = (const void *)((const char *)uploaded_array->Ptr + 
-				   ((const char *)array->Ptr - ptr));
-   vbo_array->Enabled = 1;
-   vbo_array->Normalized = array->Normalized;
-   vbo_array->_MaxElement = array->_MaxElement;	
-   vbo_array->BufferObj = uploaded_array->BufferObj;
-
-   return vbo_array;
-}
-
-
-GLboolean brw_upload_vertices( struct brw_context *brw,
+int brw_prepare_vertices( struct brw_context *brw,
 			       GLuint min_index,
 			       GLuint max_index )
 {
    GLcontext *ctx = &brw->intel.ctx;
    struct intel_context *intel = intel_context(ctx);
    GLuint tmp = brw->vs.prog_data->inputs_read; 
-   struct brw_vertex_element_packet vep;
-   struct brw_array_state vbp;
    GLuint i;
-   const void *ptr = NULL;
+   const unsigned char *ptr = NULL;
    GLuint interleave = 0;
+   int ret = 0;
 
    struct brw_vertex_element *enabled[VERT_ATTRIB_MAX];
    GLuint nr_enabled = 0;
 
    struct brw_vertex_element *upload[VERT_ATTRIB_MAX];
    GLuint nr_uploads = 0;
-   
-
-   memset(&vbp, 0, sizeof(vbp));
-   memset(&vep, 0, sizeof(vep));
 
    /* First build an array of pointers to ve's in vb.inputs_read
     */
    if (0)
       _mesa_printf("%s %d..%d\n", __FUNCTION__, min_index, max_index);
-   
+
+   /* Accumulate the list of enabled arrays. */
    while (tmp) {
       GLuint i = _mesa_ffsll(tmp)-1;
       struct brw_vertex_element *input = &brw->vb.inputs[i];
 
       tmp &= ~(1<<i);
       enabled[nr_enabled++] = input;
+   }
+
+   /* XXX: In the rare cases where this happens we fallback all
+    * the way to software rasterization, although a tnl fallback
+    * would be sufficient.  I don't know of *any* real world
+    * cases with > 17 vertex attributes enabled, so it probably
+    * isn't an issue at this point.
+    */
+   if (nr_enabled >= BRW_VEP_MAX)
+       return -1;
+
+   for (i = 0; i < nr_enabled; i++) {
+      struct brw_vertex_element *input = enabled[i];
 
-      input->index = i;
       input->element_size = get_size(input->glarray->Type) * input->glarray->Size;
       input->count = input->glarray->StrideB ? max_index + 1 - min_index : 1;
 
-      if (!input->glarray->BufferObj->Name) {
+      if (input->glarray->BufferObj->Name != 0) {
+	 struct intel_buffer_object *intel_buffer =
+	    intel_buffer_object(input->glarray->BufferObj);
+
+	 /* Named buffer object: Just reference its contents directly. */
+	 input->bo = intel_bufferobj_buffer(intel, intel_buffer,
+					    INTEL_READ);
+	 dri_bo_reference(input->bo);
+	 input->offset = (unsigned long)input->glarray->Ptr;
+	 input->stride = input->glarray->StrideB;
+
+	 ret |= dri_bufmgr_check_aperture_space(input->bo);
+      } else {
+	 /* Queue the buffer object up to be uploaded in the next pass,
+	  * when we've decided if we're doing interleaved or not.
+	  */
 	 if (i == 0) {
 	    /* Position array not properly enabled:
 	     */
 	    if (input->glarray->StrideB == 0)
-	       return GL_FALSE;
+	      return -1;
 
 	    interleave = input->glarray->StrideB;
 	    ptr = input->glarray->Ptr;
 	 }
 	 else if (interleave != input->glarray->StrideB ||
-		  (const char *)input->glarray->Ptr - (const char *)ptr < 0 ||
-		  (const char *)input->glarray->Ptr - (const char *)ptr > interleave) {
+		  (const unsigned char *)input->glarray->Ptr - ptr < 0 ||
+		  (const unsigned char *)input->glarray->Ptr - ptr > interleave)
+	 {
 	    interleave = 0;
 	 }
 
@@ -451,131 +396,137 @@ GLboolean brw_upload_vertices( struct brw_context *brw,
       }
    }
 
-   /* Upload interleaved arrays if all uploads are interleaved
-    */
-   if (nr_uploads > 1 && 
-       interleave && 
-       interleave <= 256) {
-      struct brw_vertex_element *input0 = upload[0];
-
-      input0->glarray = copy_array_to_vbo_array(brw, 0,
-						input0->glarray, 
-						interleave,
-						input0->count);
+   /* Handle any arrays to be uploaded. */
+   if (nr_uploads > 1 && interleave && interleave <= 256) {
+      /* All uploads are interleaved, so upload the arrays together as
+       * interleaved.  First, upload the contents and set up upload[0].
+       */
+      copy_array_to_vbo_array(brw, upload[0], interleave);
 
+      ret |= dri_bufmgr_check_aperture_space(upload[0]->bo);
       for (i = 1; i < nr_uploads; i++) {
-	 upload[i]->glarray = interleaved_vbo_array(brw,
-						    i,
-						    input0->glarray,
-						    upload[i]->glarray,
-						    ptr);
+	 /* Then, just point upload[i] at upload[0]'s buffer. */
+	 upload[i]->stride = interleave;
+	 upload[i]->offset = upload[0]->offset +
+	    ((const unsigned char *)upload[i]->glarray->Ptr - ptr);
+	 upload[i]->bo = upload[0]->bo;
+	 dri_bo_reference(upload[i]->bo);
       }
    }
    else {
+      /* Upload non-interleaved arrays */
       for (i = 0; i < nr_uploads; i++) {
-	 struct brw_vertex_element *input = upload[i];
-
-	 input->glarray = copy_array_to_vbo_array(brw, i, 
-						  input->glarray,
-						  input->element_size,
-						  input->count);
-
+          copy_array_to_vbo_array(brw, upload[i], upload[i]->element_size);
+          if (upload[i]->bo) {
+              ret |= dri_bufmgr_check_aperture_space(upload[i]->bo);
+          }
       }
    }
 
-   /* XXX: In the rare cases where this happens we fallback all
-    * the way to software rasterization, although a tnl fallback
-    * would be sufficient.  I don't know of *any* real world
-    * cases with > 17 vertex attributes enabled, so it probably
-    * isn't an issue at this point.
-    */
-   if (nr_enabled >= BRW_VEP_MAX)
-	 return GL_FALSE;
 
-   /* This still defines a hardware VB for each input, even if they
-    * are interleaved or from the same VBO.  TBD if this makes a
-    * performance difference.
-    */
-   for (i = 0; i < nr_enabled; i++) {
-      struct brw_vertex_element *input = enabled[i];
+   if (ret)
+     return 1;
 
-      input->vep = &vep.ve[i];
-      input->vep->ve0.src_format = get_surface_type(input->glarray->Type, 
-						    input->glarray->Size,
-						    input->glarray->Normalized);
-      input->vep->ve0.valid = 1;
-      input->vep->ve1.dst_offset = (i) * 4;
-      input->vep->ve1.vfcomponent3 = BRW_VFCOMPONENT_STORE_SRC;
-      input->vep->ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_SRC;
-      input->vep->ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_SRC;
-      input->vep->ve1.vfcomponent0 = BRW_VFCOMPONENT_STORE_SRC;
 
-      switch (input->glarray->Size) {
-      case 0: input->vep->ve1.vfcomponent0 = BRW_VFCOMPONENT_STORE_0;
-      case 1: input->vep->ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_0;
-      case 2: input->vep->ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_0;
-      case 3: input->vep->ve1.vfcomponent3 = BRW_VFCOMPONENT_STORE_1_FLT;
-	 break;
-      }
+   return 0;
+}
 
-      input->vep->ve0.vertex_buffer_index = i;
-      input->vep->ve0.src_offset = 0;
+void brw_emit_vertices( struct brw_context *brw,
+                        GLuint min_index,
+                        GLuint max_index )
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   struct intel_context *intel = intel_context(ctx);
+   GLuint tmp = brw->vs.prog_data->inputs_read;
+   struct brw_vertex_element *enabled[VERT_ATTRIB_MAX];
+   GLuint i;
+   GLuint nr_enabled = 0;
 
-      vbp.vb[i].vb0.bits.pitch = input->glarray->StrideB;
-      vbp.vb[i].vb0.bits.pad = 0;
-      vbp.vb[i].vb0.bits.access_type = BRW_VERTEXBUFFER_ACCESS_VERTEXDATA;
-      vbp.vb[i].vb0.bits.vb_index = i;
-      vbp.vb[i].offset = (GLuint)input->glarray->Ptr;
-      vbp.vb[i].buffer = array_buffer(input->glarray);
-      vbp.vb[i].max_index = max_index;
-   }
+  /* Accumulate the list of enabled arrays. */
+   while (tmp) {
+      i = _mesa_ffsll(tmp)-1;
+      struct brw_vertex_element *input = &brw->vb.inputs[i];
 
+      tmp &= ~(1<<i);
+      enabled[nr_enabled++] = input;
+   }
 
 
-   /* Now emit VB and VEP state packets:
+   /* Now emit VB and VEP state packets.
+    *
+    * This still defines a hardware VB for each input, even if they
+    * are interleaved or from the same VBO.  TBD if this makes a
+    * performance difference.
     */
-   vbp.header.bits.length = (1 + nr_enabled * 4) - 2;
-   vbp.header.bits.opcode = CMD_VERTEX_BUFFER;
+   BEGIN_BATCH(1 + nr_enabled * 4, IGNORE_CLIPRECTS);
+   OUT_BATCH((CMD_VERTEX_BUFFER << 16) |
+	     ((1 + nr_enabled * 4) - 2));
 
-   BEGIN_BATCH(vbp.header.bits.length+2, 0);
-   OUT_BATCH( vbp.header.dword );
-   
    for (i = 0; i < nr_enabled; i++) {
-      OUT_BATCH( vbp.vb[i].vb0.dword );
-      OUT_BATCH( bmBufferOffset(&brw->intel, vbp.vb[i].buffer) + vbp.vb[i].offset);
-      OUT_BATCH( vbp.vb[i].max_index );
-      OUT_BATCH( vbp.vb[i].instance_data_step_rate );
+      struct brw_vertex_element *input = enabled[i];
+
+      OUT_BATCH((i << BRW_VB0_INDEX_SHIFT) |
+		BRW_VB0_ACCESS_VERTEXDATA |
+		(input->stride << BRW_VB0_PITCH_SHIFT));
+      OUT_RELOC(input->bo,
+		DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
+		input->offset);
+      OUT_BATCH(max_index);
+      OUT_BATCH(0); /* Instance data step rate */
+
+      /* Unreference the buffer so it can get freed, now that we won't
+       * touch it any more.
+       */
+      dri_bo_unreference(input->bo);
+      input->bo = NULL;
    }
    ADVANCE_BATCH();
 
-   vep.header.length = (1 + nr_enabled * sizeof(vep.ve[0])/4) - 2;
-   vep.header.opcode = CMD_VERTEX_ELEMENT;
-   brw_cached_batch_struct(brw, &vep, 4 + nr_enabled * sizeof(vep.ve[0]));
-
-   return GL_TRUE;
-}
+   BEGIN_BATCH(1 + nr_enabled * 2, IGNORE_CLIPRECTS);
+   OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | ((1 + nr_enabled * 2) - 2));
+   for (i = 0; i < nr_enabled; i++) {
+      struct brw_vertex_element *input = enabled[i];
+      uint32_t format = get_surface_type(input->glarray->Type,
+					 input->glarray->Size,
+					 input->glarray->Normalized);
+      uint32_t comp0 = BRW_VE1_COMPONENT_STORE_SRC;
+      uint32_t comp1 = BRW_VE1_COMPONENT_STORE_SRC;
+      uint32_t comp2 = BRW_VE1_COMPONENT_STORE_SRC;
+      uint32_t comp3 = BRW_VE1_COMPONENT_STORE_SRC;
 
+      switch (input->glarray->Size) {
+      case 0: comp0 = BRW_VE1_COMPONENT_STORE_0;
+      case 1: comp1 = BRW_VE1_COMPONENT_STORE_0;
+      case 2: comp2 = BRW_VE1_COMPONENT_STORE_0;
+      case 3: comp3 = BRW_VE1_COMPONENT_STORE_1_FLT;
+	 break;
+      }
 
-static GLuint element_size( GLenum type )
-{
-   switch(type) {
-   case GL_UNSIGNED_INT: return 4;
-   case GL_UNSIGNED_SHORT: return 2;
-   case GL_UNSIGNED_BYTE: return 1;
-   default: assert(0); return 0;
+      OUT_BATCH((i << BRW_VE0_INDEX_SHIFT) |
+		BRW_VE0_VALID |
+		(format << BRW_VE0_FORMAT_SHIFT) |
+		(0 << BRW_VE0_SRC_OFFSET_SHIFT));
+      OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
+		(comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
+		(comp2 << BRW_VE1_COMPONENT_2_SHIFT) |
+		(comp3 << BRW_VE1_COMPONENT_3_SHIFT) |
+		((i * 4) << BRW_VE1_DST_OFFSET_SHIFT));
    }
+   ADVANCE_BATCH();
 }
 
-
-
-void brw_upload_indices( struct brw_context *brw,
-			 const struct _mesa_index_buffer *index_buffer )
+int brw_prepare_indices( struct brw_context *brw,
+			 const struct _mesa_index_buffer *index_buffer,
+			 dri_bo **bo_return,
+			 GLuint *offset_return)
 {
    GLcontext *ctx = &brw->intel.ctx;
    struct intel_context *intel = &brw->intel;
    GLuint ib_size = get_size(index_buffer->type) * index_buffer->count;
+   dri_bo *bo;
    struct gl_buffer_object *bufferobj = index_buffer->obj;
    GLuint offset = (GLuint)index_buffer->ptr;
+   int ret;
 
    /* Turn into a proper VBO:
     */
@@ -583,23 +534,51 @@ void brw_upload_indices( struct brw_context *brw,
      
       /* Get new bufferobj, offset:
        */
-      get_space(brw, ib_size, &bufferobj, &offset);
+      get_space(brw, ib_size, &bo, &offset);
 
       /* Straight upload
        */
-      ctx->Driver.BufferSubData( ctx,
-				 GL_ELEMENT_ARRAY_BUFFER_ARB,
-				 offset, 
-				 ib_size,
-				 index_buffer->ptr,
-				 bufferobj);
+      dri_bo_subdata(bo, offset, ib_size, index_buffer->ptr);
+   } else {
+      /* If the index buffer isn't aligned to its element size, we have to
+       * rebase it into a temporary.
+       */
+       if ((get_size(index_buffer->type) - 1) & offset) {
+           GLubyte *map = ctx->Driver.MapBuffer(ctx,
+                                                GL_ELEMENT_ARRAY_BUFFER_ARB,
+                                                GL_DYNAMIC_DRAW_ARB,
+                                                bufferobj);
+           map += offset;
+
+	   get_space(brw, ib_size, &bo, &offset);
+
+	   dri_bo_subdata(bo, offset, ib_size, map);
+
+           ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, bufferobj);
+       } else {
+	  bo = intel_bufferobj_buffer(intel, intel_buffer_object(bufferobj),
+				      INTEL_READ);
+	  dri_bo_reference(bo);
+       }
    }
 
+   *bo_return = bo;
+   *offset_return = offset;
+   ret = dri_bufmgr_check_aperture_space(bo);
+   return ret;
+}
+
+void brw_emit_indices(struct brw_context *brw,
+                      const struct _mesa_index_buffer *index_buffer,
+                      dri_bo *bo,
+                      GLuint offset)
+{
+   struct intel_context *intel = &brw->intel;
+   GLuint ib_size = get_size(index_buffer->type) * index_buffer->count;
    /* Emit the indexbuffer packet:
     */
    {
       struct brw_indexbuffer ib;
-      struct buffer *buffer = intel_bufferobj_buffer(intel_buffer_object(bufferobj));
 
       memset(&ib, 0, sizeof(ib));
    
@@ -609,11 +588,15 @@ void brw_upload_indices( struct brw_context *brw,
       ib.header.bits.cut_index_enable = 0;
    
 
-      BEGIN_BATCH(4, 0);
+      BEGIN_BATCH(4, IGNORE_CLIPRECTS);
       OUT_BATCH( ib.header.dword );
-      OUT_BATCH( bmBufferOffset(intel, buffer) + offset );
-      OUT_BATCH( bmBufferOffset(intel, buffer) + offset + ib_size );
+      OUT_RELOC( bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, offset);
+      OUT_RELOC( bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
+		 offset + ib_size);
       OUT_BATCH( 0 );
       ADVANCE_BATCH();
+
+      dri_bo_unreference(bo);
    }
 }
+
diff --git a/i965/brw_eu.h b/i965/brw_eu.h
index 9d46aac..207b8b7 100644
--- a/i965/brw_eu.h
+++ b/i965/brw_eu.h
@@ -110,7 +110,7 @@ struct brw_compile {
 
 
 
-static __inline int type_sz( GLuint type )
+static INLINE int type_sz( GLuint type )
 {
    switch( type ) {
    case BRW_REGISTER_TYPE_UD:
@@ -129,7 +129,7 @@ static __inline int type_sz( GLuint type )
    }
 }
 
-static __inline struct brw_reg brw_reg( GLuint file,
+static INLINE struct brw_reg brw_reg( GLuint file,
 					GLuint nr,
 					GLuint subnr,
 					GLuint type,
@@ -166,7 +166,7 @@ static __inline struct brw_reg brw_reg( GLuint file,
    return reg;
 }
 
-static __inline struct brw_reg brw_vec16_reg( GLuint file,
+static INLINE struct brw_reg brw_vec16_reg( GLuint file,
 					      GLuint nr,
 					      GLuint subnr )
 {
@@ -181,7 +181,7 @@ static __inline struct brw_reg brw_vec16_reg( GLuint file,
 		  WRITEMASK_XYZW);
 }
 
-static __inline struct brw_reg brw_vec8_reg( GLuint file,
+static INLINE struct brw_reg brw_vec8_reg( GLuint file,
 					     GLuint nr,
 					     GLuint subnr )
 {
@@ -197,7 +197,7 @@ static __inline struct brw_reg brw_vec8_reg( GLuint file,
 }
 
 
-static __inline struct brw_reg brw_vec4_reg( GLuint file,
+static INLINE struct brw_reg brw_vec4_reg( GLuint file,
 					      GLuint nr,
 					      GLuint subnr )
 {
@@ -213,7 +213,7 @@ static __inline struct brw_reg brw_vec4_reg( GLuint file,
 }
 
 
-static __inline struct brw_reg brw_vec2_reg( GLuint file,
+static INLINE struct brw_reg brw_vec2_reg( GLuint file,
 					      GLuint nr,
 					      GLuint subnr )
 {
@@ -228,7 +228,7 @@ static __inline struct brw_reg brw_vec2_reg( GLuint file,
 		  WRITEMASK_XY);
 }
 
-static __inline struct brw_reg brw_vec1_reg( GLuint file,
+static INLINE struct brw_reg brw_vec1_reg( GLuint file,
 					     GLuint nr,
 					     GLuint subnr )
 {
@@ -244,14 +244,14 @@ static __inline struct brw_reg brw_vec1_reg( GLuint file,
 }
 
 
-static __inline struct brw_reg retype( struct brw_reg reg,
+static INLINE struct brw_reg retype( struct brw_reg reg,
 				       GLuint type )
 {
    reg.type = type;
    return reg;
 }
 
-static __inline struct brw_reg suboffset( struct brw_reg reg,
+static INLINE struct brw_reg suboffset( struct brw_reg reg,
 					  GLuint delta )
 {   
    reg.subnr += delta * type_sz(reg.type);
@@ -259,7 +259,7 @@ static __inline struct brw_reg suboffset( struct brw_reg reg,
 }
 
 
-static __inline struct brw_reg offset( struct brw_reg reg,
+static INLINE struct brw_reg offset( struct brw_reg reg,
 				       GLuint delta )
 {
    reg.nr += delta;
@@ -267,7 +267,7 @@ static __inline struct brw_reg offset( struct brw_reg reg,
 }
 
 
-static __inline struct brw_reg byte_offset( struct brw_reg reg,
+static INLINE struct brw_reg byte_offset( struct brw_reg reg,
 					    GLuint bytes )
 {
    GLuint newoffset = reg.nr * REG_SIZE + reg.subnr + bytes;
@@ -277,28 +277,28 @@ static __inline struct brw_reg byte_offset( struct brw_reg reg,
 }
    
 
-static __inline struct brw_reg brw_uw16_reg( GLuint file,
+static INLINE struct brw_reg brw_uw16_reg( GLuint file,
 					     GLuint nr,
 					     GLuint subnr )
 {
    return suboffset(retype(brw_vec16_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
 }
 
-static __inline struct brw_reg brw_uw8_reg( GLuint file,
+static INLINE struct brw_reg brw_uw8_reg( GLuint file,
 					    GLuint nr,
 					    GLuint subnr )
 {
    return suboffset(retype(brw_vec8_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
 }
 
-static __inline struct brw_reg brw_uw1_reg( GLuint file,
+static INLINE struct brw_reg brw_uw1_reg( GLuint file,
 					    GLuint nr,
 					    GLuint subnr )
 {
    return suboffset(retype(brw_vec1_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
 }
 
-static __inline struct brw_reg brw_imm_reg( GLuint type )
+static INLINE struct brw_reg brw_imm_reg( GLuint type )
 {
    return brw_reg( BRW_IMMEDIATE_VALUE,
 		   0,
@@ -311,38 +311,38 @@ static __inline struct brw_reg brw_imm_reg( GLuint type )
 		   0);      
 }
 
-static __inline struct brw_reg brw_imm_f( GLfloat f )
+static INLINE struct brw_reg brw_imm_f( GLfloat f )
 {
    struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_F);
    imm.dw1.f = f;
    return imm;
 }
 
-static __inline struct brw_reg brw_imm_d( GLint d )
+static INLINE struct brw_reg brw_imm_d( GLint d )
 {
    struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_D);
    imm.dw1.d = d;
    return imm;
 }
 
-static __inline struct brw_reg brw_imm_ud( GLuint ud )
+static INLINE struct brw_reg brw_imm_ud( GLuint ud )
 {
    struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UD);
    imm.dw1.ud = ud;
    return imm;
 }
 
-static __inline struct brw_reg brw_imm_uw( GLushort uw )
+static INLINE struct brw_reg brw_imm_uw( GLushort uw )
 {
    struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UW);
-   imm.dw1.ud = uw;
+   imm.dw1.ud = uw | (uw << 16);
    return imm;
 }
 
-static __inline struct brw_reg brw_imm_w( GLshort w )
+static INLINE struct brw_reg brw_imm_w( GLshort w )
 {
    struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_W);
-   imm.dw1.d = w;
+   imm.dw1.d = w | (w << 16);
    return imm;
 }
 
@@ -352,7 +352,7 @@ static __inline struct brw_reg brw_imm_w( GLshort w )
 
 /* Vector of eight signed half-byte values: 
  */
-static __inline struct brw_reg brw_imm_v( GLuint v )
+static INLINE struct brw_reg brw_imm_v( GLuint v )
 {
    struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_V);
    imm.vstride = BRW_VERTICAL_STRIDE_0;
@@ -364,7 +364,7 @@ static __inline struct brw_reg brw_imm_v( GLuint v )
 
 /* Vector of four 8-bit float values:
  */
-static __inline struct brw_reg brw_imm_vf( GLuint v )
+static INLINE struct brw_reg brw_imm_vf( GLuint v )
 {
    struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
    imm.vstride = BRW_VERTICAL_STRIDE_0;
@@ -378,7 +378,7 @@ static __inline struct brw_reg brw_imm_vf( GLuint v )
 #define VF_ONE  0x30
 #define VF_NEG  (1<<7)
 
-static __inline struct brw_reg brw_imm_vf4( GLuint v0, 
+static INLINE struct brw_reg brw_imm_vf4( GLuint v0, 
 					    GLuint v1, 
 					    GLuint v2,
 					    GLuint v3)
@@ -395,51 +395,51 @@ static __inline struct brw_reg brw_imm_vf4( GLuint v0,
 }
 
 
-static __inline struct brw_reg brw_address( struct brw_reg reg )
+static INLINE struct brw_reg brw_address( struct brw_reg reg )
 {
    return brw_imm_uw(reg.nr * REG_SIZE + reg.subnr);
 }
 
 
-static __inline struct brw_reg brw_vec1_grf( GLuint nr,
+static INLINE struct brw_reg brw_vec1_grf( GLuint nr,
 					       GLuint subnr )
 {
    return brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
 }
 
-static __inline struct brw_reg brw_vec8_grf( GLuint nr,
+static INLINE struct brw_reg brw_vec8_grf( GLuint nr,
 					     GLuint subnr )
 {
    return brw_vec8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
 }
 
-static __inline struct brw_reg brw_vec4_grf( GLuint nr,
+static INLINE struct brw_reg brw_vec4_grf( GLuint nr,
 					     GLuint subnr )
 {
    return brw_vec4_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
 }
 
 
-static __inline struct brw_reg brw_vec2_grf( GLuint nr,
+static INLINE struct brw_reg brw_vec2_grf( GLuint nr,
 					     GLuint subnr )
 {
    return brw_vec2_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
 }
 
-static __inline struct brw_reg brw_uw8_grf( GLuint nr,
+static INLINE struct brw_reg brw_uw8_grf( GLuint nr,
 					    GLuint subnr )
 {
    return brw_uw8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
 }
 
-static __inline struct brw_reg brw_null_reg( void )
+static INLINE struct brw_reg brw_null_reg( void )
 {
    return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, 
 		       BRW_ARF_NULL, 
 		       0);
 }
 
-static __inline struct brw_reg brw_address_reg( GLuint subnr )
+static INLINE struct brw_reg brw_address_reg( GLuint subnr )
 {
    return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, 
 		      BRW_ARF_ADDRESS, 
@@ -450,7 +450,7 @@ static __inline struct brw_reg brw_address_reg( GLuint subnr )
  * aren't xyzw.  This goes against the convention for other scalar
  * regs:
  */
-static __inline struct brw_reg brw_ip_reg( void )
+static INLINE struct brw_reg brw_ip_reg( void )
 {
    return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE, 
 		  BRW_ARF_IP, 
@@ -463,7 +463,7 @@ static __inline struct brw_reg brw_ip_reg( void )
 		  WRITEMASK_XYZW); /* NOTE! */
 }
 
-static __inline struct brw_reg brw_acc_reg( void )
+static INLINE struct brw_reg brw_acc_reg( void )
 {
    return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, 
 		       BRW_ARF_ACCUMULATOR, 
@@ -471,7 +471,7 @@ static __inline struct brw_reg brw_acc_reg( void )
 }
 
 
-static __inline struct brw_reg brw_flag_reg( void )
+static INLINE struct brw_reg brw_flag_reg( void )
 {
    return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
 		      BRW_ARF_FLAG,
@@ -479,14 +479,14 @@ static __inline struct brw_reg brw_flag_reg( void )
 }
 
 
-static __inline struct brw_reg brw_mask_reg( GLuint subnr )
+static INLINE struct brw_reg brw_mask_reg( GLuint subnr )
 {
    return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
 		      BRW_ARF_MASK,
 		      subnr);
 }
 
-static __inline struct brw_reg brw_message_reg( GLuint nr )
+static INLINE struct brw_reg brw_message_reg( GLuint nr )
 {
    return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE,
 		       nr,
@@ -499,7 +499,7 @@ static __inline struct brw_reg brw_message_reg( GLuint nr )
 /* This is almost always called with a numeric constant argument, so
  * make things easy to evaluate at compile time:
  */
-static __inline GLuint cvt( GLuint val )
+static INLINE GLuint cvt( GLuint val )
 {
    switch (val) {
    case 0: return 0;
@@ -513,7 +513,7 @@ static __inline GLuint cvt( GLuint val )
    return 0;
 }
 
-static __inline struct brw_reg stride( struct brw_reg reg,
+static INLINE struct brw_reg stride( struct brw_reg reg,
 				       GLuint vstride,
 				       GLuint width,
 				       GLuint hstride )
@@ -525,43 +525,43 @@ static __inline struct brw_reg stride( struct brw_reg reg,
    return reg;
 }
 
-static __inline struct brw_reg vec16( struct brw_reg reg )
+static INLINE struct brw_reg vec16( struct brw_reg reg )
 {
    return stride(reg, 16,16,1);
 }
 
-static __inline struct brw_reg vec8( struct brw_reg reg )
+static INLINE struct brw_reg vec8( struct brw_reg reg )
 {
    return stride(reg, 8,8,1);
 }
 
-static __inline struct brw_reg vec4( struct brw_reg reg )
+static INLINE struct brw_reg vec4( struct brw_reg reg )
 {
    return stride(reg, 4,4,1);
 }
 
-static __inline struct brw_reg vec2( struct brw_reg reg )
+static INLINE struct brw_reg vec2( struct brw_reg reg )
 {
    return stride(reg, 2,2,1);
 }
 
-static __inline struct brw_reg vec1( struct brw_reg reg )
+static INLINE struct brw_reg vec1( struct brw_reg reg )
 {
    return stride(reg, 0,1,0);
 }
 
-static __inline struct brw_reg get_element( struct brw_reg reg, GLuint elt )
+static INLINE struct brw_reg get_element( struct brw_reg reg, GLuint elt )
 {
    return vec1(suboffset(reg, elt));
 }
 
-static __inline struct brw_reg get_element_ud( struct brw_reg reg, GLuint elt )
+static INLINE struct brw_reg get_element_ud( struct brw_reg reg, GLuint elt )
 {
    return vec1(suboffset(retype(reg, BRW_REGISTER_TYPE_UD), elt));
 }
 
 
-static __inline struct brw_reg brw_swizzle( struct brw_reg reg,
+static INLINE struct brw_reg brw_swizzle( struct brw_reg reg,
 					    GLuint x,
 					    GLuint y, 
 					    GLuint z,
@@ -575,33 +575,33 @@ static __inline struct brw_reg brw_swizzle( struct brw_reg reg,
 }
 
 
-static __inline struct brw_reg brw_swizzle1( struct brw_reg reg,
+static INLINE struct brw_reg brw_swizzle1( struct brw_reg reg,
 					     GLuint x )
 {
    return brw_swizzle(reg, x, x, x, x);
 }
 
-static __inline struct brw_reg brw_writemask( struct brw_reg reg,
+static INLINE struct brw_reg brw_writemask( struct brw_reg reg,
 					      GLuint mask )
 {
    reg.dw1.bits.writemask &= mask;
    return reg;
 }
 
-static __inline struct brw_reg brw_set_writemask( struct brw_reg reg,
+static INLINE struct brw_reg brw_set_writemask( struct brw_reg reg,
 						  GLuint mask )
 {
    reg.dw1.bits.writemask = mask;
    return reg;
 }
 
-static __inline struct brw_reg negate( struct brw_reg reg )
+static INLINE struct brw_reg negate( struct brw_reg reg )
 {
    reg.negate ^= 1;
    return reg;
 }
 
-static __inline struct brw_reg brw_abs( struct brw_reg reg )
+static INLINE struct brw_reg brw_abs( struct brw_reg reg )
 {
    reg.abs = 1;
    return reg;
@@ -609,7 +609,7 @@ static __inline struct brw_reg brw_abs( struct brw_reg reg )
 
 /***********************************************************************
  */
-static __inline struct brw_reg brw_vec4_indirect( GLuint subnr,
+static INLINE struct brw_reg brw_vec4_indirect( GLuint subnr,
 						  GLint offset )
 {
    struct brw_reg reg =  brw_vec4_grf(0, 0);
@@ -619,7 +619,7 @@ static __inline struct brw_reg brw_vec4_indirect( GLuint subnr,
    return reg;
 }
 
-static __inline struct brw_reg brw_vec1_indirect( GLuint subnr,
+static INLINE struct brw_reg brw_vec1_indirect( GLuint subnr,
 						  GLint offset )
 {
    struct brw_reg reg =  brw_vec1_grf(0, 0);
@@ -629,38 +629,48 @@ static __inline struct brw_reg brw_vec1_indirect( GLuint subnr,
    return reg;
 }
 
-static __inline struct brw_reg deref_4f(struct brw_indirect ptr, GLint offset)
+static INLINE struct brw_reg deref_4f(struct brw_indirect ptr, GLint offset)
 {
    return brw_vec4_indirect(ptr.addr_subnr, ptr.addr_offset + offset);
 }
 
-static __inline struct brw_reg deref_1f(struct brw_indirect ptr, GLint offset)
+static INLINE struct brw_reg deref_1f(struct brw_indirect ptr, GLint offset)
 {
    return brw_vec1_indirect(ptr.addr_subnr, ptr.addr_offset + offset);
 }
 
-static __inline struct brw_reg deref_4b(struct brw_indirect ptr, GLint offset)
+static INLINE struct brw_reg deref_4b(struct brw_indirect ptr, GLint offset)
 {
    return retype(deref_4f(ptr, offset), BRW_REGISTER_TYPE_B);
 }
 
-static __inline struct brw_reg deref_1uw(struct brw_indirect ptr, GLint offset)
+static INLINE struct brw_reg deref_1uw(struct brw_indirect ptr, GLint offset)
 {
    return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UW);
 }
 
-static __inline struct brw_reg get_addr_reg(struct brw_indirect ptr)
+static INLINE struct brw_reg deref_1d(struct brw_indirect ptr, GLint offset)
+{
+   return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_D);
+}
+
+static INLINE struct brw_reg deref_1ud(struct brw_indirect ptr, GLint offset)
+{
+   return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UD);
+}
+
+static INLINE struct brw_reg get_addr_reg(struct brw_indirect ptr)
 {
    return brw_address_reg(ptr.addr_subnr);
 }
 
-static __inline struct brw_indirect brw_indirect_offset( struct brw_indirect ptr, GLint offset )
+static INLINE struct brw_indirect brw_indirect_offset( struct brw_indirect ptr, GLint offset )
 {
    ptr.addr_offset += offset;
    return ptr;
 }
 
-static __inline struct brw_indirect brw_indirect( GLuint addr_subnr, GLint offset )
+static INLINE struct brw_indirect brw_indirect( GLuint addr_subnr, GLint offset )
 {
    struct brw_indirect ptr;
    ptr.addr_subnr = addr_subnr;
@@ -669,7 +679,10 @@ static __inline struct brw_indirect brw_indirect( GLuint addr_subnr, GLint offse
    return ptr;
 }
 
-
+static INLINE struct brw_instruction *current_insn( struct brw_compile *p)
+{
+	return &p->store[p->nr_insn];
+}
 
 void brw_pop_insn_state( struct brw_compile *p );
 void brw_push_insn_state( struct brw_compile *p );
@@ -809,9 +822,11 @@ void brw_ENDIF(struct brw_compile *p,
 struct brw_instruction *brw_DO(struct brw_compile *p,
 			       GLuint execute_size);
 
-void brw_WHILE(struct brw_compile *p, 
+struct brw_instruction *brw_WHILE(struct brw_compile *p, 
 	       struct brw_instruction *patch_insn);
 
+struct brw_instruction *brw_BREAK(struct brw_compile *p);
+struct brw_instruction *brw_CONT(struct brw_compile *p);
 /* Forward jumps:
  */
 void brw_land_fwd_jump(struct brw_compile *p, 
@@ -861,5 +876,6 @@ void brw_math_invert( struct brw_compile *p,
 		      struct brw_reg dst,
 		      struct brw_reg src);
 
-
+void brw_set_src1( struct brw_instruction *insn,
+                          struct brw_reg reg );
 #endif
diff --git a/i965/brw_eu_emit.c b/i965/brw_eu_emit.c
index 1c717e4..6b97f8b 100644
--- a/i965/brw_eu_emit.c
+++ b/i965/brw_eu_emit.c
@@ -164,7 +164,7 @@ static void brw_set_src0( struct brw_instruction *insn,
 }
 
 
-static void brw_set_src1( struct brw_instruction *insn,
+void brw_set_src1( struct brw_instruction *insn,
 			  struct brw_reg reg )
 {
    assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
@@ -186,7 +186,7 @@ static void brw_set_src1( struct brw_instruction *insn,
        * in the future:
        */
       assert (reg.address_mode == BRW_ADDRESS_DIRECT);
-      assert (reg.file == BRW_GENERAL_REGISTER_FILE);
+      //assert (reg.file == BRW_GENERAL_REGISTER_FILE);
 
       if (insn->header.access_mode == BRW_ALIGN_1) {
 	 insn->bits3.da1.src1_subreg_nr = reg.subnr;
@@ -319,7 +319,7 @@ static void brw_set_dp_read_message( struct brw_instruction *insn,
 }
 
 static void brw_set_sampler_message(struct brw_context *brw,
-				     struct brw_instruction *insn,
+                 struct brw_instruction *insn,
 				     GLuint binding_table_index,
 				     GLuint sampler,
 				     GLuint msg_type,
@@ -329,14 +329,14 @@ static void brw_set_sampler_message(struct brw_context *brw,
 {
    brw_set_src1(insn, brw_imm_d(0));
 
-   if (BRW_IS_IGD(brw)) {
-      insn->bits3.sampler_igd.binding_table_index = binding_table_index;
-      insn->bits3.sampler_igd.sampler = sampler;
-      insn->bits3.sampler_igd.msg_type = msg_type;
-      insn->bits3.sampler_igd.response_length = response_length;
-      insn->bits3.sampler_igd.msg_length = msg_length;
-      insn->bits3.sampler_igd.end_of_thread = eot;
-      insn->bits3.sampler_igd.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
+   if (BRW_IS_GM45(brw) || BRW_IS_G4X(brw)) {
+      insn->bits3.sampler_gm45_g4x.binding_table_index = binding_table_index;
+      insn->bits3.sampler_gm45_g4x.sampler = sampler;
+      insn->bits3.sampler_gm45_g4x.msg_type = msg_type;
+      insn->bits3.sampler_gm45_g4x.response_length = response_length;
+      insn->bits3.sampler_gm45_g4x.msg_length = msg_length;
+      insn->bits3.sampler_gm45_g4x.end_of_thread = eot;
+      insn->bits3.sampler_gm45_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
    } else {
       insn->bits3.sampler.binding_table_index = binding_table_index;
       insn->bits3.sampler.sampler = sampler;
@@ -608,6 +608,34 @@ void brw_ENDIF(struct brw_compile *p,
    }
 }
 
+struct brw_instruction *brw_BREAK(struct brw_compile *p)
+{
+   struct brw_instruction *insn;
+   insn = next_insn(p, BRW_OPCODE_BREAK);
+   brw_set_dest(insn, brw_ip_reg());
+   brw_set_src0(insn, brw_ip_reg());
+   brw_set_src1(insn, brw_imm_d(0x0));
+   insn->header.compression_control = BRW_COMPRESSION_NONE;
+   insn->header.execution_size = BRW_EXECUTE_8;
+   insn->header.mask_control = BRW_MASK_DISABLE;
+   insn->bits3.if_else.pad0 = 0;
+   return insn;
+}
+
+struct brw_instruction *brw_CONT(struct brw_compile *p)
+{
+   struct brw_instruction *insn;
+   insn = next_insn(p, BRW_OPCODE_CONTINUE);
+   brw_set_dest(insn, brw_ip_reg());
+   brw_set_src0(insn, brw_ip_reg());
+   brw_set_src1(insn, brw_imm_d(0x0));
+   insn->header.compression_control = BRW_COMPRESSION_NONE;
+   insn->header.execution_size = BRW_EXECUTE_8;
+   insn->header.mask_control = BRW_MASK_DISABLE;
+   insn->bits3.if_else.pad0 = 0;
+   return insn;
+}
+
 /* DO/WHILE loop:
  */
 struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
@@ -619,13 +647,15 @@ struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
 
       /* Override the defaults for this instruction:
        */
-      brw_set_dest(insn, retype(brw_vec1_grf(0,0), BRW_REGISTER_TYPE_UD));
-      brw_set_src0(insn, retype(brw_vec1_grf(0,0), BRW_REGISTER_TYPE_UD));
-      brw_set_src1(insn, retype(brw_vec1_grf(0,0), BRW_REGISTER_TYPE_UD));
+      brw_set_dest(insn, brw_null_reg());
+      brw_set_src0(insn, brw_null_reg());
+      brw_set_src1(insn, brw_null_reg());
 
       insn->header.compression_control = BRW_COMPRESSION_NONE;
       insn->header.execution_size = execute_size;
+      insn->header.predicate_control = BRW_PREDICATE_NONE;
       /* insn->header.mask_control = BRW_MASK_ENABLE; */
+      insn->header.mask_control = BRW_MASK_DISABLE;
 
       return insn;
    }
@@ -633,7 +663,7 @@ struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
 
 
 
-void brw_WHILE(struct brw_compile *p, 
+struct brw_instruction *brw_WHILE(struct brw_compile *p, 
 	       struct brw_instruction *do_insn)
 {
    struct brw_instruction *insn;
@@ -657,14 +687,16 @@ void brw_WHILE(struct brw_compile *p,
       insn->header.execution_size = do_insn->header.execution_size;
 
       assert(do_insn->header.opcode == BRW_OPCODE_DO);
-      insn->bits3.if_else.jump_count = do_insn - insn;
+      insn->bits3.if_else.jump_count = do_insn - insn + 1;
       insn->bits3.if_else.pop_count = 0;
       insn->bits3.if_else.pad0 = 0;
    }
 
 /*    insn->header.mask_control = BRW_MASK_ENABLE; */
 
+   insn->header.mask_control = BRW_MASK_DISABLE;
    p->current->header.predicate_control = BRW_PREDICATE_NONE;   
+   return insn;
 }
 
 
diff --git a/i965/brw_exec_generic.c b/i965/brw_exec_generic.c
deleted file mode 100644
index 11d1ef7..0000000
--- a/i965/brw_exec_generic.c
+++ /dev/null
@@ -1,530 +0,0 @@
-/**************************************************************************
-
-Copyright 2004 Tungsten Graphics Inc., Cedar Park, Texas.
-
-All Rights Reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a
-copy of this software and associated documentation files (the "Software"),
-to deal in the Software without restriction, including without limitation
-on the rights to use, copy, modify, merge, publish, distribute, sub
-license, and/or sell copies of the Software, and to permit persons to whom
-the Software is furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice (including the next
-paragraph) shall be included in all copies or substantial portions of the
-Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
-ATI, TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
-DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
-OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
-USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-**************************************************************************/
-
-/*
- * Authors:
- *   Keith Whitwell <keith@tungstengraphics.com>
- */
-
-#include "glheader.h"
-#include "context.h"
-#include "macros.h"
-#include "vtxfmt.h"
-#include "dlist.h"
-#include "state.h"
-#include "light.h"
-#include "api_arrayelt.h"
-#include "api_noop.h"
-
-#include "brw_exec.h"
-
-
-/* Versions of all the entrypoints for situations where codegen isn't
- * available.  
- *
- * Note: Only one size for each attribute may be active at once.
- * Eg. if Color3f is installed/active, then Color4f may not be, even
- * if the vertex actually contains 4 color coordinates.  This is
- * because the 3f version won't otherwise set color[3] to 1.0 -- this
- * is the job of the chooser function when switching between Color4f
- * and Color3f.
- */
-#define ATTRFV( ATTR, N )				\
-static void attrib_##ATTR##_##N( const GLfloat *v )	\
-{							\
-   GET_CURRENT_CONTEXT( ctx );				\
-   struct brw_exec_context *exec = IMM_CONTEXT(ctx)->exec;			\
-							\
-   if ((ATTR) == 0) {					\
-      GLuint i;						\
-							\
-      if (N>0) exec->vtx.vbptr[0] = v[0];		\
-      if (N>1) exec->vtx.vbptr[1] = v[1];		\
-      if (N>2) exec->vtx.vbptr[2] = v[2];		\
-      if (N>3) exec->vtx.vbptr[3] = v[3];		\
-							\
-      for (i = N; i < exec->vtx.vertex_size; i++)	\
-	 exec->vtx.vbptr[i] = exec->vtx.vertex[i];	\
-							\
-      exec->vtx.vbptr += exec->vtx.vertex_size;		\
-      exec->ctx->Driver.NeedFlush |= FLUSH_STORED_VERTICES; \
-							\
-      if (++exec->vtx.vert_count >= exec->vtx.max_vert)	\
-	 brw_exec_vtx_wrap( exec );		\
-   }							\
-   else {						\
-      GLfloat *dest = exec->vtx.attrptr[ATTR];		\
-      if (N>0) dest[0] = v[0];				\
-      if (N>1) dest[1] = v[1];				\
-      if (N>2) dest[2] = v[2];				\
-      if (N>3) dest[3] = v[3];				\
-   }							\
-}
-
-#define INIT(TAB, ATTR)						\
-   TAB[ATTR][0] = attrib_##ATTR##_1;				\
-   TAB[ATTR][1] = attrib_##ATTR##_2;				\
-   TAB[ATTR][2] = attrib_##ATTR##_3;				\
-   TAB[ATTR][3] = attrib_##ATTR##_4;
-
-
-#define ATTRS( ATTRIB )				\
-   ATTRFV( ATTRIB, 1 )				\
-   ATTRFV( ATTRIB, 2 )				\
-   ATTRFV( ATTRIB, 3 )				\
-   ATTRFV( ATTRIB, 4 )			
-
-ATTRS( 0 )
-ATTRS( 1 )
-ATTRS( 2 )
-ATTRS( 3 )
-ATTRS( 4 )
-ATTRS( 5 )
-ATTRS( 6 )
-ATTRS( 7 )
-ATTRS( 8 )
-ATTRS( 9 )
-ATTRS( 10 )
-ATTRS( 11 )
-ATTRS( 12 )
-ATTRS( 13 )
-ATTRS( 14 )
-ATTRS( 15 )
-
-void brw_exec_generic_attr_table_init( brw_attrfv_func (*tab)[4] )
-{
-   INIT( tab, 0 );
-   INIT( tab, 1 );
-   INIT( tab, 2 );
-   INIT( tab, 3 );
-   INIT( tab, 4 );
-   INIT( tab, 5 );
-   INIT( tab, 6 );
-   INIT( tab, 7 );
-   INIT( tab, 8 );
-   INIT( tab, 9 );
-   INIT( tab, 10 );
-   INIT( tab, 11 );
-   INIT( tab, 12 );
-   INIT( tab, 13 );
-   INIT( tab, 14 );
-   INIT( tab, 15 );
-}
-
-/* These can be made efficient with codegen.  Further, by adding more
- * logic to do_choose(), the double-dispatch for legacy entrypoints
- * like glVertex3f() can be removed.
- */
-#define DISPATCH_ATTRFV( ATTR, COUNT, P )	\
-do {						\
-   GET_CURRENT_CONTEXT( ctx ); 			\
-   struct brw_exec_context *exec = IMM_CONTEXT(ctx)->exec; 		\
-   exec->vtx.tabfv[ATTR][COUNT-1]( P );		\
-} while (0)
-
-#define DISPATCH_ATTR1FV( ATTR, V ) DISPATCH_ATTRFV( ATTR, 1, V )
-#define DISPATCH_ATTR2FV( ATTR, V ) DISPATCH_ATTRFV( ATTR, 2, V )
-#define DISPATCH_ATTR3FV( ATTR, V ) DISPATCH_ATTRFV( ATTR, 3, V )
-#define DISPATCH_ATTR4FV( ATTR, V ) DISPATCH_ATTRFV( ATTR, 4, V )
-
-#define DISPATCH_ATTR1F( ATTR, S ) DISPATCH_ATTRFV( ATTR, 1, &(S) )
-
-#define DISPATCH_ATTR2F( ATTR, S,T ) 		\
-do { 						\
-   GLfloat v[2]; 				\
-   v[0] = S; v[1] = T;				\
-   DISPATCH_ATTR2FV( ATTR, v );			\
-} while (0)
-#define DISPATCH_ATTR3F( ATTR, S,T,R ) 		\
-do { 						\
-   GLfloat v[3]; 				\
-   v[0] = S; v[1] = T; v[2] = R;		\
-   DISPATCH_ATTR3FV( ATTR, v );			\
-} while (0)
-#define DISPATCH_ATTR4F( ATTR, S,T,R,Q )	\
-do { 						\
-   GLfloat v[4]; 				\
-   v[0] = S; v[1] = T; v[2] = R; v[3] = Q;	\
-   DISPATCH_ATTR4FV( ATTR, v );			\
-} while (0)
-
-
-static void GLAPIENTRY brw_Vertex2f( GLfloat x, GLfloat y )
-{
-   DISPATCH_ATTR2F( BRW_ATTRIB_POS, x, y );
-}
-
-static void GLAPIENTRY brw_Vertex2fv( const GLfloat *v )
-{
-   DISPATCH_ATTR2FV( BRW_ATTRIB_POS, v );
-}
-
-static void GLAPIENTRY brw_Vertex3f( GLfloat x, GLfloat y, GLfloat z )
-{
-   DISPATCH_ATTR3F( BRW_ATTRIB_POS, x, y, z );
-}
-
-static void GLAPIENTRY brw_Vertex3fv( const GLfloat *v )
-{
-   DISPATCH_ATTR3FV( BRW_ATTRIB_POS, v );
-}
-
-static void GLAPIENTRY brw_Vertex4f( GLfloat x, GLfloat y, GLfloat z, 
-				      GLfloat w )
-{
-   DISPATCH_ATTR4F( BRW_ATTRIB_POS, x, y, z, w );
-}
-
-static void GLAPIENTRY brw_Vertex4fv( const GLfloat *v )
-{
-   DISPATCH_ATTR4FV( BRW_ATTRIB_POS, v );
-}
-
-static void GLAPIENTRY brw_TexCoord1f( GLfloat x )
-{
-   DISPATCH_ATTR1F( BRW_ATTRIB_TEX0, x );
-}
-
-static void GLAPIENTRY brw_TexCoord1fv( const GLfloat *v )
-{
-   DISPATCH_ATTR1FV( BRW_ATTRIB_TEX0, v );
-}
-
-static void GLAPIENTRY brw_TexCoord2f( GLfloat x, GLfloat y )
-{
-   DISPATCH_ATTR2F( BRW_ATTRIB_TEX0, x, y );
-}
-
-static void GLAPIENTRY brw_TexCoord2fv( const GLfloat *v )
-{
-   DISPATCH_ATTR2FV( BRW_ATTRIB_TEX0, v );
-}
-
-static void GLAPIENTRY brw_TexCoord3f( GLfloat x, GLfloat y, GLfloat z )
-{
-   DISPATCH_ATTR3F( BRW_ATTRIB_TEX0, x, y, z );
-}
-
-static void GLAPIENTRY brw_TexCoord3fv( const GLfloat *v )
-{
-   DISPATCH_ATTR3FV( BRW_ATTRIB_TEX0, v );
-}
-
-static void GLAPIENTRY brw_TexCoord4f( GLfloat x, GLfloat y, GLfloat z,
-					GLfloat w )
-{
-   DISPATCH_ATTR4F( BRW_ATTRIB_TEX0, x, y, z, w );
-}
-
-static void GLAPIENTRY brw_TexCoord4fv( const GLfloat *v )
-{
-   DISPATCH_ATTR4FV( BRW_ATTRIB_TEX0, v );
-}
-
-static void GLAPIENTRY brw_Normal3f( GLfloat x, GLfloat y, GLfloat z )
-{
-   DISPATCH_ATTR3F( BRW_ATTRIB_NORMAL, x, y, z );
-}
-
-static void GLAPIENTRY brw_Normal3fv( const GLfloat *v )
-{
-   DISPATCH_ATTR3FV( BRW_ATTRIB_NORMAL, v );
-}
-
-static void GLAPIENTRY brw_FogCoordfEXT( GLfloat x )
-{
-   DISPATCH_ATTR1F( BRW_ATTRIB_FOG, x );
-}
-
-static void GLAPIENTRY brw_FogCoordfvEXT( const GLfloat *v )
-{
-   DISPATCH_ATTR1FV( BRW_ATTRIB_FOG, v );
-}
-
-static void GLAPIENTRY brw_Color3f( GLfloat x, GLfloat y, GLfloat z )
-{
-   DISPATCH_ATTR3F( BRW_ATTRIB_COLOR0, x, y, z );
-}
-
-static void GLAPIENTRY brw_Color3fv( const GLfloat *v )
-{
-   DISPATCH_ATTR3FV( BRW_ATTRIB_COLOR0, v );
-}
-
-static void GLAPIENTRY brw_Color4f( GLfloat x, GLfloat y, GLfloat z, 
-				     GLfloat w )
-{
-   DISPATCH_ATTR4F( BRW_ATTRIB_COLOR0, x, y, z, w );
-}
-
-static void GLAPIENTRY brw_Color4fv( const GLfloat *v )
-{
-   DISPATCH_ATTR4FV( BRW_ATTRIB_COLOR0, v );
-}
-
-static void GLAPIENTRY brw_SecondaryColor3fEXT( GLfloat x, GLfloat y, 
-						 GLfloat z )
-{
-   DISPATCH_ATTR3F( BRW_ATTRIB_COLOR1, x, y, z );
-}
-
-static void GLAPIENTRY brw_SecondaryColor3fvEXT( const GLfloat *v )
-{
-   DISPATCH_ATTR3FV( BRW_ATTRIB_COLOR1, v );
-}
-
-static void GLAPIENTRY brw_MultiTexCoord1f( GLenum target, GLfloat x  )
-{
-   GLuint attr = (target & 0x7) + BRW_ATTRIB_TEX0;
-   DISPATCH_ATTR1F( attr, x );
-}
-
-static void GLAPIENTRY brw_MultiTexCoord1fv( GLenum target,
-					      const GLfloat *v )
-{
-   GLuint attr = (target & 0x7) + BRW_ATTRIB_TEX0;
-   DISPATCH_ATTR1FV( attr, v );
-}
-
-static void GLAPIENTRY brw_MultiTexCoord2f( GLenum target, GLfloat x, 
-					     GLfloat y )
-{
-   GLuint attr = (target & 0x7) + BRW_ATTRIB_TEX0;
-   DISPATCH_ATTR2F( attr, x, y );
-}
-
-static void GLAPIENTRY brw_MultiTexCoord2fv( GLenum target, 
-					      const GLfloat *v )
-{
-   GLuint attr = (target & 0x7) + BRW_ATTRIB_TEX0;
-   DISPATCH_ATTR2FV( attr, v );
-}
-
-static void GLAPIENTRY brw_MultiTexCoord3f( GLenum target, GLfloat x, 
-					     GLfloat y, GLfloat z)
-{
-   GLuint attr = (target & 0x7) + BRW_ATTRIB_TEX0;
-   DISPATCH_ATTR3F( attr, x, y, z );
-}
-
-static void GLAPIENTRY brw_MultiTexCoord3fv( GLenum target, 
-					      const GLfloat *v )
-{
-   GLuint attr = (target & 0x7) + BRW_ATTRIB_TEX0;
-   DISPATCH_ATTR3FV( attr, v );
-}
-
-static void GLAPIENTRY brw_MultiTexCoord4f( GLenum target, GLfloat x, 
-					     GLfloat y, GLfloat z,
-					     GLfloat w )
-{
-   GLuint attr = (target & 0x7) + BRW_ATTRIB_TEX0;
-   DISPATCH_ATTR4F( attr, x, y, z, w );
-}
-
-static void GLAPIENTRY brw_MultiTexCoord4fv( GLenum target, 
-					      const GLfloat *v )
-{
-   GLuint attr = (target & 0x7) + BRW_ATTRIB_TEX0;
-   DISPATCH_ATTR4FV( attr, v );
-}
-
-
-static void GLAPIENTRY brw_VertexAttrib1fNV( GLuint index, GLfloat x )
-{
-   if (index >= BRW_ATTRIB_FIRST_MATERIAL) index = ERROR_ATTRIB;
-   DISPATCH_ATTR1F( index, x );
-}
-
-static void GLAPIENTRY brw_VertexAttrib1fvNV( GLuint index, 
-					       const GLfloat *v )
-{
-   if (index >= BRW_ATTRIB_FIRST_MATERIAL) index = ERROR_ATTRIB;
-   DISPATCH_ATTR1FV( index, v );
-}
-
-static void GLAPIENTRY brw_VertexAttrib2fNV( GLuint index, GLfloat x, 
-					      GLfloat y )
-{
-   if (index >= BRW_ATTRIB_FIRST_MATERIAL) index = ERROR_ATTRIB;
-   DISPATCH_ATTR2F( index, x, y );
-}
-
-static void GLAPIENTRY brw_VertexAttrib2fvNV( GLuint index,
-					       const GLfloat *v )
-{
-   if (index >= BRW_ATTRIB_FIRST_MATERIAL) index = ERROR_ATTRIB;
-   DISPATCH_ATTR2FV( index, v );
-}
-
-static void GLAPIENTRY brw_VertexAttrib3fNV( GLuint index, GLfloat x,
-					      GLfloat y, GLfloat z )
-{
-   if (index >= BRW_ATTRIB_FIRST_MATERIAL) index = ERROR_ATTRIB;
-   DISPATCH_ATTR3F( index, x, y, z );
-}
-
-static void GLAPIENTRY brw_VertexAttrib3fvNV( GLuint index,
-					       const GLfloat *v )
-{
-   if (index >= BRW_ATTRIB_FIRST_MATERIAL) index = ERROR_ATTRIB;
-   DISPATCH_ATTR3FV( index, v );
-}
-
-static void GLAPIENTRY brw_VertexAttrib4fNV( GLuint index, GLfloat x,
-					      GLfloat y, GLfloat z,
-					      GLfloat w )
-{
-   if (index >= BRW_ATTRIB_FIRST_MATERIAL) index = ERROR_ATTRIB;
-   DISPATCH_ATTR4F( index, x, y, z, w );
-}
-
-static void GLAPIENTRY brw_VertexAttrib4fvNV( GLuint index, 
-					       const GLfloat *v )
-{
-   if (index >= BRW_ATTRIB_FIRST_MATERIAL) index = ERROR_ATTRIB;
-   DISPATCH_ATTR4FV( index, v );
-}
-
-
-/*
- * XXX adjust index
- */
-
-static void GLAPIENTRY brw_VertexAttrib1fARB( GLuint index, GLfloat x )
-{
-   if (index >= BRW_ATTRIB_FIRST_MATERIAL) index = ERROR_ATTRIB;
-   DISPATCH_ATTR1F( index, x );
-}
-
-static void GLAPIENTRY brw_VertexAttrib1fvARB( GLuint index, 
-					       const GLfloat *v )
-{
-   if (index >= BRW_ATTRIB_FIRST_MATERIAL) index = ERROR_ATTRIB;
-   DISPATCH_ATTR1FV( index, v );
-}
-
-static void GLAPIENTRY brw_VertexAttrib2fARB( GLuint index, GLfloat x, 
-					      GLfloat y )
-{
-   if (index >= BRW_ATTRIB_FIRST_MATERIAL) index = ERROR_ATTRIB;
-   DISPATCH_ATTR2F( index, x, y );
-}
-
-static void GLAPIENTRY brw_VertexAttrib2fvARB( GLuint index,
-					       const GLfloat *v )
-{
-   if (index >= BRW_ATTRIB_FIRST_MATERIAL) index = ERROR_ATTRIB;
-   DISPATCH_ATTR2FV( index, v );
-}
-
-static void GLAPIENTRY brw_VertexAttrib3fARB( GLuint index, GLfloat x,
-					      GLfloat y, GLfloat z )
-{
-   if (index >= BRW_ATTRIB_FIRST_MATERIAL) index = ERROR_ATTRIB;
-   DISPATCH_ATTR3F( index, x, y, z );
-}
-
-static void GLAPIENTRY brw_VertexAttrib3fvARB( GLuint index,
-					       const GLfloat *v )
-{
-   if (index >= BRW_ATTRIB_FIRST_MATERIAL) index = ERROR_ATTRIB;
-   DISPATCH_ATTR3FV( index, v );
-}
-
-static void GLAPIENTRY brw_VertexAttrib4fARB( GLuint index, GLfloat x,
-					      GLfloat y, GLfloat z,
-					      GLfloat w )
-{
-   if (index >= BRW_ATTRIB_FIRST_MATERIAL) index = ERROR_ATTRIB;
-   DISPATCH_ATTR4F( index, x, y, z, w );
-}
-
-static void GLAPIENTRY brw_VertexAttrib4fvARB( GLuint index, 
-					       const GLfloat *v )
-{
-   if (index >= BRW_ATTRIB_FIRST_MATERIAL) index = ERROR_ATTRIB;
-   DISPATCH_ATTR4FV( index, v );
-}
-
-
-/* Install the generic versions of the 2nd level dispatch
- * functions.  Some of these have a codegen alternative.
- */
-void brw_exec_vtx_generic_init( struct brw_exec_context *exec )
-{
-   GLvertexformat *vfmt = &exec->vtxfmt;
-
-   vfmt->Color3f = brw_Color3f;
-   vfmt->Color3fv = brw_Color3fv;
-   vfmt->Color4f = brw_Color4f;
-   vfmt->Color4fv = brw_Color4fv;
-   vfmt->FogCoordfEXT = brw_FogCoordfEXT;
-   vfmt->FogCoordfvEXT = brw_FogCoordfvEXT;
-   vfmt->MultiTexCoord1fARB = brw_MultiTexCoord1f;
-   vfmt->MultiTexCoord1fvARB = brw_MultiTexCoord1fv;
-   vfmt->MultiTexCoord2fARB = brw_MultiTexCoord2f;
-   vfmt->MultiTexCoord2fvARB = brw_MultiTexCoord2fv;
-   vfmt->MultiTexCoord3fARB = brw_MultiTexCoord3f;
-   vfmt->MultiTexCoord3fvARB = brw_MultiTexCoord3fv;
-   vfmt->MultiTexCoord4fARB = brw_MultiTexCoord4f;
-   vfmt->MultiTexCoord4fvARB = brw_MultiTexCoord4fv;
-   vfmt->Normal3f = brw_Normal3f;
-   vfmt->Normal3fv = brw_Normal3fv;
-   vfmt->SecondaryColor3fEXT = brw_SecondaryColor3fEXT;
-   vfmt->SecondaryColor3fvEXT = brw_SecondaryColor3fvEXT;
-   vfmt->TexCoord1f = brw_TexCoord1f;
-   vfmt->TexCoord1fv = brw_TexCoord1fv;
-   vfmt->TexCoord2f = brw_TexCoord2f;
-   vfmt->TexCoord2fv = brw_TexCoord2fv;
-   vfmt->TexCoord3f = brw_TexCoord3f;
-   vfmt->TexCoord3fv = brw_TexCoord3fv;
-   vfmt->TexCoord4f = brw_TexCoord4f;
-   vfmt->TexCoord4fv = brw_TexCoord4fv;
-   vfmt->Vertex2f = brw_Vertex2f;
-   vfmt->Vertex2fv = brw_Vertex2fv;
-   vfmt->Vertex3f = brw_Vertex3f;
-   vfmt->Vertex3fv = brw_Vertex3fv;
-   vfmt->Vertex4f = brw_Vertex4f;
-   vfmt->Vertex4fv = brw_Vertex4fv;
-   vfmt->VertexAttrib1fNV = brw_VertexAttrib1fNV;
-   vfmt->VertexAttrib1fvNV = brw_VertexAttrib1fvNV;
-   vfmt->VertexAttrib2fNV = brw_VertexAttrib2fNV;
-   vfmt->VertexAttrib2fvNV = brw_VertexAttrib2fvNV;
-   vfmt->VertexAttrib3fNV = brw_VertexAttrib3fNV;
-   vfmt->VertexAttrib3fvNV = brw_VertexAttrib3fvNV;
-   vfmt->VertexAttrib4fNV = brw_VertexAttrib4fNV;
-   vfmt->VertexAttrib4fvNV = brw_VertexAttrib4fvNV;
-   vfmt->VertexAttrib1fARB = brw_VertexAttrib1fARB;
-   vfmt->VertexAttrib1fvARB = brw_VertexAttrib1fvARB;
-   vfmt->VertexAttrib2fARB = brw_VertexAttrib2fARB;
-   vfmt->VertexAttrib2fvARB = brw_VertexAttrib2fvARB;
-   vfmt->VertexAttrib3fARB = brw_VertexAttrib3fARB;
-   vfmt->VertexAttrib3fvARB = brw_VertexAttrib3fvARB;
-   vfmt->VertexAttrib4fARB = brw_VertexAttrib4fARB;
-   vfmt->VertexAttrib4fvARB = brw_VertexAttrib4fvARB;
-}
diff --git a/i965/brw_fallback.c b/i965/brw_fallback.c
index 86464b2..8a8fb50 100644
--- a/i965/brw_fallback.c
+++ b/i965/brw_fallback.c
@@ -39,38 +39,32 @@
 #include "macros.h"
 #include "mtypes.h"
 
-
-
-
-
-
+#define FILE_DEBUG_FLAG DEBUG_FALLBACKS
 
 static GLboolean do_check_fallback(struct brw_context *brw)
 {
    GLcontext *ctx = &brw->intel.ctx;
    GLuint i;
-   
+
    /* BRW_NEW_METAOPS
     */
    if (brw->metaops.active)
       return GL_FALSE;
 
-   if (brw->intel.no_rast)
-      return GL_TRUE;
-   
-   /* _NEW_BUFFERS
-    */
-   if (ctx->DrawBuffer->_ColorDrawBufferMask[0] != BUFFER_BIT_FRONT_LEFT &&
-       ctx->DrawBuffer->_ColorDrawBufferMask[0] != BUFFER_BIT_BACK_LEFT)
+   if (brw->intel.no_rast) {
+      DBG("FALLBACK: rasterization disabled\n");
       return GL_TRUE;
+   }
 
    /* _NEW_RENDERMODE
     *
     * XXX: need to save/restore RenderMode in metaops state, or
     * somehow move to a new attribs pointer:
     */
-   if (ctx->RenderMode != GL_RENDER)
+   if (ctx->RenderMode != GL_RENDER) {
+      DBG("FALLBACK: render mode\n");
       return GL_TRUE;
+   }
 
    /* _NEW_TEXTURE:
     */
@@ -79,8 +73,13 @@ static GLboolean do_check_fallback(struct brw_context *brw)
       if (texUnit->_ReallyEnabled) {
 	 struct intel_texture_object *intelObj = intel_texture_object(texUnit->_Current);
 	 struct gl_texture_image *texImage = intelObj->base.Image[0][intelObj->firstLevel];
-	 if (texImage->Border)
+	 if (texImage->Border ||
+         ((texImage->_BaseFormat == GL_DEPTH_COMPONENT) &&
+          ((texImage->TexObject->WrapS == GL_CLAMP_TO_BORDER) || 
+           (texImage->TexObject->WrapT == GL_CLAMP_TO_BORDER)))) {
+	    DBG("FALLBACK: texture border\n");
 	    return GL_TRUE;
+	 }
       }
    }
    
@@ -88,6 +87,7 @@ static GLboolean do_check_fallback(struct brw_context *brw)
     */
    if (brw->attribs.Stencil->Enabled && 
        !brw->intel.hw_stencil) {
+      DBG("FALLBACK: stencil\n");
       return GL_TRUE;
    }
 
@@ -95,9 +95,10 @@ static GLboolean do_check_fallback(struct brw_context *brw)
    return GL_FALSE;
 }
 
-static void check_fallback(struct brw_context *brw)
+static int check_fallback(struct brw_context *brw)
 {
    brw->intel.Fallback = do_check_fallback(brw);
+   return 0;
 }
 
 const struct brw_tracked_state brw_check_fallback = {
@@ -106,7 +107,7 @@ const struct brw_tracked_state brw_check_fallback = {
       .brw  = BRW_NEW_METAOPS,
       .cache = 0
    },
-   .update = check_fallback
+   .prepare = check_fallback
 };
 
 
diff --git a/i965/brw_gs.c b/i965/brw_gs.c
index 119d07d..9419315 100644
--- a/i965/brw_gs.c
+++ b/i965/brw_gs.c
@@ -119,26 +119,15 @@ static void compile_gs_prog( struct brw_context *brw,
 
    /* Upload
     */
-   brw->gs.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_GS_PROG],
-					      &c.key,
-					      sizeof(c.key),
-					      program,
-					      program_size,
-					      &c.prog_data,
-					      &brw->gs.prog_data );
+   dri_bo_unreference(brw->gs.prog_bo);
+   brw->gs.prog_bo = brw_upload_cache( &brw->cache, BRW_GS_PROG,
+				       &c.key, sizeof(c.key),
+				       NULL, 0,
+				       program, program_size,
+				       &c.prog_data,
+				       &brw->gs.prog_data );
 }
 
-
-static GLboolean search_cache( struct brw_context *brw, 
-			       struct brw_gs_prog_key *key )
-{
-   return brw_search_cache(&brw->cache[BRW_GS_PROG], 
-			   key, sizeof(*key),
-			   &brw->gs.prog_data,
-			   &brw->gs.prog_gs_offset);
-}
-
-
 static const GLenum gs_prim[GL_POLYGON+1] = {  
    GL_POINTS,
    GL_LINES,
@@ -173,10 +162,10 @@ static void populate_key( struct brw_context *brw,
 
 /* Calculate interpolants for triangle and line rasterization.
  */
-static void upload_gs_prog( struct brw_context *brw )
+static int prepare_gs_prog( struct brw_context *brw )
 {
    struct brw_gs_prog_key key;
-
+   int ret = 0;
    /* Populate the key:
     */
    populate_key(brw, &key);
@@ -187,9 +176,18 @@ static void upload_gs_prog( struct brw_context *brw )
    }
 
    if (brw->gs.prog_active) {
-      if (!search_cache(brw, &key))
+      dri_bo_unreference(brw->gs.prog_bo);
+      brw->gs.prog_bo = brw_search_cache(&brw->cache, BRW_GS_PROG,
+					 &key, sizeof(key),
+					 NULL, 0,
+					 &brw->gs.prog_data);
+      if (brw->gs.prog_bo == NULL)
 	 compile_gs_prog( brw, &key );
+
+      ret |= dri_bufmgr_check_aperture_space(brw->gs.prog_bo);
    }
+
+   return ret;
 }
 
 
@@ -199,5 +197,5 @@ const struct brw_tracked_state brw_gs_prog = {
       .brw   = BRW_NEW_PRIMITIVE,
       .cache = CACHE_NEW_VS_PROG
    },
-   .update = upload_gs_prog
+   .prepare = prepare_gs_prog
 };
diff --git a/i965/brw_gs.h b/i965/brw_gs.h
index 29a4e80..18a4537 100644
--- a/i965/brw_gs.h
+++ b/i965/brw_gs.h
@@ -40,11 +40,11 @@
 #define MAX_GS_VERTS (4)	     
 
 struct brw_gs_prog_key {
+   GLuint attrs:32;
    GLuint primitive:4;
-   GLuint attrs:16;		
    GLuint hint_gs_always:1;
    GLuint need_gs_prog:1;
-   GLuint pad:10;
+   GLuint pad:26;
 };
 
 struct brw_gs_compile {
diff --git a/i965/brw_gs_state.c b/i965/brw_gs_state.c
index 5826c01..f1f9e01 100644
--- a/i965/brw_gs_state.c
+++ b/i965/brw_gs_state.c
@@ -36,47 +36,102 @@
 #include "brw_defines.h"
 #include "macros.h"
 
+struct brw_gs_unit_key {
+   unsigned int total_grf;
+   unsigned int urb_entry_read_length;
 
+   unsigned int curbe_offset;
 
-static void upload_gs_unit( struct brw_context *brw )
-{
-   struct brw_gs_unit_state gs;
+   unsigned int nr_urb_entries, urb_size;
+   GLboolean prog_active;
+};
 
-   memset(&gs, 0, sizeof(gs));
+static void
+gs_unit_populate_key(struct brw_context *brw, struct brw_gs_unit_key *key)
+{
+   memset(key, 0, sizeof(*key));
 
    /* CACHE_NEW_GS_PROG */
-   if (brw->gs.prog_active) {
-      gs.thread0.grf_reg_count = ((brw->gs.prog_data->total_grf-1) & ~15) / 16;
-      gs.thread0.kernel_start_pointer = brw->gs.prog_gs_offset >> 6;
-      gs.thread3.urb_entry_read_length = brw->gs.prog_data->urb_read_length;
-   }
-   else {
-      gs.thread0.grf_reg_count = 0;
-      gs.thread0.kernel_start_pointer = 0;
-      gs.thread3.urb_entry_read_length = 1;
+   key->prog_active = brw->gs.prog_active;
+   if (key->prog_active) {
+      key->total_grf = brw->gs.prog_data->total_grf;
+      key->urb_entry_read_length = brw->gs.prog_data->urb_read_length;
+   } else {
+      key->total_grf = 1;
+      key->urb_entry_read_length = 1;
    }
 
+   /* BRW_NEW_CURBE_OFFSETS */
+   key->curbe_offset = brw->curbe.clip_start;
+
    /* BRW_NEW_URB_FENCE */
-   gs.thread4.nr_urb_entries = brw->urb.nr_gs_entries; 
-   gs.thread4.urb_entry_allocation_size = brw->urb.vsize - 1;
+   key->nr_urb_entries = brw->urb.nr_gs_entries;
+   key->urb_size = brw->urb.vsize;
+}
 
-   gs.thread4.max_threads = 0; /* Hardware requirement */
+static dri_bo *
+gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key)
+{
+   struct brw_gs_unit_state gs;
+   dri_bo *bo;
 
-   if (INTEL_DEBUG & DEBUG_STATS)
-      gs.thread4.stats_enable = 1; 
+   memset(&gs, 0, sizeof(gs));
+
+   gs.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1;
+   if (key->prog_active) /* reloc */
+      gs.thread0.kernel_start_pointer = brw->gs.prog_bo->offset >> 6;
 
-   /* CONSTANT */
    gs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
    gs.thread1.single_program_flow = 1;
+
    gs.thread3.dispatch_grf_start_reg = 1;
    gs.thread3.const_urb_entry_read_offset = 0;
    gs.thread3.const_urb_entry_read_length = 0;
    gs.thread3.urb_entry_read_offset = 0;
-   
+   gs.thread3.urb_entry_read_length = key->urb_entry_read_length;
+
+   gs.thread4.nr_urb_entries = key->nr_urb_entries;
+   gs.thread4.urb_entry_allocation_size = key->urb_size - 1;
+
+   gs.thread4.max_threads = 0; /* Hardware requirement */
+
+   if (INTEL_DEBUG & DEBUG_STATS)
+      gs.thread4.stats_enable = 1;
+
+   bo = brw_upload_cache(&brw->cache, BRW_GS_UNIT,
+			 key, sizeof(*key),
+			 &brw->gs.prog_bo, 1,
+			 &gs, sizeof(gs),
+			 NULL, NULL);
 
-   brw->gs.state_gs_offset = brw_cache_data( &brw->cache[BRW_GS_UNIT], &gs );
+   if (key->prog_active) {
+      /* Emit GS program relocation */
+      dri_emit_reloc(bo,
+		     DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
+		     gs.thread0.grf_reg_count << 1,
+		     offsetof(struct brw_gs_unit_state, thread0),
+		     brw->gs.prog_bo);
+   }
+
+   return bo;
 }
 
+static int prepare_gs_unit( struct brw_context *brw )
+{
+   struct brw_gs_unit_key key;
+
+   gs_unit_populate_key(brw, &key);
+
+   dri_bo_unreference(brw->gs.state_bo);
+   brw->gs.state_bo = brw_search_cache(&brw->cache, BRW_GS_UNIT,
+				       &key, sizeof(key),
+				       &brw->gs.prog_bo, 1,
+				       NULL);
+   if (brw->gs.state_bo == NULL) {
+      brw->gs.state_bo = gs_unit_create_from_key(brw, &key);
+   }
+   return dri_bufmgr_check_aperture_space(brw->gs.state_bo);
+}
 
 const struct brw_tracked_state brw_gs_unit = {
    .dirty = {
@@ -85,5 +140,5 @@ const struct brw_tracked_state brw_gs_unit = {
 		BRW_NEW_URB_FENCE),
       .cache = CACHE_NEW_GS_PROG
    },
-   .update = upload_gs_unit
+   .prepare = prepare_gs_unit,
 };
diff --git a/i965/brw_hal.c b/i965/brw_hal.c
deleted file mode 100644
index 3126102..0000000
--- a/i965/brw_hal.c
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- Copyright (C) Intel Corp.  2006.  All Rights Reserved.
- 
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
- 
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial
- portions of the Software.
- 
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- 
- **********************************************************************/
-
-#include "intel_batchbuffer.h"
-#include "brw_context.h"
-#include "brw_state.h"
-#include "brw_defines.h"
-#include "brw_hal.h"
-#include <dlfcn.h>
-
-static void *brw_hal_lib;
-static GLboolean brw_hal_tried;
-
-void *
-brw_hal_find_symbol (char *symbol)
-{
-    if (!brw_hal_tried)
-    {
-	char *brw_hal_name = getenv ("INTEL_HAL");
-    
-	if (!brw_hal_name)
-	    brw_hal_name = "/usr/lib/xorg/modules/drivers/intel_hal.so";
-
-	brw_hal_lib = dlopen (brw_hal_name, RTLD_LAZY|RTLD_LOCAL);
-	brw_hal_tried = 1;
-    }
-    if (!brw_hal_lib)
-	return NULL;
-    return dlsym (brw_hal_lib, symbol);
-}
diff --git a/i965/brw_hal.h b/i965/brw_hal.h
deleted file mode 100644
index cd86e39..0000000
--- a/i965/brw_hal.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- Copyright (C) Intel Corp.  2006.  All Rights Reserved.
- 
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
- 
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial
- portions of the Software.
- 
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- 
- **********************************************************************/
-
-void *
-brw_hal_find_symbol (char *symbol);
diff --git a/i965/brw_metaops.c b/i965/brw_metaops.c
index 6e030f1..252a899 100644
--- a/i965/brw_metaops.c
+++ b/i965/brw_metaops.c
@@ -41,6 +41,7 @@
 #include "intel_screen.h"
 #include "intel_batchbuffer.h"
 #include "intel_regions.h"
+#include "intel_buffers.h"
 
 #include "brw_context.h"
 #include "brw_defines.h"
@@ -156,7 +157,7 @@ static const char *fp_tex_prog =
  *   FragmentProgram->_Current
  *   VertexProgram->_Enabled
  *   brw->vertex_program
- *   DrawBuffer->_ColorDrawBufferMask[0]
+ *   DrawBuffer->_ColorDrawBufferIndexes[0]
  * 
  *
  * More if drawpixels-through-texture is added.  
@@ -195,7 +196,7 @@ static void init_metaops_state( struct brw_context *brw )
 				  vp_prog, strlen(vp_prog),
 				  brw->metaops.vp);
 
-   brw->metaops.attribs.VertexProgram->Current = brw->metaops.vp;
+   brw->metaops.attribs.VertexProgram->_Current = brw->metaops.vp;
    brw->metaops.attribs.VertexProgram->_Enabled = GL_TRUE;
 
    brw->metaops.attribs.FragmentProgram->_Current = brw->metaops.fp;
@@ -361,13 +362,21 @@ static void meta_draw_region( struct intel_context *intel,
    struct brw_context *brw = brw_context(&intel->ctx);
 
    if (!brw->metaops.saved_draw_region) {
-      brw->metaops.saved_draw_region = brw->state.draw_region;
+      brw->metaops.saved_draw_region = brw->state.draw_regions[0];
+      brw->metaops.saved_nr_draw_regions = brw->state.nr_draw_regions;
       brw->metaops.saved_depth_region = brw->state.depth_region;
    }
 
-   brw->state.draw_region = draw_region;
+   brw->state.draw_regions[0] = draw_region;
+   brw->state.nr_draw_regions = 1;
    brw->state.depth_region = depth_region;
 
+   if (intel->frame_buffer_texobj != NULL)
+      brw_FrameBufferTexDestroy(brw);
+
+   if (draw_region)
+       brw_FrameBufferTexInit(brw, draw_region);
+
    brw->state.dirty.mesa |= _NEW_BUFFERS;
 }
 
@@ -376,8 +385,7 @@ static void meta_draw_quad(struct intel_context *intel,
 			   GLfloat x0, GLfloat x1,
 			   GLfloat y0, GLfloat y1, 
 			   GLfloat z,
-			   GLubyte red, GLubyte green,
-			   GLubyte blue, GLubyte alpha,
+			   GLuint color,
 			   GLfloat s0, GLfloat s1,
 			   GLfloat t0, GLfloat t1)
 {
@@ -388,7 +396,6 @@ static void meta_draw_quad(struct intel_context *intel,
    struct gl_client_array *attribs[VERT_ATTRIB_MAX];
    struct _mesa_prim prim[1];
    GLfloat pos[4][3];
-   GLubyte color[4];
 
    ctx->Driver.BufferData(ctx,
 			  GL_ARRAY_BUFFER_ARB,
@@ -413,7 +420,6 @@ static void meta_draw_quad(struct intel_context *intel,
    pos[3][1] = y1;
    pos[3][2] = z;
 
-
    ctx->Driver.BufferSubData(ctx,
 			     GL_ARRAY_BUFFER_ARB,
 			     0,
@@ -421,16 +427,15 @@ static void meta_draw_quad(struct intel_context *intel,
 			     pos,
 			     brw->metaops.vbo);
 
-   color[0] = red;
-   color[1] = green;
-   color[2] = blue;
-   color[3] = alpha;
+   /* Convert incoming ARGB to required RGBA */
+   /* Note this color is stored as GL_UNSIGNED_BYTE */
+   color = (color & 0xff00ff00) | (((color >> 16) | (color << 16)) & 0xff00ff);
 
    ctx->Driver.BufferSubData(ctx,
 			     GL_ARRAY_BUFFER_ARB,
 			     sizeof(pos),
 			     sizeof(color),
-			     color,
+			     &color,
 			     brw->metaops.vbo);
 
    /* Ignoring texture coords. 
@@ -486,6 +491,7 @@ static void install_meta_state( struct intel_context *intel )
 {
    GLcontext *ctx = &intel->ctx;
    struct brw_context *brw = brw_context(ctx);
+   GLuint i;
 
    if (!brw->metaops.vbo) {
       init_metaops_state(brw);
@@ -495,12 +501,18 @@ static void install_meta_state( struct intel_context *intel )
    
    meta_no_texture(&brw->intel);
    meta_flat_shade(&brw->intel);
-   brw->metaops.restore_draw_mask = ctx->DrawBuffer->_ColorDrawBufferMask[0];
+   for (i = 0; i < ctx->Const.MaxDrawBuffers; i++) {
+      brw->metaops.restore_draw_buffers[i]
+         = ctx->DrawBuffer->_ColorDrawBufferIndexes[i];
+   }
+   brw->metaops.restore_num_draw_buffers = ctx->DrawBuffer->_NumColorDrawBuffers;
+
    brw->metaops.restore_fp = ctx->FragmentProgram.Current;
 
    /* This works without adjusting refcounts.  Fix later? 
     */
-   brw->metaops.saved_draw_region = brw->state.draw_region;
+   brw->metaops.saved_draw_region = brw->state.draw_regions[0];
+   brw->metaops.saved_nr_draw_regions = brw->state.nr_draw_regions;
    brw->metaops.saved_depth_region = brw->state.depth_region;
    brw->metaops.active = 1;
    
@@ -511,13 +523,20 @@ static void leave_meta_state( struct intel_context *intel )
 {
    GLcontext *ctx = &intel->ctx;
    struct brw_context *brw = brw_context(ctx);
+   GLuint i;
 
    restore_attribs(brw);
 
-   ctx->DrawBuffer->_ColorDrawBufferMask[0] = brw->metaops.restore_draw_mask;
+   for (i = 0; i < ctx->Const.MaxDrawBuffers; i++) {
+      ctx->DrawBuffer->_ColorDrawBufferIndexes[i]
+         = brw->metaops.restore_draw_buffers[i];
+   }
+   ctx->DrawBuffer->_NumColorDrawBuffers = brw->metaops.restore_num_draw_buffers;
+
    ctx->FragmentProgram.Current = brw->metaops.restore_fp;
 
-   brw->state.draw_region = brw->metaops.saved_draw_region;
+   brw->state.draw_regions[0] = brw->metaops.saved_draw_region;
+   brw->state.nr_draw_regions = brw->metaops.saved_nr_draw_regions;
    brw->state.depth_region = brw->metaops.saved_depth_region;
    brw->metaops.saved_draw_region = NULL;
    brw->metaops.saved_depth_region = NULL;
diff --git a/i965/brw_misc_state.c b/i965/brw_misc_state.c
index fe476c9..62df259 100644
--- a/i965/brw_misc_state.c
+++ b/i965/brw_misc_state.c
@@ -68,137 +68,75 @@ const struct brw_tracked_state brw_blend_constant_color = {
       .brw = 0,
       .cache = 0
    },
-   .update = upload_blend_constant_color
+   .emit = upload_blend_constant_color
 };
 
-/***********************************************************************
- * Drawing rectangle -- Need for AUB file only.
+/**
+ * Upload the binding table pointers, which point each stage's array of surface
+ * state pointers.
+ *
+ * The binding table pointers are relative to the surface state base address,
+ * which is 0.
  */
-static void upload_drawing_rect(struct brw_context *brw)
+static void upload_binding_table_pointers(struct brw_context *brw)
 {
    struct intel_context *intel = &brw->intel;
-   __DRIdrawablePrivate *dPriv = intel->driDrawable;
-   struct brw_drawrect bdr;
-   int x1, y1;
-   int x2, y2;
 
-   /* If there is a single cliprect, set it here.  Otherwise iterate
-    * over them in brw_draw_prim().
-    */
-   if (brw->intel.numClipRects > 1) 
-      return; 
- 
-   x1 = brw->intel.pClipRects[0].x1;
-   y1 = brw->intel.pClipRects[0].y1;
-   x2 = brw->intel.pClipRects[0].x2;
-   y2 = brw->intel.pClipRects[0].y2;
-	 
-   if (x1 < 0) x1 = 0;
-   if (y1 < 0) y1 = 0;
-   if (x2 > intel->intelScreen->width) x2 = intel->intelScreen->width;
-   if (y2 > intel->intelScreen->height) y2 = intel->intelScreen->height;
-
-   memset(&bdr, 0, sizeof(bdr));
-   bdr.header.opcode = CMD_DRAW_RECT;
-   bdr.header.length = sizeof(bdr)/4 - 2;
-   bdr.xmin = x1;
-   bdr.ymin = y1;
-   bdr.xmax = x2;
-   bdr.ymax = y2;
-   bdr.xorg = dPriv->x;
-   bdr.yorg = dPriv->y;
-
-   /* Can't use BRW_CACHED_BATCH_STRUCT because this is also emitted
-    * uncached in brw_draw.c:
-    */
-   BRW_BATCH_STRUCT(brw, &bdr);
-}
-
-const struct brw_tracked_state brw_drawing_rect = {
-   .dirty = {
-      .mesa = _NEW_WINDOW_POS,
-      .brw = 0,
-      .cache = 0
-   },
-   .update = upload_drawing_rect
-};
-
-/***********************************************************************
- * Binding table pointers
- */
-
-static void upload_binding_table_pointers(struct brw_context *brw)
-{
-   struct brw_binding_table_pointers btp;
-   memset(&btp, 0, sizeof(btp));
-
-   /* The binding table has been emitted to the SS pool already, so we
-    * know what its offset is.  When the batch buffer is fired, the
-    * binding table and surface structs will get fixed up to point to
-    * where the textures actually landed, but that won't change the
-    * value of the offsets here:
-    */
-   btp.header.opcode = CMD_BINDING_TABLE_PTRS;
-   btp.header.length = sizeof(btp)/4 - 2;
-   btp.vs = 0;
-   btp.gs = 0;
-   btp.clp = 0;
-   btp.sf = 0;
-   btp.wm = brw->wm.bind_ss_offset;
-
-   BRW_CACHED_BATCH_STRUCT(brw, &btp);
+   BEGIN_BATCH(6, IGNORE_CLIPRECTS);
+   OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | (6 - 2));
+   OUT_BATCH(0); /* vs */
+   OUT_BATCH(0); /* gs */
+   OUT_BATCH(0); /* clip */
+   OUT_BATCH(0); /* sf */
+   OUT_RELOC(brw->wm.bind_bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 0);
+   ADVANCE_BATCH();
 }
 
 const struct brw_tracked_state brw_binding_table_pointers = {
    .dirty = {
       .mesa = 0,
-      .brw = 0,
-      .cache = CACHE_NEW_SURF_BIND 
+      .brw = BRW_NEW_BATCH,
+      .cache = CACHE_NEW_SURF_BIND,
    },
-   .update = upload_binding_table_pointers
+   .emit = upload_binding_table_pointers,
 };
 
 
-/***********************************************************************
- * Pipelined state pointers.  This is the key state packet from which
- * the hardware chases pointers to all the uploaded state in VRAM.
+/**
+ * Upload pointers to the per-stage state.
+ *
+ * The state pointers in this packet are all relative to the general state
+ * base address set by CMD_STATE_BASE_ADDRESS, which is 0.
  */
-   
 static void upload_pipelined_state_pointers(struct brw_context *brw )
 {
-   struct brw_pipelined_state_pointers psp;
-   memset(&psp, 0, sizeof(psp));
-
-   psp.header.opcode = CMD_PIPELINED_STATE_POINTERS;
-   psp.header.length = sizeof(psp)/4 - 2;
-
-   psp.vs.offset = brw->vs.state_gs_offset >> 5;
-   psp.sf.offset = brw->sf.state_gs_offset >> 5;
-   psp.wm.offset = brw->wm.state_gs_offset >> 5;
-   psp.cc.offset = brw->cc.state_gs_offset >> 5;
-
-   /* GS gets turned on and off regularly.  Need to re-emit URB fence
-    * after this occurs.  
-    */
-   if (brw->gs.prog_active) {
-      psp.gs.offset = brw->gs.state_gs_offset >> 5;
-      psp.gs.enable = 1;
-   }
-
-   if (!brw->metaops.active) {
-      psp.clp.offset = brw->clip.state_gs_offset >> 5;
-      psp.clp.enable = 1;
-   }
-
+   struct intel_context *intel = &brw->intel;
 
-   if (BRW_CACHED_BATCH_STRUCT(brw, &psp))
-      brw->state.dirty.brw |= BRW_NEW_PSP;
+   BEGIN_BATCH(7, IGNORE_CLIPRECTS);
+   OUT_BATCH(CMD_PIPELINED_STATE_POINTERS << 16 | (7 - 2));
+   OUT_RELOC(brw->vs.state_bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 0);
+   if (brw->gs.prog_active)
+      OUT_RELOC(brw->gs.state_bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 1);
+   else
+      OUT_BATCH(0);
+   if (!brw->metaops.active)
+      OUT_RELOC(brw->clip.state_bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 1);
+   else
+      OUT_BATCH(0);
+   OUT_RELOC(brw->sf.state_bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 0);
+   OUT_RELOC(brw->wm.state_bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 0);
+   OUT_RELOC(brw->cc.state_bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 0);
+   ADVANCE_BATCH();
+
+   brw->state.dirty.brw |= BRW_NEW_PSP;
 }
 
+#if 0
+/* Combined into brw_psp_urb_cbs */
 const struct brw_tracked_state brw_pipelined_state_pointers = {
    .dirty = {
       .mesa = 0,
-      .brw = BRW_NEW_METAOPS,
+      .brw = BRW_NEW_METAOPS | BRW_NEW_BATCH,
       .cache = (CACHE_NEW_VS_UNIT | 
 		CACHE_NEW_GS_UNIT | 
 		CACHE_NEW_GS_PROG | 
@@ -207,8 +145,9 @@ const struct brw_tracked_state brw_pipelined_state_pointers = {
 		CACHE_NEW_WM_UNIT | 
 		CACHE_NEW_CC_UNIT)
    },
-   .update = upload_pipelined_state_pointers
+   .emit = upload_pipelined_state_pointers
 };
+#endif
 
 static void upload_psp_urb_cbs(struct brw_context *brw )
 {
@@ -221,7 +160,7 @@ static void upload_psp_urb_cbs(struct brw_context *brw )
 const struct brw_tracked_state brw_psp_urb_cbs = {
    .dirty = {
       .mesa = 0,
-      .brw = BRW_NEW_URB_FENCE | BRW_NEW_METAOPS,
+      .brw = BRW_NEW_URB_FENCE | BRW_NEW_METAOPS | BRW_NEW_BATCH,
       .cache = (CACHE_NEW_VS_UNIT | 
 		CACHE_NEW_GS_UNIT | 
 		CACHE_NEW_GS_PROG | 
@@ -230,74 +169,91 @@ const struct brw_tracked_state brw_psp_urb_cbs = {
 		CACHE_NEW_WM_UNIT | 
 		CACHE_NEW_CC_UNIT)
    },
-   .update = upload_psp_urb_cbs
+   .emit = upload_psp_urb_cbs,
 };
 
+/**
+ * Upload the depthbuffer offset and format.
+ *
+ * We have to do this per state validation as we need to emit the relocation
+ * in the batch buffer.
+ */
 
+static int prepare_depthbuffer(struct brw_context *brw)
+{
+   struct intel_region *region = brw->state.depth_region;
 
+   if (!region || !region->buffer)
+      return 0;
+   return dri_bufmgr_check_aperture_space(region->buffer);
+}
 
-/***********************************************************************
- * Depthbuffer - currently constant, but rotation would change that.
- */
-
-static void upload_depthbuffer(struct brw_context *brw)
+static void emit_depthbuffer(struct brw_context *brw)
 {
-   /* 0x79050003  Depth Buffer */
    struct intel_context *intel = &brw->intel;
    struct intel_region *region = brw->state.depth_region;
-   struct brw_depthbuffer bd;
-   memset(&bd, 0, sizeof(bd));
-
-   bd.header.bits.opcode = CMD_DEPTH_BUFFER;
-   bd.header.bits.length = BRW_IS_IGD(brw) ? (sizeof(bd)/4-2) : (sizeof(bd)/4-3);
-   bd.dword1.bits.pitch = (region->pitch * region->cpp) - 1;
-   
-   switch (region->cpp) {
-   case 2:
-      bd.dword1.bits.format = BRW_DEPTHFORMAT_D16_UNORM;
-      break;
-   case 4:
-      if (intel->depth_buffer_is_float)
-	 bd.dword1.bits.format = BRW_DEPTHFORMAT_D32_FLOAT;
-      else
-	 bd.dword1.bits.format = BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
-      break;
-   default:
-      assert(0);
-      return;
+   unsigned int len = (BRW_IS_GM45(brw) || BRW_IS_G4X(brw)) ? sizeof(struct brw_depthbuffer_gm45_g4x) / 4 : sizeof(struct brw_depthbuffer) / 4;
+
+   if (region == NULL) {
+      BEGIN_BATCH(len, IGNORE_CLIPRECTS);
+      OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2));
+      OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) |
+		(BRW_SURFACE_NULL << 29));
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+
+      if (BRW_IS_GM45(brw) || BRW_IS_G4X(brw))
+         OUT_BATCH(0);
+
+      ADVANCE_BATCH();
+   } else {
+      unsigned int format;
+
+      switch (region->cpp) {
+      case 2:
+	 format = BRW_DEPTHFORMAT_D16_UNORM;
+	 break;
+      case 4:
+	 if (intel->depth_buffer_is_float)
+	    format = BRW_DEPTHFORMAT_D32_FLOAT;
+	 else
+	    format = BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
+	 break;
+      default:
+	 assert(0);
+	 return;
+      }
+
+      BEGIN_BATCH(len, IGNORE_CLIPRECTS);
+      OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2));
+      OUT_BATCH(((region->pitch * region->cpp) - 1) |
+		(format << 18) |
+		(BRW_TILEWALK_YMAJOR << 26) |
+		(region->tiled << 27) |
+		(BRW_SURFACE_2D << 29));
+      OUT_RELOC(region->buffer,
+		DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE, 0);
+      OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1) |
+		((region->pitch - 1) << 6) |
+		((region->height - 1) << 19));
+      OUT_BATCH(0);
+
+      if (BRW_IS_GM45(brw) || BRW_IS_G4X(brw))
+         OUT_BATCH(0);
+
+      ADVANCE_BATCH();
    }
-
-   bd.dword1.bits.depth_offset_disable = 0; /* coordinate offset */
-
-   /* The depthbuffer can only use YMAJOR tiling...  This is a bit of
-    * a shame as it clashes with the 2d blitter which only supports
-    * XMAJOR tiling...  
-    */
-   bd.dword1.bits.tile_walk = BRW_TILEWALK_YMAJOR;
-   bd.dword1.bits.tiled_surface = intel->depth_region->tiled;
-   bd.dword1.bits.surface_type = BRW_SURFACE_2D;
-
-   /* BRW_NEW_LOCK */
-   bd.dword2_base_addr = bmBufferOffset(intel, region->buffer);    
-
-   bd.dword3.bits.mipmap_layout = BRW_SURFACE_MIPMAPLAYOUT_BELOW;
-   bd.dword3.bits.lod = 0;
-   bd.dword3.bits.width = region->pitch - 1; /* XXX: width ? */
-   bd.dword3.bits.height = region->height - 1;
-
-   bd.dword4.bits.min_array_element = 0;
-   bd.dword4.bits.depth = 0;
-      
-   BRW_CACHED_BATCH_STRUCT(brw, &bd);
 }
 
 const struct brw_tracked_state brw_depthbuffer = {
    .dirty = {
       .mesa = 0,
-      .brw = BRW_NEW_CONTEXT | BRW_NEW_LOCK,
-      .cache = 0
+      .brw = BRW_NEW_DEPTH_BUFFER | BRW_NEW_BATCH,
+      .cache = 0,
    },
-   .update = upload_depthbuffer
+   .prepare = prepare_depthbuffer,
+   .emit = emit_depthbuffer,
 };
 
 
@@ -327,7 +283,7 @@ const struct brw_tracked_state brw_polygon_stipple = {
       .brw = 0,
       .cache = 0
    },
-   .update = upload_polygon_stipple
+   .emit = upload_polygon_stipple
 };
 
 
@@ -350,13 +306,15 @@ static void upload_polygon_stipple_offset(struct brw_context *brw)
    BRW_CACHED_BATCH_STRUCT(brw, &bpso);
 }
 
+#define _NEW_WINDOW_POS 0x40000000
+
 const struct brw_tracked_state brw_polygon_stipple_offset = {
    .dirty = {
       .mesa = _NEW_WINDOW_POS,
       .brw = 0,
       .cache = 0
    },
-   .update = upload_polygon_stipple_offset
+   .emit = upload_polygon_stipple_offset
 };
 
 /**********************************************************************
@@ -366,7 +324,7 @@ static void upload_aa_line_parameters(struct brw_context *brw)
 {
    struct brw_aa_line_parameters balp;
    
-   if (!BRW_IS_IGD(brw))
+   if (!(BRW_IS_GM45(brw) || BRW_IS_G4X(brw)))
       return;
 
    /* use legacy aa line coverage computation */
@@ -383,7 +341,7 @@ const struct brw_tracked_state brw_aa_line_parameters = {
       .brw = BRW_NEW_CONTEXT,
       .cache = 0
    },
-   .update = upload_aa_line_parameters
+   .emit = upload_aa_line_parameters
 };
 
 /***********************************************************************
@@ -418,7 +376,7 @@ const struct brw_tracked_state brw_line_stipple = {
       .brw = 0,
       .cache = 0
    },
-   .update = upload_line_stipple
+   .emit = upload_line_stipple
 };
 
 
@@ -449,10 +407,10 @@ static void upload_pipe_control(struct brw_context *brw)
 const struct brw_tracked_state brw_pipe_control = {
    .dirty = {
       .mesa = 0,
-      .brw = BRW_NEW_CONTEXT,
+      .brw = BRW_NEW_BATCH,
       .cache = 0
    },
-   .update = upload_pipe_control
+   .emit = upload_pipe_control
 };
 
 
@@ -518,43 +476,39 @@ const struct brw_tracked_state brw_invarient_state = {
       .brw = BRW_NEW_CONTEXT,
       .cache = 0
    },
-   .update = upload_invarient_state
+   .emit = upload_invarient_state
 };
 
-
-/* State pool addresses:
+/**
+ * Define the base addresses which some state is referenced from.
+ *
+ * This allows us to avoid having to emit relocations in many places for
+ * cached state, and instead emit pointers inside of large, mostly-static
+ * state pools.  This comes at the expense of memory, and more expensive cache
+ * misses.
  */
 static void upload_state_base_address( struct brw_context *brw )
 {
    struct intel_context *intel = &brw->intel;
-   struct brw_state_base_address sba;
-      
-   memset(&sba, 0, sizeof(sba));
-
-   sba.header.opcode = CMD_STATE_BASE_ADDRESS;
-   sba.header.length = 0x4;
 
-   /* BRW_NEW_LOCK */
-   sba.bits0.general_state_address = bmBufferOffset(intel, brw->pool[BRW_GS_POOL].buffer) >> 5;
-   sba.bits0.modify_enable = 1;
-
-   /* BRW_NEW_LOCK */
-   sba.bits1.surface_state_address = bmBufferOffset(intel, brw->pool[BRW_SS_POOL].buffer) >> 5;
-   sba.bits1.modify_enable = 1;
-
-   sba.bits2.modify_enable = 1;
-   sba.bits3.modify_enable = 1;
-   sba.bits4.modify_enable = 1;
-
-   BRW_CACHED_BATCH_STRUCT(brw, &sba);
+   /* Output the structure (brw_state_base_address) directly to the
+    * batchbuffer, so we can emit relocations inline.
+    */
+   BEGIN_BATCH(6, IGNORE_CLIPRECTS);
+   OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2));
+   OUT_BATCH(1); /* General state base address */
+   OUT_BATCH(1); /* Surface state base address */
+   OUT_BATCH(1); /* Indirect object base address */
+   OUT_BATCH(1); /* General state upper bound */
+   OUT_BATCH(1); /* Indirect object upper bound */
+   ADVANCE_BATCH();
 }
 
-
 const struct brw_tracked_state brw_state_base_address = {
    .dirty = {
       .mesa = 0,
-      .brw = BRW_NEW_CONTEXT | BRW_NEW_LOCK,
-      .cache = 0
+      .brw = BRW_NEW_CONTEXT,
+      .cache = 0,
    },
-   .update = upload_state_base_address
+   .emit = upload_state_base_address
 };
diff --git a/i965/brw_program.c b/i965/brw_program.c
index 752fe49..c38610b 100644
--- a/i965/brw_program.c
+++ b/i965/brw_program.c
@@ -29,15 +29,15 @@
   *   Keith Whitwell <keith@tungstengraphics.com>
   */
   
+#include "main/imports.h"
+#include "main/enums.h"
 #include "shader/prog_parameter.h"
-#include "brw_context.h"
-#include "brw_aub.h"
-#include "brw_util.h"
-#include "program.h"
-#include "imports.h"
-#include "enums.h"
+#include "shader/program.h"
+#include "shader/programopt.h"
 #include "tnl/tnl.h"
 
+#include "brw_context.h"
+#include "brw_util.h"
 
 static void brwBindProgram( GLcontext *ctx,
 			    GLenum target, 
@@ -125,6 +125,9 @@ static void brwProgramStringNotify( GLcontext *ctx,
       struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program;
       if (p == vp)
 	 brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM;
+      if (p->program.IsPositionInvariant) {
+	 _mesa_insert_mvp_code(ctx, &p->program);
+      }
       p->id = brw->program_id++;      
       p->param_state = p->program.Base.Parameters->StateFlags;
 
diff --git a/i965/brw_sf.c b/i965/brw_sf.c
index 6dcfa62..0b61748 100644
--- a/i965/brw_sf.c
+++ b/i965/brw_sf.c
@@ -43,8 +43,6 @@
 #include "brw_sf.h"
 #include "brw_state.h"
 
-#define DO_SETUP_BITS ((1<<FRAG_ATTRIB_MAX)-1)
-
 static void compile_sf_prog( struct brw_context *brw,
 			     struct brw_sf_prog_key *key )
 {
@@ -74,6 +72,11 @@ static void compile_sf_prog( struct brw_context *brw,
       if (c.key.attrs & (1<<i)) {
 	 c.attr_to_idx[i] = idx;
 	 c.idx_to_attr[idx] = i;
+	 if (i >= VERT_RESULT_TEX0 && i <= VERT_RESULT_TEX7) {
+		 c.point_attrs[i].CoordReplace = 
+			brw->attribs.Point->CoordReplace[i - VERT_RESULT_TEX0];
+	 } else
+		 c.point_attrs[i].CoordReplace = GL_FALSE;
 	 idx++;
       }
    
@@ -90,7 +93,10 @@ static void compile_sf_prog( struct brw_context *brw,
       break;
    case SF_POINTS:
       c.nr_verts = 1;
-      brw_emit_point_setup( &c, GL_TRUE );
+      if (key->do_point_sprite)
+	  brw_emit_point_sprite_setup( &c, GL_TRUE );
+      else
+	  brw_emit_point_setup( &c, GL_TRUE );
       break;
    case SF_UNFILLED_TRIS:
       c.nr_verts = 3;
@@ -108,29 +114,18 @@ static void compile_sf_prog( struct brw_context *brw,
 
    /* Upload
     */
-   brw->sf.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_SF_PROG],
-					      &c.key,
-					      sizeof(c.key),
-					      program,
-					      program_size,
-					      &c.prog_data,
-					      &brw->sf.prog_data );
+   dri_bo_unreference(brw->sf.prog_bo);
+   brw->sf.prog_bo = brw_upload_cache( &brw->cache, BRW_SF_PROG,
+				       &c.key, sizeof(c.key),
+				       NULL, 0,
+				       program, program_size,
+				       &c.prog_data,
+				       &brw->sf.prog_data );
 }
 
-
-static GLboolean search_cache( struct brw_context *brw, 
-			       struct brw_sf_prog_key *key )
-{
-   return brw_search_cache(&brw->cache[BRW_SF_PROG], 
-			   key, sizeof(*key),
-			   &brw->sf.prog_data,
-			   &brw->sf.prog_gs_offset);
-}
-
-
 /* Calculate interpolants for triangle and line rasterization.
  */
-static void upload_sf_prog( struct brw_context *brw )
+static int upload_sf_prog( struct brw_context *brw )
 {
    struct brw_sf_prog_key key;
 
@@ -162,7 +157,8 @@ static void upload_sf_prog( struct brw_context *brw )
       break;
    }
 
-
+   key.do_point_sprite = brw->attribs.Point->PointSprite;
+   key.SpriteOrigin = brw->attribs.Point->SpriteOrigin;
    /* _NEW_LIGHT */
    key.do_flat_shading = (brw->attribs.Light->ShadeModel == GL_FLAT);
    key.do_twoside_color = (brw->attribs.Light->Enabled && brw->attribs.Light->Model.TwoSide);
@@ -171,18 +167,23 @@ static void upload_sf_prog( struct brw_context *brw )
    if (key.do_twoside_color)
       key.frontface_ccw = (brw->attribs.Polygon->FrontFace == GL_CCW);
 
-
-   if (!search_cache(brw, &key))
+   dri_bo_unreference(brw->sf.prog_bo);
+   brw->sf.prog_bo = brw_search_cache(&brw->cache, BRW_SF_PROG,
+				      &key, sizeof(key),
+				      NULL, 0,
+				      &brw->sf.prog_data);
+   if (brw->sf.prog_bo == NULL)
       compile_sf_prog( brw, &key );
+   return dri_bufmgr_check_aperture_space(brw->sf.prog_bo);
 }
 
 
 const struct brw_tracked_state brw_sf_prog = {
    .dirty = {
-      .mesa  = (_NEW_LIGHT|_NEW_POLYGON),
+      .mesa  = (_NEW_LIGHT|_NEW_POLYGON|_NEW_POINT),
       .brw   = (BRW_NEW_REDUCED_PRIMITIVE),
       .cache = CACHE_NEW_VS_PROG
    },
-   .update = upload_sf_prog
+   .prepare = upload_sf_prog
 };
 
diff --git a/i965/brw_sf.h b/i965/brw_sf.h
index b321cda..1c0fb70 100644
--- a/i965/brw_sf.h
+++ b/i965/brw_sf.h
@@ -34,9 +34,9 @@
 #define BRW_SF_H
 
 
+#include "shader/program.h"
 #include "brw_context.h"
 #include "brw_eu.h"
-#include "program.h"
 
 
 #define SF_POINTS    0
@@ -45,14 +45,19 @@
 #define SF_UNFILLED_TRIS   3
 
 struct brw_sf_prog_key {
+   GLuint attrs:32;
    GLuint primitive:2;
    GLuint do_twoside_color:1;
    GLuint do_flat_shading:1;
-   GLuint attrs:16;
    GLuint frontface_ccw:1;
-   GLuint pad:11;
+   GLuint do_point_sprite:1;
+   GLuint pad:10;
+   GLenum SpriteOrigin;
 };
 
+struct brw_sf_point_tex {
+	GLboolean CoordReplace;	
+};
 
 struct brw_sf_compile {
    struct brw_compile func;
@@ -94,12 +99,14 @@ struct brw_sf_compile {
 
    GLubyte attr_to_idx[VERT_RESULT_MAX];   
    GLubyte idx_to_attr[VERT_RESULT_MAX];   
+   struct brw_sf_point_tex point_attrs[VERT_RESULT_MAX];
 };
 
  
 void brw_emit_tri_setup( struct brw_sf_compile *c, GLboolean allocate );
 void brw_emit_line_setup( struct brw_sf_compile *c, GLboolean allocate );
 void brw_emit_point_setup( struct brw_sf_compile *c, GLboolean allocate );
+void brw_emit_point_sprite_setup( struct brw_sf_compile *c, GLboolean allocate );
 void brw_emit_anyprim_setup( struct brw_sf_compile *c );
 
 #endif
diff --git a/i965/brw_sf_emit.c b/i965/brw_sf_emit.c
index 94be815..6fba8c8 100644
--- a/i965/brw_sf_emit.c
+++ b/i965/brw_sf_emit.c
@@ -59,6 +59,35 @@ static GLboolean have_attr(struct brw_sf_compile *c,
    return (c->key.attrs & (1<<attr)) ? 1 : 0;
 }
 
+/**
+ * Sets VERT_RESULT_FOGC.Y  for gl_FrontFacing
+ *
+ * This is currently executed if the fragment program uses VERT_RESULT_FOGC
+ * at all, but this could be eliminated with a scan of the FP contents.
+ */
+static void
+do_front_facing( struct brw_sf_compile *c )
+{
+   struct brw_compile *p = &c->func; 
+   int i;
+
+   if (!have_attr(c, VERT_RESULT_FOGC))
+      return;
+
+   brw_push_insn_state(p);
+   brw_CMP(p, brw_null_reg(), 
+        c->key.frontface_ccw ? BRW_CONDITIONAL_G : BRW_CONDITIONAL_L,
+        c->det, brw_imm_f(0));
+   brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+   for (i = 0; i < 3; i++) {
+       struct brw_reg fogc = get_vert_attr(c, c->vert[i],FRAG_ATTRIB_FOGC);
+       brw_MOV(p, get_element(fogc, 1), brw_imm_f(0));
+       brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
+       brw_MOV(p, get_element(fogc, 1), brw_imm_f(1));
+       brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+   }
+   brw_pop_insn_state(p);
+}
 
 			 
 /*********************************************************************** 
@@ -355,6 +384,7 @@ void brw_emit_tri_setup( struct brw_sf_compile *c, GLboolean allocate)
 
    invert_det(c);
    copy_z_inv_w(c);
+   do_front_facing(c);
 
    if (c->key.do_twoside_color) 
       do_twoside_color(c);
@@ -503,6 +533,90 @@ void brw_emit_line_setup( struct brw_sf_compile *c, GLboolean allocate)
    } 
 }
 
+void brw_emit_point_sprite_setup( struct brw_sf_compile *c, GLboolean allocate)
+{
+   struct brw_compile *p = &c->func;
+   GLuint i;
+
+   c->nr_verts = 1;
+
+   if (allocate)
+      alloc_regs(c);
+
+   copy_z_inv_w(c);
+   for (i = 0; i < c->nr_setup_regs; i++)
+   {
+      struct brw_sf_point_tex *tex = &c->point_attrs[c->idx_to_attr[2*i]];
+      struct brw_reg a0 = offset(c->vert[0], i);
+      GLushort pc, pc_persp, pc_linear;
+      GLboolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
+            
+      if (pc_persp)
+      {				
+	  if (!tex->CoordReplace) {
+	      brw_set_predicate_control_flag_value(p, pc_persp);
+	      brw_MUL(p, a0, a0, c->inv_w[0]);
+	  }
+      }
+
+      if (tex->CoordReplace) {
+	  /* Caculate 1.0/PointWidth */
+	  brw_math(&c->func,
+		  c->tmp,
+		  BRW_MATH_FUNCTION_INV,
+		  BRW_MATH_SATURATE_NONE,
+		  0,
+		  c->dx0,
+		  BRW_MATH_DATA_SCALAR,
+		  BRW_MATH_PRECISION_FULL);
+
+	  if (c->key.SpriteOrigin == GL_LOWER_LEFT) {
+	   	brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]);
+		brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0));
+	  	brw_MUL(p, c->m2Cy, c->tmp, negate(c->inv_w[0]));
+		brw_MOV(p, vec1(suboffset(c->m2Cy, 0)), brw_imm_f(0.0));
+	  } else {
+	   	brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]);
+		brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0));
+	  	brw_MUL(p, c->m2Cy, c->tmp, c->inv_w[0]);
+		brw_MOV(p, vec1(suboffset(c->m2Cy, 0)), brw_imm_f(0.0));
+	  }
+      } else {
+	  brw_MOV(p, c->m1Cx, brw_imm_ud(0));
+	  brw_MOV(p, c->m2Cy, brw_imm_ud(0));
+      }
+
+      {
+	 brw_set_predicate_control_flag_value(p, pc); 
+	 if (tex->CoordReplace) {
+	     if (c->key.SpriteOrigin == GL_LOWER_LEFT) {
+		 brw_MUL(p, c->m3C0, c->inv_w[0], brw_imm_f(1.0));
+		 brw_MOV(p, vec1(suboffset(c->m3C0, 0)), brw_imm_f(0.0));
+	     }
+	     else
+		 brw_MOV(p, c->m3C0, brw_imm_f(0.0));
+	 } else {
+	 	brw_MOV(p, c->m3C0, a0); /* constant value */
+	 }
+
+	 /* Copy m0..m3 to URB. 
+	  */
+	 brw_urb_WRITE(p, 
+		       brw_null_reg(),
+		       0,
+		       brw_vec8_grf(0, 0),
+		       0, 	/* allocate */
+		       1,	/* used */
+		       4, 	/* msg len */
+		       0,	/* response len */
+		       last, 	/* eot */
+		       last, 	/* writes complete */
+		       i*4,	/* urb destination offset */
+		       BRW_URB_SWIZZLE_TRANSPOSE);
+      }
+   }
+}
+
 /* Points setup - several simplifications as all attributes are
  * constant across the face of the point (point sprites excluded!)
  */
@@ -569,6 +683,7 @@ void brw_emit_anyprim_setup( struct brw_sf_compile *c )
    struct brw_compile *p = &c->func;
    struct brw_reg ip = brw_ip_reg();
    struct brw_reg payload_prim = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0);
+   struct brw_reg payload_attr = get_element_ud(brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0), 0); 
    struct brw_reg primmask;
    struct brw_instruction *jmp;
    struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
@@ -623,6 +738,18 @@ void brw_emit_anyprim_setup( struct brw_sf_compile *c )
    }
    brw_land_fwd_jump(p, jmp); 
 
+   brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
+   brw_AND(p, v1_null_ud, payload_attr, brw_imm_ud(1<<BRW_SPRITE_POINT_ENABLE));
+   jmp = brw_JMPI(p, ip, ip, brw_imm_w(0));
+   {
+      saveflag = p->flag_value;
+      brw_push_insn_state(p); 
+      brw_emit_point_sprite_setup( c, GL_FALSE );
+      brw_pop_insn_state(p);
+      p->flag_value = saveflag;
+   }
+   brw_land_fwd_jump(p, jmp); 
+
    brw_emit_point_setup( c, GL_FALSE );
 }
 
diff --git a/i965/brw_sf_state.c b/i965/brw_sf_state.c
index 2fd75a0..24388b7 100644
--- a/i965/brw_sf_state.c
+++ b/i965/brw_sf_state.c
@@ -35,69 +35,71 @@
 #include "brw_state.h"
 #include "brw_defines.h"
 #include "macros.h"
+#include "intel_fbo.h"
 
-static void upload_sf_vp(struct brw_context *brw)
+static int upload_sf_vp(struct brw_context *brw)
 {
+   GLcontext *ctx = &brw->intel.ctx;
+   const GLfloat depth_scale = 1.0F / ctx->DrawBuffer->_DepthMaxF;
    struct brw_sf_viewport sfv;
+   struct intel_renderbuffer *irb =
+      intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[0]);
+   GLfloat y_scale, y_bias;
 
    memset(&sfv, 0, sizeof(sfv));
-   
-   if (brw->intel.driDrawable) 
-   {
-      /* _NEW_VIEWPORT, BRW_NEW_METAOPS */
-
-      if (!brw->metaops.active) {
-	 const GLfloat *v = brw->intel.ctx.Viewport._WindowMap.m;
-	 
-	 sfv.viewport.m00 =   v[MAT_SX];
-	 sfv.viewport.m11 = - v[MAT_SY];
-	 sfv.viewport.m22 =   v[MAT_SZ] * brw->intel.depth_scale;
-	 sfv.viewport.m30 =   v[MAT_TX];
-	 sfv.viewport.m31 = - v[MAT_TY] + brw->intel.driDrawable->h;
-	 sfv.viewport.m32 =   v[MAT_TZ] * brw->intel.depth_scale;
-      }
-      else {
-	 sfv.viewport.m00 =   1;
-	 sfv.viewport.m11 = - 1;
-	 sfv.viewport.m22 =   1;
-	 sfv.viewport.m30 =   0;
-	 sfv.viewport.m31 =   brw->intel.driDrawable->h;
-	 sfv.viewport.m32 =   0;
+
+   if (ctx->DrawBuffer->Name) {
+      /* User-created FBO */
+      if (irb && !irb->RenderToTexture) {
+	 y_scale = -1.0;
+	 y_bias = ctx->DrawBuffer->Height;
+      } else {
+	 y_scale = 1.0;
+	 y_bias = 0;
       }
+   } else {
+      y_scale = -1.0;
+      y_bias = ctx->DrawBuffer->Height;
    }
 
-   /* XXX: what state for this? */
-   if (brw->intel.driDrawable)
-   {
-      intelScreenPrivate *screen = brw->intel.intelScreen;
-      /* _NEW_SCISSOR */
-      GLint x = brw->attribs.Scissor->X;
-      GLint y = brw->attribs.Scissor->Y;
-      GLuint w = brw->attribs.Scissor->Width;
-      GLuint h = brw->attribs.Scissor->Height;
-
-      GLint x1 = x;
-      GLint y1 = brw->intel.driDrawable->h - (y + h);
-      GLint x2 = x + w - 1;
-      GLint y2 = y1 + h - 1;
-
-      if (x1 < 0) x1 = 0;
-      if (y1 < 0) y1 = 0;
-      if (x2 < 0) x2 = 0;
-      if (y2 < 0) y2 = 0;
-
-      if (x2 >= screen->width) x2 = screen->width-1;
-      if (y2 >= screen->height) y2 = screen->height-1;
-      if (x1 >= screen->width) x1 = screen->width-1;
-      if (y1 >= screen->height) y1 = screen->height-1;
-      
-      sfv.scissor.xmin = x1;
-      sfv.scissor.xmax = x2;
-      sfv.scissor.ymin = y1;
-      sfv.scissor.ymax = y2;
+   /* _NEW_VIEWPORT, BRW_NEW_METAOPS */
+
+   if (!brw->metaops.active) {
+      const GLfloat *v = ctx->Viewport._WindowMap.m;
+
+      sfv.viewport.m00 = v[MAT_SX];
+      sfv.viewport.m11 = v[MAT_SY] * y_scale;
+      sfv.viewport.m22 = v[MAT_SZ] * depth_scale;
+      sfv.viewport.m30 = v[MAT_TX];
+      sfv.viewport.m31 = v[MAT_TY] * y_scale + y_bias;
+      sfv.viewport.m32 = v[MAT_TZ] * depth_scale;
+   } else {
+      sfv.viewport.m00 =   1;
+      sfv.viewport.m11 = - 1;
+      sfv.viewport.m22 =   1;
+      sfv.viewport.m30 =   0;
+      sfv.viewport.m31 =   ctx->DrawBuffer->Height;
+      sfv.viewport.m32 =   0;
    }
 
-   brw->sf.vp_gs_offset = brw_cache_data( &brw->cache[BRW_SF_VP], &sfv );
+   /* _NEW_SCISSOR */
+
+   /* The scissor only needs to handle the intersection of drawable and
+    * scissor rect.  Clipping to the boundaries of static shared buffers
+    * for front/back/depth is covered by looping over cliprects in brw_draw.c.
+    *
+    * Note that the hardware's coordinates are inclusive, while Mesa's min is
+    * inclusive but max is exclusive.
+    */
+   sfv.scissor.xmin = ctx->DrawBuffer->_Xmin;
+   sfv.scissor.xmax = ctx->DrawBuffer->_Xmax - 1;
+   sfv.scissor.ymin = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymax;
+   sfv.scissor.ymax = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymin - 1;
+
+   dri_bo_unreference(brw->sf.vp_bo);
+   brw->sf.vp_bo = brw_cache_data( &brw->cache, BRW_SF_VP, &sfv, NULL, 0 );
+
+   return dri_bufmgr_check_aperture_space(brw->sf.vp_bo);
 }
 
 const struct brw_tracked_state brw_sf_vp = {
@@ -107,84 +109,129 @@ const struct brw_tracked_state brw_sf_vp = {
       .brw   = BRW_NEW_METAOPS,
       .cache = 0
    },
-   .update = upload_sf_vp
+   .prepare = upload_sf_vp
+};
+
+struct brw_sf_unit_key {
+   unsigned int total_grf;
+   unsigned int urb_entry_read_length;
+
+   unsigned int nr_urb_entries, urb_size, sfsize;
+
+   GLenum front_face, cull_face;
+   GLboolean scissor, line_smooth, point_sprite, point_attenuated;
+   float line_width;
+   float point_size;
 };
 
+static void
+sf_unit_populate_key(struct brw_context *brw, struct brw_sf_unit_key *key)
+{
+   memset(key, 0, sizeof(*key));
+
+   /* CACHE_NEW_SF_PROG */
+   key->total_grf = brw->sf.prog_data->total_grf;
+   key->urb_entry_read_length = brw->sf.prog_data->urb_read_length;
+
+   /* BRW_NEW_URB_FENCE */
+   key->nr_urb_entries = brw->urb.nr_sf_entries;
+   key->urb_size = brw->urb.vsize;
+   key->sfsize = brw->urb.sfsize;
 
+   key->scissor = brw->attribs.Scissor->Enabled;
+   key->front_face = brw->attribs.Polygon->FrontFace;
+
+   if (brw->attribs.Polygon->CullFlag)
+      key->cull_face = brw->attribs.Polygon->CullFaceMode;
+   else
+      key->cull_face = GL_NONE;
+
+   key->line_width = brw->attribs.Line->Width;
+   key->line_smooth = brw->attribs.Line->SmoothFlag;
+
+   key->point_sprite = brw->attribs.Point->PointSprite;
+   key->point_size = brw->attribs.Point->Size;
+   key->point_attenuated = brw->attribs.Point->_Attenuated;
+}
 
-static void upload_sf_unit( struct brw_context *brw )
+static dri_bo *
+sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
+			dri_bo **reloc_bufs)
 {
    struct brw_sf_unit_state sf;
+   dri_bo *bo;
+
    memset(&sf, 0, sizeof(sf));
 
-   /* CACHE_NEW_SF_PROG */
-   sf.thread0.grf_reg_count = ((brw->sf.prog_data->total_grf-1) & ~15) / 16;
-   sf.thread0.kernel_start_pointer = brw->sf.prog_gs_offset >> 6;
-   sf.thread3.urb_entry_read_length = brw->sf.prog_data->urb_read_length;
+   sf.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1;
+   sf.thread0.kernel_start_pointer = brw->sf.prog_bo->offset >> 6; /* reloc */
 
    sf.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+
    sf.thread3.dispatch_grf_start_reg = 3;
    sf.thread3.urb_entry_read_offset = 1;
+   sf.thread3.urb_entry_read_length = key->urb_entry_read_length;
 
-   /* BRW_NEW_URB_FENCE */
-   sf.thread4.nr_urb_entries = brw->urb.nr_sf_entries;
-   sf.thread4.urb_entry_allocation_size = brw->urb.sfsize - 1;
-   sf.thread4.max_threads = MIN2(12, brw->urb.nr_sf_entries / 2) - 1;
+   sf.thread4.nr_urb_entries = key->nr_urb_entries;
+   sf.thread4.urb_entry_allocation_size = key->sfsize - 1;
+   sf.thread4.max_threads = MIN2(12, key->nr_urb_entries / 2) - 1;
 
    if (INTEL_DEBUG & DEBUG_SINGLE_THREAD)
-      sf.thread4.max_threads = 0; 
+      sf.thread4.max_threads = 0;
 
    if (INTEL_DEBUG & DEBUG_STATS)
-      sf.thread4.stats_enable = 1; 
+      sf.thread4.stats_enable = 1;
 
    /* CACHE_NEW_SF_VP */
-   sf.sf5.sf_viewport_state_offset = brw->sf.vp_gs_offset >> 5;
-   
+   sf.sf5.sf_viewport_state_offset = brw->sf.vp_bo->offset >> 5; /* reloc */
+
    sf.sf5.viewport_transform = 1;
-   
+
    /* _NEW_SCISSOR */
-   if (brw->attribs.Scissor->Enabled) 
-      sf.sf6.scissor = 1;  
+   if (key->scissor)
+      sf.sf6.scissor = 1;
 
    /* _NEW_POLYGON */
-   if (brw->attribs.Polygon->FrontFace == GL_CCW)
+   if (key->front_face == GL_CCW)
       sf.sf5.front_winding = BRW_FRONTWINDING_CCW;
    else
       sf.sf5.front_winding = BRW_FRONTWINDING_CW;
 
-   if (brw->attribs.Polygon->CullFlag) {
-      switch (brw->attribs.Polygon->CullFaceMode) {
-      case GL_FRONT:
-	 sf.sf6.cull_mode = BRW_CULLMODE_FRONT;
-	 break;
-      case GL_BACK:
-	 sf.sf6.cull_mode = BRW_CULLMODE_BACK;
-	 break;
-      case GL_FRONT_AND_BACK:
-	 sf.sf6.cull_mode = BRW_CULLMODE_BOTH;
-	 break;
-      default:
-	 assert(0);
-	 break;
-      }
-   }
-   else
+   switch (key->cull_face) {
+   case GL_FRONT:
+      sf.sf6.cull_mode = BRW_CULLMODE_FRONT;
+      break;
+   case GL_BACK:
+      sf.sf6.cull_mode = BRW_CULLMODE_BACK;
+      break;
+   case GL_FRONT_AND_BACK:
+      sf.sf6.cull_mode = BRW_CULLMODE_BOTH;
+      break;
+   case GL_NONE:
       sf.sf6.cull_mode = BRW_CULLMODE_NONE;
-      
+      break;
+   default:
+      assert(0);
+      break;
+   }
 
    /* _NEW_LINE */
-   sf.sf6.line_width = brw->attribs.Line->_Width * (1<<1);
+   /* XXX use ctx->Const.Min/MaxLineWidth here */
+   sf.sf6.line_width = CLAMP(key->line_width, 1.0, 5.0) * (1<<1);
 
    sf.sf6.line_endcap_aa_region_width = 1;
-   if (brw->attribs.Line->SmoothFlag)
+   if (key->line_smooth)
       sf.sf6.aa_enable = 1;
-   else if (sf.sf6.line_width <= 0x2) 
-       sf.sf6.line_width = 0; 
+   else if (sf.sf6.line_width <= 0x2)
+       sf.sf6.line_width = 0;
 
    /* _NEW_POINT */
-   sf.sf6.point_rast_rule = 1;	/* opengl conventions */
-   sf.sf7.point_size = brw->attribs.Point->_Size * (1<<3);
-   sf.sf7.use_point_size_state = !brw->attribs.Point->_Attenuated;
+   sf.sf6.point_rast_rule = BRW_RASTRULE_UPPER_RIGHT;	/* opengl conventions */
+   /* XXX clamp max depends on AA vs. non-AA */
+
+   sf.sf7.sprite_point = key->point_sprite;
+   sf.sf7.point_size = CLAMP(nearbyint(key->point_size), 1, 255) * (1<<3);
+   sf.sf7.use_point_size_state = !key->point_attenuated;
    sf.sf7.aa_line_distance_mode = 0;
 
    /* might be BRW_NEW_PRIMITIVE if we have to adjust pv for polygons:
@@ -199,9 +246,58 @@ static void upload_sf_unit( struct brw_context *brw )
    sf.sf6.dest_org_vbias = 0x8;
    sf.sf6.dest_org_hbias = 0x8;
 
-   brw->sf.state_gs_offset = brw_cache_data( &brw->cache[BRW_SF_UNIT], &sf );
+   bo = brw_upload_cache(&brw->cache, BRW_SF_UNIT,
+			 key, sizeof(*key),
+			 reloc_bufs, 2,
+			 &sf, sizeof(sf),
+			 NULL, NULL);
+
+   /* Emit SF program relocation */
+   dri_emit_reloc(bo,
+		  DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
+		  sf.thread0.grf_reg_count << 1,
+		  offsetof(struct brw_sf_unit_state, thread0),
+		  brw->sf.prog_bo);
+
+   /* Emit SF viewport relocation */
+   dri_emit_reloc(bo,
+		  DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
+		  sf.sf5.front_winding | (sf.sf5.viewport_transform << 1),
+		  offsetof(struct brw_sf_unit_state, sf5),
+		  brw->sf.vp_bo);
+
+   return bo;
 }
 
+static int upload_sf_unit( struct brw_context *brw )
+{
+   struct brw_sf_unit_key key;
+   dri_bo *reloc_bufs[2];
+   int ret = 0;
+
+   sf_unit_populate_key(brw, &key);
+
+   reloc_bufs[0] = brw->sf.prog_bo;
+   reloc_bufs[1] = brw->sf.vp_bo;
+
+   dri_bo_unreference(brw->sf.state_bo);
+   brw->sf.state_bo = brw_search_cache(&brw->cache, BRW_SF_UNIT,
+				       &key, sizeof(key),
+				       reloc_bufs, 2,
+				       NULL);
+   if (brw->sf.state_bo == NULL) {
+      brw->sf.state_bo = sf_unit_create_from_key(brw, &key, reloc_bufs);
+   }
+
+   if (reloc_bufs[0])
+     ret |= dri_bufmgr_check_aperture_space(reloc_bufs[0]);
+
+   if (reloc_bufs[1])
+     ret |= dri_bufmgr_check_aperture_space(reloc_bufs[1]);
+
+   ret |= dri_bufmgr_check_aperture_space(brw->sf.state_bo);
+   return ret;
+}
 
 const struct brw_tracked_state brw_sf_unit = {
    .dirty = {
@@ -214,7 +310,5 @@ const struct brw_tracked_state brw_sf_unit = {
       .cache = (CACHE_NEW_SF_VP |
 		CACHE_NEW_SF_PROG)
    },
-   .update = upload_sf_unit
+   .prepare = upload_sf_unit,
 };
-
-
diff --git a/i965/brw_state.h b/i965/brw_state.h
index 41ac095..d1fca05 100644
--- a/i965/brw_state.h
+++ b/i965/brw_state.h
@@ -48,7 +48,6 @@ const struct brw_tracked_state brw_curbe_offsets;
 const struct brw_tracked_state brw_invarient_state;
 const struct brw_tracked_state brw_gs_prog;
 const struct brw_tracked_state brw_gs_unit;
-const struct brw_tracked_state brw_drawing_rect;
 const struct brw_tracked_state brw_line_stipple;
 const struct brw_tracked_state brw_aa_line_parameters;
 const struct brw_tracked_state brw_pipelined_state_pointers;
@@ -84,64 +83,52 @@ const struct brw_tracked_state brw_clear_batch_cache;
 /***********************************************************************
  * brw_state_cache.c
  */
-GLuint brw_cache_data(struct brw_cache *cache,
-		      const void *data );
-
-GLuint brw_cache_data_sz(struct brw_cache *cache,
-			 const void *data,
-			 GLuint data_sz);
-
-GLuint brw_upload_cache( struct brw_cache *cache,
-			 const void *key,
-			 GLuint key_sz,
-			 const void *data,
-			 GLuint data_sz,
-			 const void *aux,
-			 void *aux_return );
-
-GLboolean brw_search_cache( struct brw_cache *cache,
-			    const void *key,
-			    GLuint key_size,
-			    void *aux_return,
-			    GLuint *offset_return);
-
-void brw_init_caches( struct brw_context *brw );
-void brw_destroy_caches( struct brw_context *brw );
+dri_bo *brw_cache_data(struct brw_cache *cache,
+		       enum brw_cache_id cache_id,
+		       const void *data,
+		       dri_bo **reloc_bufs,
+		       GLuint nr_reloc_bufs);
+
+dri_bo *brw_cache_data_sz(struct brw_cache *cache,
+			  enum brw_cache_id cache_id,
+			  const void *data,
+			  GLuint data_size,
+			  dri_bo **reloc_bufs,
+			  GLuint nr_reloc_bufs);
+
+dri_bo *brw_upload_cache( struct brw_cache *cache,
+			  enum brw_cache_id cache_id,
+			  const void *key,
+			  GLuint key_sz,
+			  dri_bo **reloc_bufs,
+			  GLuint nr_reloc_bufs,
+			  const void *data,
+			  GLuint data_sz,
+			  const void *aux,
+			  void *aux_return );
+
+dri_bo *brw_search_cache( struct brw_cache *cache,
+			  enum brw_cache_id cache_id,
+			  const void *key,
+			  GLuint key_size,
+			  dri_bo **reloc_bufs,
+			  GLuint nr_reloc_bufs,
+			  void *aux_return);
+void brw_state_cache_check_size( struct brw_context *brw );
+
+void brw_init_cache( struct brw_context *brw );
+void brw_destroy_cache( struct brw_context *brw );
 
 /***********************************************************************
  * brw_state_batch.c
  */
-#define BRW_BATCH_STRUCT(brw, s) intel_batchbuffer_data( brw->intel.batch, (s), sizeof(*(s)), 0)
+#define BRW_BATCH_STRUCT(brw, s) intel_batchbuffer_data( brw->intel.batch, (s), sizeof(*(s)), IGNORE_CLIPRECTS)
 #define BRW_CACHED_BATCH_STRUCT(brw, s) brw_cached_batch_struct( brw, (s), sizeof(*(s)) )
 
 GLboolean brw_cached_batch_struct( struct brw_context *brw,
 				   const void *data,
 				   GLuint sz );
-
 void brw_destroy_batch_cache( struct brw_context *brw );
-
-
-/***********************************************************************
- * brw_state_pool.c
- */
-void brw_init_pools( struct brw_context *brw );
-void brw_destroy_pools( struct brw_context *brw );
-
-GLboolean brw_pool_alloc( struct brw_mem_pool *pool,
-			  GLuint size,
-			  GLuint alignment,
-			  GLuint *offset_return);
-
-void brw_pool_fence( struct brw_context *brw,
-		     struct brw_mem_pool *pool,
-		     GLuint fence );
-
-
-void brw_pool_check_wrap( struct brw_context *brw,
-			  struct brw_mem_pool *pool );
-
-void brw_clear_all_caches( struct brw_context *brw );
-void brw_invalidate_pools( struct brw_context *brw );
 void brw_clear_batch_cache_flush( struct brw_context *brw );
 
 #endif
diff --git a/i965/brw_state_batch.c b/i965/brw_state_batch.c
index c93d66a..77e2736 100644
--- a/i965/brw_state_batch.c
+++ b/i965/brw_state_batch.c
@@ -32,7 +32,6 @@
 
 
 #include "brw_state.h"
-#include "brw_aub.h"
 #include "intel_batchbuffer.h"
 #include "imports.h"
 
@@ -49,7 +48,7 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw,
    struct header *newheader = (struct header *)data;
 
    if (brw->emit_state_always) {
-      intel_batchbuffer_data(brw->intel.batch, data, sz, 0);
+      intel_batchbuffer_data(brw->intel.batch, data, sz, IGNORE_CLIPRECTS);
       return GL_TRUE;
    }
 
@@ -76,7 +75,7 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw,
 
  emit:
    memcpy(item->header, newheader, sz);
-   intel_batchbuffer_data(brw->intel.batch, data, sz, 0);
+   intel_batchbuffer_data(brw->intel.batch, data, sz, IGNORE_CLIPRECTS);
    return GL_TRUE;
 }
 
@@ -92,18 +91,10 @@ static void clear_batch_cache( struct brw_context *brw )
    }
 
    brw->cached_batch_items = NULL;
-
-
-   brw_clear_all_caches(brw);
-
-   bmReleaseBuffers(&brw->intel);
-   
-   brw_invalidate_pools(brw);
 }
 
 void brw_clear_batch_cache_flush( struct brw_context *brw )
 {
-   bmFinishFenceLock(&(brw->intel), bmSetFenceLock(&(brw->intel)));
    clear_batch_cache(brw);
 
    brw->wrap = 0;
diff --git a/i965/brw_state_cache.c b/i965/brw_state_cache.c
index 71c6938..d617650 100644
--- a/i965/brw_state_cache.c
+++ b/i965/brw_state_cache.c
@@ -28,10 +28,35 @@
   * Authors:
   *   Keith Whitwell <keith@tungstengraphics.com>
   */
-      
+
+/** @file brw_state_cache.c
+ *
+ * This file implements a simple static state cache for 965.  The consumers
+ * can query the hash table of state using a cache_id, opaque key data,
+ * and list of buffers that will be used in relocations, and receive the
+ * corresponding state buffer object of state (plus associated auxiliary
+ * data) in return.
+ *
+ * The inner workings are a simple hash table based on a CRC of the key data.
+ * The cache_id and relocation target buffers associated with the state
+ * buffer are included as auxiliary key data, but are not part of the hash
+ * value (this should be fixed, but will likely be fixed instead by making
+ * consumers use structured keys).
+ *
+ * Replacement is not implemented.  Instead, when the cache gets too big, at
+ * a safe point (unlock) we throw out all of the cache data and let it
+ * regenerate for the next rendering operation.
+ *
+ * The reloc_buf pointers need to be included as key data, otherwise the
+ * non-unique values stuffed in the offset in key data through
+ * brw_cache_data() may result in successful probe for state buffers
+ * even when the buffer being referenced doesn't match.  The result would be
+ * that the same state cache entry is used twice for different buffers,
+ * only one of the two buffers referenced gets put into the offset, and the
+ * incorrect program is run for the other instance.
+ */
 
 #include "brw_state.h"
-#include "brw_aub.h"
 #include "intel_batchbuffer.h"
 #include "imports.h"
 
@@ -44,16 +69,8 @@
 #include "brw_sf.h"
 #include "brw_gs.h"
 
-
-/***********************************************************************
- * Check cache for uploaded version of struct, else upload new one.
- * Fail when memory is exhausted.
- *
- * XXX: FIXME: Currently search is so slow it would be quicker to
- * regenerate the data every time...
- */
-
-static GLuint hash_key( const void *key, GLuint key_size )
+static GLuint hash_key( const void *key, GLuint key_size,
+			dri_bo **reloc_bufs, GLuint nr_reloc_bufs)
 {
    GLuint *ikey = (GLuint *)key;
    GLuint hash = 0, i;
@@ -62,23 +79,63 @@ static GLuint hash_key( const void *key, GLuint key_size )
 
    /* I'm sure this can be improved on:
     */
-   for (i = 0; i < key_size/4; i++)
+   for (i = 0; i < key_size/4; i++) {
       hash ^= ikey[i];
+      hash = (hash << 5) | (hash >> 27);
+   }
+
+   /* Include the BO pointers as key data as well */
+   ikey = (GLuint *)reloc_bufs;
+   key_size = nr_reloc_bufs * sizeof(dri_bo *);
+   for (i = 0; i < key_size/4; i++) {
+      hash ^= ikey[i];
+      hash = (hash << 5) | (hash >> 27);
+   }
 
    return hash;
 }
 
-static struct brw_cache_item *search_cache( struct brw_cache *cache,
-					     GLuint hash,
-					     const void *key,
-					     GLuint key_size)
+/**
+ * Marks a new buffer as being chosen for the given cache id.
+ */
+static void
+update_cache_last(struct brw_cache *cache, enum brw_cache_id cache_id,
+		  dri_bo *bo)
+{
+   if (bo == cache->last_bo[cache_id])
+      return; /* no change */
+
+   dri_bo_unreference(cache->last_bo[cache_id]);
+   cache->last_bo[cache_id] = bo;
+   dri_bo_reference(cache->last_bo[cache_id]);
+   cache->brw->state.dirty.cache |= 1 << cache_id;
+}
+
+static struct brw_cache_item *
+search_cache(struct brw_cache *cache, enum brw_cache_id cache_id,
+	     GLuint hash, const void *key, GLuint key_size,
+	     dri_bo **reloc_bufs, GLuint nr_reloc_bufs)
 {
    struct brw_cache_item *c;
 
+#if 0
+   int bucketcount = 0;
+
+   for (c = cache->items[hash % cache->size]; c; c = c->next)
+      bucketcount++;
+
+   fprintf(stderr, "bucket %d/%d = %d/%d items\n", hash % cache->size,
+	   cache->size, bucketcount, cache->n_items);
+#endif
+
    for (c = cache->items[hash % cache->size]; c; c = c->next) {
-      if (c->hash == hash && 
+      if (c->cache_id == cache_id &&
+	  c->hash == hash &&
 	  c->key_size == key_size &&
-	  memcmp(c->key, key, key_size) == 0)
+	  memcmp(c->key, key, key_size) == 0 &&
+	  c->nr_reloc_bufs == nr_reloc_bufs &&
+	  memcmp(c->reloc_bufs, reloc_bufs,
+		 nr_reloc_bufs * sizeof(dri_bo *)) == 0)
 	 return c;
    }
 
@@ -93,8 +150,7 @@ static void rehash( struct brw_cache *cache )
    GLuint size, i;
 
    size = cache->size * 3;
-   items = (struct brw_cache_item**) _mesa_malloc(size * sizeof(*items));
-   _mesa_memset(items, 0, size * sizeof(*items));
+   items = (struct brw_cache_item**) _mesa_calloc(size * sizeof(*items));
 
    for (i = 0; i < cache->size; i++)
       for (c = cache->items[i]; c; c = next) {
@@ -108,142 +164,183 @@ static void rehash( struct brw_cache *cache )
    cache->size = size;
 }
 
-
-GLboolean brw_search_cache( struct brw_cache *cache,
-			    const void *key,
-			    GLuint key_size,
-			    void *aux_return,
-			    GLuint *offset_return)
+/**
+ * Returns the buffer object matching cache_id and key, or NULL.
+ */
+dri_bo *brw_search_cache( struct brw_cache *cache,
+			  enum brw_cache_id cache_id,
+			  const void *key,
+			  GLuint key_size,
+			  dri_bo **reloc_bufs, GLuint nr_reloc_bufs,
+			  void *aux_return )
 {
    struct brw_cache_item *item;
-   GLuint addr = 0;
-   GLuint hash = hash_key(key, key_size);
+   GLuint hash = hash_key(key, key_size, reloc_bufs, nr_reloc_bufs);
 
-   item = search_cache(cache, hash, key, key_size);
+   item = search_cache(cache, cache_id, hash, key, key_size,
+		       reloc_bufs, nr_reloc_bufs);
 
-   if (item) {
-      if (aux_return) 
-	 *(void **)aux_return = (void *)((char *)item->key + item->key_size);
-      
-      *offset_return = addr = item->offset;
-   }    
-    
-   if (item == NULL || addr != cache->last_addr) {
-      cache->brw->state.dirty.cache |= 1<<cache->id;
-      cache->last_addr = addr;
-   }
-   
-   return item != NULL;
+   if (item == NULL)
+      return NULL;
+
+   if (aux_return)
+      *(void **)aux_return = (void *)((char *)item->key + item->key_size);
+
+   update_cache_last(cache, cache_id, item->bo);
+
+   dri_bo_reference(item->bo);
+   return item->bo;
 }
 
-GLuint brw_upload_cache( struct brw_cache *cache,
-			 const void *key,
-			 GLuint key_size,
-			 const void *data,
-			 GLuint data_size,
-			 const void *aux,
-			 void *aux_return )
-{   
-   GLuint offset;
+dri_bo *
+brw_upload_cache( struct brw_cache *cache,
+		  enum brw_cache_id cache_id,
+		  const void *key,
+		  GLuint key_size,
+		  dri_bo **reloc_bufs,
+		  GLuint nr_reloc_bufs,
+		  const void *data,
+		  GLuint data_size,
+		  const void *aux,
+		  void *aux_return )
+{
    struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item);
-   GLuint hash = hash_key(key, key_size);
-   void *tmp = _mesa_malloc(key_size + cache->aux_size);
-   
-   if (!brw_pool_alloc(cache->pool, data_size, 6, &offset)) {
-      /* Should not be possible: 
-       */
-      _mesa_printf("brw_pool_alloc failed\n");
-      exit(1);
-   }
+   GLuint hash = hash_key(key, key_size, reloc_bufs, nr_reloc_bufs);
+   GLuint relocs_size = nr_reloc_bufs * sizeof(dri_bo *);
+   GLuint aux_size = cache->aux_size[cache_id];
+   void *tmp;
+   dri_bo *bo;
+   int i;
+
+   /* Create the buffer object to contain the data */
+   bo = dri_bo_alloc(cache->brw->intel.bufmgr,
+		     cache->name[cache_id], data_size, 1 << 6,
+		     DRM_BO_FLAG_MEM_LOCAL |
+		     DRM_BO_FLAG_CACHED |
+		     DRM_BO_FLAG_CACHED_MAPPED);
+
+
+   /* Set up the memory containing the key, aux_data, and reloc_bufs */
+   tmp = _mesa_malloc(key_size + aux_size + relocs_size);
 
    memcpy(tmp, key, key_size);
+   memcpy(tmp + key_size, aux, cache->aux_size[cache_id]);
+   memcpy(tmp + key_size + aux_size, reloc_bufs, relocs_size);
+   for (i = 0; i < nr_reloc_bufs; i++) {
+      if (reloc_bufs[i] != NULL)
+	 dri_bo_reference(reloc_bufs[i]);
+   }
 
-   if (cache->aux_size)
-      memcpy(tmp+key_size, aux, cache->aux_size);
-	 
+   item->cache_id = cache_id;
    item->key = tmp;
    item->hash = hash;
    item->key_size = key_size;
-   item->offset = offset;
+   item->reloc_bufs = tmp + key_size + aux_size;
+   item->nr_reloc_bufs = nr_reloc_bufs;
+
+   item->bo = bo;
+   dri_bo_reference(bo);
    item->data_size = data_size;
 
-   if (++cache->n_items > cache->size * 1.5)
+   if (cache->n_items > cache->size * 1.5)
       rehash(cache);
-   
+
    hash %= cache->size;
    item->next = cache->items[hash];
    cache->items[hash] = item;
-      
+   cache->n_items++;
+
    if (aux_return) {
-      assert(cache->aux_size);
+      assert(cache->aux_size[cache_id]);
       *(void **)aux_return = (void *)((char *)item->key + item->key_size);
    }
 
    if (INTEL_DEBUG & DEBUG_STATE)
-      _mesa_printf("upload %s: %d bytes to pool buffer %d offset %x\n",
-		   cache->name,
-		   data_size, 
-		   cache->pool->buffer,
-		   offset);
+      _mesa_printf("upload %s: %d bytes to cache id %d\n",
+		   cache->name[cache_id],
+		   data_size, cache_id);
 
-   /* Copy data to the buffer:
-    */
-   bmBufferSubDataAUB(&cache->brw->intel,
-		      cache->pool->buffer,
-		      offset, 
-		      data_size, 
-		      data,
-		      cache->aub_type,
-		      cache->aub_sub_type);
-   
-
-   cache->brw->state.dirty.cache |= 1<<cache->id;
-   cache->last_addr = offset;
-
-   return offset;
+   /* Copy data to the buffer */
+   dri_bo_subdata(bo, 0, data_size, data);
+
+   update_cache_last(cache, cache_id, bo);
+
+   return bo;
 }
 
 /* This doesn't really work with aux data.  Use search/upload instead
  */
-GLuint brw_cache_data_sz(struct brw_cache *cache,
-			 const void *data,
-			 GLuint data_size)
+dri_bo *
+brw_cache_data_sz(struct brw_cache *cache,
+		  enum brw_cache_id cache_id,
+		  const void *data,
+		  GLuint data_size,
+		  dri_bo **reloc_bufs,
+		  GLuint nr_reloc_bufs)
 {
-   GLuint addr;
+   dri_bo *bo;
+   struct brw_cache_item *item;
+   GLuint hash = hash_key(data, data_size, reloc_bufs, nr_reloc_bufs);
 
-   if (!brw_search_cache(cache, data, data_size, NULL, &addr)) {
-      addr = brw_upload_cache(cache, 
-			      data, data_size, 
-			      data, data_size, 
-			      NULL, NULL);
+   item = search_cache(cache, cache_id, hash, data, data_size,
+		       reloc_bufs, nr_reloc_bufs);
+   if (item) {
+      update_cache_last(cache, cache_id, item->bo);
+      dri_bo_reference(item->bo);
+      return item->bo;
    }
 
-   return addr;
+   bo = brw_upload_cache(cache, cache_id,
+			 data, data_size,
+			 reloc_bufs, nr_reloc_bufs,
+			 data, data_size,
+			 NULL, NULL);
+
+   return bo;
 }
 
-GLuint brw_cache_data(struct brw_cache *cache,
-		      const void *data)
+/**
+ * Wrapper around brw_cache_data_sz using the cache_id's canonical key size.
+ *
+ * If nr_reloc_bufs is nonzero, brw_search_cache()/brw_upload_cache() would be
+ * better to use, as the potentially changing offsets in the data-used-as-key
+ * will result in excessive cache misses.
+ */
+dri_bo *
+brw_cache_data(struct brw_cache *cache,
+	       enum brw_cache_id cache_id,
+	       const void *data,
+	       dri_bo **reloc_bufs,
+	       GLuint nr_reloc_bufs)
 {
-   return brw_cache_data_sz(cache, data, cache->key_size);
+   return brw_cache_data_sz(cache, cache_id, data, cache->key_size[cache_id],
+			    reloc_bufs, nr_reloc_bufs);
 }
 
+enum pool_type {
+   DW_SURFACE_STATE,
+   DW_GENERAL_STATE
+};
+
+static void
+brw_init_cache_id( struct brw_context *brw,
+		const char *name,
+		enum brw_cache_id id,
+		GLuint key_size,
+		GLuint aux_size)
+{
+   struct brw_cache *cache = &brw->cache;
 
+   cache->name[id] = strdup(name);
+   cache->key_size[id] = key_size;
+   cache->aux_size[id] = aux_size;
+}
 
-
-
-static void brw_init_cache( struct brw_context *brw, 
-			    const char *name,
-			    GLuint id,
-			    GLuint key_size,
-			    GLuint aux_size,
-			    GLuint aub_type,
-			    GLuint aub_sub_type )
+void brw_init_cache( struct brw_context *brw )
 {
-   struct brw_cache *cache = &brw->cache[id];
+   struct brw_cache *cache = &brw->cache;
+
    cache->brw = brw;
-   cache->id = id;
-   cache->name = name;
-   cache->items = NULL;
 
    cache->size = 7;
    cache->n_items = 0;
@@ -251,200 +348,133 @@ static void brw_init_cache( struct brw_context *brw,
       _mesa_calloc(cache->size * 
 		   sizeof(struct brw_cache_item));
 
-
-   cache->key_size = key_size;
-   cache->aux_size = aux_size;
-   cache->aub_type = aub_type;
-   cache->aub_sub_type = aub_sub_type;
-   switch (aub_type) {
-   case DW_GENERAL_STATE: cache->pool = &brw->pool[BRW_GS_POOL]; break;
-   case DW_SURFACE_STATE: cache->pool = &brw->pool[BRW_SS_POOL]; break;
-   default: assert(0); break;
-   }
-}
-
-void brw_init_caches( struct brw_context *brw )
-{
-
-   brw_init_cache(brw,
-		  "CC_VP",
-		  BRW_CC_VP,
-		  sizeof(struct brw_cc_viewport),
-		  0,
-		  DW_GENERAL_STATE,
-		  DWGS_COLOR_CALC_VIEWPORT_STATE);
-
-   brw_init_cache(brw,
-		  "CC_UNIT",
-		  BRW_CC_UNIT,
-		  sizeof(struct brw_cc_unit_state),
-		  0,
-		  DW_GENERAL_STATE,
-		  DWGS_COLOR_CALC_STATE);
-
-   brw_init_cache(brw,
-		  "WM_PROG",
-		  BRW_WM_PROG,
-		  sizeof(struct brw_wm_prog_key),
-		  sizeof(struct brw_wm_prog_data),
-		  DW_GENERAL_STATE,
-		  DWGS_KERNEL_INSTRUCTIONS);
-
-   brw_init_cache(brw,
-		  "SAMPLER_DEFAULT_COLOR",
-		  BRW_SAMPLER_DEFAULT_COLOR,
-		  sizeof(struct brw_sampler_default_color),
-		  0,
-		  DW_GENERAL_STATE,
-		  DWGS_SAMPLER_DEFAULT_COLOR);
-
-   brw_init_cache(brw,
-		  "SAMPLER",
-		  BRW_SAMPLER,
-		  0,		/* variable key/data size */
-		  0,
-		  DW_GENERAL_STATE,
-		  DWGS_SAMPLER_STATE);
-
-   brw_init_cache(brw,
-		  "WM_UNIT",
-		  BRW_WM_UNIT,
-		  sizeof(struct brw_wm_unit_state),
-		  0,
-		  DW_GENERAL_STATE,
-		  DWGS_WINDOWER_IZ_STATE);
-
-   brw_init_cache(brw,
-		  "SF_PROG",
-		  BRW_SF_PROG,
-		  sizeof(struct brw_sf_prog_key),
-		  sizeof(struct brw_sf_prog_data),
-		  DW_GENERAL_STATE,
-		  DWGS_KERNEL_INSTRUCTIONS);
-
-   brw_init_cache(brw,
-		  "SF_VP",
-		  BRW_SF_VP,
-		  sizeof(struct brw_sf_viewport),
-		  0,
-		  DW_GENERAL_STATE,
-		  DWGS_STRIPS_FANS_VIEWPORT_STATE);
-
-   brw_init_cache(brw,
-		  "SF_UNIT",
-		  BRW_SF_UNIT,
-		  sizeof(struct brw_sf_unit_state),
-		  0,
-		  DW_GENERAL_STATE,
-		  DWGS_STRIPS_FANS_STATE);
-
-   brw_init_cache(brw,
-		  "VS_UNIT",
-		  BRW_VS_UNIT,
-		  sizeof(struct brw_vs_unit_state),
-		  0,
-		  DW_GENERAL_STATE,
-		  DWGS_VERTEX_SHADER_STATE);
-
-   brw_init_cache(brw,
-		  "VS_PROG",
-		  BRW_VS_PROG,
-		  sizeof(struct brw_vs_prog_key),
-		  sizeof(struct brw_vs_prog_data),
-		  DW_GENERAL_STATE,
-		  DWGS_KERNEL_INSTRUCTIONS);
-
-   brw_init_cache(brw,
-		  "CLIP_UNIT",
-		  BRW_CLIP_UNIT,
-		  sizeof(struct brw_clip_unit_state),
-		  0,
-		  DW_GENERAL_STATE,
-		  DWGS_CLIPPER_STATE);
-
-   brw_init_cache(brw,
-		  "CLIP_PROG",
-		  BRW_CLIP_PROG,
-		  sizeof(struct brw_clip_prog_key),
-		  sizeof(struct brw_clip_prog_data),
-		  DW_GENERAL_STATE,
-		  DWGS_KERNEL_INSTRUCTIONS);
-
-   brw_init_cache(brw,
-		  "GS_UNIT",
-		  BRW_GS_UNIT,
-		  sizeof(struct brw_gs_unit_state),
-		  0,
-		  DW_GENERAL_STATE,
-		  DWGS_GEOMETRY_SHADER_STATE);
-
-   brw_init_cache(brw,
-		  "GS_PROG",
-		  BRW_GS_PROG,
-		  sizeof(struct brw_gs_prog_key),
-		  sizeof(struct brw_gs_prog_data),
-		  DW_GENERAL_STATE,
-		  DWGS_KERNEL_INSTRUCTIONS);
-
-   brw_init_cache(brw,
-		  "SS_SURFACE",
-		  BRW_SS_SURFACE,
-		  sizeof(struct brw_surface_state),
-		  0,
-		  DW_SURFACE_STATE,
-		  DWSS_SURFACE_STATE);
-
-   brw_init_cache(brw,
-		  "SS_SURF_BIND",
-		  BRW_SS_SURF_BIND,
-		  sizeof(struct brw_surface_binding_table),
-		  0,
-		  DW_SURFACE_STATE,
-		  DWSS_BINDING_TABLE_STATE);
+   brw_init_cache_id(brw,
+		     "CC_VP",
+		     BRW_CC_VP,
+		     sizeof(struct brw_cc_viewport),
+		     0);
+
+   brw_init_cache_id(brw,
+		     "CC_UNIT",
+		     BRW_CC_UNIT,
+		     sizeof(struct brw_cc_unit_state),
+		     0);
+
+   brw_init_cache_id(brw,
+		     "WM_PROG",
+		     BRW_WM_PROG,
+		     sizeof(struct brw_wm_prog_key),
+		     sizeof(struct brw_wm_prog_data));
+
+   brw_init_cache_id(brw,
+		     "SAMPLER_DEFAULT_COLOR",
+		     BRW_SAMPLER_DEFAULT_COLOR,
+		     sizeof(struct brw_sampler_default_color),
+		     0);
+
+   brw_init_cache_id(brw,
+		     "SAMPLER",
+		     BRW_SAMPLER,
+		     0,		/* variable key/data size */
+		     0);
+
+   brw_init_cache_id(brw,
+		     "WM_UNIT",
+		     BRW_WM_UNIT,
+		     sizeof(struct brw_wm_unit_state),
+		     0);
+
+   brw_init_cache_id(brw,
+		     "SF_PROG",
+		     BRW_SF_PROG,
+		     sizeof(struct brw_sf_prog_key),
+		     sizeof(struct brw_sf_prog_data));
+
+   brw_init_cache_id(brw,
+		     "SF_VP",
+		     BRW_SF_VP,
+		     sizeof(struct brw_sf_viewport),
+		     0);
+
+   brw_init_cache_id(brw,
+		     "SF_UNIT",
+		     BRW_SF_UNIT,
+		     sizeof(struct brw_sf_unit_state),
+		     0);
+
+   brw_init_cache_id(brw,
+		     "VS_UNIT",
+		     BRW_VS_UNIT,
+		     sizeof(struct brw_vs_unit_state),
+		     0);
+
+   brw_init_cache_id(brw,
+		     "VS_PROG",
+		     BRW_VS_PROG,
+		     sizeof(struct brw_vs_prog_key),
+		     sizeof(struct brw_vs_prog_data));
+
+   brw_init_cache_id(brw,
+		     "CLIP_UNIT",
+		     BRW_CLIP_UNIT,
+		     sizeof(struct brw_clip_unit_state),
+		     0);
+
+   brw_init_cache_id(brw,
+		     "CLIP_PROG",
+		     BRW_CLIP_PROG,
+		     sizeof(struct brw_clip_prog_key),
+		     sizeof(struct brw_clip_prog_data));
+
+   brw_init_cache_id(brw,
+		     "GS_UNIT",
+		     BRW_GS_UNIT,
+		     sizeof(struct brw_gs_unit_state),
+		     0);
+
+   brw_init_cache_id(brw,
+		     "GS_PROG",
+		     BRW_GS_PROG,
+		     sizeof(struct brw_gs_prog_key),
+		     sizeof(struct brw_gs_prog_data));
+
+   brw_init_cache_id(brw,
+		     "SS_SURFACE",
+		     BRW_SS_SURFACE,
+		     sizeof(struct brw_surface_state),
+		     0);
+
+   brw_init_cache_id(brw,
+		     "SS_SURF_BIND",
+		     BRW_SS_SURF_BIND,
+		     0,
+		     0);
 }
 
-
-/* When we lose hardware context, need to invalidate the surface cache
- * as these structs must be explicitly re-uploaded.  They are subject
- * to fixup by the memory manager as they contain absolute agp
- * offsets, so we need to ensure there is a fresh version of the
- * struct available to receive the fixup.
- *
- * XXX: Need to ensure that there aren't two versions of a surface or
- * bufferobj with different backing data active in the same buffer at
- * once?  Otherwise the cache could confuse them.  Maybe better not to
- * cache at all?
- * 
- * --> Isn't this the same as saying need to ensure batch is flushed
- *         before new data is uploaded to an existing buffer?  We
- *         already try to make sure of that.
- */
-static void clear_cache( struct brw_cache *cache )
+static void
+brw_clear_cache( struct brw_context *brw )
 {
    struct brw_cache_item *c, *next;
    GLuint i;
 
-   for (i = 0; i < cache->size; i++) {
-      for (c = cache->items[i]; c; c = next) {
+   if (INTEL_DEBUG & DEBUG_STATE)
+      _mesa_printf("%s\n", __FUNCTION__);
+
+   for (i = 0; i < brw->cache.size; i++) {
+      for (c = brw->cache.items[i]; c; c = next) {
+	 int j;
+
 	 next = c->next;
+	 for (j = 0; j < c->nr_reloc_bufs; j++)
+	    dri_bo_unreference(c->reloc_bufs[j]);
+	 dri_bo_unreference(c->bo);
 	 free((void *)c->key);
 	 free(c);
       }
-      cache->items[i] = NULL;
+      brw->cache.items[i] = NULL;
    }
 
-   cache->n_items = 0;
-}
-
-void brw_clear_all_caches( struct brw_context *brw )
-{
-   GLint i;
-
-   if (INTEL_DEBUG & DEBUG_STATE)
-      _mesa_printf("%s\n", __FUNCTION__);
-
-   for (i = 0; i < BRW_MAX_CACHE; i++)
-      clear_cache(&brw->cache[i]);      
+   brw->cache.n_items = 0;
 
    if (brw->curbe.last_buf) {
       _mesa_free(brw->curbe.last_buf);
@@ -456,14 +486,24 @@ void brw_clear_all_caches( struct brw_context *brw )
    brw->state.dirty.cache |= ~0;
 }
 
+void brw_state_cache_check_size( struct brw_context *brw )
+{
+   /* un-tuned guess.  We've got around 20 state objects for a total of around
+    * 32k, so 1000 of them is around 1.5MB.
+    */
+   if (brw->cache.n_items > 1000)
+      brw_clear_cache(brw);
+}
 
-
-
-
-void brw_destroy_caches( struct brw_context *brw )
+void brw_destroy_cache( struct brw_context *brw )
 {
    GLuint i;
 
+   brw_clear_cache(brw);
    for (i = 0; i < BRW_MAX_CACHE; i++)
-      clear_cache(&brw->cache[i]);      
+      free(brw->cache.name[i]);
+
+   free(brw->cache.items);
+   brw->cache.items = NULL;
+   brw->cache.size = 0;
 }
diff --git a/i965/brw_state_dump.c b/i965/brw_state_dump.c
new file mode 100644
index 0000000..3a93f9f
--- /dev/null
+++ b/i965/brw_state_dump.c
@@ -0,0 +1,200 @@
+/*
+ * Copyright © 2007 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#include "mtypes.h"
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+
+/**
+ * Prints out a header, the contents, and the message associated with
+ * the hardware state data given.
+ *
+ * \param name Name of the state object
+ * \param data Pointer to the base of the state object
+ * \param hw_offset Hardware offset of the base of the state data.
+ * \param index Index of the DWORD being output.
+ */
+static void
+state_out(const char *name, void *data, uint32_t hw_offset, int index,
+	  char *fmt, ...)
+{
+    va_list va;
+
+    fprintf(stderr, "%8s: 0x%08x: 0x%08x: ",
+	    name, hw_offset + index * 4, ((uint32_t *)data)[index]);
+    va_start(va, fmt);
+    vfprintf(stderr, fmt, va);
+    va_end(va);
+}
+
+/** Generic, undecoded state buffer debug printout */
+static void
+state_struct_out(const char *name, dri_bo *buffer, unsigned int state_size)
+{
+   int i;
+
+   if (buffer == NULL)
+      return;
+
+   dri_bo_map(buffer, GL_FALSE);
+   for (i = 0; i < state_size / 4; i++) {
+      state_out(name, buffer->virtual, buffer->offset, i,
+		"dword %d\n", i);
+   }
+   dri_bo_unmap(buffer);
+}
+
+static const char *
+get_965_surfacetype(unsigned int surfacetype)
+{
+    switch (surfacetype) {
+    case 0: return "1D";
+    case 1: return "2D";
+    case 2: return "3D";
+    case 3: return "CUBE";
+    case 4: return "BUFFER";
+    case 7: return "NULL";
+    default: return "unknown";
+    }
+}
+
+static void dump_wm_surface_state(struct brw_context *brw)
+{
+   int i;
+
+   for (i = 0; i < brw->wm.nr_surfaces; i++) {
+      dri_bo *surf_bo = brw->wm.surf_bo[i];
+      unsigned int surfoff;
+      struct brw_surface_state *surf;
+      char name[20];
+
+      if (surf_bo == NULL) {
+	 fprintf(stderr, "WM SS%d: NULL\n", i);
+	 continue;
+      }
+      dri_bo_map(surf_bo, GL_FALSE);
+      surfoff = surf_bo->offset;
+      surf = (struct brw_surface_state *)(surf_bo->virtual);
+
+      sprintf(name, "WM SS%d", i);
+      state_out(name, surf, surfoff, 0, "%s\n",
+		get_965_surfacetype(surf->ss0.surface_type));
+      state_out(name, surf, surfoff, 1, "offset\n");
+      state_out(name, surf, surfoff, 2, "%dx%d size, %d mips\n",
+		surf->ss2.width + 1, surf->ss2.height + 1, surf->ss2.mip_count);
+      state_out(name, surf, surfoff, 3, "pitch %d, %stiled\n",
+		surf->ss3.pitch + 1, surf->ss3.tiled_surface ? "" : "not ");
+      state_out(name, surf, surfoff, 4, "mip base %d\n",
+		surf->ss4.min_lod);
+
+      dri_bo_unmap(surf_bo);
+   }
+}
+
+static void dump_sf_viewport_state(struct brw_context *brw)
+{
+   const char *name = "SF VP";
+   struct brw_sf_viewport *vp;
+   uint32_t vp_off;
+
+   if (brw->sf.vp_bo == NULL)
+      return;
+
+   dri_bo_map(brw->sf.vp_bo, GL_FALSE);
+
+   vp = brw->sf.vp_bo->virtual;
+   vp_off = brw->sf.vp_bo->offset;
+
+   state_out(name, vp, vp_off, 0, "m00 = %f\n", vp->viewport.m00);
+   state_out(name, vp, vp_off, 1, "m11 = %f\n", vp->viewport.m11);
+   state_out(name, vp, vp_off, 2, "m22 = %f\n", vp->viewport.m22);
+   state_out(name, vp, vp_off, 3, "m30 = %f\n", vp->viewport.m30);
+   state_out(name, vp, vp_off, 4, "m31 = %f\n", vp->viewport.m31);
+   state_out(name, vp, vp_off, 5, "m32 = %f\n", vp->viewport.m32);
+
+   state_out(name, vp, vp_off, 6, "top left = %d,%d\n",
+	     vp->scissor.xmin, vp->scissor.ymin);
+   state_out(name, vp, vp_off, 7, "bottom right = %d,%d\n",
+	     vp->scissor.xmax, vp->scissor.ymax);
+
+   dri_bo_unmap(brw->sf.vp_bo);
+}
+
+static void brw_debug_prog(const char *name, dri_bo *prog)
+{
+   unsigned int i;
+   uint32_t *data;
+
+   if (prog == NULL)
+      return;
+
+   dri_bo_map(prog, GL_FALSE);
+
+   data = prog->virtual;
+
+   for (i = 0; i < prog->size / 4 / 4; i++) {
+      fprintf(stderr, "%8s: 0x%08x: 0x%08x 0x%08x 0x%08x 0x%08x\n",
+	      name, (unsigned int)prog->offset + i * 4 * 4,
+	      data[i * 4], data[i * 4 + 1], data[i * 4 + 2], data[i * 4 + 3]);
+   }
+
+   dri_bo_unmap(prog);
+}
+
+
+/**
+ * Print additional debug information associated with the batchbuffer
+ * when DEBUG_BATCH is set.
+ *
+ * For 965, this means mapping the state buffers that would have been referenced
+ * by the batchbuffer and dumping them.
+ *
+ * The buffer offsets printed rely on the buffer containing the last offset
+ * it was validated at.
+ */
+void brw_debug_batch(struct intel_context *intel)
+{
+   struct brw_context *brw = brw_context(&intel->ctx);
+
+   state_struct_out("WM bind", brw->wm.bind_bo, 4 * brw->wm.nr_surfaces);
+   dump_wm_surface_state(brw);
+
+   state_struct_out("VS", brw->vs.state_bo, sizeof(struct brw_vs_unit_state));
+   brw_debug_prog("VS prog", brw->vs.prog_bo);
+
+   state_struct_out("GS", brw->gs.state_bo, sizeof(struct brw_gs_unit_state));
+   brw_debug_prog("GS prog", brw->gs.prog_bo);
+
+   state_struct_out("SF", brw->sf.state_bo, sizeof(struct brw_sf_unit_state));
+   dump_sf_viewport_state(brw);
+   brw_debug_prog("SF prog", brw->sf.prog_bo);
+
+   state_struct_out("WM", brw->wm.state_bo, sizeof(struct brw_wm_unit_state));
+   brw_debug_prog("WM prog", brw->wm.prog_bo);
+}
diff --git a/i965/brw_state_pool.c b/i965/brw_state_pool.c
deleted file mode 100644
index b9926f2..0000000
--- a/i965/brw_state_pool.c
+++ /dev/null
@@ -1,154 +0,0 @@
-/*
- Copyright (C) Intel Corp.  2006.  All Rights Reserved.
- Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
- develop this 3D driver.
- 
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
- 
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial
- portions of the Software.
- 
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- 
- **********************************************************************/
- /*
-  * Authors:
-  *   Keith Whitwell <keith@tungstengraphics.com>
-  */
-       
-
-#include "brw_state.h"
-#include "imports.h"
-
-#include "intel_ioctl.h"
-#include "bufmgr.h"
-
-GLboolean brw_pool_alloc( struct brw_mem_pool *pool,
-			  GLuint size,
-			  GLuint align,
-			  GLuint *offset_return)
-{
-   GLuint align_mask = (1<<align)-1;
-   GLuint fixup = ((pool->offset + align_mask) & ~align_mask) - pool->offset;
-
-   size = (size + 3) & ~3;
-
-   if (pool->offset + fixup + size >= pool->size) {
-      _mesa_printf("%s failed\n", __FUNCTION__);
-      assert(0);
-      exit(0);
-   }
-
-   pool->offset += fixup;
-   *offset_return = pool->offset;
-   pool->offset += size; 
-  
-   return GL_TRUE;
-}
-
-static
-void brw_invalidate_pool( struct intel_context *intel,
-			  struct brw_mem_pool *pool )
-{
-   if (INTEL_DEBUG & DEBUG_STATE)
-      _mesa_printf("\n\n\n %s \n\n\n", __FUNCTION__);
-   
-   bmBufferData(intel,
-		pool->buffer,
-		pool->size,
-		NULL,
-		0); 
-
-   pool->offset = 0;
-
-   brw_clear_all_caches(pool->brw);
-}
-
-static void brw_invalidate_pool_cb( struct intel_context *intel, void *ptr )
-{
-   struct brw_mem_pool *pool = (struct brw_mem_pool *) ptr;
-
-   pool->offset = 0;
-   brw_clear_all_caches(pool->brw);
-}
-
-
-
-static void brw_init_pool( struct brw_context *brw,
-			   GLuint pool_id,
-			   GLuint size )
-{
-   struct brw_mem_pool *pool = &brw->pool[pool_id];
-
-   pool->size = size;   
-   pool->brw = brw;
-   
-   bmGenBuffers(&brw->intel, "pool", 1, &pool->buffer, 12);
-
-   /* Also want to say not to wait on fences when data is presented
-    */
-   bmBufferSetInvalidateCB(&brw->intel, pool->buffer, 
-			   brw_invalidate_pool_cb, 
-			   pool,
-			   GL_TRUE);   
-
-   bmBufferData(&brw->intel,
-		pool->buffer,
-		pool->size,
-		NULL,
-		0); 
-
-}
-
-static void brw_destroy_pool( struct brw_context *brw,
-			      GLuint pool_id )
-{
-   struct brw_mem_pool *pool = &brw->pool[pool_id];
-   
-   bmDeleteBuffers(&brw->intel, 1, &pool->buffer);
-}
-
-
-void brw_pool_check_wrap( struct brw_context *brw,
-			  struct brw_mem_pool *pool )
-{
-   if (pool->offset > (pool->size * 3) / 4) {
-      if (brw->intel.aub_file)
-	 brw->intel.aub_wrap = 1;
-      else
-	 brw->state.dirty.brw |= BRW_NEW_CONTEXT;
-   }
-
-}
-
-void brw_init_pools( struct brw_context *brw )
-{
-   brw_init_pool(brw, BRW_GS_POOL, 0x80000);
-   brw_init_pool(brw, BRW_SS_POOL, 0x80000);
-}
-
-void brw_destroy_pools( struct brw_context *brw )
-{
-   brw_destroy_pool(brw, BRW_GS_POOL);
-   brw_destroy_pool(brw, BRW_SS_POOL);
-}
-
-
-void brw_invalidate_pools( struct brw_context *brw )
-{
-   brw_invalidate_pool(&brw->intel, &brw->pool[BRW_GS_POOL]);
-   brw_invalidate_pool(&brw->intel, &brw->pool[BRW_SS_POOL]);
-}
diff --git a/i965/brw_state_upload.c b/i965/brw_state_upload.c
index 9bd2881..3b2ccd4 100644
--- a/i965/brw_state_upload.c
+++ b/i965/brw_state_upload.c
@@ -33,7 +33,7 @@
 
 #include "brw_context.h"
 #include "brw_state.h"
-#include "bufmgr.h"
+#include "dri_bufmgr.h"
 #include "intel_batchbuffer.h"
 
 /* This is used to initialize brw->state.atoms[].  We could use this
@@ -85,7 +85,6 @@ const struct brw_tracked_state *atoms[] =
    &brw_binding_table_pointers,
    &brw_blend_constant_color,
 
-   &brw_drawing_rect,
    &brw_depthbuffer,
 
    &brw_polygon_stipple,
@@ -112,8 +111,7 @@ void brw_init_state( struct brw_context *brw )
 {
    GLuint i;
 
-   brw_init_pools(brw);
-   brw_init_caches(brw);
+   brw_init_cache(brw);
 
    brw->state.atoms = _mesa_malloc(sizeof(atoms));
    brw->state.nr_atoms = sizeof(atoms)/sizeof(*atoms);
@@ -138,9 +136,8 @@ void brw_destroy_state( struct brw_context *brw )
       brw->state.atoms = NULL;
    }
 
-   brw_destroy_caches(brw);
+   brw_destroy_cache(brw);
    brw_destroy_batch_cache(brw);
-   brw_destroy_pools(brw);   
 }
 
 /***********************************************************************
@@ -176,10 +173,10 @@ static void xor_states( struct brw_state_flags *result,
 /***********************************************************************
  * Emit all state:
  */
-void brw_validate_state( struct brw_context *brw )
+int brw_validate_state( struct brw_context *brw )
 {
    struct brw_state_flags *state = &brw->state.dirty;
-   GLuint i;
+   GLuint i, ret, count;
 
    state->mesa |= brw->intel.NewGLState;
    brw->intel.NewGLState = 0;
@@ -205,18 +202,33 @@ void brw_validate_state( struct brw_context *brw )
    if (state->mesa == 0 &&
        state->cache == 0 &&
        state->brw == 0)
-      return;
+      return 0;
 
    if (brw->state.dirty.brw & BRW_NEW_CONTEXT)
       brw_clear_batch_cache_flush(brw);
 
+   brw->intel.Fallback = 0;
 
-   /* Make an early reference to the state pools, as we don't cope
-    * well with them being evicted from here down.
-    */
-   (void)bmBufferOffset(&brw->intel, brw->pool[BRW_GS_POOL].buffer);
-   (void)bmBufferOffset(&brw->intel, brw->pool[BRW_SS_POOL].buffer);
-   (void)bmBufferOffset(&brw->intel, brw->intel.batch->buffer);
+   count = 0;
+
+   /* do prepare stage for all atoms */
+   for (i = 0; i < Elements(atoms); i++) {
+      const struct brw_tracked_state *atom = brw->state.atoms[i];
+
+      if (brw->intel.Fallback)
+         break;
+
+      if (check_state(state, &atom->dirty)) {
+         if (atom->prepare) {
+            ret = atom->prepare(brw);
+            if (ret)
+               return ret;
+        }
+      }
+   }
+
+   if (brw->intel.Fallback)
+      return 0;
 
    if (INTEL_DEBUG) {
       /* Debug version which enforces various sanity checks on the
@@ -234,12 +246,13 @@ void brw_validate_state( struct brw_context *brw )
 	 assert(atom->dirty.mesa ||
 		atom->dirty.brw ||
 		atom->dirty.cache);
-	 assert(atom->update);
+
+	 if (brw->intel.Fallback)
+	    break;
 
 	 if (check_state(state, &atom->dirty)) {
-	    brw->state.atoms[i]->update( brw );
-	    
-/* 	    emit_foo(brw); */
+	    if (atom->emit)
+	       atom->emit( brw );
 	 }
 
 	 accumulate_state(&examined, &atom->dirty);
@@ -255,10 +268,19 @@ void brw_validate_state( struct brw_context *brw )
    }
    else {
       for (i = 0; i < Elements(atoms); i++) {	 
-	 if (check_state(state, &brw->state.atoms[i]->dirty))
-	    brw->state.atoms[i]->update( brw );
+	 const struct brw_tracked_state *atom = brw->state.atoms[i];
+
+	 if (brw->intel.Fallback)
+	    break;
+
+	 if (check_state(state, &atom->dirty)) {
+	    if (atom->emit)
+	       atom->emit( brw );
+	 }
       }
    }
 
-   memset(state, 0, sizeof(*state));
+   if (!brw->intel.Fallback)
+      memset(state, 0, sizeof(*state));
+   return 0;
 }
diff --git a/i965/brw_structs.h b/i965/brw_structs.h
index a799122..ec865c9 100644
--- a/i965/brw_structs.h
+++ b/i965/brw_structs.h
@@ -173,6 +173,48 @@ struct brw_depthbuffer
       } bits;
       GLuint dword;
    } dword4;
+};
+
+struct brw_depthbuffer_gm45_g4x
+{
+   union header_union header;
+   
+   union {
+      struct {
+	 GLuint pitch:18; 
+	 GLuint format:3; 
+	 GLuint pad:2;
+	 GLuint software_tiled_rendering_mode:2;
+	 GLuint depth_offset_disable:1; 
+	 GLuint tile_walk:1; 
+	 GLuint tiled_surface:1; 
+	 GLuint pad2:1;
+	 GLuint surface_type:3; 
+      } bits;
+      GLuint dword;
+   } dword1;
+   
+   GLuint dword2_base_addr; 
+ 
+   union {
+      struct {
+	 GLuint pad:1;
+	 GLuint mipmap_layout:1; 
+	 GLuint lod:4; 
+	 GLuint width:13; 
+	 GLuint height:13; 
+      } bits;
+      GLuint dword;
+   } dword3;
+
+   union {
+      struct {
+	 GLuint pad:10;
+	 GLuint min_array_element:11; 
+	 GLuint depth:11; 
+      } bits;
+      GLuint dword;
+   } dword4;
 
    union {
       struct {
@@ -267,39 +309,39 @@ struct brw_pipelined_state_pointers
    
    struct {
       GLuint pad:5;
-      GLuint offset:27; 
+      GLuint offset:27; /* Offset from GENERAL_STATE_BASE */
    } vs;
    
    struct
    {
       GLuint enable:1;
       GLuint pad:4;
-      GLuint offset:27; 
+      GLuint offset:27; /* Offset from GENERAL_STATE_BASE */
    } gs;
    
    struct
    {
       GLuint enable:1;
       GLuint pad:4;
-      GLuint offset:27; 
+      GLuint offset:27; /* Offset from GENERAL_STATE_BASE */
    } clp;
    
    struct
    {
       GLuint pad:5;
-      GLuint offset:27; 
+      GLuint offset:27; /* Offset from GENERAL_STATE_BASE */
    } sf;
 
    struct
    {
       GLuint pad:5;
-      GLuint offset:27; 
+      GLuint offset:27; /* Offset from GENERAL_STATE_BASE */
    } wm;
    
    struct
    {
       GLuint pad:5;
-      GLuint offset:27; /* KW: check me! */
+      GLuint offset:27; /* Offset from GENERAL_STATE_BASE. KW: check me! */
    } cc;
 };
 
@@ -502,7 +544,7 @@ struct thread0
    GLuint pad0:1;
    GLuint grf_reg_count:3; 
    GLuint pad1:2;
-   GLuint kernel_start_pointer:26; 
+   GLuint kernel_start_pointer:26; /* Offset from GENERAL_STATE_BASE */
 };
 
 struct thread1
@@ -666,7 +708,7 @@ struct brw_cc_unit_state
    struct
    {
       GLuint pad0:5; 
-      GLuint cc_viewport_state_offset:27; 
+      GLuint cc_viewport_state_offset:27; /* Offset from GENERAL_STATE_BASE */
    } cc4;
    
    struct
@@ -728,7 +770,7 @@ struct brw_sf_unit_state
       GLuint front_winding:1; 
       GLuint viewport_transform:1; 
       GLuint pad0:3;
-      GLuint sf_viewport_state_offset:27; 
+      GLuint sf_viewport_state_offset:27; /* Offset from GENERAL_STATE_BASE */
    } sf5;
    
    struct
@@ -960,6 +1002,7 @@ struct brw_sf_viewport
       GLfloat m32;  
    } viewport;
 
+   /* scissor coordinates are inclusive */
    struct {
       GLshort xmin;
       GLshort ymin;
@@ -1362,7 +1405,7 @@ struct brw_instruction
          GLuint msg_target:4;
          GLuint pad1:3;
          GLuint end_of_thread:1;
-      } sampler_igd; 
+      } sampler_gm45_g4x; 
 
       struct brw_urb_immediate urb;
 
diff --git a/i965/brw_tex.c b/i965/brw_tex.c
index 9d4b986..258c626 100644
--- a/i965/brw_tex.c
+++ b/i965/brw_tex.c
@@ -44,151 +44,16 @@
 #include "intel_context.h"
 #include "intel_ioctl.h"
 #include "intel_regions.h"
+#include "intel_tex.h"
 #include "brw_context.h"
 #include "brw_defines.h"
 
 
-
-
-static const struct gl_texture_format *
-brwChooseTextureFormat( GLcontext *ctx, GLint internalFormat,
-			 GLenum srcFormat, GLenum srcType )
-{
-   switch ( internalFormat ) {
-   case 4:
-   case GL_RGBA:
-   case GL_COMPRESSED_RGBA:
-      if (srcFormat == GL_BGRA && srcType == GL_UNSIGNED_SHORT_4_4_4_4_REV)
-	 return &_mesa_texformat_argb4444;
-      else if (srcFormat == GL_BGRA && srcType == GL_UNSIGNED_SHORT_1_5_5_5_REV)
-	 return &_mesa_texformat_argb1555;
-      else if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8_REV) ||
-	       (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE) ||
-	       (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_INT_8_8_8_8)) 
-	 return &_mesa_texformat_rgba8888_rev;
-      else
-	 return &_mesa_texformat_argb8888;
-
-   case GL_RGBA8:
-   case GL_RGB10_A2:
-   case GL_RGBA12:
-   case GL_RGBA16:
-      return &_mesa_texformat_argb8888; 
-
-   case GL_RGB8:
-   case GL_RGB10:
-   case GL_RGB12:
-   case GL_RGB16:
-      /* Broadwater doesn't support RGB888 textures, so these must be
-       * stored as ARGB.
-       */
-      return &_mesa_texformat_argb8888;
-
-   case 3:
-   case GL_COMPRESSED_RGB:
-   case GL_RGB:
-      if (srcFormat == GL_RGB &&
-	  srcType == GL_UNSIGNED_SHORT_5_6_5)
-	 return &_mesa_texformat_rgb565;
-      else
-	 return &_mesa_texformat_argb8888;
-
-
-   case GL_RGB5:
-   case GL_RGB5_A1:
-      return &_mesa_texformat_argb1555;
-
-   case GL_R3_G3_B2:
-   case GL_RGBA2:
-   case GL_RGBA4:
-   case GL_RGB4:
-      return &_mesa_texformat_argb4444;
-
-   case GL_ALPHA:
-   case GL_ALPHA4:
-   case GL_ALPHA8:
-   case GL_ALPHA12:
-   case GL_ALPHA16:
-   case GL_COMPRESSED_ALPHA:
-      return &_mesa_texformat_a8;
-
-   case 1:
-   case GL_LUMINANCE:
-   case GL_LUMINANCE4:
-   case GL_LUMINANCE8:
-   case GL_LUMINANCE12:
-   case GL_LUMINANCE16:
-   case GL_COMPRESSED_LUMINANCE:
-      return &_mesa_texformat_l8;
-
-   case 2:
-   case GL_LUMINANCE_ALPHA:
-   case GL_LUMINANCE4_ALPHA4:
-   case GL_LUMINANCE6_ALPHA2:
-   case GL_LUMINANCE8_ALPHA8:
-   case GL_LUMINANCE12_ALPHA4:
-   case GL_LUMINANCE12_ALPHA12:
-   case GL_LUMINANCE16_ALPHA16:
-   case GL_COMPRESSED_LUMINANCE_ALPHA:
-      return &_mesa_texformat_al88;
-
-   case GL_INTENSITY:
-   case GL_INTENSITY4:
-   case GL_INTENSITY8:
-   case GL_INTENSITY12:
-   case GL_INTENSITY16:
-   case GL_COMPRESSED_INTENSITY:
-      return &_mesa_texformat_i8;
-
-   case GL_YCBCR_MESA:
-      if (srcType == GL_UNSIGNED_SHORT_8_8_MESA ||
-	  srcType == GL_UNSIGNED_BYTE)
-         return &_mesa_texformat_ycbcr;
-      else
-         return &_mesa_texformat_ycbcr_rev;
-
-   case GL_COMPRESSED_RGB_FXT1_3DFX:
-       return &_mesa_texformat_rgb_fxt1;
-   case GL_COMPRESSED_RGBA_FXT1_3DFX:
-       return &_mesa_texformat_rgba_fxt1;
-
-   case GL_RGB_S3TC:
-   case GL_RGB4_S3TC:
-   case GL_RGBA_S3TC:
-   case GL_RGBA4_S3TC:
-   case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
-   case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
-   case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
-   case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
-     return &_mesa_texformat_rgb_dxt1; /* there is no rgba support? */
-
-   case GL_DEPTH_COMPONENT:
-   case GL_DEPTH_COMPONENT16:
-   case GL_DEPTH_COMPONENT24:
-   case GL_DEPTH_COMPONENT32:
-      return &_mesa_texformat_z16;
-
-   default:
-      fprintf(stderr, "unexpected texture format %s in %s\n", 
-	      _mesa_lookup_enum_by_nr(internalFormat),
-	      __FUNCTION__);
-      return NULL;
-   }
-
-   return NULL; /* never get here */
-}
-
-
-void brwInitTextureFuncs( struct dd_function_table *functions )
-{
-   functions->ChooseTextureFormat = brwChooseTextureFormat;
-}
-
-void brw_FrameBufferTexInit( struct brw_context *brw )
+void brw_FrameBufferTexInit( struct brw_context *brw,
+			     struct intel_region *region )
 {
    struct intel_context *intel = &brw->intel;
    GLcontext *ctx = &intel->ctx;
-   struct intel_region *region = intel->front_region;
    struct gl_texture_object *obj;
    struct gl_texture_image *img;
    
@@ -209,6 +74,26 @@ void brw_FrameBufferTexInit( struct brw_context *brw )
 
 void brw_FrameBufferTexDestroy( struct brw_context *brw )
 {
-   brw->intel.ctx.Driver.DeleteTexture( &brw->intel.ctx,
-					brw->intel.frame_buffer_texobj );
+   if (brw->intel.frame_buffer_texobj != NULL)
+      brw->intel.ctx.Driver.DeleteTexture( &brw->intel.ctx,
+					   brw->intel.frame_buffer_texobj );
+   brw->intel.frame_buffer_texobj = NULL;
+}
+
+/**
+ * Finalizes all textures, completing any rendering that needs to be done
+ * to prepare them.
+ */
+void brw_validate_textures( struct brw_context *brw )
+{
+   struct intel_context *intel = &brw->intel;
+   int i;
+
+   for (i = 0; i < BRW_MAX_TEX_UNIT; i++) {
+      struct gl_texture_unit *texUnit = &brw->attribs.Texture->Unit[i];
+
+      if (texUnit->_ReallyEnabled) {
+	 intel_finalize_mipmap_tree(intel, i);
+      }
+   }
 }
diff --git a/i965/brw_tex_layout.c b/i965/brw_tex_layout.c
index d4888a4..e437c41 100644
--- a/i965/brw_tex_layout.c
+++ b/i965/brw_tex_layout.c
@@ -35,8 +35,10 @@
 
 #include "intel_mipmap_tree.h"
 #include "intel_tex_layout.h"
+#include "intel_context.h"
 #include "macros.h"
 
+#define FILE_DEBUG_FLAG DEBUG_MIPTREE
 
 GLboolean brw_miptree_layout( struct intel_context *intel, struct intel_mipmap_tree *mt )
 {
@@ -53,11 +55,20 @@ GLboolean brw_miptree_layout( struct intel_context *intel, struct intel_mipmap_t
       GLuint pack_x_pitch, pack_x_nr;
       GLuint pack_y_pitch;
       GLuint level;
+      GLuint align_h = 2;
+      GLuint align_w = 4;
 
-      mt->pitch = ((mt->width0 * mt->cpp + 3) & ~3) / mt->cpp;
       mt->total_height = 0;
+      
+      if (mt->compressed) {
+          align_w = intel_compressed_alignment(mt->internal_format);
+          mt->pitch = ALIGN(width, align_w);
+          pack_y_pitch = (height + 3) / 4;
+      } else {
+          mt->pitch = intel_miptree_pitch_align (intel, mt, mt->width0);
+          pack_y_pitch = ALIGN(mt->height0, align_h);
+      }
 
-      pack_y_pitch = MAX2(mt->height0, 2);
       pack_x_pitch = mt->pitch;
       pack_x_nr = 1;
 
@@ -83,20 +94,30 @@ GLboolean brw_miptree_layout( struct intel_context *intel, struct intel_mipmap_t
 
 
 	 mt->total_height += y;
-
-	 if (pack_x_pitch > 4) {
-	    pack_x_pitch >>= 1;
-	    pack_x_nr <<= 1;
-	    assert(pack_x_pitch * pack_x_nr <= mt->pitch);
-	 }
-
-	 if (pack_y_pitch > 2) {
-	    pack_y_pitch >>= 1;
-	 }
-
 	 width  = minify(width);
 	 height = minify(height);
 	 depth  = minify(depth);
+
+    if (mt->compressed) {
+        pack_y_pitch = (height + 3) / 4;
+        
+        if (pack_x_pitch > ALIGN(width, align_w)) {
+            pack_x_pitch = ALIGN(width, align_w);
+            pack_x_nr <<= 1;
+        }
+    } else {
+        if (pack_x_pitch > 4) {
+            pack_x_pitch >>= 1;
+            pack_x_nr <<= 1;
+            assert(pack_x_pitch * pack_x_nr <= mt->pitch);
+        }
+
+        if (pack_y_pitch > 2) {
+            pack_y_pitch >>= 1;
+            pack_y_pitch = ALIGN(pack_y_pitch, align_h);
+        }
+    }
+
       }
       break;
    }
diff --git a/i965/brw_urb.c b/i965/brw_urb.c
index 4ca6e99..c423dbe 100644
--- a/i965/brw_urb.c
+++ b/i965/brw_urb.c
@@ -35,7 +35,6 @@
 #include "brw_context.h"
 #include "brw_state.h"
 #include "brw_defines.h"
-#include "brw_hal.h"
 
 #define VS 0
 #define GS 1
@@ -53,7 +52,7 @@ static const struct {
    GLuint min_entry_size;
    GLuint max_entry_size;
 } limits[CS+1] = {
-   { 8, 32, 1, 5 },			/* vs */
+   { 16, 32, 1, 5 },			/* vs */
    { 4, 8,  1, 5 },			/* gs */
    { 6, 8,  1, 5 },			/* clp */
    { 1, 8,  1, 12 },		        /* sf */
@@ -75,26 +74,12 @@ static GLboolean check_urb_layout( struct brw_context *brw )
 /* Most minimal update, forces re-emit of URB fence packet after GS
  * unit turned on/off.
  */
-static void recalculate_urb_fence( struct brw_context *brw )
+static int recalculate_urb_fence( struct brw_context *brw )
 {
    GLuint csize = brw->curbe.total_size;
    GLuint vsize = brw->vs.prog_data->urb_entry_size;
    GLuint sfsize = brw->sf.prog_data->urb_entry_size;
 
-   static GLboolean (*hal_recalculate_urb_fence) (struct brw_context *brw);
-   static GLboolean hal_tried;
-
-   if (!hal_tried)
-   {
-      hal_recalculate_urb_fence = brw_hal_find_symbol ("intel_hal_recalculate_urb_fence");
-      hal_tried = 1;
-   }
-   if (hal_recalculate_urb_fence)
-   {
-      if ((*hal_recalculate_urb_fence) (brw))
-	 return;
-   }
-   
    if (csize < limits[CS].min_entry_size)
       csize = limits[CS].min_entry_size;
 
@@ -157,6 +142,7 @@ static void recalculate_urb_fence( struct brw_context *brw )
       
       brw->state.dirty.brw |= BRW_NEW_URB_FENCE;
    }
+   return 0;
 }
 
 
@@ -167,7 +153,7 @@ const struct brw_tracked_state brw_recalculate_urb_fence = {
       .cache = (CACHE_NEW_VS_PROG |
 		CACHE_NEW_SF_PROG)
    },
-   .update = recalculate_urb_fence
+   .prepare = recalculate_urb_fence
 };
 
 
diff --git a/i965/brw_util.c b/i965/brw_util.c
index b6deee2..d8d35c5 100644
--- a/i965/brw_util.c
+++ b/i965/brw_util.c
@@ -45,86 +45,6 @@ GLuint brw_count_bits( GLuint val )
 }
 
 
-static GLuint brw_parameter_state_flags(const gl_state_index state[])
-{
-   switch (state[0]) {
-   case STATE_MATERIAL:
-   case STATE_LIGHT:
-   case STATE_LIGHTMODEL_AMBIENT:
-   case STATE_LIGHTMODEL_SCENECOLOR:
-   case STATE_LIGHTPROD:
-      return _NEW_LIGHT;
-
-   case STATE_TEXGEN:
-   case STATE_TEXENV_COLOR:
-      return _NEW_TEXTURE;
-
-   case STATE_FOG_COLOR:
-   case STATE_FOG_PARAMS:
-      return _NEW_FOG;
-
-   case STATE_CLIPPLANE:
-      return _NEW_TRANSFORM;
-
-   case STATE_POINT_SIZE:
-   case STATE_POINT_ATTENUATION:
-      return _NEW_POINT;
-
-   case STATE_MODELVIEW_MATRIX:
-      return _NEW_MODELVIEW;
-   case STATE_PROJECTION_MATRIX:
-      return _NEW_PROJECTION;
-   case STATE_MVP_MATRIX:
-      return _NEW_MODELVIEW | _NEW_PROJECTION;
-   case STATE_TEXTURE_MATRIX:
-      return _NEW_TEXTURE_MATRIX;
-   case STATE_PROGRAM_MATRIX:
-      return _NEW_TRACK_MATRIX;
-
-   case STATE_DEPTH_RANGE:
-      return _NEW_VIEWPORT;
-
-   case STATE_FRAGMENT_PROGRAM:
-   case STATE_VERTEX_PROGRAM:
-      return _NEW_PROGRAM;
-
-   case STATE_INTERNAL:
-      switch (state[1]) {
-      case STATE_NORMAL_SCALE:
-	 return _NEW_MODELVIEW;
-      case STATE_TEXRECT_SCALE:
-	 return _NEW_TEXTURE;
-      default:
-	 assert(0);
-	 return 0;
-      }
-
-   default:
-      assert(0);
-      return 0;
-   }
-}
-
-
-GLuint
-brw_parameter_list_state_flags(struct gl_program_parameter_list *paramList)
-{
-   GLuint i;
-   GLuint result = 0;
-
-   if (!paramList)
-      return 0;
-
-   for (i = 0; i < paramList->NumParameters; i++) {
-      if (paramList->Parameters[i].Type == PROGRAM_STATE_VAR) {
-         result |= brw_parameter_state_flags(paramList->Parameters[i].StateIndexes);
-      }
-   }
-
-   return result;
-}
-
-
 GLuint brw_translate_blend_equation( GLenum mode )
 {
    switch (mode) {
diff --git a/i965/brw_vs.c b/i965/brw_vs.c
index 50826d9..f89b0e1 100644
--- a/i965/brw_vs.c
+++ b/i965/brw_vs.c
@@ -73,19 +73,17 @@ static void do_vs_prog( struct brw_context *brw,
     */
    program = brw_get_program(&c.func, &program_size);
 
-   /*
-    */
-   brw->vs.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_VS_PROG],
-					      &c.key,
-					      sizeof(c.key),
-					      program,
-					      program_size,
-					      &c.prog_data,
-					      &brw->vs.prog_data);
+   dri_bo_unreference(brw->vs.prog_bo);
+   brw->vs.prog_bo = brw_upload_cache( &brw->cache, BRW_VS_PROG,
+				       &c.key, sizeof(c.key),
+				       NULL, 0,
+				       program, program_size,
+				       &c.prog_data,
+				       &brw->vs.prog_data );
 }
 
 
-static void brw_upload_vs_prog( struct brw_context *brw )
+static int brw_upload_vs_prog( struct brw_context *brw )
 {
    struct brw_vs_prog_key key;
    struct brw_vertex_program *vp = 
@@ -110,13 +108,14 @@ static void brw_upload_vs_prog( struct brw_context *brw )
 
    /* Make an early check for the key.
     */
-   if (brw_search_cache(&brw->cache[BRW_VS_PROG], 
-			&key, sizeof(key),
-			&brw->vs.prog_data,
-			&brw->vs.prog_gs_offset))
-       return;
-
-   do_vs_prog(brw, vp, &key);
+   dri_bo_unreference(brw->vs.prog_bo);
+   brw->vs.prog_bo = brw_search_cache(&brw->cache, BRW_VS_PROG,
+				      &key, sizeof(key),
+				      NULL, 0,
+				      &brw->vs.prog_data);
+   if (brw->vs.prog_bo == NULL)
+      do_vs_prog(brw, vp, &key);
+   return dri_bufmgr_check_aperture_space(brw->vs.prog_bo);
 }
 
 
@@ -128,5 +127,5 @@ const struct brw_tracked_state brw_vs_prog = {
       .brw   = BRW_NEW_VERTEX_PROGRAM | BRW_NEW_METAOPS,
       .cache = 0
    },
-   .update = brw_upload_vs_prog
+   .prepare = brw_upload_vs_prog
 };
diff --git a/i965/brw_vs.h b/i965/brw_vs.h
index fdb5785..41a33ff 100644
--- a/i965/brw_vs.h
+++ b/i965/brw_vs.h
@@ -36,7 +36,7 @@
 
 #include "brw_context.h"
 #include "brw_eu.h"
-#include "program.h"
+#include "shader/program.h"
 
 
 struct brw_vs_prog_key {
@@ -67,6 +67,12 @@ struct brw_vs_compile {
    struct brw_reg r1;
    struct brw_reg regs[PROGRAM_ADDRESS+1][128];
    struct brw_reg tmp;
+   struct brw_reg stack;
+
+   struct {	
+       GLboolean used_in_src;
+       struct brw_reg reg;
+   } output_regs[128];
 
    struct brw_reg userplane[6];
 
diff --git a/i965/brw_vs_constval.c b/i965/brw_vs_constval.c
index caef042..a0106b8 100644
--- a/i965/brw_vs_constval.c
+++ b/i965/brw_vs_constval.c
@@ -166,7 +166,7 @@ static GLuint get_input_size(struct brw_context *brw,
 /* Calculate sizes of vertex program outputs.  Size is the largest
  * component index which might vary from [0,0,0,1]
  */
-static void calc_wm_input_sizes( struct brw_context *brw )
+static int calc_wm_input_sizes( struct brw_context *brw )
 {
    /* BRW_NEW_VERTEX_PROGRAM */
    struct brw_vertex_program *vp = 
@@ -210,6 +210,7 @@ static void calc_wm_input_sizes( struct brw_context *brw )
       memcpy(brw->wm.input_size_masks, t.size_masks, sizeof(t.size_masks));
       brw->state.dirty.brw |= BRW_NEW_WM_INPUT_DIMENSIONS;
    }
+   return 0;
 }
 
 const struct brw_tracked_state brw_wm_input_sizes = {
@@ -218,6 +219,6 @@ const struct brw_tracked_state brw_wm_input_sizes = {
       .brw   = BRW_NEW_VERTEX_PROGRAM | BRW_NEW_INPUT_DIMENSIONS,
       .cache = 0
    },
-   .update = calc_wm_input_sizes
+   .prepare = calc_wm_input_sizes
 };
 
diff --git a/i965/brw_vs_emit.c b/i965/brw_vs_emit.c
index c38e998..8759826 100644
--- a/i965/brw_vs_emit.c
+++ b/i965/brw_vs_emit.c
@@ -30,8 +30,8 @@
   */
             
 
-#include "program.h"
-#include "macros.h"
+#include "main/macros.h"
+#include "shader/program.h"
 #include "shader/prog_parameter.h"
 #include "shader/prog_print.h"
 #include "brw_context.h"
@@ -134,6 +134,16 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
 					     WRITEMASK_X);
       reg++;
    }
+
+   for (i = 0; i < 128; i++) {
+       if (c->output_regs[i].used_in_src) {
+            c->output_regs[i].reg = brw_vec8_grf(reg, 0);
+            reg++;
+        }
+   }
+
+   c->stack =  brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg, 0);
+   reg += 2;
  
    
    /* Some opcodes need an internal temporary:
@@ -201,7 +211,7 @@ static void unalias2( struct brw_vs_compile *c,
 				    struct brw_reg,
 				    struct brw_reg ))
 {
-   if ((dst.file == arg0.file && dst.nr == arg0.nr) &&
+   if ((dst.file == arg0.file && dst.nr == arg0.nr) ||
        (dst.file == arg1.file && dst.nr == arg1.nr)) {
       struct brw_compile *p = &c->func;
       struct brw_reg tmp = brw_writemask(get_tmp(c), dst.dw1.bits.writemask);
@@ -213,57 +223,65 @@ static void unalias2( struct brw_vs_compile *c,
    }
 }
 
+static void emit_sop( struct brw_compile *p,
+                      struct brw_reg dst,
+                      struct brw_reg arg0,
+                      struct brw_reg arg1, 
+		      GLuint cond)
+{
+   brw_MOV(p, dst, brw_imm_f(0.0f));
+   brw_CMP(p, brw_null_reg(), cond, arg0, arg1);
+   brw_MOV(p, dst, brw_imm_f(1.0f));
+   brw_set_predicate_control_flag_value(p, 0xff);
+}
 
+static void emit_seq( struct brw_compile *p,
+                      struct brw_reg dst,
+                      struct brw_reg arg0,
+                      struct brw_reg arg1 )
+{
+   emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_EQ);
+}
 
-
+static void emit_sne( struct brw_compile *p,
+                      struct brw_reg dst,
+                      struct brw_reg arg0,
+                      struct brw_reg arg1 )
+{
+   emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_NEQ);
+}
 static void emit_slt( struct brw_compile *p, 
 		      struct brw_reg dst,
 		      struct brw_reg arg0,
 		      struct brw_reg arg1 )
 {
-   /* Could be done with an if/else/endif, but this method uses half
-    * the instructions.  Note that we are careful to reference the
-    * arguments before writing the dest.  That means we emit the
-    * instructions in an odd order and have to play with the flag
-    * values.
-    */
-   brw_push_insn_state(p);
-   brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0, arg1);
-
-   /* Write all values to 1:
-    */
-   brw_set_predicate_control(p, BRW_PREDICATE_NONE);
-   brw_MOV(p, dst, brw_imm_f(1.0));
+   emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_L);
+}
 
-   /* Where the test succeeded, overwite with zero:
-    */
-   brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
-   brw_MOV(p, dst, brw_imm_f(0.0));
-   brw_pop_insn_state(p);
+static void emit_sle( struct brw_compile *p, 
+		      struct brw_reg dst,
+		      struct brw_reg arg0,
+		      struct brw_reg arg1 )
+{
+   emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_LE);
 }
 
+static void emit_sgt( struct brw_compile *p, 
+		      struct brw_reg dst,
+		      struct brw_reg arg0,
+		      struct brw_reg arg1 )
+{
+   emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_G);
+}
 
 static void emit_sge( struct brw_compile *p, 
 		      struct brw_reg dst,
 		      struct brw_reg arg0,
 		      struct brw_reg arg1 )
 {
-   brw_push_insn_state(p);
-   brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0, arg1);
-
-   /* Write all values to zero:
-    */
-   brw_set_predicate_control(p, BRW_PREDICATE_NONE);
-   brw_MOV(p, dst, brw_imm_f(0));
-
-   /* Where the test succeeded, overwite with 1:
-    */
-   brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
-   brw_MOV(p, dst, brw_imm_f(1.0));
-   brw_pop_insn_state(p);
+  emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_GE);
 }
 
-
 static void emit_max( struct brw_compile *p, 
 		      struct brw_reg dst,
 		      struct brw_reg arg0,
@@ -592,9 +610,13 @@ static struct brw_reg get_reg( struct brw_vs_compile *c,
    case PROGRAM_TEMPORARY:
    case PROGRAM_INPUT:
    case PROGRAM_OUTPUT:
-   case PROGRAM_STATE_VAR:
       assert(c->regs[file][index].nr != 0);
       return c->regs[file][index];
+   case PROGRAM_STATE_VAR:
+   case PROGRAM_CONSTANT:
+   case PROGRAM_UNIFORM:
+      assert(c->regs[PROGRAM_STATE_VAR][index].nr != 0);
+      return c->regs[PROGRAM_STATE_VAR][index];
    case PROGRAM_ADDRESS:
       assert(index == 0);
       return c->regs[file][index];
@@ -668,28 +690,28 @@ static void emit_arl( struct brw_vs_compile *c,
  * account.
  */
 static struct brw_reg get_arg( struct brw_vs_compile *c,
-			       struct prog_src_register src )
+			       struct prog_src_register *src )
 {
    struct brw_reg reg;
 
-   if (src.File == PROGRAM_UNDEFINED)
+   if (src->File == PROGRAM_UNDEFINED)
       return brw_null_reg();
 
-   if (src.RelAddr) 
-      reg = deref(c, c->regs[PROGRAM_STATE_VAR][0], src.Index);
+   if (src->RelAddr) 
+      reg = deref(c, c->regs[PROGRAM_STATE_VAR][0], src->Index);
    else
-      reg = get_reg(c, src.File, src.Index);
+      reg = get_reg(c, src->File, src->Index);
 
    /* Convert 3-bit swizzle to 2-bit.  
     */
-   reg.dw1.bits.swizzle = BRW_SWIZZLE4(GET_SWZ(src.Swizzle, 0),
-				       GET_SWZ(src.Swizzle, 1),
-				       GET_SWZ(src.Swizzle, 2),
-				       GET_SWZ(src.Swizzle, 3));
+   reg.dw1.bits.swizzle = BRW_SWIZZLE4(GET_SWZ(src->Swizzle, 0),
+				       GET_SWZ(src->Swizzle, 1),
+				       GET_SWZ(src->Swizzle, 2),
+				       GET_SWZ(src->Swizzle, 3));
 
    /* Note this is ok for non-swizzle instructions: 
     */
-   reg.negate = src.NegateBase ? 1 : 0;   
+   reg.negate = src->NegateBase ? 1 : 0;   
 
    return reg;
 }
@@ -845,7 +867,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
        * Later, clipping will detect ucp[6] and ensure the primitive is
        * clipped against all fixed planes.
        */
-      if (!BRW_IS_IGD(p->brw) && !c->key.know_w_is_one) {
+      if (!(BRW_IS_GM45(p->brw) || BRW_IS_G4X(p->brw)) && !c->key.know_w_is_one) {
 	 brw_CMP(p,
 		 vec8(brw_null_reg()),
 		 BRW_CONDITIONAL_L,
@@ -891,17 +913,50 @@ static void emit_vertex_write( struct brw_vs_compile *c)
 
 }
 
-
-
+static void 
+post_vs_emit( struct brw_vs_compile *c, struct brw_instruction *end_inst )
+{
+   GLuint nr_insns = c->vp->program.Base.NumInstructions;
+   GLuint insn, target_insn;
+   struct prog_instruction *inst1, *inst2;
+   struct brw_instruction *brw_inst1, *brw_inst2;
+   int offset;
+   for (insn = 0; insn < nr_insns; insn++) {
+       inst1 = &c->vp->program.Base.Instructions[insn];
+       brw_inst1 = inst1->Data;
+       switch (inst1->Opcode) {
+	   case OPCODE_CAL:
+	   case OPCODE_BRA:
+	       target_insn = inst1->BranchTarget;
+	       inst2 = &c->vp->program.Base.Instructions[target_insn];
+	       brw_inst2 = inst2->Data;
+	       offset = brw_inst2 - brw_inst1;
+	       brw_set_src1(brw_inst1, brw_imm_d(offset*16));
+	       break;
+	   case OPCODE_END:
+	       offset = end_inst - brw_inst1;
+	       brw_set_src1(brw_inst1, brw_imm_d(offset*16));
+	       break;
+	   default:
+	       break;
+       }
+   }
+}
 
 /* Emit the fragment program instructions here.
  */
-void brw_vs_emit( struct brw_vs_compile *c )
+void brw_vs_emit(struct brw_vs_compile *c )
 {
+#define MAX_IFSN 32
    struct brw_compile *p = &c->func;
    GLuint nr_insns = c->vp->program.Base.NumInstructions;
-   GLuint insn;
+   GLuint insn, if_insn = 0;
+   struct brw_instruction *end_inst;
+   struct brw_instruction *if_inst[MAX_IFSN];
+   struct brw_indirect stack_index = brw_indirect(0, 0);   
 
+   GLuint index;
+   GLuint file;
 
    if (INTEL_DEBUG & DEBUG_VS) {
       _mesa_printf("\n\n\nvs-emit:\n");
@@ -912,9 +967,24 @@ void brw_vs_emit( struct brw_vs_compile *c )
    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
    brw_set_access_mode(p, BRW_ALIGN_16);
    
+   /* Message registers can't be read, so copy the output into GRF register
+      if they are used in source registers */
+   for (insn = 0; insn < nr_insns; insn++) {
+       GLuint i;
+       struct prog_instruction *inst = &c->vp->program.Base.Instructions[insn];
+       for (i = 0; i < 3; i++) {
+	   struct prog_src_register *src = &inst->SrcReg[i];
+	   GLuint index = src->Index;
+	   GLuint file = src->File;	
+	   if (file == PROGRAM_OUTPUT && index != VERT_RESULT_HPOS)
+	       c->output_regs[index].used_in_src = GL_TRUE;
+       }
+   }
+
    /* Static register allocation
     */
    brw_vs_alloc_regs(c);
+   brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack));
 
    for (insn = 0; insn < nr_insns; insn++) {
 
@@ -924,17 +994,29 @@ void brw_vs_emit( struct brw_vs_compile *c )
       
       /* Get argument regs.  SWZ is special and does this itself.
        */
+      inst->Data = &p->store[p->nr_insn];
       if (inst->Opcode != OPCODE_SWZ)
-	 for (i = 0; i < 3; i++) 
-	    args[i] = get_arg(c, inst->SrcReg[i]);
+	  for (i = 0; i < 3; i++) {
+	      struct prog_src_register *src = &inst->SrcReg[i];
+	      index = src->Index;
+	      file = src->File;	
+	      if (file == PROGRAM_OUTPUT&&c->output_regs[index].used_in_src)
+		  args[i] = c->output_regs[index].reg;
+	      else
+		  args[i] = get_arg(c, src);
+	  }
 
       /* Get dest regs.  Note that it is possible for a reg to be both
        * dst and arg, given the static allocation of registers.  So
        * care needs to be taken emitting multi-operation instructions.
-       */
-      dst = get_dst(c, inst->DstReg);
+       */ 
+      index = inst->DstReg.Index;
+      file = inst->DstReg.File;
+      if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src)
+	  dst = c->output_regs[index].reg;
+      else
+	  dst = get_dst(c, inst->DstReg);
 
-      
       switch (inst->Opcode) {
       case OPCODE_ABS:
 	 brw_MOV(p, dst, brw_abs(args[0]));
@@ -1003,12 +1085,25 @@ void brw_vs_emit( struct brw_vs_compile *c )
       case OPCODE_RSQ:
 	 emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, args[0], BRW_MATH_PRECISION_FULL);
 	 break;
+
+      case OPCODE_SEQ:
+         emit_seq(p, dst, args[0], args[1]);
+         break;
+      case OPCODE_SNE:
+         emit_sne(p, dst, args[0], args[1]);
+         break;
       case OPCODE_SGE:
 	 emit_sge(p, dst, args[0], args[1]);
 	 break;
+      case OPCODE_SGT:
+         emit_sgt(p, dst, args[0], args[1]);
+        break;
       case OPCODE_SLT:
 	 emit_slt(p, dst, args[0], args[1]);
 	 break;
+      case OPCODE_SLE:
+         emit_sle(p, dst, args[0], args[1]);
+         break;
       case OPCODE_SUB:
 	 brw_ADD(p, dst, args[0], negate(args[1]));
 	 break;
@@ -1021,21 +1116,82 @@ void brw_vs_emit( struct brw_vs_compile *c )
       case OPCODE_XPD:
 	 emit_xpd(p, dst, args[0], args[1]);
 	 break;
+      case OPCODE_IF:
+	 assert(if_insn < MAX_IFSN);
+         if_inst[if_insn++] = brw_IF(p, BRW_EXECUTE_8);
+	 break;
+      case OPCODE_ELSE:
+	 if_inst[if_insn-1] = brw_ELSE(p, if_inst[if_insn-1]);
+	 break;
+      case OPCODE_ENDIF:
+         assert(if_insn > 0);
+	 brw_ENDIF(p, if_inst[--if_insn]);
+	 break;			
+      case OPCODE_BRA:
+         brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
+         brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
+         brw_set_predicate_control_flag_value(p, 0xff);
+        break;
+      case OPCODE_CAL:
+	 brw_set_access_mode(p, BRW_ALIGN_1);
+	 brw_ADD(p, deref_1d(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16));
+	 brw_set_access_mode(p, BRW_ALIGN_16);
+	 brw_ADD(p, get_addr_reg(stack_index),
+			 get_addr_reg(stack_index), brw_imm_d(4));
+	 inst->Data = &p->store[p->nr_insn];
+	 brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
+        break;
+      case OPCODE_RET:
+	 brw_ADD(p, get_addr_reg(stack_index),
+			 get_addr_reg(stack_index), brw_imm_d(-4));
+	 brw_set_access_mode(p, BRW_ALIGN_1);
+         brw_MOV(p, brw_ip_reg(), deref_1d(stack_index, 0));
+	 brw_set_access_mode(p, BRW_ALIGN_16);
       case OPCODE_END:	
+         brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
+        break;
       case OPCODE_PRINT:
+      case OPCODE_BGNSUB:
+      case OPCODE_ENDSUB:
 	 break;
       default:
+	 _mesa_printf("Unsupport opcode %d in vertex shader\n", inst->Opcode);
 	 break;
       }
 
+      if ((inst->DstReg.File == PROGRAM_OUTPUT)
+          && (inst->DstReg.Index != VERT_RESULT_HPOS)
+          && c->output_regs[inst->DstReg.Index].used_in_src) {
+         brw_MOV(p, get_dst(c, inst->DstReg), dst);
+      }
+
+      /* Result color clamping.
+       *
+       * When destination register is an output register and
+       * it's primary/secondary front/back color, we have to clamp
+       * the result to [0,1]. This is done by enabling the
+       * saturation bit for the last instruction.
+       *
+       * We don't use brw_set_saturate() as it modifies
+       * p->current->header.saturate, which affects all the subsequent
+       * instructions. Instead, we directly modify the header
+       * of the last (already stored) instruction.
+       */
+      if (inst->DstReg.File == PROGRAM_OUTPUT) {
+         if ((inst->DstReg.Index == VERT_RESULT_COL0)
+             || (inst->DstReg.Index == VERT_RESULT_COL1)
+             || (inst->DstReg.Index == VERT_RESULT_BFC0)
+             || (inst->DstReg.Index == VERT_RESULT_BFC1)) {
+            p->store[p->nr_insn-1].header.saturate = 1;
+         }
+      }
+
       release_tmps(c);
    }
 
+   end_inst = &p->store[p->nr_insn];
    emit_vertex_write(c);
-
+   post_vs_emit(c, end_inst);
+   for (insn = 0; insn < nr_insns; insn++)
+       c->vp->program.Base.Instructions[insn].Data = NULL;
 }
-
-
-
-
-
diff --git a/i965/brw_vs_state.c b/i965/brw_vs_state.c
index c225bf8..2a64f3d 100644
--- a/i965/brw_vs_state.c
+++ b/i965/brw_vs_state.c
@@ -36,60 +36,110 @@
 #include "brw_defines.h"
 #include "macros.h"
 
-static void upload_vs_unit( struct brw_context *brw )
-{
-   struct brw_vs_unit_state vs;
-
-   memset(&vs, 0, sizeof(vs));
-
-   /* CACHE_NEW_VS_PROG */
-   vs.thread0.kernel_start_pointer = brw->vs.prog_gs_offset >> 6;
-   vs.thread0.grf_reg_count = ((brw->vs.prog_data->total_grf-1) & ~15) / 16;
-   vs.thread3.urb_entry_read_length = brw->vs.prog_data->urb_read_length;
-   vs.thread3.const_urb_entry_read_length = brw->vs.prog_data->curb_read_length;
-   vs.thread3.dispatch_grf_start_reg = 1;
+struct brw_vs_unit_key {
+   unsigned int total_grf;
+   unsigned int urb_entry_read_length;
+   unsigned int curb_entry_read_length;
 
+   unsigned int curbe_offset;
 
-   /* BRW_NEW_URB_FENCE  */
-   vs.thread4.nr_urb_entries = brw->urb.nr_vs_entries; 
-   vs.thread4.urb_entry_allocation_size = brw->urb.vsize - 1;
-   vs.thread4.max_threads = MIN2(
-      MAX2(0, (brw->urb.nr_vs_entries - 6) / 2 - 1), 
-      15);
+   unsigned int nr_urb_entries, urb_size;
+};
 
+static void
+vs_unit_populate_key(struct brw_context *brw, struct brw_vs_unit_key *key)
+{
+   memset(key, 0, sizeof(*key));
 
+   /* CACHE_NEW_VS_PROG */
+   key->total_grf = brw->vs.prog_data->total_grf;
+   key->urb_entry_read_length = brw->vs.prog_data->urb_read_length;
+   key->curb_entry_read_length = brw->vs.prog_data->curb_read_length;
 
-   if (INTEL_DEBUG & DEBUG_SINGLE_THREAD)
-      vs.thread4.max_threads = 0; 
+   /* BRW_NEW_URB_FENCE */
+   key->nr_urb_entries = brw->urb.nr_vs_entries;
+   key->urb_size = brw->urb.vsize;
 
    /* BRW_NEW_CURBE_OFFSETS, _NEW_TRANSFORM */
    if (brw->attribs.Transform->ClipPlanesEnabled) {
       /* Note that we read in the userclip planes as well, hence
        * clip_start:
        */
-      vs.thread3.const_urb_entry_read_offset = brw->curbe.clip_start * 2;
+      key->curbe_offset = brw->curbe.clip_start;
    }
    else {
-      vs.thread3.const_urb_entry_read_offset = brw->curbe.vs_start * 2;
+      key->curbe_offset = brw->curbe.vs_start;
    }
+}
+
+static dri_bo *
+vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key)
+{
+   struct brw_vs_unit_state vs;
+   dri_bo *bo;
+
+   memset(&vs, 0, sizeof(vs));
 
+   vs.thread0.kernel_start_pointer = brw->vs.prog_bo->offset >> 6; /* reloc */
+   vs.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1;
    vs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+   vs.thread3.urb_entry_read_length = key->urb_entry_read_length;
+   vs.thread3.const_urb_entry_read_length = key->curb_entry_read_length;
+   vs.thread3.dispatch_grf_start_reg = 1;
    vs.thread3.urb_entry_read_offset = 0;
+   vs.thread3.const_urb_entry_read_offset = key->curbe_offset * 2;
+
+   vs.thread4.nr_urb_entries = key->nr_urb_entries;
+   vs.thread4.urb_entry_allocation_size = key->urb_size - 1;
+   vs.thread4.max_threads = MIN2(MAX2(0, (key->nr_urb_entries - 6) / 2 - 1),
+				 15);
+
+   if (INTEL_DEBUG & DEBUG_SINGLE_THREAD)
+      vs.thread4.max_threads = 0;
 
    /* No samplers for ARB_vp programs:
     */
    vs.vs5.sampler_count = 0;
 
    if (INTEL_DEBUG & DEBUG_STATS)
-      vs.thread4.stats_enable = 1; 
+      vs.thread4.stats_enable = 1;
 
-   /* Vertex program always enabled: 
+   /* Vertex program always enabled:
     */
    vs.vs6.vs_enable = 1;
 
-   brw->vs.state_gs_offset = brw_cache_data( &brw->cache[BRW_VS_UNIT], &vs );
+   bo = brw_upload_cache(&brw->cache, BRW_VS_UNIT,
+			 key, sizeof(*key),
+			 &brw->vs.prog_bo, 1,
+			 &vs, sizeof(vs),
+			 NULL, NULL);
+
+   /* Emit VS program relocation */
+   dri_emit_reloc(bo,
+		  DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
+		  vs.thread0.grf_reg_count << 1,
+		  offsetof(struct brw_vs_unit_state, thread0),
+		  brw->vs.prog_bo);
+
+   return bo;
 }
 
+static int prepare_vs_unit( struct brw_context *brw )
+{
+   struct brw_vs_unit_key key;
+
+   vs_unit_populate_key(brw, &key);
+
+   dri_bo_unreference(brw->vs.state_bo);
+   brw->vs.state_bo = brw_search_cache(&brw->cache, BRW_VS_UNIT,
+				       &key, sizeof(key),
+				       &brw->vs.prog_bo, 1,
+				       NULL);
+   if (brw->vs.state_bo == NULL) {
+      brw->vs.state_bo = vs_unit_create_from_key(brw, &key);
+   }
+   return dri_bufmgr_check_aperture_space(brw->vs.state_bo);
+}
 
 const struct brw_tracked_state brw_vs_unit = {
    .dirty = {
@@ -98,5 +148,5 @@ const struct brw_tracked_state brw_vs_unit = {
 		BRW_NEW_URB_FENCE),
       .cache = CACHE_NEW_VS_PROG
    },
-   .update = upload_vs_unit
+   .prepare = prepare_vs_unit,
 };
diff --git a/i965/brw_vs_tnl.c b/i965/brw_vs_tnl.c
index 14483b3..e409620 100644
--- a/i965/brw_vs_tnl.c
+++ b/i965/brw_vs_tnl.c
@@ -404,7 +404,7 @@ static struct ureg register_const4f( struct tnl_program *p,
    values[3] = s3;
    idx = _mesa_add_unnamed_constant( p->program->Base.Parameters, values, 4,
                                      &swizzle);
-   /* XXX what about swizzle? */
+   assert(swizzle == SWIZZLE_NOOP); /* Need to handle swizzle in reg setup */
    return make_ureg(PROGRAM_STATE_VAR, idx);
 }
 
@@ -524,10 +524,13 @@ static void emit_op3fn(struct tnl_program *p,
    GLuint nr = p->program->Base.NumInstructions++;
       
    if (nr >= p->nr_instructions) {
+      int new_nr_instructions = p->nr_instructions * 2;
+
       p->program->Base.Instructions = 
 	 _mesa_realloc(p->program->Base.Instructions,
 		       sizeof(struct prog_instruction) * p->nr_instructions,
-		       sizeof(struct prog_instruction) * (p->nr_instructions *= 2));
+		       sizeof(struct prog_instruction) * new_nr_instructions);
+      p->nr_instructions = new_nr_instructions;
    }
 
    {      
@@ -1000,16 +1003,16 @@ static void build_lighting( struct tnl_program *p )
 					       STATE_POSITION); 
 	    struct ureg V = get_eye_position(p);
 	    struct ureg dist = get_temp(p);
-       struct ureg tmpPpli = get_temp(p);
+	    struct ureg tmpPpli = get_temp(p);
 
 	    VPpli = get_temp(p); 
 	    half = get_temp(p);
 
-       /* In homogeneous object coordinates
-        */
-       emit_op1(p, OPCODE_RCP, dist, 0, swizzle1(Ppli, W));
-       emit_op2(p, OPCODE_MUL, tmpPpli, 0, Ppli, dist);
-
+	    /* In homogeneous object coordinates
+	     */
+	    emit_op1(p, OPCODE_RCP, dist, 0, swizzle1(Ppli, W));
+	    emit_op2(p, OPCODE_MUL, tmpPpli, 0, Ppli, dist);
+ 
 	    /* Calulate VPpli vector
 	     */
 	    emit_op2(p, OPCODE_SUB, VPpli, 0, tmpPpli, V); 
@@ -1044,7 +1047,7 @@ static void build_lighting( struct tnl_program *p )
 	    emit_normalize_vec3(p, half, half);
 
 	    release_temp(p, dist);
-       release_temp(p, tmpPpli);
+	    release_temp(p, tmpPpli);
 	 }
 
 	 /* Calculate dot products:
@@ -1161,12 +1164,19 @@ static void build_fog( struct tnl_program *p )
 {
    struct ureg fog = register_output(p, VERT_RESULT_FOGC);
    struct ureg input;
-   
+   GLuint useabs = p->state->fog_source_is_depth && p->state->fog_option &&
+		   (p->state->fog_option != FOG_EXP2);
+
    if (p->state->fog_source_is_depth) {
       input = swizzle1(get_eye_position(p), Z);
    }
    else {
       input = swizzle1(register_input(p, VERT_ATTRIB_FOG), X);
+      if (p->state->fog_option &&
+	  p->state->tnl_do_vertex_fog)
+	  input = swizzle1(register_input(p, VERT_ATTRIB_FOG), X);
+      else
+	  input = register_input(p, VERT_ATTRIB_FOG);
    }
 
    if (p->state->fog_option &&
@@ -1178,26 +1188,30 @@ static void build_fog( struct tnl_program *p )
 
       emit_op1(p, OPCODE_MOV, fog, 0, id);
 
+      if (useabs) {
+	 emit_op1(p, OPCODE_ABS, tmp, 0, input);
+      }
+
       switch (p->state->fog_option) {
       case FOG_LINEAR: {
-	 emit_op1(p, OPCODE_ABS, tmp, 0, input);
-	 emit_op3(p, OPCODE_MAD, tmp, 0, tmp, swizzle1(params,X), swizzle1(params,Y));
+	 emit_op3(p, OPCODE_MAD, tmp, 0, useabs ? tmp : input,
+			swizzle1(params,X), swizzle1(params,Y));
 	 emit_op2(p, OPCODE_MAX, tmp, 0, tmp, swizzle1(id,X)); /* saturate */
 	 emit_op2(p, OPCODE_MIN, fog, WRITEMASK_X, tmp, swizzle1(id,W));
 	 break;
       }
       case FOG_EXP:
-	 emit_op1(p, OPCODE_ABS, tmp, 0, input); 
-	 emit_op2(p, OPCODE_MUL, tmp, 0, tmp, swizzle1(params,Z));
+	 emit_op2(p, OPCODE_MUL, tmp, 0, useabs ? tmp : input,
+			swizzle1(params,Z));
 	 emit_op1(p, OPCODE_EX2, fog, WRITEMASK_X, ureg_negate(tmp));
 	 break;
       case FOG_EXP2:
 	 emit_op2(p, OPCODE_MUL, tmp, 0, input, swizzle1(params,W));
-	 emit_op2(p, OPCODE_MUL, tmp, 0, tmp, tmp); 
+	 emit_op2(p, OPCODE_MUL, tmp, 0, tmp, tmp);
 	 emit_op1(p, OPCODE_EX2, fog, WRITEMASK_X, ureg_negate(tmp));
 	 break;
       }
-      
+
       release_temp(p, tmp);
    }
    else {
@@ -1205,7 +1219,7 @@ static void build_fog( struct tnl_program *p )
        *
        * KW:  Is it really necessary to do anything in this case?
        */
-      emit_op1(p, OPCODE_MOV, fog, 0, input);
+      emit_op1(p, useabs ? OPCODE_ABS : OPCODE_MOV, fog, 0, input);
    }
 }
  
@@ -1567,7 +1581,7 @@ static GLuint hash_key( struct state_key *key )
    return hash;
 }
 
-static void update_tnl_program( struct brw_context *brw )
+static int prepare_tnl_program( struct brw_context *brw )
 {
    GLcontext *ctx = &brw->intel.ctx;
    struct state_key key;
@@ -1575,8 +1589,8 @@ static void update_tnl_program( struct brw_context *brw )
    struct gl_vertex_program *old = brw->tnl_program;
 
    /* _NEW_PROGRAM */
-   if (brw->attribs.VertexProgram->_Enabled) 
-      return;
+   if (brw->attribs.VertexProgram->_Current) 
+      return 0;
       
    /* Grab all the relevent state and put it in a single structure:
     */
@@ -1609,6 +1623,7 @@ static void update_tnl_program( struct brw_context *brw )
 
    if (old != brw->tnl_program)
       brw->state.dirty.brw |= BRW_NEW_TNL_PROGRAM;
+   return 0;
 }
 
 /* Note: See brw_draw.c - the vertex program must not rely on
@@ -1622,24 +1637,25 @@ const struct brw_tracked_state brw_tnl_vertprog = {
 	       _NEW_FOG | 
 	       _NEW_HINT | 
 	       _NEW_POINT | 
-	       _NEW_TEXTURE),
+	       _NEW_TEXTURE |
+          _NEW_TEXTURE_MATRIX),
       .brw = (BRW_NEW_FRAGMENT_PROGRAM | 
 	      BRW_NEW_INPUT_VARYING),
       .cache = 0
    },
-   .update = update_tnl_program
+   .prepare = prepare_tnl_program
 };
 
 
 
 
-static void update_active_vertprog( struct brw_context *brw )
+static int prepare_active_vertprog( struct brw_context *brw )
 {
    const struct gl_vertex_program *prev = brw->vertex_program;
 
    /* NEW_PROGRAM */
-   if (brw->attribs.VertexProgram->_Enabled) {
-      brw->vertex_program = brw->attribs.VertexProgram->Current;
+   if (brw->attribs.VertexProgram->_Current) {
+      brw->vertex_program = brw->attribs.VertexProgram->_Current;
    }
    else {
       /* BRW_NEW_TNL_PROGRAM */
@@ -1648,6 +1664,8 @@ static void update_active_vertprog( struct brw_context *brw )
 
    if (brw->vertex_program != prev) 
       brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM;
+
+   return 0;
 }
 
 
@@ -1658,7 +1676,7 @@ const struct brw_tracked_state brw_active_vertprog = {
       .brw = BRW_NEW_TNL_PROGRAM,
       .cache = 0
    },
-   .update = update_active_vertprog
+   .prepare = prepare_active_vertprog
 };
 
 
diff --git a/i965/brw_vtbl.c b/i965/brw_vtbl.c
index 786f30e..31e96a2 100644
--- a/i965/brw_vtbl.c
+++ b/i965/brw_vtbl.c
@@ -47,10 +47,9 @@
 
 #include "brw_draw.h"
 #include "brw_state.h"
-#include "brw_aub.h"
 #include "brw_fallback.h"
 #include "brw_vs.h"
-
+#include <stdarg.h>
 
 
 /* called from intelDestroyContext()
@@ -60,8 +59,6 @@ static void brw_destroy_context( struct intel_context *intel )
    GLcontext *ctx = &intel->ctx;
    struct brw_context *brw = brw_context(&intel->ctx);
 
-   brw_aub_destroy(brw);
-
    brw_destroy_metaops(brw);
    brw_destroy_state(brw);
    brw_draw_destroy( brw );
@@ -73,38 +70,54 @@ static void brw_destroy_context( struct intel_context *intel )
 /* called from intelDrawBuffer()
  */
 static void brw_set_draw_region( struct intel_context *intel, 
-				  struct intel_region *draw_region,
-				  struct intel_region *depth_region)
+				  struct intel_region *draw_regions[],
+				  struct intel_region *depth_region,
+				GLuint num_regions)
 {
    struct brw_context *brw = brw_context(&intel->ctx);
-
-   intel_region_release(intel, &brw->state.draw_region);
-   intel_region_release(intel, &brw->state.depth_region);
-   intel_region_reference(&brw->state.draw_region, draw_region);
+   int i;
+   if (brw->state.depth_region != depth_region)
+      brw->state.dirty.brw |= BRW_NEW_DEPTH_BUFFER;
+   for (i = 0; i < brw->state.nr_draw_regions; i++)
+       intel_region_release(&brw->state.draw_regions[i]);
+   intel_region_release(&brw->state.depth_region);
+   for (i = 0; i < num_regions; i++)
+       intel_region_reference(&brw->state.draw_regions[i], draw_regions[i]);
    intel_region_reference(&brw->state.depth_region, depth_region);
+   brw->state.nr_draw_regions = num_regions;
 }
 
 
 /* called from intelFlushBatchLocked
  */
-static void brw_lost_hardware( struct intel_context *intel )
+static void brw_new_batch( struct intel_context *intel )
 {
    struct brw_context *brw = brw_context(&intel->ctx);
 
-   /* Note that we effectively lose the context after this.
-    * 
-    * Setting this flag provokes a state buffer wrap and also flushes
-    * the hardware caches.
-    */
-   brw->state.dirty.brw |= BRW_NEW_CONTEXT;
+   /* Check that we didn't just wrap our batchbuffer at a bad time. */
+   assert(!brw->no_batch_wrap);
+
+   dri_bo_unreference(brw->curbe.curbe_bo);
+   brw->curbe.curbe_bo = NULL;
 
-   /* Which means there shouldn't be any commands already queued:
+   /* Mark all context state as needing to be re-emitted.
+    * This is probably not as severe as on 915, since almost all of our state
+    * is just in referenced buffers.
     */
-   assert(intel->batch->ptr == intel->batch->map + intel->batch->offset);
+   brw->state.dirty.brw |= BRW_NEW_CONTEXT;
 
    brw->state.dirty.mesa |= ~0;
    brw->state.dirty.brw |= ~0;
    brw->state.dirty.cache |= ~0;
+
+   /* Move to the end of the current upload buffer so that we'll force choosing
+    * a new buffer next time.
+    */
+   if (brw->vb.upload.bo != NULL) {
+      dri_bo_unreference(brw->vb.upload.bo);
+      brw->vb.upload.bo = NULL;
+      brw->vb.upload.offset = 0;
+   }
 }
 
 static void brw_note_fence( struct intel_context *intel, 
@@ -115,10 +128,9 @@ static void brw_note_fence( struct intel_context *intel,
  
 static void brw_note_unlock( struct intel_context *intel )
 {
-  struct brw_context *brw = brw_context(&intel->ctx);
+   struct brw_context *brw = brw_context(&intel->ctx);
 
-   brw_pool_check_wrap(brw, &brw->pool[BRW_GS_POOL]);
-   brw_pool_check_wrap(brw, &brw->pool[BRW_SS_POOL]);
+   brw_state_cache_check_size(brw);
 
    brw_context(&intel->ctx)->state.dirty.brw |= BRW_NEW_LOCK;
 }
@@ -156,9 +168,6 @@ static GLuint brw_flush_cmd( void )
    return *(GLuint *)&flush;
 }
 
-
-
-
 static void brw_invalidate_state( struct intel_context *intel, GLuint new_state )
 {
    /* nothing */
@@ -176,10 +185,11 @@ void brwInitVtbl( struct brw_context *brw )
    brw->intel.vtbl.invalidate_state = brw_invalidate_state; 
    brw->intel.vtbl.note_fence = brw_note_fence; 
    brw->intel.vtbl.note_unlock = brw_note_unlock; 
-   brw->intel.vtbl.lost_hardware = brw_lost_hardware;
+   brw->intel.vtbl.new_batch = brw_new_batch;
    brw->intel.vtbl.destroy = brw_destroy_context;
    brw->intel.vtbl.set_draw_region = brw_set_draw_region;
    brw->intel.vtbl.flush_cmd = brw_flush_cmd;
    brw->intel.vtbl.emit_flush = brw_emit_flush;
+   brw->intel.vtbl.debug_batch = brw_debug_batch;
 }
 
diff --git a/i965/brw_wm.c b/i965/brw_wm.c
index f80ba17..a470a25 100644
--- a/i965/brw_wm.c
+++ b/i965/brw_wm.c
@@ -29,12 +29,11 @@
   *   Keith Whitwell <keith@tungstengraphics.com>
   */
              
-
+#include "main/texformat.h"
 #include "brw_context.h"
 #include "brw_util.h"
 #include "brw_wm.h"
 #include "brw_state.h"
-#include "brw_hal.h"
 
 
 GLuint brw_wm_nr_args( GLuint opcode )
@@ -66,7 +65,11 @@ GLuint brw_wm_nr_args( GLuint opcode )
    case OPCODE_POW:
    case OPCODE_SUB:
    case OPCODE_SGE:
+   case OPCODE_SGT:
+   case OPCODE_SLE:
    case OPCODE_SLT:
+   case OPCODE_SEQ:
+   case OPCODE_SNE:
    case OPCODE_ADD:
    case OPCODE_MAX:
    case OPCODE_MIN:
@@ -116,20 +119,6 @@ GLuint brw_wm_is_scalar_result( GLuint opcode )
 }
 
 
-static void brw_wm_pass_hal (struct brw_wm_compile *c)
-{
-   static void (*hal_wm_pass) (struct brw_wm_compile *c);
-   static GLboolean hal_tried;
-   
-   if (!hal_tried)
-   {
-      hal_wm_pass = brw_hal_find_symbol ("intel_hal_wm_pass");
-      hal_tried = 1;
-   }
-   if (hal_wm_pass)
-      (*hal_wm_pass) (c);
-}
-
 static void do_wm_prog( struct brw_context *brw,
 			struct brw_fragment_program *fp, 
 			struct brw_wm_prog_key *key)
@@ -150,59 +139,53 @@ static void do_wm_prog( struct brw_context *brw,
    c->fp = fp;
    c->env_param = brw->intel.ctx.FragmentProgram.Parameters;
 
-   /* Augment fragment program.  Add instructions for pre- and
-    * post-fragment-program tasks such as interpolation and fogging.
-    */
-   brw_wm_pass_fp(c);
-   
-   /* Translate to intermediate representation.  Build register usage
-    * chains.
-    */
-   brw_wm_pass0(c);
-
-   /* Dead code removal.
-    */
-   brw_wm_pass1(c);
-
-   /* Hal optimization
-    */
-   brw_wm_pass_hal (c);
-   
-   /* Register allocation.
-    */
-   c->grf_limit = BRW_WM_MAX_GRF/2;
-
-   /* This is where we start emitting gen4 code:
-    */
-   brw_init_compile(brw, &c->func);    
-
-   brw_wm_pass2(c);
-
-   c->prog_data.total_grf = c->max_wm_grf;
-   if (c->last_scratch) {
-      c->prog_data.total_scratch =
-	 c->last_scratch + 0x40;
+    brw_init_compile(brw, &c->func);
+   if (brw_wm_is_glsl(&c->fp->program)) {
+       brw_wm_glsl_emit(brw, c);
    } else {
-      c->prog_data.total_scratch = 0;
+       /* Augment fragment program.  Add instructions for pre- and
+	* post-fragment-program tasks such as interpolation and fogging.
+	*/
+       brw_wm_pass_fp(c);
+
+       /* Translate to intermediate representation.  Build register usage
+	* chains.
+	*/
+       brw_wm_pass0(c);
+
+       /* Dead code removal.
+	*/
+       brw_wm_pass1(c);
+
+       /* Register allocation.
+	*/
+       c->grf_limit = BRW_WM_MAX_GRF/2;
+
+       brw_wm_pass2(c);
+
+       c->prog_data.total_grf = c->max_wm_grf;
+       if (c->last_scratch) {
+	   c->prog_data.total_scratch =
+	       c->last_scratch + 0x40;
+       } else {
+	   c->prog_data.total_scratch = 0;
+       }
+
+       /* Emit GEN4 code.
+	*/
+       brw_wm_emit(c);
    }
-
-   /* Emit GEN4 code.
-    */
-   brw_wm_emit(c);
-
    /* get the program
     */
    program = brw_get_program(&c->func, &program_size);
 
-   /*
-    */
-   brw->wm.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_WM_PROG],
-					      &c->key,
-					      sizeof(c->key),
-					      program,
-					      program_size,
-					      &c->prog_data,
-					      &brw->wm.prog_data );
+   dri_bo_unreference(brw->wm.prog_bo);
+   brw->wm.prog_bo = brw_upload_cache( &brw->cache, BRW_WM_PROG,
+				       &c->key, sizeof(c->key),
+				       NULL, 0,
+				       program, program_size,
+				       &c->prog_data,
+				       &brw->wm.prog_data );
 }
 
 
@@ -242,7 +225,8 @@ static void brw_wm_populate_key( struct brw_context *brw,
       lookup |= IZ_STENCIL_TEST_ENABLE_BIT;
 
       if (brw->attribs.Stencil->WriteMask[0] ||
-	  (brw->attribs.Stencil->TestTwoSide && brw->attribs.Stencil->WriteMask[1]))
+	  (brw->attribs.Stencil->_TestTwoSide &&
+	   brw->attribs.Stencil->WriteMask[1]))
 	 lookup |= IZ_STENCIL_WRITE_ENABLE_BIT;
    }
 
@@ -284,7 +268,7 @@ static void brw_wm_populate_key( struct brw_context *brw,
 
 
    /* BRW_NEW_WM_INPUT_DIMENSIONS */
-   key->projtex_mask = brw->wm.input_size_masks[4-1]; 
+   key->projtex_mask = brw->wm.input_size_masks[4-1] >> (FRAG_ATTRIB_TEX0 - FRAG_ATTRIB_WPOS); 
 
    /* _NEW_LIGHT */
    key->flat_shade = (brw->attribs.Light->ShadeModel == GL_FLAT);
@@ -295,17 +279,41 @@ static void brw_wm_populate_key( struct brw_context *brw,
       const struct gl_texture_object *t = unit->_Current;
 
       if (unit->_ReallyEnabled) {
-
-	 if (t->CompareMode == GL_COMPARE_R_TO_TEXTURE_ARB &&
-	     t->Image[0][t->BaseLevel]->_BaseFormat == GL_DEPTH_COMPONENT) {
-	    key->shadowtex_mask |= 1<<i;
-	 }
-
-	 if (t->Image[0][t->BaseLevel]->InternalFormat == GL_YCBCR_MESA)
+	 if (t->Image[0][t->BaseLevel]->InternalFormat == GL_YCBCR_MESA) {
 	    key->yuvtex_mask |= 1<<i;
+	    if (t->Image[0][t->BaseLevel]->TexFormat->MesaFormat == 
+		    MESA_FORMAT_YCBCR)
+		key->yuvtex_swap_mask |= 1<< i;
+	 }
       }
    }
-	  
+
+   /* Shadow */
+   key->shadowtex_mask = fp->program.Base.ShadowSamplers;
+
+   /* _NEW_BUFFERS */
+   /*
+    * Include the draw buffer origin and height so that we can calculate
+    * fragment position values relative to the bottom left of the drawable,
+    * from the incoming screen origin relative position we get as part of our
+    * payload.
+    *
+    * We could avoid recompiling by including this as a constant referenced by
+    * our program, but if we were to do that it would also be nice to handle
+    * getting that constant updated at batchbuffer submit time (when we
+    * hold the lock and know where the buffer really is) rather than at emit
+    * time when we don't hold the lock and are just guessing.  We could also
+    * just avoid using this as key data if the program doesn't use
+    * fragment.position.
+    *
+    * This pretty much becomes moot with DRI2 and redirected buffers anyway,
+    * as our origins will always be zero then.
+    */
+   if (brw->intel.driDrawable != NULL) {
+      key->origin_x = brw->intel.driDrawable->x;
+      key->origin_y = brw->intel.driDrawable->y;
+      key->drawable_height = brw->intel.driDrawable->h;
+   }
 
    /* Extra info:
     */
@@ -314,7 +322,7 @@ static void brw_wm_populate_key( struct brw_context *brw,
 }
 
 
-static void brw_upload_wm_prog( struct brw_context *brw )
+static int brw_prepare_wm_prog( struct brw_context *brw )
 {
    struct brw_wm_prog_key key;
    struct brw_fragment_program *fp = (struct brw_fragment_program *)
@@ -324,13 +332,15 @@ static void brw_upload_wm_prog( struct brw_context *brw )
 
    /* Make an early check for the key.
     */
-   if (brw_search_cache(&brw->cache[BRW_WM_PROG], 
-			&key, sizeof(key),
-			&brw->wm.prog_data,
-			&brw->wm.prog_gs_offset))
-      return;
-
-   do_wm_prog(brw, fp, &key);
+   dri_bo_unreference(brw->wm.prog_bo);
+   brw->wm.prog_bo = brw_search_cache(&brw->cache, BRW_WM_PROG,
+				      &key, sizeof(key),
+				      NULL, 0,
+				      &brw->wm.prog_data);
+   if (brw->wm.prog_bo == NULL)
+      do_wm_prog(brw, fp, &key);
+
+   return dri_bufmgr_check_aperture_space(brw->wm.prog_bo);
 }
 
 
@@ -344,12 +354,13 @@ const struct brw_tracked_state brw_wm_prog = {
 		_NEW_POLYGON |
 		_NEW_LINE |
 		_NEW_LIGHT |
+		_NEW_BUFFERS |
 		_NEW_TEXTURE),
       .brw   = (BRW_NEW_FRAGMENT_PROGRAM |
 		BRW_NEW_WM_INPUT_DIMENSIONS |
 		BRW_NEW_REDUCED_PRIMITIVE),
       .cache = 0
    },
-   .update = brw_upload_wm_prog
+   .prepare = brw_prepare_wm_prog
 };
 
diff --git a/i965/brw_wm.h b/i965/brw_wm.h
index f5fddfd..297617e 100644
--- a/i965/brw_wm.h
+++ b/i965/brw_wm.h
@@ -34,9 +34,9 @@
 #define BRW_WM_H
 
 
+#include "shader/prog_instruction.h"
 #include "brw_context.h"
 #include "brw_eu.h"
-#include "prog_instruction.h"
 
 /* A big lookup table is used to figure out which and how many
  * additional regs will inserted before the main payload in the WM
@@ -69,9 +69,12 @@ struct brw_wm_prog_key {
    GLuint runtime_check_aads_emit:1;
    
    GLuint yuvtex_mask:8;
-   GLuint pad1:24;
+   GLuint yuvtex_swap_mask:8;	/* UV swaped */
+   GLuint pad1:16;
 
    GLuint program_string_id:32;
+   GLuint origin_x, origin_y;
+   GLuint drawable_height;
 };
 
 
@@ -140,6 +143,8 @@ struct brw_wm_instruction {
    GLuint writemask:4;
    GLuint tex_unit:4;   /* texture unit for TEX, TXD, TXP instructions */
    GLuint tex_idx:3;    /* TEXTURE_1D,2D,3D,CUBE,RECT_INDEX source target */
+   GLuint eot:1;    	/* End of thread indicator for FB_WRITE*/
+   GLuint target:10;    /* target binding table index for FB_WRITE*/
 };
 
 
@@ -194,6 +199,8 @@ struct brw_wm_compile {
    GLuint nr_fp_insns;
    GLuint fp_temp;
    GLuint fp_interp_emitted;
+   GLuint fp_fragcolor_emitted;
+   GLuint fp_deriv_emitted;
 
    struct prog_src_register pixel_xy;
    struct prog_src_register delta_xy;
@@ -231,6 +238,15 @@ struct brw_wm_compile {
    GLuint grf_limit;
    GLuint max_wm_grf;
    GLuint last_scratch;
+
+   struct {
+	GLboolean inited;
+	struct brw_reg reg;
+   } wm_regs[PROGRAM_PAYLOAD+1][256][4];
+   struct brw_reg stack;
+   struct brw_reg emit_mask_reg;
+   GLuint reg_index;
+   GLuint tmp_index;
 };
 
 
@@ -259,4 +275,6 @@ void brw_wm_lookup_iz( GLuint line_aa,
 		       GLuint lookup,
 		       struct brw_wm_prog_key *key );
 
+GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp);
+void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c);
 #endif
diff --git a/i965/brw_wm_emit.c b/i965/brw_wm_emit.c
index 80bd576..9b919b9 100644
--- a/i965/brw_wm_emit.c
+++ b/i965/brw_wm_emit.c
@@ -39,7 +39,7 @@
 /* Not quite sure how correct this is - need to understand horiz
  * vs. vertical strides a little better.
  */
-static __inline struct brw_reg sechalf( struct brw_reg reg )
+static INLINE struct brw_reg sechalf( struct brw_reg reg )
 {
    if (reg.vstride)
       reg.nr++;
@@ -122,26 +122,30 @@ static void emit_delta_xy(struct brw_compile *p,
    }
 }
 
-static void emit_wpos_xy(struct brw_compile *p,
-			   const struct brw_reg *dst,
-			   GLuint mask,
-			   const struct brw_reg *arg0)
+static void emit_wpos_xy(struct brw_wm_compile *c,
+			 const struct brw_reg *dst,
+			 GLuint mask,
+			 const struct brw_reg *arg0)
 {
-   /* Calc delta X,Y by subtracting origin in r1 from the pixel
-    * centers.
+   struct brw_compile *p = &c->func;
+
+   /* Calculate the pixel offset from window bottom left into destination
+    * X and Y channels.
     */
    if (mask & WRITEMASK_X) {
-      brw_MOV(p,
+      /* X' = X - origin */
+      brw_ADD(p,
 	      dst[0],
-	      retype(arg0[0], BRW_REGISTER_TYPE_UW));
+	      retype(arg0[0], BRW_REGISTER_TYPE_W),
+	      brw_imm_d(0 - c->key.origin_x));
    }
 
    if (mask & WRITEMASK_Y) {
-      /* TODO -- window_height - Y */
-      brw_MOV(p,
+      /* Y' = height - (Y - origin_y) = height + origin_y - Y */
+      brw_ADD(p,
 	      dst[1],
-	      negate(retype(arg0[1], BRW_REGISTER_TYPE_UW)));
-
+	      negate(retype(arg0[1], BRW_REGISTER_TYPE_W)),
+	      brw_imm_d(c->key.origin_y + c->key.drawable_height - 1));
    }
 }
 
@@ -219,6 +223,10 @@ static void emit_pinterp( struct brw_compile *p,
       if (mask & (1<<i)) {
 	 brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
 	 brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
+      }
+   }
+   for(i = 0; i < 4; i++ ) {
+      if (mask & (1<<i)) {
 	 brw_MUL(p, dst[i], dst[i], w[3]);
       }
    }
@@ -229,20 +237,20 @@ static void emit_cinterp( struct brw_compile *p,
 			 GLuint mask,
 			 const struct brw_reg *arg0 )
 {
-   struct brw_reg interp[4];
-   GLuint nr = arg0[0].nr;
-   GLuint i;
-
-   interp[0] = brw_vec1_grf(nr, 0);
-   interp[1] = brw_vec1_grf(nr, 4);
-   interp[2] = brw_vec1_grf(nr+1, 0);
-   interp[3] = brw_vec1_grf(nr+1, 4);
-
-   for(i = 0; i < 4; i++ ) {
-      if (mask & (1<<i)) {
-	 brw_MOV(p, dst[i], suboffset(interp[i],3));	/* TODO: optimize away like other moves */
-      }
-   }
+	struct brw_reg interp[4];
+	GLuint nr = arg0[0].nr;
+	GLuint i;
+
+	interp[0] = brw_vec1_grf(nr, 0);
+	interp[1] = brw_vec1_grf(nr, 4);
+	interp[2] = brw_vec1_grf(nr+1, 0);
+	interp[3] = brw_vec1_grf(nr+1, 4);
+
+	for(i = 0; i < 4; i++ ) {
+		if (mask & (1<<i)) {
+			brw_MOV(p, dst[i], suboffset(interp[i],3));	/* TODO: optimize away like other moves */
+		}
+	}
 }
 
 
@@ -343,11 +351,10 @@ static void emit_lrp( struct brw_compile *p,
       }
    }
 }
-
-
-static void emit_slt( struct brw_compile *p, 
+static void emit_sop( struct brw_compile *p, 
 		      const struct brw_reg *dst,
 		      GLuint mask,
+		      GLuint cond,
 		      const struct brw_reg *arg0,
 		      const struct brw_reg *arg1 )
 {
@@ -356,34 +363,66 @@ static void emit_slt( struct brw_compile *p,
    for (i = 0; i < 4; i++) {
       if (mask & (1<<i)) {	
 	 brw_MOV(p, dst[i], brw_imm_f(0));
-	 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
+	 brw_CMP(p, brw_null_reg(), cond, arg0[i], arg1[i]);
 	 brw_MOV(p, dst[i], brw_imm_f(1.0));
 	 brw_set_predicate_control_flag_value(p, 0xff);
       }
    }
 }
 
-/* Isn't this just the same as the above with the args swapped?
- */
-static void emit_sge( struct brw_compile *p, 
+static void emit_slt( struct brw_compile *p, 
 		      const struct brw_reg *dst,
 		      GLuint mask,
 		      const struct brw_reg *arg0,
 		      const struct brw_reg *arg1 )
 {
-   GLuint i;
+	 emit_sop(p, dst, mask, BRW_CONDITIONAL_L, arg0, arg1);
+}
 
-   for (i = 0; i < 4; i++) {
-      if (mask & (1<<i)) {	
-	 brw_MOV(p, dst[i], brw_imm_f(0));
-	 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], arg1[i]);
-	 brw_MOV(p, dst[i], brw_imm_f(1.0));
-	 brw_set_predicate_control_flag_value(p, 0xff);
-      }
-   }
+static void emit_sle( struct brw_compile *p, 
+		      const struct brw_reg *dst,
+		      GLuint mask,
+		      const struct brw_reg *arg0,
+		      const struct brw_reg *arg1 )
+{
+	 emit_sop(p, dst, mask, BRW_CONDITIONAL_LE, arg0, arg1);
 }
 
+static void emit_sgt( struct brw_compile *p, 
+		      const struct brw_reg *dst,
+		      GLuint mask,
+		      const struct brw_reg *arg0,
+		      const struct brw_reg *arg1 )
+{
+	 emit_sop(p, dst, mask, BRW_CONDITIONAL_G, arg0, arg1);
+}
 
+static void emit_sge( struct brw_compile *p, 
+		      const struct brw_reg *dst,
+		      GLuint mask,
+		      const struct brw_reg *arg0,
+		      const struct brw_reg *arg1 )
+{
+	 emit_sop(p, dst, mask, BRW_CONDITIONAL_GE, arg0, arg1);
+}
+
+static void emit_seq( struct brw_compile *p, 
+		      const struct brw_reg *dst,
+		      GLuint mask,
+		      const struct brw_reg *arg0,
+		      const struct brw_reg *arg1 )
+{
+	 emit_sop(p, dst, mask, BRW_CONDITIONAL_EQ, arg0, arg1);
+}
+
+static void emit_sne( struct brw_compile *p, 
+		      const struct brw_reg *dst,
+		      GLuint mask,
+		      const struct brw_reg *arg0,
+		      const struct brw_reg *arg1 )
+{
+	 emit_sop(p, dst, mask, BRW_CONDITIONAL_NEQ, arg0, arg1);
+}
 
 static void emit_cmp( struct brw_compile *p, 
 		      const struct brw_reg *dst,
@@ -465,6 +504,9 @@ static void emit_dp3( struct brw_compile *p,
 		      const struct brw_reg *arg0,
 		      const struct brw_reg *arg1 )
 {
+   if (!(mask & WRITEMASK_XYZW))
+      return; /* Do not emit dead code*/
+
    assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
 
    brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
@@ -482,6 +524,9 @@ static void emit_dp4( struct brw_compile *p,
 		      const struct brw_reg *arg0,
 		      const struct brw_reg *arg1 )
 {
+   if (!(mask & WRITEMASK_XYZW))
+      return; /* Do not emit dead code*/
+
    assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
 
    brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
@@ -500,6 +545,9 @@ static void emit_dph( struct brw_compile *p,
 		      const struct brw_reg *arg0,
 		      const struct brw_reg *arg1 )
 {
+   if (!(mask & WRITEMASK_XYZW))
+      return; /* Do not emit dead code*/
+
    assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
 
    brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
@@ -543,8 +591,11 @@ static void emit_math1( struct brw_compile *p,
 			GLuint mask,
 			const struct brw_reg *arg0 )
 {
-   assert((mask & WRITEMASK_XYZW) == WRITEMASK_X ||
-	  function == BRW_MATH_FUNCTION_SINCOS);
+   if (!(mask & WRITEMASK_XYZW))
+      return; /* Do not emit dead code*/
+
+   //assert((mask & WRITEMASK_XYZW) == WRITEMASK_X ||
+   //	  function == BRW_MATH_FUNCTION_SINCOS);
    
    brw_MOV(p, brw_message_reg(2), arg0[0]);
 
@@ -567,6 +618,9 @@ static void emit_math2( struct brw_compile *p,
 			const struct brw_reg *arg0,
 			const struct brw_reg *arg1)
 {
+   if (!(mask & WRITEMASK_XYZW))
+      return; /* Do not emit dead code*/
+
    assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
 
    brw_push_insn_state(p);
@@ -661,7 +715,7 @@ static void emit_tex( struct brw_wm_compile *c,
 	      retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
 	      1,
 	      retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
-	      inst->tex_unit + 1, /* surface */
+	      inst->tex_unit + MAX_DRAW_BUFFERS, /* surface */
 	      inst->tex_unit,	  /* sampler */
 	      inst->writemask,
 	      (shadow ? 
@@ -670,7 +724,6 @@ static void emit_tex( struct brw_wm_compile *c,
 	      responseLength,
 	      msgLength,
 	      0);	
-
 }
 
 
@@ -712,7 +765,7 @@ static void emit_txb( struct brw_wm_compile *c,
 	      retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
 	      1,
 	      retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
-	      inst->tex_unit + 1, /* surface */
+	      inst->tex_unit + MAX_DRAW_BUFFERS, /* surface */
 	      inst->tex_unit,	  /* sampler */
 	      inst->writemask,
 	      BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS,
@@ -785,7 +838,9 @@ static void emit_kil( struct brw_wm_compile *c,
 
 static void fire_fb_write( struct brw_wm_compile *c,
 			   GLuint base_reg,
-			   GLuint nr )
+			   GLuint nr,
+			   GLuint target,
+			   GLuint eot )
 {
    struct brw_compile *p = &c->func;
    
@@ -808,10 +863,10 @@ static void fire_fb_write( struct brw_wm_compile *c,
 		retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW),
 		base_reg,
 		retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
-		0,		/* render surface always 0 */
+		target,		
 		nr,
 		0, 
-		1);
+		eot);
 }
 
 static void emit_aa( struct brw_wm_compile *c,
@@ -836,7 +891,9 @@ static void emit_aa( struct brw_wm_compile *c,
 static void emit_fb_write( struct brw_wm_compile *c,
 			   struct brw_reg *arg0,
 			   struct brw_reg *arg1,
-			   struct brw_reg *arg2)
+			   struct brw_reg *arg2,
+			   GLuint target,
+			   GLuint eot)
 {
    struct brw_compile *p = &c->func;
    GLuint nr = 2;
@@ -891,15 +948,16 @@ static void emit_fb_write( struct brw_wm_compile *c,
       GLuint off = c->key.dest_depth_reg % 2;
 
       if (off != 0) {
-	 brw_push_insn_state(p);
-	 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
-	 brw_MOV(p, brw_message_reg(nr), arg1[comp]);
-	 /* 2nd half? */
-	 brw_MOV(p, brw_message_reg(nr+1), offset(arg1[comp],1));
-	 brw_pop_insn_state(p);
+         brw_push_insn_state(p);
+         brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+
+         brw_MOV(p, brw_message_reg(nr), offset(arg1[comp],1));
+         /* 2nd half? */
+         brw_MOV(p, brw_message_reg(nr+1), arg1[comp+1]);
+         brw_pop_insn_state(p);
       }
       else {
-	 brw_MOV(p, brw_message_reg(nr), arg1[comp]);
+         brw_MOV(p, brw_message_reg(nr), arg1[comp]);
       }
       nr += 2;
    }
@@ -909,7 +967,7 @@ static void emit_fb_write( struct brw_wm_compile *c,
       if (c->key.aa_dest_stencil_reg)
 	 emit_aa(c, arg1, 2);
 
-      fire_fb_write(c, 0, nr);
+      fire_fb_write(c, 0, nr, target, eot);
    }
    else {
       struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
@@ -926,14 +984,14 @@ static void emit_fb_write( struct brw_wm_compile *c,
       jmp = brw_JMPI(p, ip, ip, brw_imm_w(0));
       {
 	 emit_aa(c, arg1, 2);
-	 fire_fb_write(c, 0, nr);
+	 fire_fb_write(c, 0, nr, target, eot);
 	 /* note - thread killed in subroutine */
       }
       brw_land_fwd_jump(p, jmp);
 
       /* ELSE: Shuffle up one register to fill in the hole left for AA:
        */
-      fire_fb_write(c, 1, nr-1);
+      fire_fb_write(c, 1, nr-1, target, eot);
    }
 }
 
@@ -1081,7 +1139,7 @@ void brw_wm_emit( struct brw_wm_compile *c )
 	 break;
 
       case WM_WPOSXY:
-	 emit_wpos_xy(p, dst, dst_flags, args[0]);
+	 emit_wpos_xy(c, dst, dst_flags, args[0]);
 	 break;
 
       case WM_PIXELW:
@@ -1101,7 +1159,7 @@ void brw_wm_emit( struct brw_wm_compile *c )
 	 break;
 
       case WM_FB_WRITE:
-	 emit_fb_write(c, args[0], args[1], args[2]);
+	 emit_fb_write(c, args[0], args[1], args[2], inst->target, inst->eot);
 	 break;
 
 	 /* Straightforward arithmetic:
@@ -1209,9 +1267,21 @@ void brw_wm_emit( struct brw_wm_compile *c )
 	 emit_slt(p, dst, dst_flags, args[0], args[1]);
 	 break;
 
+      case OPCODE_SLE:
+	 emit_sle(p, dst, dst_flags, args[0], args[1]);
+	break;
+      case OPCODE_SGT:
+	 emit_sgt(p, dst, dst_flags, args[0], args[1]);
+	break;
       case OPCODE_SGE:
 	 emit_sge(p, dst, dst_flags, args[0], args[1]);
 	 break;
+      case OPCODE_SEQ:
+	 emit_seq(p, dst, dst_flags, args[0], args[1]);
+	break;
+      case OPCODE_SNE:
+	 emit_sne(p, dst, dst_flags, args[0], args[1]);
+	break;
 
       case OPCODE_LIT:
 	 emit_lit(p, dst, dst_flags, args[0]);
@@ -1232,7 +1302,8 @@ void brw_wm_emit( struct brw_wm_compile *c )
 	 break;
 
       default:
-	 assert(0);
+	_mesa_printf("unsupport opcode %d in fragment program\n", 
+		inst->opcode);
       }
       
       for (i = 0; i < 4; i++)
diff --git a/i965/brw_wm_fp.c b/i965/brw_wm_fp.c
index dc57fd2..bc933fe 100644
--- a/i965/brw_wm_fp.c
+++ b/i965/brw_wm_fp.c
@@ -144,7 +144,7 @@ static struct prog_dst_register dst_undef( void )
 
 static struct prog_dst_register get_temp( struct brw_wm_compile *c )
 {
-   int bit = ffs( ~c->fp_temp );
+   int bit = _mesa_ffs( ~c->fp_temp );
 
    if (!bit) {
       _mesa_printf("%s: out of temporaries\n", __FILE__);
@@ -158,7 +158,7 @@ static struct prog_dst_register get_temp( struct brw_wm_compile *c )
 
 static void release_temp( struct brw_wm_compile *c, struct prog_dst_register temp )
 {
-   c->fp_temp &= ~1<<(temp.Index + 1 - FIRST_INTERNAL_TEMP);
+   c->fp_temp &= ~(1 << (temp.Index - FIRST_INTERNAL_TEMP));
 }
 
 
@@ -176,6 +176,7 @@ static struct prog_instruction *emit_insn(struct brw_wm_compile *c,
 {
    struct prog_instruction *inst = get_fp_inst(c);
    *inst = *inst0;
+   inst->Data = (void *)inst0;
    return inst;
 }
 
@@ -201,7 +202,6 @@ static struct prog_instruction * emit_op(struct brw_wm_compile *c,
    inst->SrcReg[0] = src0;
    inst->SrcReg[1] = src1;
    inst->SrcReg[2] = src2;
-   
    return inst;
 }
    
@@ -361,6 +361,37 @@ static void emit_interp( struct brw_wm_compile *c,
    c->fp_interp_emitted |= 1<<idx;
 }
 
+static void emit_ddx( struct brw_wm_compile *c,
+        const struct prog_instruction *inst )
+{
+    GLuint idx = inst->SrcReg[0].Index;
+    struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
+
+    c->fp_deriv_emitted |= 1<<idx;
+    emit_op(c,
+            OPCODE_DDX,
+            inst->DstReg,
+            0, 0, 0,
+            interp,
+            get_pixel_w(c),
+            src_undef());
+}
+
+static void emit_ddy( struct brw_wm_compile *c,
+        const struct prog_instruction *inst )
+{
+    GLuint idx = inst->SrcReg[0].Index;
+    struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
+
+    c->fp_deriv_emitted |= 1<<idx;
+    emit_op(c,
+            OPCODE_DDY,
+            inst->DstReg,
+            0, 0, 0,
+            interp,
+            get_pixel_w(c),
+            src_undef());
+}
 
 /***********************************************************************
  * Hacks to extend the program parameter and constant lists.
@@ -433,7 +464,7 @@ static struct prog_src_register search_or_add_const4f( struct brw_wm_compile *c,
    }
    
    idx = _mesa_add_unnamed_constant( paramList, values, 4, &swizzle );
-   /* XXX what about swizzle? */
+   assert(swizzle == SWIZZLE_NOOP); /* Need to handle swizzle in reg setup */
    return src_reg(PROGRAM_STATE_VAR, idx);
 }
 
@@ -463,17 +494,20 @@ static void precalc_dst( struct brw_wm_compile *c,
 
 
    if (dst.WriteMask & WRITEMASK_XZ) {
+      struct prog_instruction *swz;
       GLuint z = GET_SWZ(src0.Swizzle, Z);
 
       /* dst.xz = swz src0.1zzz
        */
-      emit_op(c,
-	      OPCODE_SWZ,
-	      dst_mask(dst, WRITEMASK_XZ),
-	      inst->SaturateMode, 0, 0,
-	      src_swizzle(src0, SWIZZLE_ONE, z, z, z),
-	      src_undef(),
-	      src_undef());
+      swz = emit_op(c,
+		    OPCODE_SWZ,
+		    dst_mask(dst, WRITEMASK_XZ),
+		    inst->SaturateMode, 0, 0,
+		    src_swizzle(src0, SWIZZLE_ONE, z, z, z),
+		    src_undef(),
+		    src_undef());
+      /* Avoid letting negation flag of src0 affect our 1 constant. */
+      swz->SrcReg[0].NegateBase &= ~NEGATE_X;
    }
    if (dst.WriteMask & WRITEMASK_W) {
       /* dst.w = mov src1.w
@@ -496,15 +530,19 @@ static void precalc_lit( struct brw_wm_compile *c,
    struct prog_dst_register dst = inst->DstReg;
    
    if (dst.WriteMask & WRITEMASK_XW) {
+      struct prog_instruction *swz;
+
       /* dst.xw = swz src0.1111
        */
-      emit_op(c,
-	      OPCODE_SWZ,
-	      dst_mask(dst, WRITEMASK_XW),
-	      0, 0, 0,
-	      src_swizzle1(src0, SWIZZLE_ONE),
-	      src_undef(),
-	      src_undef());
+      swz = emit_op(c,
+		    OPCODE_SWZ,
+		    dst_mask(dst, WRITEMASK_XW),
+		    0, 0, 0,
+		    src_swizzle1(src0, SWIZZLE_ONE),
+		    src_undef(),
+		    src_undef());
+      /* Avoid letting the negation flag of src0 affect our 1 constant. */
+      swz->SrcReg[0].NegateBase = 0;
    }
 
 
@@ -524,6 +562,7 @@ static void precalc_tex( struct brw_wm_compile *c,
 {
    struct prog_src_register coord;
    struct prog_dst_register tmpcoord;
+   GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit];
 
    if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) {
        struct prog_instruction *out;
@@ -580,7 +619,7 @@ static void precalc_tex( struct brw_wm_compile *c,
 	 search_or_add_param5( c, 
 			       STATE_INTERNAL, 
 			       STATE_TEXRECT_SCALE,
-			       inst->TexSrcUnit,
+			       unit,
 			       0,0 );
 
       tmpcoord = get_temp(c);
@@ -606,29 +645,33 @@ static void precalc_tex( struct brw_wm_compile *c,
     * conversion requires allocating a temporary variable which we
     * don't have the facility to do that late in the compilation.
     */
-   if (!(c->key.yuvtex_mask & (1<<inst->TexSrcUnit))) {
+   if (!(c->key.yuvtex_mask & (1<<unit))) {
       emit_op(c, 
 	      OPCODE_TEX,
 	      inst->DstReg,
 	      inst->SaturateMode,
-	      inst->TexSrcUnit,
+	      unit,
 	      inst->TexSrcTarget,
 	      coord,
 	      src_undef(),
 	      src_undef());
    }
    else {
+       GLboolean  swap_uv = c->key.yuvtex_swap_mask & (1<<unit);
+
       /* 
 	 CONST C0 = { -.5, -.0625,  -.5, 1.164 }
 	 CONST C1 = { 1.596, -0.813, 2.018, -.391 }
 	 UYV     = TEX ...
 	 UYV.xyz = ADD UYV,     C0
 	 UYV.y   = MUL UYV.y,   C0.w
-	 RGB.xyz = MAD UYV.xxz, C1,   UYV.y
+ 	 if (UV swaped)
+	    RGB.xyz = MAD UYV.zzx, C1,   UYV.y
+	 else
+	    RGB.xyz = MAD UYV.xxz, C1,   UYV.y 
 	 RGB.y   = MAD UYV.z,   C1.w, RGB.y
       */
       struct prog_dst_register dst = inst->DstReg;
-      struct prog_src_register src0 = inst->SrcReg[0];
       struct prog_dst_register tmp = get_temp(c);
       struct prog_src_register tmpsrc = src_reg_from_dst(tmp);
       struct prog_src_register C0 = search_or_add_const4f( c,  -.5, -.0625, -.5, 1.164 );
@@ -640,9 +683,9 @@ static void precalc_tex( struct brw_wm_compile *c,
 	      OPCODE_TEX,
 	      tmp,
 	      inst->SaturateMode,
-	      inst->TexSrcUnit,
+	      unit,
 	      inst->TexSrcTarget,
-	      src0,
+	      coord,
 	      src_undef(),
 	      src_undef());
 
@@ -658,6 +701,7 @@ static void precalc_tex( struct brw_wm_compile *c,
 
       /* YUV.y   = MUL YUV.y, C0.w
        */
+
       emit_op(c,
 	      OPCODE_MUL,
 	      dst_mask(tmp, WRITEMASK_Y),
@@ -666,13 +710,18 @@ static void precalc_tex( struct brw_wm_compile *c,
 	      src_swizzle1(C0, W),
 	      src_undef());
 
-      /* RGB.xyz = MAD YUV.xxz, C1, YUV.y
+      /* 
+       * if (UV swaped)
+       *     RGB.xyz = MAD YUV.zzx, C1, YUV.y
+       * else
+       *     RGB.xyz = MAD YUV.xxz, C1, YUV.y
        */
+
       emit_op(c,
 	      OPCODE_MAD,
 	      dst_mask(dst, WRITEMASK_XYZ),
 	      0, 0, 0,
-	      src_swizzle(tmpsrc, X,X,Z,Z),
+	      swap_uv?src_swizzle(tmpsrc, Z,Z,X,X):src_swizzle(tmpsrc, X,X,Z,Z),
 	      C1,
 	      src_swizzle1(tmpsrc, Y));
 
@@ -689,7 +738,8 @@ static void precalc_tex( struct brw_wm_compile *c,
       release_temp(c, tmp);
    }
 
-   if (inst->TexSrcTarget == GL_TEXTURE_RECTANGLE_NV) 
+   if ((inst->TexSrcTarget == TEXTURE_RECT_INDEX) ||
+       (inst->TexSrcTarget == TEXTURE_CUBE_INDEX))
       release_temp(c, tmpcoord);
 }
 
@@ -710,7 +760,7 @@ static GLboolean projtex( struct brw_wm_compile *c,
       return 0;  /* ut2004 gun rendering !?! */
    else if (src.File == PROGRAM_INPUT && 
 	    GET_SWZ(src.Swizzle, W) == W &&
-	    (c->key.projtex_mask & (1<<src.Index)) == 0)
+           (c->key.projtex_mask & (1<<(src.Index + FRAG_ATTRIB_WPOS - FRAG_ATTRIB_TEX0))) == 0)
       return 0;
    else
       return 1;
@@ -820,14 +870,34 @@ static void emit_fb_write( struct brw_wm_compile *c )
    struct prog_src_register outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLR);
    struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
    struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPR);
+   GLuint i;
 
-   emit_op(c,
-	   WM_FB_WRITE,
-	   dst_mask(dst_undef(),0),
-	   0, 0, 0,
-	   outcolor,
-	   payload_r0_depth,
-	   outdepth);
+   struct prog_instruction *inst, *last_inst;
+   struct brw_context *brw = c->func.brw;
+
+   /* inst->Sampler is not used by backend, 
+      use it for fb write target and eot */
+
+   if (brw->state.nr_draw_regions > 1) {
+       for (i = 0 ; i < brw->state.nr_draw_regions; i++) {
+	   outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i);
+	   last_inst = inst = emit_op(c,
+		   WM_FB_WRITE, dst_mask(dst_undef(),0), 0, 0, 0,
+		   outcolor, payload_r0_depth, outdepth);
+	   inst->Sampler = (i<<1);
+	   if (c->fp_fragcolor_emitted) {
+	       outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLR);
+	       last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0),
+		       0, 0, 0, outcolor, payload_r0_depth, outdepth);
+	       inst->Sampler = (i<<1);
+	   }
+       }
+       last_inst->Sampler |= 1; //eot
+   }else {
+       inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0),
+	       0, 0, 0, outcolor, payload_r0_depth, outdepth);
+       inst->Sampler = 1|(0<<1);
+   }
 }
 
 
@@ -853,7 +923,15 @@ static void validate_src_regs( struct brw_wm_compile *c,
    }
 }
 	 
-
+static void validate_dst_regs( struct brw_wm_compile *c,
+			       const struct prog_instruction *inst )
+{
+   if (inst->DstReg.File == PROGRAM_OUTPUT) {
+       GLuint idx = inst->DstReg.Index;
+       if (idx == FRAG_RESULT_COLR)
+	   c->fp_fragcolor_emitted = 1;
+   }
+}
 
 static void print_insns( const struct prog_instruction *insn,
 			 GLuint nr )
@@ -898,12 +976,16 @@ void brw_wm_pass_fp( struct brw_wm_compile *c )
 
    for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
       const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
+      validate_src_regs(c, inst);
+      validate_dst_regs(c, inst);
+   }
+   for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
+      const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
       struct prog_instruction *out;
 
       /* Check for INPUT values, emit INTERP instructions where
        * necessary:
        */
-      validate_src_regs(c, inst);
 
 
       switch (inst->Opcode) {
@@ -939,11 +1021,20 @@ void brw_wm_pass_fp( struct brw_wm_compile *c )
       case OPCODE_LIT:
 	 precalc_lit(c, inst);
 	 break;
-     
+
+      case OPCODE_TEX:
+	 precalc_tex(c, inst);
+	 break;
+
       case OPCODE_TXP:
 	 precalc_txp(c, inst);
 	 break;
 
+      case OPCODE_TXB:
+	 out = emit_insn(c, inst);
+	 out->TexSrcUnit = fp->program.Base.SamplerUnits[inst->TexSrcUnit];
+	 break;
+
       case OPCODE_XPD: 
 	 out = emit_insn(c, inst);
 	 /* This should probably be done in the parser. 
@@ -957,8 +1048,16 @@ void brw_wm_pass_fp( struct brw_wm_compile *c )
 	  */
 	 out->DstReg.WriteMask = 0;
 	 break;
-
+      case OPCODE_DDX:
+	 emit_ddx(c, inst);
+	 break;
+      case OPCODE_DDY:
+         emit_ddy(c, inst);
+	break;
       case OPCODE_END:
+	 emit_fog(c);
+	 emit_fb_write(c);
+	 break;
       case OPCODE_PRINT:
 	 break;
 	 
@@ -967,15 +1066,11 @@ void brw_wm_pass_fp( struct brw_wm_compile *c )
 	 break;
       }
    }
-   
-   emit_fog(c);
-   emit_fb_write(c);
-
 
    if (INTEL_DEBUG & DEBUG_WM) {
-      _mesa_printf("\n\n\npass_fp:\n");
-      print_insns( c->prog_instructions, c->nr_fp_insns );
-      _mesa_printf("\n");
+	   _mesa_printf("\n\n\npass_fp:\n");
+	   print_insns( c->prog_instructions, c->nr_fp_insns );
+	   _mesa_printf("\n");
    }
 }
 
diff --git a/i965/brw_wm_glsl.c b/i965/brw_wm_glsl.c
new file mode 100644
index 0000000..305100f
--- /dev/null
+++ b/i965/brw_wm_glsl.c
@@ -0,0 +1,1375 @@
+#include "macros.h"
+#include "shader/prog_parameter.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_wm.h"
+
+/* Only guess, need a flag in gl_fragment_program later */
+GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp)
+{
+    int i;
+    for (i = 0; i < fp->Base.NumInstructions; i++) {
+	struct prog_instruction *inst = &fp->Base.Instructions[i];
+	switch (inst->Opcode) {
+	    case OPCODE_IF:
+	    case OPCODE_INT:
+	    case OPCODE_ENDIF:
+	    case OPCODE_CAL:
+	    case OPCODE_BRK:
+	    case OPCODE_RET:
+	    case OPCODE_DDX:
+	    case OPCODE_DDY:
+	    case OPCODE_BGNLOOP:
+		return GL_TRUE; 
+	    default:
+		break;
+	}
+    }
+    return GL_FALSE; 
+}
+
+static void set_reg(struct brw_wm_compile *c, int file, int index, 
+	int component, struct brw_reg reg)
+{
+    c->wm_regs[file][index][component].reg = reg;
+    c->wm_regs[file][index][component].inited = GL_TRUE;
+}
+
+static int get_scalar_dst_index(struct prog_instruction *inst)
+{
+    int i;
+    for (i = 0; i < 4; i++)
+	if (inst->DstReg.WriteMask & (1<<i))
+	    break;
+    return i;
+}
+
+static struct brw_reg alloc_tmp(struct brw_wm_compile *c)
+{
+    struct brw_reg reg;
+    reg = brw_vec8_grf(c->tmp_index--, 0);
+    return reg;
+}
+
+static void release_tmps(struct brw_wm_compile *c)
+{
+    c->tmp_index = 127;
+}
+
+static struct brw_reg 
+get_reg(struct brw_wm_compile *c, int file, int index, int component, int nr, GLuint neg, GLuint abs)
+{
+    struct brw_reg reg;
+    switch (file) {
+	case PROGRAM_STATE_VAR:
+	case PROGRAM_CONSTANT:
+	case PROGRAM_UNIFORM:
+	    file = PROGRAM_STATE_VAR;
+	    break;
+	case PROGRAM_UNDEFINED:
+	    return brw_null_reg();	
+	default:
+	    break;
+    }
+
+    if(c->wm_regs[file][index][component].inited)
+	reg = c->wm_regs[file][index][component].reg;
+    else 
+	reg = brw_vec8_grf(c->reg_index, 0);
+
+    if(!c->wm_regs[file][index][component].inited) {
+	set_reg(c, file, index, component, reg);
+	c->reg_index++;
+    }
+
+    if (neg & (1<< component)) {
+	reg = negate(reg);
+    }
+    if (abs)
+	reg = brw_abs(reg);
+    return reg;
+}
+
+static void prealloc_reg(struct brw_wm_compile *c)
+{
+    int i, j;
+    struct brw_reg reg;
+    int nr_interp_regs = 0;
+    GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted | c->fp_deriv_emitted;
+
+    for (i = 0; i < 4; i++) {
+	reg = (i < c->key.nr_depth_regs) 
+	    ? brw_vec8_grf(i*2, 0) : brw_vec8_grf(0, 0);
+	set_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, reg);
+    }
+    c->reg_index += 2*c->key.nr_depth_regs;
+    {
+	int nr_params = c->fp->program.Base.Parameters->NumParameters;
+	struct gl_program_parameter_list *plist = 
+	    c->fp->program.Base.Parameters;
+	int index = 0;
+	c->prog_data.nr_params = 4*nr_params;
+	for (i = 0; i < nr_params; i++) {
+	    for (j = 0; j < 4; j++, index++) {
+		reg = brw_vec1_grf(c->reg_index + index/8, 
+			index%8);
+		c->prog_data.param[index] = 
+		    &plist->ParameterValues[i][j];
+		set_reg(c, PROGRAM_STATE_VAR, i, j, reg);
+	    }
+	}
+	c->nr_creg = 2*((4*nr_params+15)/16);
+	c->reg_index += c->nr_creg;
+    }
+    for (i = 0; i < FRAG_ATTRIB_MAX; i++) {
+	if (inputs & (1<<i)) {
+	    nr_interp_regs++;
+	    reg = brw_vec8_grf(c->reg_index, 0);
+	    for (j = 0; j < 4; j++)
+		set_reg(c, PROGRAM_PAYLOAD, i, j, reg);
+	    c->reg_index += 2;
+
+	}
+    }
+    c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2;
+    c->prog_data.urb_read_length = nr_interp_regs * 2;
+    c->prog_data.curb_read_length = c->nr_creg;
+    c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0);
+    c->reg_index++;
+    c->stack =  brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0);
+    c->reg_index += 2;
+}
+
+static struct brw_reg get_dst_reg(struct brw_wm_compile *c, 
+	struct prog_instruction *inst, int component, int nr)
+{
+    return get_reg(c, inst->DstReg.File, inst->DstReg.Index, component, nr,
+	    0, 0);
+}
+
+static struct brw_reg get_src_reg(struct brw_wm_compile *c, 
+	struct prog_src_register *src, int index, int nr)
+{
+    int component = GET_SWZ(src->Swizzle, index);
+    return get_reg(c, src->File, src->Index, component, nr, 
+	    src->NegateBase, src->Abs);
+}
+
+static void emit_abs( struct brw_wm_compile *c,
+		struct prog_instruction *inst)
+{
+    int i;
+    struct brw_compile *p = &c->func;
+    brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
+    for (i = 0; i < 4; i++) {
+	if (inst->DstReg.WriteMask & (1<<i)) {
+	    struct brw_reg src, dst;
+	    dst = get_dst_reg(c, inst, i, 1);
+	    src = get_src_reg(c, &inst->SrcReg[0], i, 1);
+	    brw_MOV(p, dst, brw_abs(src));
+	}
+    }
+    brw_set_saturate(p, 0);
+}
+
+static void emit_int( struct brw_wm_compile *c,
+		struct prog_instruction *inst)
+{
+    int i;
+    struct brw_compile *p = &c->func;
+    GLuint mask = inst->DstReg.WriteMask;
+    brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
+    for (i = 0; i < 4; i++) {
+	if (mask & (1<<i)) {
+	    struct brw_reg src, dst;
+	    dst = get_dst_reg(c, inst, i, 1) ;
+	    src = get_src_reg(c, &inst->SrcReg[0], i, 1);
+	    brw_RNDD(p, dst, src);
+	}
+    }
+    brw_set_saturate(p, 0);
+}
+
+static void emit_mov( struct brw_wm_compile *c,
+		struct prog_instruction *inst)
+{
+    int i;
+    struct brw_compile *p = &c->func;
+    GLuint mask = inst->DstReg.WriteMask;
+    brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
+    for (i = 0; i < 4; i++) {
+	if (mask & (1<<i)) {
+	    struct brw_reg src, dst;
+	    dst = get_dst_reg(c, inst, i, 1);
+	    src = get_src_reg(c, &inst->SrcReg[0], i, 1);
+	    brw_MOV(p, dst, src);
+	}
+    }
+    brw_set_saturate(p, 0);
+}
+
+static void emit_pixel_xy(struct brw_wm_compile *c,
+		struct prog_instruction *inst)
+{
+    struct brw_reg r1 = brw_vec1_grf(1, 0);
+    struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);
+
+    struct brw_reg dst0, dst1;
+    struct brw_compile *p = &c->func;
+    GLuint mask = inst->DstReg.WriteMask;
+
+    dst0 = get_dst_reg(c, inst, 0, 1);
+    dst1 = get_dst_reg(c, inst, 1, 1);
+    /* Calculate pixel centers by adding 1 or 0 to each of the
+     * micro-tile coordinates passed in r1.
+     */
+    if (mask & WRITEMASK_X) {
+	brw_ADD(p,
+		vec8(retype(dst0, BRW_REGISTER_TYPE_UW)),
+		stride(suboffset(r1_uw, 4), 2, 4, 0),
+		brw_imm_v(0x10101010));
+    }
+
+    if (mask & WRITEMASK_Y) {
+	brw_ADD(p,
+		vec8(retype(dst1, BRW_REGISTER_TYPE_UW)),
+		stride(suboffset(r1_uw, 5), 2, 4, 0),
+		brw_imm_v(0x11001100));
+    }
+
+}
+
+static void emit_delta_xy(struct brw_wm_compile *c,
+		struct prog_instruction *inst)
+{
+    struct brw_reg r1 = brw_vec1_grf(1, 0);
+    struct brw_reg dst0, dst1, src0, src1;
+    struct brw_compile *p = &c->func;
+    GLuint mask = inst->DstReg.WriteMask;
+
+    dst0 = get_dst_reg(c, inst, 0, 1);
+    dst1 = get_dst_reg(c, inst, 1, 1);
+    src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
+    src1 = get_src_reg(c, &inst->SrcReg[0], 1, 1);
+    /* Calc delta X,Y by subtracting origin in r1 from the pixel
+     * centers.
+     */
+    if (mask & WRITEMASK_X) {
+	brw_ADD(p,
+		dst0,
+		retype(src0, BRW_REGISTER_TYPE_UW),
+		negate(r1));
+    }
+
+    if (mask & WRITEMASK_Y) {
+	brw_ADD(p,
+		dst1,
+		retype(src1, BRW_REGISTER_TYPE_UW),
+		negate(suboffset(r1,1)));
+
+    }
+
+}
+
+
+static void fire_fb_write( struct brw_wm_compile *c,
+                           GLuint base_reg,
+                           GLuint nr,
+                           GLuint target,
+                           GLuint eot)
+{
+    struct brw_compile *p = &c->func;
+    /* Pass through control information:
+     */
+    /*  mov (8) m1.0<1>:ud   r1.0<8;8,1>:ud   { Align1 NoMask } */
+    {
+	brw_push_insn_state(p);
+	brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */
+	brw_MOV(p,
+		brw_message_reg(base_reg + 1),
+		brw_vec8_grf(1, 0));
+	brw_pop_insn_state(p);
+    }
+    /* Send framebuffer write message: */
+    brw_fb_WRITE(p,
+	    retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW),
+	    base_reg,
+	    retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
+	    target,              
+	    nr,
+	    0,
+	    eot);
+}
+
+static void emit_fb_write(struct brw_wm_compile *c,
+		struct prog_instruction *inst)
+{
+    struct brw_compile *p = &c->func;
+    int nr = 2;
+    int channel;
+    GLuint target, eot;
+    struct brw_reg src0;
+
+    /* Reserve a space for AA - may not be needed:
+     */
+    if (c->key.aa_dest_stencil_reg)
+	nr += 1;
+    {
+	brw_push_insn_state(p);
+	for (channel = 0; channel < 4; channel++) {
+	    src0 = get_src_reg(c,  &inst->SrcReg[0], channel, 1);
+	    /*  mov (8) m2.0<1>:ud   r28.0<8;8,1>:ud  { Align1 } */
+	    /*  mov (8) m6.0<1>:ud   r29.0<8;8,1>:ud  { Align1 SecHalf } */
+	    brw_MOV(p, brw_message_reg(nr + channel), src0);
+	}
+	/* skip over the regs populated above: */
+	nr += 8;
+	brw_pop_insn_state(p);
+    }
+
+   if (c->key.source_depth_to_render_target)
+   {
+      if (c->key.computes_depth) {
+         src0 = get_src_reg(c, &inst->SrcReg[2], 2, 1);
+         brw_MOV(p, brw_message_reg(nr), src0);
+      } else {
+         src0 = get_src_reg(c, &inst->SrcReg[1], 1, 1);
+         brw_MOV(p, brw_message_reg(nr), src0);
+      }
+
+      nr += 2;
+   }
+    target = inst->Sampler >> 1;
+    eot = inst->Sampler & 1;
+    fire_fb_write(c, 0, nr, target, eot);
+}
+
+static void emit_pixel_w( struct brw_wm_compile *c,
+		struct prog_instruction *inst)
+{
+    struct brw_compile *p = &c->func;
+    GLuint mask = inst->DstReg.WriteMask;
+    if (mask & WRITEMASK_W) {
+	struct brw_reg dst, src0, delta0, delta1;
+	struct brw_reg interp3;
+
+	dst = get_dst_reg(c, inst, 3, 1);
+	src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
+	delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1);
+	delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1);
+
+	interp3 = brw_vec1_grf(src0.nr+1, 4);
+	/* Calc 1/w - just linterp wpos[3] optimized by putting the
+	 * result straight into a message reg.
+	 */
+	brw_LINE(p, brw_null_reg(), interp3, delta0);
+	brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), delta1);
+
+	/* Calc w */
+	brw_math_16( p, dst,
+		BRW_MATH_FUNCTION_INV,
+		BRW_MATH_SATURATE_NONE,
+		2, brw_null_reg(),
+		BRW_MATH_PRECISION_FULL);
+    }
+}
+
+static void emit_linterp(struct brw_wm_compile *c,
+		struct prog_instruction *inst)
+{
+    struct brw_compile *p = &c->func;
+    GLuint mask = inst->DstReg.WriteMask;
+    struct brw_reg interp[4];
+    struct brw_reg dst, delta0, delta1;
+    struct brw_reg src0;
+
+    src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
+    delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1);
+    delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1);
+    GLuint nr = src0.nr;
+    int i;
+
+    interp[0] = brw_vec1_grf(nr, 0);
+    interp[1] = brw_vec1_grf(nr, 4);
+    interp[2] = brw_vec1_grf(nr+1, 0);
+    interp[3] = brw_vec1_grf(nr+1, 4);
+
+    for(i = 0; i < 4; i++ ) {
+	if (mask & (1<<i)) {
+	    dst = get_dst_reg(c, inst, i, 1);
+	    brw_LINE(p, brw_null_reg(), interp[i], delta0);
+	    brw_MAC(p, dst, suboffset(interp[i],1), delta1);
+	}
+    }
+}
+
+static void emit_cinterp(struct brw_wm_compile *c,
+		struct prog_instruction *inst)
+{
+    struct brw_compile *p = &c->func;
+    GLuint mask = inst->DstReg.WriteMask;
+
+    struct brw_reg interp[4];
+    struct brw_reg dst, src0;
+
+    src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
+    GLuint nr = src0.nr;
+    int i;
+
+    interp[0] = brw_vec1_grf(nr, 0);
+    interp[1] = brw_vec1_grf(nr, 4);
+    interp[2] = brw_vec1_grf(nr+1, 0);
+    interp[3] = brw_vec1_grf(nr+1, 4);
+
+    for(i = 0; i < 4; i++ ) {
+	if (mask & (1<<i)) {
+	    dst = get_dst_reg(c, inst, i, 1);
+	    brw_MOV(p, dst, suboffset(interp[i],3));
+	}
+    }
+}
+
+static void emit_pinterp(struct brw_wm_compile *c,
+		struct prog_instruction *inst)
+{
+    struct brw_compile *p = &c->func;
+    GLuint mask = inst->DstReg.WriteMask;
+
+    struct brw_reg interp[4];
+    struct brw_reg dst, delta0, delta1;
+    struct brw_reg src0, w;
+
+    src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
+    delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1);
+    delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1);
+    w = get_src_reg(c, &inst->SrcReg[2], 3, 1);
+    GLuint nr = src0.nr;
+    int i;
+
+    interp[0] = brw_vec1_grf(nr, 0);
+    interp[1] = brw_vec1_grf(nr, 4);
+    interp[2] = brw_vec1_grf(nr+1, 0);
+    interp[3] = brw_vec1_grf(nr+1, 4);
+
+    for(i = 0; i < 4; i++ ) {
+	if (mask & (1<<i)) {
+	    dst = get_dst_reg(c, inst, i, 1);
+	    brw_LINE(p, brw_null_reg(), interp[i], delta0);
+	    brw_MAC(p, dst, suboffset(interp[i],1), 
+		    delta1);
+	    brw_MUL(p, dst, dst, w);
+	}
+    }
+}
+
+static void emit_xpd(struct brw_wm_compile *c,
+		struct prog_instruction *inst)
+{
+    int i;
+    struct brw_compile *p = &c->func;
+    GLuint mask = inst->DstReg.WriteMask;
+    for (i = 0; i < 4; i++) {
+	GLuint i2 = (i+2)%3;
+	GLuint i1 = (i+1)%3;
+	if (mask & (1<<i)) {
+	    struct brw_reg src0, src1, dst;
+	    dst = get_dst_reg(c, inst, i, 1);
+	    src0 = negate(get_src_reg(c, &inst->SrcReg[0], i2, 1));
+	    src1 = get_src_reg(c, &inst->SrcReg[1], i1, 1);
+	    brw_MUL(p, brw_null_reg(), src0, src1);
+	    src0 = get_src_reg(c, &inst->SrcReg[0], i1, 1);
+	    src1 = get_src_reg(c, &inst->SrcReg[1], i2, 1);
+	    brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
+	    brw_MAC(p, dst, src0, src1);
+	    brw_set_saturate(p, 0);
+	}
+    }
+    brw_set_saturate(p, 0);
+}
+
+static void emit_dp3(struct brw_wm_compile *c,
+		struct prog_instruction *inst)
+{
+    struct brw_reg src0[3], src1[3], dst;
+    int i;
+    struct brw_compile *p = &c->func;
+    for (i = 0; i < 3; i++) {
+	src0[i] = get_src_reg(c, &inst->SrcReg[0], i, 1);
+	src1[i] = get_src_reg(c, &inst->SrcReg[1], i, 1);
+    }
+
+    dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1);
+    brw_MUL(p, brw_null_reg(), src0[0], src1[0]);
+    brw_MAC(p, brw_null_reg(), src0[1], src1[1]);
+    brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+    brw_MAC(p, dst, src0[2], src1[2]);
+    brw_set_saturate(p, 0);
+}
+
+static void emit_dp4(struct brw_wm_compile *c,
+		struct prog_instruction *inst)
+{
+    struct brw_reg src0[4], src1[4], dst;
+    int i;
+    struct brw_compile *p = &c->func;
+    for (i = 0; i < 4; i++) {
+	src0[i] = get_src_reg(c, &inst->SrcReg[0], i, 1);
+	src1[i] = get_src_reg(c, &inst->SrcReg[1], i, 1);
+    }
+    dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1);
+    brw_MUL(p, brw_null_reg(), src0[0], src1[0]);
+    brw_MAC(p, brw_null_reg(), src0[1], src1[1]);
+    brw_MAC(p, brw_null_reg(), src0[2], src1[2]);
+    brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+    brw_MAC(p, dst, src0[3], src1[3]);
+    brw_set_saturate(p, 0);
+}
+
+static void emit_dph(struct brw_wm_compile *c,
+		struct prog_instruction *inst)
+{
+    struct brw_reg src0[4], src1[4], dst;
+    int i;
+    struct brw_compile *p = &c->func;
+    for (i = 0; i < 4; i++) {
+	src0[i] = get_src_reg(c, &inst->SrcReg[0], i, 1);
+	src1[i] = get_src_reg(c, &inst->SrcReg[1], i, 1);
+    }
+    dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1);
+    brw_MUL(p, brw_null_reg(), src0[0], src1[0]);
+    brw_MAC(p, brw_null_reg(), src0[1], src1[1]);
+    brw_MAC(p, dst, src0[2], src1[2]);
+    brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+    brw_ADD(p, dst, src0[3], src1[3]);
+    brw_set_saturate(p, 0);
+}
+
+static void emit_math1(struct brw_wm_compile *c,
+		struct prog_instruction *inst, GLuint func)
+{
+    struct brw_compile *p = &c->func;
+    struct brw_reg src0, dst;
+
+    src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
+    dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1);
+    brw_MOV(p, brw_message_reg(2), src0);
+    brw_math(p,
+	    dst,
+	    func,
+	    (inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
+	    2,
+	    brw_null_reg(),
+	    BRW_MATH_DATA_VECTOR,
+	    BRW_MATH_PRECISION_FULL);
+}
+
+static void emit_rcp(struct brw_wm_compile *c,
+		struct prog_instruction *inst)
+{
+    emit_math1(c, inst, BRW_MATH_FUNCTION_INV);
+}
+
+static void emit_rsq(struct brw_wm_compile *c,
+		struct prog_instruction *inst)
+{
+    emit_math1(c, inst, BRW_MATH_FUNCTION_RSQ);
+}
+
+static void emit_sin(struct brw_wm_compile *c,
+		struct prog_instruction *inst)
+{
+    emit_math1(c, inst, BRW_MATH_FUNCTION_SIN);
+}
+
+static void emit_cos(struct brw_wm_compile *c,
+		struct prog_instruction *inst)
+{
+    emit_math1(c, inst, BRW_MATH_FUNCTION_COS);
+}
+
+static void emit_ex2(struct brw_wm_compile *c,
+		struct prog_instruction *inst)
+{
+    emit_math1(c, inst, BRW_MATH_FUNCTION_EXP);
+}
+
+static void emit_lg2(struct brw_wm_compile *c,
+		struct prog_instruction *inst)
+{
+    emit_math1(c, inst, BRW_MATH_FUNCTION_LOG);
+}
+
+static void emit_add(struct brw_wm_compile *c,
+		struct prog_instruction *inst)
+{
+    struct brw_compile *p = &c->func;
+    struct brw_reg src0, src1, dst;
+    GLuint mask = inst->DstReg.WriteMask;
+    int i;
+    brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+    for (i = 0 ; i < 4; i++) {
+	if (mask & (1<<i)) {
+	    dst = get_dst_reg(c, inst, i, 1);
+	    src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
+	    src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);
+	    brw_ADD(p, dst, src0, src1);
+	}
+    }
+    brw_set_saturate(p, 0);
+}
+
+static void emit_sub(struct brw_wm_compile *c,
+		struct prog_instruction *inst)
+{
+    struct brw_compile *p = &c->func;
+    struct brw_reg src0, src1, dst;
+    GLuint mask = inst->DstReg.WriteMask;
+    int i;
+    brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+    for (i = 0 ; i < 4; i++) {
+	if (mask & (1<<i)) {
+	    dst = get_dst_reg(c, inst, i, 1);
+	    src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
+	    src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);
+	    brw_ADD(p, dst, src0, negate(src1));
+	}
+    }
+    brw_set_saturate(p, 0);
+}
+
+static void emit_mul(struct brw_wm_compile *c,
+		struct prog_instruction *inst)
+{
+    struct brw_compile *p = &c->func;
+    struct brw_reg src0, src1, dst;
+    GLuint mask = inst->DstReg.WriteMask;
+    int i;
+    brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+    for (i = 0 ; i < 4; i++) {
+	if (mask & (1<<i)) {
+	    dst = get_dst_reg(c, inst, i, 1);
+	    src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
+	    src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);
+	    brw_MUL(p, dst, src0, src1);
+	}
+    }
+    brw_set_saturate(p, 0);
+}
+
+static void emit_frc(struct brw_wm_compile *c,
+		struct prog_instruction *inst)
+{
+    struct brw_compile *p = &c->func;
+    struct brw_reg src0, dst;
+    GLuint mask = inst->DstReg.WriteMask;
+    int i;
+    brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+    for (i = 0 ; i < 4; i++) {
+	if (mask & (1<<i)) {
+	    dst = get_dst_reg(c, inst, i, 1);
+	    src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
+	    brw_FRC(p, dst, src0);
+	}
+    }
+    if (inst->SaturateMode != SATURATE_OFF)
+	brw_set_saturate(p, 0);
+}
+
+static void emit_flr(struct brw_wm_compile *c,
+		struct prog_instruction *inst)
+{
+    struct brw_compile *p = &c->func;
+    struct brw_reg src0, dst;
+    GLuint mask = inst->DstReg.WriteMask;
+    int i;
+    brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+    for (i = 0 ; i < 4; i++) {
+	if (mask & (1<<i)) {
+	    dst = get_dst_reg(c, inst, i, 1);
+	    src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
+	    brw_RNDD(p, dst, src0);
+	}
+    }
+    brw_set_saturate(p, 0);
+}
+
+static void emit_max(struct brw_wm_compile *c,
+		struct prog_instruction *inst)
+{
+    struct brw_compile *p = &c->func;
+    GLuint mask = inst->DstReg.WriteMask;
+    struct brw_reg src0, src1, dst;
+    int i;
+    brw_push_insn_state(p);
+    for (i = 0; i < 4; i++) {
+	if (mask & (1<<i)) {
+	    dst = get_dst_reg(c, inst, i, 1);
+	    src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
+	    src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);
+	    brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+	    brw_MOV(p, dst, src0);
+	    brw_set_saturate(p, 0);
+
+	    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src0, src1);
+	    brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+	    brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
+	    brw_MOV(p, dst, src1);
+	    brw_set_saturate(p, 0);
+	    brw_set_predicate_control_flag_value(p, 0xff);
+	}
+    }
+    brw_pop_insn_state(p);
+}
+
+static void emit_min(struct brw_wm_compile *c,
+		struct prog_instruction *inst)
+{
+    struct brw_compile *p = &c->func;
+    GLuint mask = inst->DstReg.WriteMask;
+    struct brw_reg src0, src1, dst;
+    int i;
+    brw_push_insn_state(p);
+    for (i = 0; i < 4; i++) {
+	if (mask & (1<<i)) {
+	    dst = get_dst_reg(c, inst, i, 1);
+	    src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
+	    src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);
+	    brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+	    brw_MOV(p, dst, src0);
+	    brw_set_saturate(p, 0);
+
+	    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src1, src0);
+	    brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+	    brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
+	    brw_MOV(p, dst, src1);
+	    brw_set_saturate(p, 0);
+	    brw_set_predicate_control_flag_value(p, 0xff);
+	}
+    }
+    brw_pop_insn_state(p);
+}
+
+static void emit_pow(struct brw_wm_compile *c,
+		struct prog_instruction *inst)
+{
+    struct brw_compile *p = &c->func;
+    struct brw_reg dst, src0, src1;
+    dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1);
+    src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
+    src1 = get_src_reg(c, &inst->SrcReg[1], 0, 1);
+
+    brw_MOV(p, brw_message_reg(2), src0);
+    brw_MOV(p, brw_message_reg(3), src1);
+
+    brw_math(p,
+	    dst,
+	    BRW_MATH_FUNCTION_POW,
+	    (inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
+	    2,
+	    brw_null_reg(),
+	    BRW_MATH_DATA_VECTOR,
+	    BRW_MATH_PRECISION_FULL);
+}
+
+static void emit_lrp(struct brw_wm_compile *c,
+		struct prog_instruction *inst)
+{
+    struct brw_compile *p = &c->func;
+    GLuint mask = inst->DstReg.WriteMask;
+    struct brw_reg dst, tmp1, tmp2, src0, src1, src2;
+    int i;
+    for (i = 0; i < 4; i++) {
+	if (mask & (1<<i)) {
+	    dst = get_dst_reg(c, inst, i, 1);
+	    src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
+
+	    src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);
+
+	    if (src1.nr == dst.nr) {
+		tmp1 = alloc_tmp(c);
+		brw_MOV(p, tmp1, src1);
+	    } else
+		tmp1 = src1;
+
+	    src2 = get_src_reg(c, &inst->SrcReg[2], i, 1);
+	    if (src2.nr == dst.nr) {
+		tmp2 = alloc_tmp(c);
+		brw_MOV(p, tmp2, src2);
+	    } else
+		tmp2 = src2;
+
+	    brw_ADD(p, dst, negate(src0), brw_imm_f(1.0));
+	    brw_MUL(p, brw_null_reg(), dst, tmp2);
+	    brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+	    brw_MAC(p, dst, src0, tmp1);
+	    brw_set_saturate(p, 0);
+	}
+	release_tmps(c);
+    }
+}
+
+static void emit_kil(struct brw_wm_compile *c)
+{
+	struct brw_compile *p = &c->func;
+	struct brw_reg depth = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
+	brw_push_insn_state(p);
+	brw_set_mask_control(p, BRW_MASK_DISABLE);
+	brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK
+	brw_AND(p, depth, c->emit_mask_reg, depth);
+	brw_pop_insn_state(p);
+}
+
+static void emit_mad(struct brw_wm_compile *c,
+		struct prog_instruction *inst)
+{
+    struct brw_compile *p = &c->func;
+    GLuint mask = inst->DstReg.WriteMask;
+    struct brw_reg dst, src0, src1, src2;
+    int i;
+
+    for (i = 0; i < 4; i++) {
+	if (mask & (1<<i)) {
+	    dst = get_dst_reg(c, inst, i, 1);
+	    src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
+	    src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);
+	    src2 = get_src_reg(c, &inst->SrcReg[2], i, 1);
+	    brw_MUL(p, dst, src0, src1);
+
+	    brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+	    brw_ADD(p, dst, dst, src2);
+	    brw_set_saturate(p, 0);
+	}
+    }
+}
+
+static void emit_sop(struct brw_wm_compile *c,
+		struct prog_instruction *inst, GLuint cond)
+{
+    struct brw_compile *p = &c->func;
+    GLuint mask = inst->DstReg.WriteMask;
+    struct brw_reg dst, src0, src1;
+    int i;
+
+    brw_push_insn_state(p);
+    for (i = 0; i < 4; i++) {
+	if (mask & (1<<i)) {
+	    dst = get_dst_reg(c, inst, i, 1);
+	    src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
+	    src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);
+	    brw_CMP(p, brw_null_reg(), cond, src0, src1);
+	    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+	    brw_MOV(p, dst, brw_imm_f(0.0));
+	    brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
+	    brw_MOV(p, dst, brw_imm_f(1.0));
+	}
+    }
+    brw_pop_insn_state(p);
+}
+
+static void emit_slt(struct brw_wm_compile *c,
+		struct prog_instruction *inst)
+{
+    emit_sop(c, inst, BRW_CONDITIONAL_L);
+}
+
+static void emit_sle(struct brw_wm_compile *c,
+		struct prog_instruction *inst)
+{
+    emit_sop(c, inst, BRW_CONDITIONAL_LE);
+}
+
+static void emit_sgt(struct brw_wm_compile *c,
+		struct prog_instruction *inst)
+{
+    emit_sop(c, inst, BRW_CONDITIONAL_G);
+}
+
+static void emit_sge(struct brw_wm_compile *c,
+		struct prog_instruction *inst)
+{
+    emit_sop(c, inst, BRW_CONDITIONAL_GE);
+}
+
+static void emit_seq(struct brw_wm_compile *c,
+		struct prog_instruction *inst)
+{
+    emit_sop(c, inst, BRW_CONDITIONAL_EQ);
+}
+
+static void emit_sne(struct brw_wm_compile *c,
+		struct prog_instruction *inst)
+{
+    emit_sop(c, inst, BRW_CONDITIONAL_NEQ);
+}
+
+static void emit_ddx(struct brw_wm_compile *c,
+                struct prog_instruction *inst)
+{
+    struct brw_compile *p = &c->func;
+    GLuint mask = inst->DstReg.WriteMask;
+    struct brw_reg interp[4];
+    struct brw_reg dst;
+    struct brw_reg src0, w;
+    GLuint nr, i;
+    src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
+    w = get_src_reg(c, &inst->SrcReg[1], 3, 1);
+    nr = src0.nr;
+    interp[0] = brw_vec1_grf(nr, 0);
+    interp[1] = brw_vec1_grf(nr, 4);
+    interp[2] = brw_vec1_grf(nr+1, 0);
+    interp[3] = brw_vec1_grf(nr+1, 4);
+    brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
+    for(i = 0; i < 4; i++ ) {
+        if (mask & (1<<i)) {
+            dst = get_dst_reg(c, inst, i, 1);
+            brw_MOV(p, dst, interp[i]);
+            brw_MUL(p, dst, dst, w);
+        }
+    }
+    brw_set_saturate(p, 0);
+}
+
+static void emit_ddy(struct brw_wm_compile *c,
+                struct prog_instruction *inst)
+{
+    struct brw_compile *p = &c->func;
+    GLuint mask = inst->DstReg.WriteMask;
+    struct brw_reg interp[4];
+    struct brw_reg dst;
+    struct brw_reg src0, w;
+    GLuint nr, i;
+
+    src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
+    nr = src0.nr;
+    w = get_src_reg(c, &inst->SrcReg[1], 3, 1);
+    interp[0] = brw_vec1_grf(nr, 0);
+    interp[1] = brw_vec1_grf(nr, 4);
+    interp[2] = brw_vec1_grf(nr+1, 0);
+    interp[3] = brw_vec1_grf(nr+1, 4);
+    brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
+    for(i = 0; i < 4; i++ ) {
+        if (mask & (1<<i)) {
+            dst = get_dst_reg(c, inst, i, 1);
+            brw_MOV(p, dst, suboffset(interp[i], 1));
+            brw_MUL(p, dst, dst, w);
+        }
+    }
+    brw_set_saturate(p, 0);
+}
+
+static void emit_wpos_xy(struct brw_wm_compile *c,
+                struct prog_instruction *inst)
+{
+    struct brw_compile *p = &c->func;
+    GLuint mask = inst->DstReg.WriteMask;
+    struct brw_reg src0[2], dst[2];
+
+    dst[0] = get_dst_reg(c, inst, 0, 1);
+    dst[1] = get_dst_reg(c, inst, 1, 1);
+
+    src0[0] = get_src_reg(c, &inst->SrcReg[0], 0, 1);
+    src0[1] = get_src_reg(c, &inst->SrcReg[0], 1, 1);
+
+    /* Calculate the pixel offset from window bottom left into destination
+     * X and Y channels.
+     */
+    if (mask & WRITEMASK_X) {
+	/* X' = X - origin_x */
+	brw_ADD(p,
+		dst[0],
+		retype(src0[0], BRW_REGISTER_TYPE_W),
+		brw_imm_d(0 - c->key.origin_x));
+    }
+
+    if (mask & WRITEMASK_Y) {
+	/* Y' = height - (Y - origin_y) = height + origin_y - Y */
+	brw_ADD(p,
+		dst[1],
+		negate(retype(src0[1], BRW_REGISTER_TYPE_W)),
+		brw_imm_d(c->key.origin_y + c->key.drawable_height - 1));
+    }
+}
+
+/* TODO
+   BIAS on SIMD8 not workind yet...
+ */	
+static void emit_txb(struct brw_wm_compile *c,
+		struct prog_instruction *inst)
+{
+    struct brw_compile *p = &c->func;
+    struct brw_reg dst[4], src[4], payload_reg;
+    GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit];
+
+    GLuint i;
+    payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0);
+    for (i = 0; i < 4; i++) 
+	dst[i] = get_dst_reg(c, inst, i, 1);
+    for (i = 0; i < 4; i++)
+	src[i] = get_src_reg(c, &inst->SrcReg[0], i, 1);
+
+    switch (inst->TexSrcTarget) {
+	case TEXTURE_1D_INDEX:
+	    brw_MOV(p, brw_message_reg(2), src[0]);
+	    brw_MOV(p, brw_message_reg(3), brw_imm_f(0));
+	    brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
+	    break;
+	case TEXTURE_2D_INDEX:
+	case TEXTURE_RECT_INDEX:
+	    brw_MOV(p, brw_message_reg(2), src[0]);
+	    brw_MOV(p, brw_message_reg(3), src[1]);
+	    brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
+	    break;
+	default:
+	    brw_MOV(p, brw_message_reg(2), src[0]);
+	    brw_MOV(p, brw_message_reg(3), src[1]);
+	    brw_MOV(p, brw_message_reg(4), src[2]);
+	    break;
+    }
+    brw_MOV(p, brw_message_reg(5), src[3]);
+    brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
+    brw_SAMPLE(p,
+	    retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW),
+	    1,
+	    retype(payload_reg, BRW_REGISTER_TYPE_UW),
+	    unit + MAX_DRAW_BUFFERS, /* surface */
+	    unit,     /* sampler */
+	    inst->DstReg.WriteMask,
+	    BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS,
+	    4,
+	    4,
+	    0);
+}
+
+static void emit_tex(struct brw_wm_compile *c,
+		struct prog_instruction *inst)
+{
+    struct brw_compile *p = &c->func;
+    struct brw_reg dst[4], src[4], payload_reg;
+    GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit];
+
+    GLuint msg_len;
+    GLuint i, nr;
+    GLuint emit;
+    GLboolean shadow = (c->key.shadowtex_mask & (1<<unit)) ? 1 : 0;
+
+    payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0);
+
+    for (i = 0; i < 4; i++) 
+	dst[i] = get_dst_reg(c, inst, i, 1);
+    for (i = 0; i < 4; i++)
+	src[i] = get_src_reg(c, &inst->SrcReg[0], i, 1);
+
+
+    switch (inst->TexSrcTarget) {
+	case TEXTURE_1D_INDEX:
+	    emit = WRITEMASK_X;
+	    nr = 1;
+	    break;
+	case TEXTURE_2D_INDEX:
+	case TEXTURE_RECT_INDEX:
+	    emit = WRITEMASK_XY;
+	    nr = 2;
+	    break;
+	default:
+	    emit = WRITEMASK_XYZ;
+	    nr = 3;
+	    break;
+    }
+    msg_len = 1;
+
+    for (i = 0; i < nr; i++) {
+	static const GLuint swz[4] = {0,1,2,2};
+	if (emit & (1<<i))
+	    brw_MOV(p, brw_message_reg(msg_len+1), src[swz[i]]);
+	else
+	    brw_MOV(p, brw_message_reg(msg_len+1), brw_imm_f(0));
+	msg_len += 1;
+    }
+
+    if (shadow) {
+	brw_MOV(p, brw_message_reg(5), brw_imm_f(0));
+	brw_MOV(p, brw_message_reg(6), src[2]);
+    }
+
+    brw_SAMPLE(p,
+	    retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW),
+	    1,
+	    retype(payload_reg, BRW_REGISTER_TYPE_UW),
+	    unit + MAX_DRAW_BUFFERS, /* surface */
+	    unit,     /* sampler */
+	    inst->DstReg.WriteMask,
+	    BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE,
+	    4,
+	    shadow ? 6 : 4,
+	    0);
+
+    if (shadow)
+	brw_MOV(p, dst[3], brw_imm_f(1.0));
+}
+
+static void post_wm_emit( struct brw_wm_compile *c )
+{
+    GLuint nr_insns = c->fp->program.Base.NumInstructions;
+    GLuint insn, target_insn;
+    struct prog_instruction *inst1, *inst2;
+    struct brw_instruction *brw_inst1, *brw_inst2;
+    int offset;
+    for (insn = 0; insn < nr_insns; insn++) {
+	inst1 = &c->fp->program.Base.Instructions[insn];
+	brw_inst1 = inst1->Data;
+	switch (inst1->Opcode) {
+	    case OPCODE_CAL:
+		target_insn = inst1->BranchTarget;
+		inst2 = &c->fp->program.Base.Instructions[target_insn];
+		brw_inst2 = inst2->Data;
+		offset = brw_inst2 - brw_inst1;
+		brw_set_src1(brw_inst1, brw_imm_d(offset*16));
+		break;
+	    default:
+		break;
+	}
+    }
+}
+
+static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
+{
+#define MAX_IFSN 32
+#define MAX_LOOP_DEPTH 32
+    struct brw_instruction *if_inst[MAX_IFSN], *loop_inst[MAX_LOOP_DEPTH];
+    struct brw_instruction *inst0, *inst1;
+    int i, if_insn = 0, loop_insn = 0;
+    struct brw_compile *p = &c->func;
+    struct brw_indirect stack_index = brw_indirect(0, 0);
+
+    c->reg_index = 0;
+    prealloc_reg(c);
+    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+    brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack));
+
+    for (i = 0; i < c->nr_fp_insns; i++) {
+	struct prog_instruction *inst = &c->prog_instructions[i];
+	struct prog_instruction *orig_inst;
+
+	if ((orig_inst = inst->Data) != 0)
+	    orig_inst->Data = current_insn(p);
+
+	if (inst->CondUpdate)
+	    brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+	else
+	    brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE);
+
+	switch (inst->Opcode) {
+	    case WM_PIXELXY:
+		emit_pixel_xy(c, inst);
+		break;
+	    case WM_DELTAXY: 
+		emit_delta_xy(c, inst);
+		break;
+	    case WM_PIXELW:
+		emit_pixel_w(c, inst);
+		break;	
+	    case WM_LINTERP:
+		emit_linterp(c, inst);
+		break;
+	    case WM_PINTERP:
+		emit_pinterp(c, inst);
+		break;
+	    case WM_CINTERP:
+		emit_cinterp(c, inst);
+		break;
+	    case WM_WPOSXY:
+		emit_wpos_xy(c, inst);
+		break;
+	    case WM_FB_WRITE:
+		emit_fb_write(c, inst);
+		break;
+	    case OPCODE_ABS:
+		emit_abs(c, inst);
+		break;
+	    case OPCODE_ADD:
+		emit_add(c, inst);
+		break;
+	    case OPCODE_SUB:
+		emit_sub(c, inst);
+		break;
+	    case OPCODE_FRC:
+		emit_frc(c, inst);
+		break;
+	    case OPCODE_FLR:
+		emit_flr(c, inst);
+		break;
+	    case OPCODE_LRP:
+		emit_lrp(c, inst);
+		break;
+	    case OPCODE_INT:
+		emit_int(c, inst);
+		break;
+	    case OPCODE_MOV:
+		emit_mov(c, inst);
+		break;
+	    case OPCODE_DP3:
+		emit_dp3(c, inst);
+		break;
+	    case OPCODE_DP4:
+		emit_dp4(c, inst);
+		break;
+	    case OPCODE_XPD:
+		emit_xpd(c, inst);
+		break;
+	    case OPCODE_DPH:
+		emit_dph(c, inst);
+		break;
+	    case OPCODE_RCP:
+		emit_rcp(c, inst);
+		break;
+	    case OPCODE_RSQ:
+		emit_rsq(c, inst);
+		break;
+	    case OPCODE_SIN:
+		emit_sin(c, inst);
+		break;
+	    case OPCODE_COS:
+		emit_cos(c, inst);
+		break;
+	    case OPCODE_EX2:
+		emit_ex2(c, inst);
+		break;
+	    case OPCODE_LG2:
+		emit_lg2(c, inst);
+		break;
+	    case OPCODE_MAX:	
+		emit_max(c, inst);
+		break;
+	    case OPCODE_MIN:	
+		emit_min(c, inst);
+		break;
+	    case OPCODE_DDX:
+		emit_ddx(c, inst);
+		break;
+	    case OPCODE_DDY:
+                emit_ddy(c, inst);
+                break;
+	    case OPCODE_SLT:
+		emit_slt(c, inst);
+		break;
+	    case OPCODE_SLE:
+		emit_sle(c, inst);
+		break;
+	    case OPCODE_SGT:
+		emit_sgt(c, inst);
+		break;
+	    case OPCODE_SGE:
+		emit_sge(c, inst);
+		break;
+	    case OPCODE_SEQ:
+		emit_seq(c, inst);
+		break;
+	    case OPCODE_SNE:
+		emit_sne(c, inst);
+		break;
+	    case OPCODE_MUL:
+		emit_mul(c, inst);
+		break;
+	    case OPCODE_POW:
+		emit_pow(c, inst);
+		break;
+	    case OPCODE_MAD:
+		emit_mad(c, inst);
+		break;
+	    case OPCODE_TEX:
+		emit_tex(c, inst);
+		break;
+	    case OPCODE_TXB:
+		emit_txb(c, inst);
+		break;
+	    case OPCODE_KIL_NV:
+		emit_kil(c);
+		break;
+	    case OPCODE_IF:
+		assert(if_insn < MAX_IFSN);
+		if_inst[if_insn++] = brw_IF(p, BRW_EXECUTE_8);
+		break;
+	    case OPCODE_ELSE:
+		if_inst[if_insn-1]  = brw_ELSE(p, if_inst[if_insn-1]);
+		break;
+	    case OPCODE_ENDIF:
+		assert(if_insn > 0);
+		brw_ENDIF(p, if_inst[--if_insn]);
+		break;
+	    case OPCODE_BGNSUB:
+	    case OPCODE_ENDSUB:
+		break;
+	    case OPCODE_CAL: 
+		brw_push_insn_state(p);
+		brw_set_mask_control(p, BRW_MASK_DISABLE);
+                brw_set_access_mode(p, BRW_ALIGN_1);
+                brw_ADD(p, deref_1ud(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16));
+                brw_set_access_mode(p, BRW_ALIGN_16);
+                brw_ADD(p, get_addr_reg(stack_index),
+                         get_addr_reg(stack_index), brw_imm_d(4));
+                orig_inst = inst->Data;
+                orig_inst->Data = &p->store[p->nr_insn];
+                brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
+                brw_pop_insn_state(p);
+		break;
+
+	    case OPCODE_RET:
+		brw_push_insn_state(p);
+		brw_set_mask_control(p, BRW_MASK_DISABLE);
+                brw_ADD(p, get_addr_reg(stack_index),
+                        get_addr_reg(stack_index), brw_imm_d(-4));
+                brw_set_access_mode(p, BRW_ALIGN_1);
+                brw_MOV(p, brw_ip_reg(), deref_1ud(stack_index, 0));
+                brw_set_access_mode(p, BRW_ALIGN_16);
+		brw_pop_insn_state(p);
+
+		break;
+	    case OPCODE_BGNLOOP:
+		loop_inst[loop_insn++] = brw_DO(p, BRW_EXECUTE_8);
+		break;
+	    case OPCODE_BRK:
+		brw_BREAK(p);
+		brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+		break;
+	    case OPCODE_CONT:
+		brw_CONT(p);
+		brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+		break;
+	    case OPCODE_ENDLOOP: 
+		loop_insn--;
+		inst0 = inst1 = brw_WHILE(p, loop_inst[loop_insn]);
+		/* patch all the BREAK instructions from
+		   last BEGINLOOP */
+		while (inst0 > loop_inst[loop_insn]) {
+		    inst0--;
+		    if (inst0->header.opcode == BRW_OPCODE_BREAK) {
+			inst0->bits3.if_else.jump_count = inst1 - inst0 + 1;
+			inst0->bits3.if_else.pop_count = 0;
+		    } else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) {
+                        inst0->bits3.if_else.jump_count = inst1 - inst0;
+                        inst0->bits3.if_else.pop_count = 0;
+                    }
+		}
+		break;
+	    default:
+		_mesa_printf("unsupported IR in fragment shader %d\n",
+			inst->Opcode);
+	}
+	if (inst->CondUpdate)
+	    brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
+	else
+	    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+    }
+    post_wm_emit(c);
+    for (i = 0; i < c->fp->program.Base.NumInstructions; i++)
+	c->fp->program.Base.Instructions[i].Data = NULL;
+}
+
+void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c)
+{
+    brw_wm_pass_fp(c);
+    c->tmp_index = 127;
+    brw_wm_emit_glsl(brw, c);
+    c->prog_data.total_grf = c->reg_index;
+    c->prog_data.total_scratch = 0;
+}
diff --git a/i965/brw_wm_pass0.c b/i965/brw_wm_pass0.c
index 00f6f6b..205a716 100644
--- a/i965/brw_wm_pass0.c
+++ b/i965/brw_wm_pass0.c
@@ -168,6 +168,7 @@ static const struct brw_wm_ref *pass0_get_reg( struct brw_wm_compile *c,
       case PROGRAM_PAYLOAD:
       case PROGRAM_TEMPORARY:
       case PROGRAM_OUTPUT:
+      case PROGRAM_VARYING:
 	 break;
 
       case PROGRAM_LOCAL_PARAM:
@@ -179,6 +180,8 @@ static const struct brw_wm_ref *pass0_get_reg( struct brw_wm_compile *c,
 	 break;
 
       case PROGRAM_STATE_VAR:
+      case PROGRAM_UNIFORM:
+      case PROGRAM_CONSTANT:
       case PROGRAM_NAMED_PARAM: {
 	 struct gl_program_parameter_list *plist = c->fp->program.Base.Parameters;
 	 
@@ -197,6 +200,7 @@ static const struct brw_wm_ref *pass0_get_reg( struct brw_wm_compile *c,
 	    break;
 	    
 	 case PROGRAM_STATE_VAR:
+	 case PROGRAM_UNIFORM:
 	    /* These may change from run to run:
 	     */
 	    ref = get_param_ref(c, &plist->ParameterValues[idx][component] );
@@ -344,6 +348,8 @@ static struct brw_wm_instruction *translate_insn( struct brw_wm_compile *c,
    out->saturate = (inst->SaturateMode != SATURATE_OFF);
    out->tex_unit = inst->TexSrcUnit;
    out->tex_idx = inst->TexSrcTarget;
+   out->eot = inst->Sampler & 1;
+   out->target = inst->Sampler>>1;
 
    /* Args:
     */
diff --git a/i965/brw_wm_pass1.c b/i965/brw_wm_pass1.c
index d668def..f6f3a38 100644
--- a/i965/brw_wm_pass1.c
+++ b/i965/brw_wm_pass1.c
@@ -150,12 +150,17 @@ void brw_wm_pass1( struct brw_wm_compile *c )
       case OPCODE_FLR:
       case OPCODE_FRC:
       case OPCODE_MOV:
+      case OPCODE_SWZ:
 	 read0 = writemask;
 	 break;
 
       case OPCODE_SUB:
       case OPCODE_SLT:
+      case OPCODE_SLE:
       case OPCODE_SGE:
+      case OPCODE_SGT:
+      case OPCODE_SEQ:
+      case OPCODE_SNE:
       case OPCODE_ADD:
       case OPCODE_MAX:
       case OPCODE_MIN:
@@ -253,11 +258,9 @@ void brw_wm_pass1( struct brw_wm_compile *c )
 	 read0 = WRITEMASK_XYW;
 	 break;
 
-      case OPCODE_SWZ:
       case OPCODE_DST:
       case OPCODE_TXP:
       default:
-	 assert(0);
 	 break;
       }
 
diff --git a/i965/brw_wm_pass2.c b/i965/brw_wm_pass2.c
index a1edbd6..6fca9ad 100644
--- a/i965/brw_wm_pass2.c
+++ b/i965/brw_wm_pass2.c
@@ -69,7 +69,8 @@ static void prealloc_reg(struct brw_wm_compile *c,
  */
 static void init_registers( struct brw_wm_compile *c )
 {
-   GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted;
+   struct brw_context *brw = c->func.brw;
+   GLuint inputs = (brw->vs.prog_data->outputs_written & DO_SETUP_BITS);
    GLuint nr_interp_regs = 0;
    GLuint i = 0;
    GLuint j;
@@ -85,8 +86,15 @@ static void init_registers( struct brw_wm_compile *c )
 
    for (j = 0; j < FRAG_ATTRIB_MAX; j++) 
       if (inputs & (1<<j)) {
+	 /* index for vs output and ps input are not the same 
+	    in shader varying */
+	 GLuint index;
+	 if (j > FRAG_ATTRIB_VAR0)
+	     index = j - (VERT_RESULT_VAR0 - FRAG_ATTRIB_VAR0);
+	 else
+	     index = j;
 	 nr_interp_regs++;
-	 prealloc_reg(c, &c->payload.input_interp[j], i++);
+	 prealloc_reg(c, &c->payload.input_interp[index], i++);
       }
 
    assert(nr_interp_regs >= 1);
@@ -328,7 +336,7 @@ void brw_wm_pass2( struct brw_wm_compile *c )
    c->state = PASS2_DONE;
 
    if (INTEL_DEBUG & DEBUG_WM) {
-      brw_wm_print_program(c, "pass2/done");
+       brw_wm_print_program(c, "pass2/done");
    }
 }
 
diff --git a/i965/brw_wm_sampler_state.c b/i965/brw_wm_sampler_state.c
index 794c7d9..d40332e 100644
--- a/i965/brw_wm_sampler_state.c
+++ b/i965/brw_wm_sampler_state.c
@@ -54,7 +54,7 @@ static GLuint translate_wrap_mode( GLenum wrap )
    case GL_REPEAT: 
       return BRW_TEXCOORDMODE_WRAP;
    case GL_CLAMP:  
-      return BRW_TEXCOORDMODE_CLAMP_BORDER; /* conform likes it this way */
+      return BRW_TEXCOORDMODE_CLAMP;
    case GL_CLAMP_TO_EDGE: 
       return BRW_TEXCOORDMODE_CLAMP; /* conform likes it this way */
    case GL_CLAMP_TO_BORDER: 
@@ -79,27 +79,43 @@ static GLint S_FIXED(GLfloat value, GLuint frac_bits)
 }
 
 
-static GLuint upload_default_color( struct brw_context *brw,
-				    const GLfloat *color )
+static dri_bo *upload_default_color( struct brw_context *brw,
+				     const GLfloat *color )
 {
    struct brw_sampler_default_color sdc;
 
    COPY_4V(sdc.color, color); 
    
-   return brw_cache_data( &brw->cache[BRW_SAMPLER_DEFAULT_COLOR], &sdc );
+   return brw_cache_data( &brw->cache, BRW_SAMPLER_DEFAULT_COLOR, &sdc,
+			  NULL, 0 );
 }
 
 
-/*
+struct wm_sampler_key {
+   int sampler_count;
+
+   struct wm_sampler_entry {
+      GLenum wrap_r, wrap_s, wrap_t;
+      float maxlod, minlod;
+      float lod_bias;
+      float max_aniso;
+      GLenum minfilter, magfilter;
+      GLenum comparemode, comparefunc;
+      dri_bo *sdc_bo;
+   } sampler[BRW_MAX_TEX_UNIT];
+};
+
+/**
+ * Sets the sampler state for a single unit based off of the sampler key
+ * entry.
  */
-static void brw_update_sampler_state( struct gl_texture_unit *texUnit,
-				      struct gl_texture_object *texObj,
-				      GLuint sdc_gs_offset,
-				      struct brw_sampler_state *sampler)
-{   
+static void brw_update_sampler_state(struct wm_sampler_entry *key,
+				     dri_bo *sdc_bo,
+				     struct brw_sampler_state *sampler)
+{
    _mesa_memset(sampler, 0, sizeof(*sampler));
 
-   switch (texObj->MinFilter) {
+   switch (key->minfilter) {
    case GL_NEAREST:
       sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST;
       sampler->ss0.mip_filter = BRW_MIPFILTER_NONE;
@@ -130,17 +146,17 @@ static void brw_update_sampler_state( struct gl_texture_unit *texUnit,
 
    /* Set Anisotropy: 
     */
-   if ( texObj->MaxAnisotropy > 1.0 ) {
+   if (key->max_aniso > 1.0) {
       sampler->ss0.min_filter = BRW_MAPFILTER_ANISOTROPIC; 
       sampler->ss0.mag_filter = BRW_MAPFILTER_ANISOTROPIC;
 
-      if (texObj->MaxAnisotropy > 2.0) {
-	 sampler->ss3.max_aniso = MAX2((texObj->MaxAnisotropy - 2) / 2,
+      if (key->max_aniso > 2.0) {
+	 sampler->ss3.max_aniso = MAX2((key->max_aniso - 2) / 2,
 				       BRW_ANISORATIO_16);
       }
    }
    else {
-      switch (texObj->MagFilter) {
+      switch (key->magfilter) {
       case GL_NEAREST:
 	 sampler->ss0.mag_filter = BRW_MAPFILTER_NEAREST;
 	 break;
@@ -152,9 +168,9 @@ static void brw_update_sampler_state( struct gl_texture_unit *texUnit,
       }  
    }
 
-   sampler->ss1.r_wrap_mode = translate_wrap_mode(texObj->WrapR);
-   sampler->ss1.s_wrap_mode = translate_wrap_mode(texObj->WrapS);
-   sampler->ss1.t_wrap_mode = translate_wrap_mode(texObj->WrapT);
+   sampler->ss1.r_wrap_mode = translate_wrap_mode(key->wrap_r);
+   sampler->ss1.s_wrap_mode = translate_wrap_mode(key->wrap_s);
+   sampler->ss1.t_wrap_mode = translate_wrap_mode(key->wrap_t);
 
    /* Fulsim complains if I don't do this.  Hardware doesn't mind:
     */
@@ -168,17 +184,18 @@ static void brw_update_sampler_state( struct gl_texture_unit *texUnit,
 
    /* Set shadow function: 
     */
-   if (texObj->CompareMode == GL_COMPARE_R_TO_TEXTURE_ARB) {
+   if (key->comparemode == GL_COMPARE_R_TO_TEXTURE_ARB) {
       /* Shadowing is "enabled" by emitting a particular sampler
        * message (sample_c).  So need to recompile WM program when
        * shadow comparison is enabled on each/any texture unit.
        */
-      sampler->ss0.shadow_function = intel_translate_shadow_compare_func(texObj->CompareFunc);
+      sampler->ss0.shadow_function =
+	 intel_translate_shadow_compare_func(key->comparefunc);
    }
 
    /* Set LOD bias: 
     */
-   sampler->ss0.lod_bias = S_FIXED(CLAMP(texUnit->LodBias + texObj->LodBias, -16, 15), 6);
+   sampler->ss0.lod_bias = S_FIXED(CLAMP(key->lod_bias, -16, 15), 6);
 
    sampler->ss0.lod_preclamp = 1; /* OpenGL mode */
    sampler->ss0.default_color_mode = 0; /* OpenGL/DX10 mode */
@@ -192,62 +209,123 @@ static void brw_update_sampler_state( struct gl_texture_unit *texUnit,
     */
    sampler->ss0.base_level = U_FIXED(0, 1);
 
-   sampler->ss1.max_lod = U_FIXED(MIN2(MAX2(texObj->MaxLod, 0), 13), 6);
-   sampler->ss1.min_lod = U_FIXED(MIN2(MAX2(texObj->MinLod, 0), 13), 6);
+   sampler->ss1.max_lod = U_FIXED(MIN2(MAX2(key->maxlod, 0), 13), 6);
+   sampler->ss1.min_lod = U_FIXED(MIN2(MAX2(key->minlod, 0), 13), 6);
    
-   sampler->ss2.default_color_pointer = sdc_gs_offset >> 5;
+   sampler->ss2.default_color_pointer = sdc_bo->offset >> 5; /* reloc */
 }
 
+/** Sets up the cache key for sampler state for all texture units */
+static void
+brw_wm_sampler_populate_key(struct brw_context *brw,
+			    struct wm_sampler_key *key)
+{
+   int unit;
+
+   memset(key, 0, sizeof(*key));
+
+   for (unit = 0; unit < BRW_MAX_TEX_UNIT; unit++) {
+      if (brw->attribs.Texture->Unit[unit]._ReallyEnabled) {
+	 struct wm_sampler_entry *entry = &key->sampler[unit];
+	 struct gl_texture_unit *texUnit = &brw->attribs.Texture->Unit[unit];
+	 struct gl_texture_object *texObj = texUnit->_Current;
+
+	 entry->wrap_r = texObj->WrapR;
+	 entry->wrap_s = texObj->WrapS;
+	 entry->wrap_t = texObj->WrapT;
 
+	 entry->maxlod = texObj->MaxLod;
+	 entry->minlod = texObj->MinLod;
+	 entry->lod_bias = texUnit->LodBias + texObj->LodBias;
+	 entry->max_aniso = texObj->MaxAnisotropy;
+	 entry->minfilter = texObj->MinFilter;
+	 entry->magfilter = texObj->MagFilter;
+	 entry->comparemode = texObj->CompareMode;
+    entry->comparefunc = texObj->CompareFunc;
+
+	 dri_bo_unreference(brw->wm.sdc_bo[unit]);
+	 brw->wm.sdc_bo[unit] = upload_default_color(brw, texObj->BorderColor);
+
+	 key->sampler_count = unit + 1;
+      }
+   }
+}
 
 /* All samplers must be uploaded in a single contiguous array, which
  * complicates various things.  However, this is still too confusing -
  * FIXME: simplify all the different new texture state flags.
  */
-static void upload_wm_samplers( struct brw_context *brw )
+static int upload_wm_samplers( struct brw_context *brw )
 {
-   GLuint unit;
-   GLuint sampler_count = 0;
+   struct wm_sampler_key key;
+   int i;
+   int ret = 0;
 
-   /* _NEW_TEXTURE */
-   for (unit = 0; unit < BRW_MAX_TEX_UNIT; unit++) {
-      if (brw->attribs.Texture->Unit[unit]._ReallyEnabled) {	 
-	 struct gl_texture_unit *texUnit = &brw->attribs.Texture->Unit[unit];
-	 struct gl_texture_object *texObj = texUnit->_Current;
+   brw_wm_sampler_populate_key(brw, &key);
+
+   if (brw->wm.sampler_count != key.sampler_count) {
+      brw->wm.sampler_count = key.sampler_count;
+      brw->state.dirty.cache |= CACHE_NEW_SAMPLER;
+   }
+
+   dri_bo_unreference(brw->wm.sampler_bo);
+   brw->wm.sampler_bo = NULL;
+   if (brw->wm.sampler_count == 0)
+      return 0;
 
-	 GLuint sdc_gs_offset = upload_default_color(brw, texObj->BorderColor);
+   brw->wm.sampler_bo = brw_search_cache(&brw->cache, BRW_SAMPLER,
+					 &key, sizeof(key),
+					 brw->wm.sdc_bo, key.sampler_count,
+					 NULL);
 
-	 brw_update_sampler_state(texUnit,
-				  texObj, 
-				  sdc_gs_offset,
-				  &brw->wm.sampler[unit]);
+   /* If we didnt find it in the cache, compute the state and put it in the
+    * cache.
+    */
+   if (brw->wm.sampler_bo == NULL) {
+      struct brw_sampler_state sampler[BRW_MAX_TEX_UNIT];
+
+      memset(sampler, 0, sizeof(sampler));
+      for (i = 0; i < key.sampler_count; i++) {
+	 if (brw->wm.sdc_bo[i] == NULL)
+	    continue;
 
-	 sampler_count = unit + 1;
+	 brw_update_sampler_state(&key.sampler[i], brw->wm.sdc_bo[i],
+				  &sampler[i]);
+      }
+
+      brw->wm.sampler_bo = brw_upload_cache(&brw->cache, BRW_SAMPLER,
+					    &key, sizeof(key),
+					    brw->wm.sdc_bo, key.sampler_count,
+					    &sampler, sizeof(sampler),
+					    NULL, NULL);
+
+      /* Emit SDC relocations */
+      for (i = 0; i < BRW_MAX_TEX_UNIT; i++) {
+	 if (!brw->attribs.Texture->Unit[i]._ReallyEnabled)
+	    continue;
+
+	 ret |= dri_bufmgr_check_aperture_space(brw->wm.sdc_bo[i]);
+	 dri_emit_reloc(brw->wm.sampler_bo,
+			DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
+			0,
+			i * sizeof(struct brw_sampler_state) +
+			offsetof(struct brw_sampler_state, ss2),
+			brw->wm.sdc_bo[i]);
       }
-   }
-   
-   if (brw->wm.sampler_count != sampler_count) {
-      brw->wm.sampler_count = sampler_count;
-      brw->state.dirty.cache |= CACHE_NEW_SAMPLER;
    }
 
-   brw->wm.sampler_gs_offset = 0;
+   ret |= dri_bufmgr_check_aperture_space(brw->wm.sampler_bo);
+   return ret;
 
-   if (brw->wm.sampler_count) 
-      brw->wm.sampler_gs_offset = 
-	 brw_cache_data_sz(&brw->cache[BRW_SAMPLER],
-			   brw->wm.sampler,
-			   sizeof(struct brw_sampler_state) * brw->wm.sampler_count);
 }
 
-
 const struct brw_tracked_state brw_wm_samplers = {
    .dirty = {
       .mesa = _NEW_TEXTURE,
       .brw = 0,
       .cache = 0
    },
-   .update = upload_wm_samplers
+   .prepare = upload_wm_samplers,
 };
 
 
diff --git a/i965/brw_wm_state.c b/i965/brw_wm_state.c
index 5b4f2ab..f4da0f2 100644
--- a/i965/brw_wm_state.c
+++ b/i965/brw_wm_state.c
@@ -34,109 +34,136 @@
 #include "brw_context.h"
 #include "brw_state.h"
 #include "brw_defines.h"
-#include "bufmgr.h"
+#include "dri_bufmgr.h"
+#include "brw_wm.h"
 
 /***********************************************************************
  * WM unit - fragment programs and rasterization
  */
 
-static void invalidate_scratch_cb( struct intel_context *intel,
-				   void *unused )
-{
-   /* nothing */
-}
+struct brw_wm_unit_key {
+   unsigned int total_grf, total_scratch;
+   unsigned int urb_entry_read_length;
+   unsigned int curb_entry_read_length;
+   unsigned int dispatch_grf_start_reg;
+
+   unsigned int curbe_offset;
+   unsigned int urb_size;
+
+   unsigned int max_threads;
 
+   unsigned int nr_surfaces, sampler_count;
+   GLboolean uses_depth, computes_depth, uses_kill, is_glsl;
+   GLboolean polygon_stipple, stats_wm, line_stipple, offset_enable;
+   GLfloat offset_units, offset_factor;
+};
 
-static void upload_wm_unit(struct brw_context *brw )
+static void
+wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
 {
+   const struct gl_fragment_program *fp = brw->fragment_program;
    struct intel_context *intel = &brw->intel;
-   struct brw_wm_unit_state wm;
-   GLuint max_threads;
+
+   memset(key, 0, sizeof(*key));
 
    if (INTEL_DEBUG & DEBUG_SINGLE_THREAD)
-      max_threads = 0; 
+      key->max_threads = 1;
    else
-      max_threads = 31;
-
-
-   memset(&wm, 0, sizeof(wm));
+      key->max_threads = 32;
 
    /* CACHE_NEW_WM_PROG */
-   wm.thread0.grf_reg_count = ((brw->wm.prog_data->total_grf-1) & ~15) / 16;
-   wm.thread0.kernel_start_pointer = brw->wm.prog_gs_offset >> 6;
-   wm.thread3.dispatch_grf_start_reg = brw->wm.prog_data->first_curbe_grf;
-   wm.thread3.urb_entry_read_length = brw->wm.prog_data->urb_read_length;
-   wm.thread3.const_urb_entry_read_length = brw->wm.prog_data->curb_read_length;
-
-   wm.wm5.max_threads = max_threads;      
-
-   if (brw->wm.prog_data->total_scratch) {
-      GLuint per_thread = (brw->wm.prog_data->total_scratch + 1023) / 1024;
-      GLuint total = per_thread * (max_threads + 1);
-
-      /* Scratch space -- just have to make sure there is sufficient
-       * allocated for the active program and current number of threads.
-       */      
-
-      if (!brw->wm.scratch_buffer) {
-	 bmGenBuffers(intel, "wm scratch", 1, &brw->wm.scratch_buffer, 12);
-	 bmBufferSetInvalidateCB(intel,
-				 brw->wm.scratch_buffer,
-				 invalidate_scratch_cb,
-				 NULL,
-				 GL_FALSE);
-      }
+   key->total_grf = brw->wm.prog_data->total_grf;
+   key->urb_entry_read_length = brw->wm.prog_data->urb_read_length;
+   key->curb_entry_read_length = brw->wm.prog_data->curb_read_length;
+   key->dispatch_grf_start_reg = brw->wm.prog_data->first_curbe_grf;
+   key->total_scratch = ALIGN(brw->wm.prog_data->total_scratch, 1024);
 
-      if (total > brw->wm.scratch_buffer_size) {
-	 brw->wm.scratch_buffer_size = total;
-	 bmBufferData(intel,
-		      brw->wm.scratch_buffer,
-		      brw->wm.scratch_buffer_size,
-		      NULL,
-		      0);
-      }
-		   
-      assert(per_thread <= 12 * 1024);
-      wm.thread2.per_thread_scratch_space = (per_thread / 1024) - 1;
+   /* BRW_NEW_URB_FENCE */
+   key->urb_size = brw->urb.vsize;
 
-      /* XXX: could make this dynamic as this is so rarely active:
-       */
-      /* BRW_NEW_LOCK */
-      wm.thread2.scratch_space_base_pointer = 
-	 bmBufferOffset(intel, brw->wm.scratch_buffer) >> 10;
-   }
+   /* BRW_NEW_CURBE_OFFSETS */
+   key->curbe_offset = brw->curbe.wm_start;
 
    /* CACHE_NEW_SURFACE */
-   wm.thread1.binding_table_entry_count = brw->wm.nr_surfaces;
+   key->nr_surfaces = brw->wm.nr_surfaces;
 
-   /* BRW_NEW_CURBE_OFFSETS */
-   wm.thread3.const_urb_entry_read_offset = brw->curbe.wm_start * 2;
+   /* CACHE_NEW_SAMPLER */
+   key->sampler_count = brw->wm.sampler_count;
 
-   wm.thread3.urb_entry_read_offset = 0;
+   /* _NEW_POLYGONSTIPPLE */
+   key->polygon_stipple = brw->attribs.Polygon->StippleFlag;
+
+   /* BRW_NEW_FRAGMENT_PROGRAM */
+   key->uses_depth = (fp->Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) != 0;
+
+   /* as far as we can tell */
+   key->computes_depth =
+      (fp->Base.OutputsWritten & (1 << FRAG_RESULT_DEPR)) != 0;
+
+   /* _NEW_COLOR */
+   key->uses_kill = fp->UsesKill || brw->attribs.Color->AlphaEnabled;
+   key->is_glsl = brw_wm_is_glsl(fp);
+
+   /* XXX: This needs a flag to indicate when it changes. */
+   key->stats_wm = intel->stats_wm;
+
+   /* _NEW_LINE */
+   key->line_stipple = brw->attribs.Line->StippleFlag;
+
+   /* _NEW_POLYGON */
+   key->offset_enable = brw->attribs.Polygon->OffsetFill;
+   key->offset_units = brw->attribs.Polygon->OffsetUnits;
+   key->offset_factor = brw->attribs.Polygon->OffsetFactor;
+}
+
+static dri_bo *
+wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
+			dri_bo **reloc_bufs)
+{
+   struct brw_wm_unit_state wm;
+   dri_bo *bo;
+
+   memset(&wm, 0, sizeof(wm));
+
+   wm.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1;
+   wm.thread0.kernel_start_pointer = brw->wm.prog_bo->offset >> 6; /* reloc */
    wm.thread1.depth_coef_urb_read_offset = 1;
    wm.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+   wm.thread1.binding_table_entry_count = key->nr_surfaces;
+
+   if (key->total_scratch != 0) {
+      wm.thread2.scratch_space_base_pointer =
+	 brw->wm.scratch_buffer->offset >> 10; /* reloc */
+      wm.thread2.per_thread_scratch_space = key->total_scratch / 1024 - 1;
+   } else {
+      wm.thread2.scratch_space_base_pointer = 0;
+      wm.thread2.per_thread_scratch_space = 0;
+   }
 
-   /* CACHE_NEW_SAMPLER */
-   wm.wm4.sampler_count = (brw->wm.sampler_count + 1) / 4;
-   wm.wm4.sampler_state_pointer = brw->wm.sampler_gs_offset >> 5;
+   wm.thread3.dispatch_grf_start_reg = key->dispatch_grf_start_reg;
+   wm.thread3.urb_entry_read_length = key->urb_entry_read_length;
+   wm.thread3.const_urb_entry_read_length = key->curb_entry_read_length;
+   wm.thread3.const_urb_entry_read_offset = key->curbe_offset * 2;
+   wm.thread3.urb_entry_read_offset = 0;
 
-   /* BRW_NEW_FRAGMENT_PROGRAM */
-   {
-      const struct gl_fragment_program *fp = brw->fragment_program; 
-
-      if (fp->Base.InputsRead & (1<<FRAG_ATTRIB_WPOS)) 
-	 wm.wm5.program_uses_depth = 1; /* as far as we can tell */
-   
-      if (fp->Base.OutputsWritten & (1<<FRAG_RESULT_DEPR)) 
-	 wm.wm5.program_computes_depth = 1;
-   
-      /* _NEW_COLOR */
-      if (fp->UsesKill || 
-	  brw->attribs.Color->AlphaEnabled) 
-	 wm.wm5.program_uses_killpixel = 1; 
+   wm.wm4.sampler_count = (key->sampler_count + 1) / 4;
+   if (brw->wm.sampler_bo != NULL) {
+      /* reloc */
+      wm.wm4.sampler_state_pointer = brw->wm.sampler_bo->offset >> 5;
+   } else {
+      wm.wm4.sampler_state_pointer = 0;
    }
 
-   wm.wm5.enable_16_pix = 1;
+   wm.wm5.program_uses_depth = key->uses_depth;
+   wm.wm5.program_computes_depth = key->computes_depth;
+   wm.wm5.program_uses_killpixel = key->uses_kill;
+
+   if (key->is_glsl)
+      wm.wm5.enable_8_pix = 1;
+   else
+      wm.wm5.enable_16_pix = 1;
+
+   wm.wm5.max_threads = key->max_threads - 1;
    wm.wm5.thread_dispatch_enable = 1;	/* AKA: color_write */
    wm.wm5.legacy_line_rast = 0;
    wm.wm5.legacy_global_depth_bias = 0;
@@ -144,34 +171,108 @@ static void upload_wm_unit(struct brw_context *brw )
    wm.wm5.line_aa_region_width = 0;
    wm.wm5.line_endcap_aa_region_width = 1;
 
-   /* _NEW_POLYGONSTIPPLE */
-   if (brw->attribs.Polygon->StippleFlag) 
-      wm.wm5.polygon_stipple = 1;
+   wm.wm5.polygon_stipple = key->polygon_stipple;
 
-   /* _NEW_POLYGON */
-   if (brw->attribs.Polygon->OffsetFill) {
+   if (key->offset_enable) {
       wm.wm5.depth_offset = 1;
       /* Something wierd going on with legacy_global_depth_bias,
        * offset_constant, scaling and MRD.  This value passes glean
        * but gives some odd results elsewere (eg. the
        * quad-offset-units test).
        */
-      wm.global_depth_offset_constant = brw->attribs.Polygon->OffsetUnits * 2;
+      wm.global_depth_offset_constant = key->offset_units * 2;
 
       /* This is the only value that passes glean:
        */
-      wm.global_depth_offset_scale = brw->attribs.Polygon->OffsetFactor;
+      wm.global_depth_offset_scale = key->offset_factor;
    }
 
-   /* _NEW_LINE */
-   if (brw->attribs.Line->StippleFlag) {
-      wm.wm5.line_stipple = 1;
-   }
+   wm.wm5.line_stipple = key->line_stipple;
 
-   if (INTEL_DEBUG & DEBUG_STATS || intel->stats_wm)
+   if (INTEL_DEBUG & DEBUG_STATS || key->stats_wm)
       wm.wm4.stats_enable = 1;
 
-   brw->wm.state_gs_offset = brw_cache_data( &brw->cache[BRW_WM_UNIT], &wm );
+   bo = brw_upload_cache(&brw->cache, BRW_WM_UNIT,
+			 key, sizeof(*key),
+			 reloc_bufs, 3,
+			 &wm, sizeof(wm),
+			 NULL, NULL);
+
+   /* Emit WM program relocation */
+   dri_emit_reloc(bo,
+		  DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
+		  wm.thread0.grf_reg_count << 1,
+		  offsetof(struct brw_wm_unit_state, thread0),
+		  brw->wm.prog_bo);
+
+   /* Emit scratch space relocation */
+   if (key->total_scratch != 0) {
+      dri_emit_reloc(bo,
+		     DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE,
+		     wm.thread2.per_thread_scratch_space,
+		     offsetof(struct brw_wm_unit_state, thread2),
+		     brw->wm.scratch_buffer);
+   }
+
+   /* Emit sampler state relocation */
+   if (key->sampler_count != 0) {
+      dri_emit_reloc(bo,
+		     DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
+		     wm.wm4.stats_enable | (wm.wm4.sampler_count << 2),
+		     offsetof(struct brw_wm_unit_state, wm4),
+		     brw->wm.sampler_bo);
+   }
+
+   return bo;
+}
+
+
+static int upload_wm_unit( struct brw_context *brw )
+{
+   struct intel_context *intel = &brw->intel;
+   struct brw_wm_unit_key key;
+   dri_bo *reloc_bufs[3];
+   int ret = 0, i;
+   wm_unit_populate_key(brw, &key);
+
+   /* Allocate the necessary scratch space if we haven't already.  Don't
+    * bother reducing the allocation later, since we use scratch so
+    * rarely.
+    */
+   assert(key.total_scratch <= 12 * 1024);
+   if (key.total_scratch) {
+      GLuint total = key.total_scratch * key.max_threads;
+
+      if (brw->wm.scratch_buffer && total > brw->wm.scratch_buffer->size) {
+	 dri_bo_unreference(brw->wm.scratch_buffer);
+	 brw->wm.scratch_buffer = NULL;
+      }
+      if (brw->wm.scratch_buffer == NULL) {
+	 brw->wm.scratch_buffer = dri_bo_alloc(intel->bufmgr,
+					       "wm scratch",
+					       total,
+					       4096, DRM_BO_FLAG_MEM_TT);
+      }
+   }
+
+   reloc_bufs[0] = brw->wm.prog_bo;
+   reloc_bufs[1] = brw->wm.scratch_buffer;
+   reloc_bufs[2] = brw->wm.sampler_bo;
+
+   dri_bo_unreference(brw->wm.state_bo);
+   brw->wm.state_bo = brw_search_cache(&brw->cache, BRW_WM_UNIT,
+				       &key, sizeof(key),
+				       reloc_bufs, 3,
+				       NULL);
+   if (brw->wm.state_bo == NULL) {
+      brw->wm.state_bo = wm_unit_create_from_key(brw, &key, reloc_bufs);
+   }
+
+   for (i = 0; i < 3; i++)
+     if (reloc_bufs[i])
+       ret |= dri_bufmgr_check_aperture_space(reloc_bufs[i]);
+   ret |= dri_bufmgr_check_aperture_space(brw->wm.state_bo);
+   return ret;
 }
 
 const struct brw_tracked_state brw_wm_unit = {
@@ -189,6 +290,6 @@ const struct brw_tracked_state brw_wm_unit = {
 		CACHE_NEW_WM_PROG | 
 		CACHE_NEW_SAMPLER)
    },
-   .update = upload_wm_unit
+   .prepare = upload_wm_unit,
 };
 
diff --git a/i965/brw_wm_surface_state.c b/i965/brw_wm_surface_state.c
index d24c618..2ba3eb4 100644
--- a/i965/brw_wm_surface_state.c
+++ b/i965/brw_wm_surface_state.c
@@ -69,7 +69,7 @@ static GLuint translate_tex_target( GLenum target )
 }
 
 
-static GLuint translate_tex_format( GLuint mesa_format )
+static GLuint translate_tex_format( GLuint mesa_format, GLenum depth_mode )
 {
    switch( mesa_format ) {
    case MESA_FORMAT_L8:
@@ -114,11 +114,32 @@ static GLuint translate_tex_format( GLuint mesa_format )
       return BRW_SURFACEFORMAT_FXT1;
 
    case MESA_FORMAT_Z16:
-      return BRW_SURFACEFORMAT_L16_UNORM;
+      if (depth_mode == GL_INTENSITY) 
+	  return BRW_SURFACEFORMAT_I16_UNORM;
+      else if (depth_mode == GL_ALPHA)
+	  return BRW_SURFACEFORMAT_A16_UNORM;
+      else
+	  return BRW_SURFACEFORMAT_L16_UNORM;
 
-   case MESA_FORMAT_RGBA_DXT1:
    case MESA_FORMAT_RGB_DXT1:
-      return BRW_SURFACEFORMAT_DXT1_RGB;
+       return BRW_SURFACEFORMAT_DXT1_RGB;
+
+   case MESA_FORMAT_RGBA_DXT1:
+       return BRW_SURFACEFORMAT_BC1_UNORM;
+       
+   case MESA_FORMAT_RGBA_DXT3:
+       return BRW_SURFACEFORMAT_BC2_UNORM;
+       
+   case MESA_FORMAT_RGBA_DXT5:
+       return BRW_SURFACEFORMAT_BC3_UNORM;
+
+   case MESA_FORMAT_SRGBA8:
+      return BRW_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB;
+   case MESA_FORMAT_SRGB_DXT1:
+      return BRW_SURFACEFORMAT_BC1_UNORM_SRGB;
+
+   case MESA_FORMAT_Z24_S8:
+      return BRW_SURFACEFORMAT_I24X8_UNORM;
 
    default:
       assert(0);
@@ -126,142 +147,343 @@ static GLuint translate_tex_format( GLuint mesa_format )
    }
 }
 
-static
-void brw_update_texture_surface( GLcontext *ctx, 
-				 GLuint unit,
-				 struct brw_surface_state *surf )
-{
-   struct intel_context *intel = intel_context(ctx);
-   struct brw_context *brw = brw_context(ctx);
-   struct gl_texture_object *tObj = brw->attribs.Texture->Unit[unit]._Current;
-   struct intel_texture_object *intelObj = intel_texture_object(tObj);
-   struct gl_texture_image *firstImage = tObj->Image[0][intelObj->firstLevel];
-
-   memset(surf, 0, sizeof(*surf));
+struct brw_wm_surface_key {
+   GLenum target, depthmode;
+   dri_bo *bo;
+   GLint format;
+   GLint first_level, last_level;
+   GLint width, height, depth;
+   GLint pitch, cpp;
+   GLboolean tiled;
+   GLuint offset;
+};
 
-   surf->ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW;   
-   surf->ss0.surface_type = translate_tex_target(tObj->Target);
-   surf->ss0.surface_format = translate_tex_format(firstImage->TexFormat->MesaFormat);
+static dri_bo *
+brw_create_texture_surface( struct brw_context *brw,
+			    struct brw_wm_surface_key *key )
+{
+   struct brw_surface_state surf;
+   dri_bo *bo;
+
+   memset(&surf, 0, sizeof(surf));
+
+   surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW;
+   surf.ss0.surface_type = translate_tex_target(key->target);
+
+   if (key->bo) 
+      surf.ss0.surface_format = translate_tex_format(key->format, key->depthmode);
+   else {
+     switch(key->depth) {
+     case 32: surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; break;
+     default:
+     case 24: surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8X8_UNORM; break;
+     case 16: surf.ss0.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM; break;
+     }
+   }
 
    /* This is ok for all textures with channel width 8bit or less:
     */
-/*    surf->ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */
-
-   /* BRW_NEW_LOCK */
-   surf->ss1.base_addr = bmBufferOffset(intel,
-					intelObj->mt->region->buffer);
-
-   surf->ss2.mip_count = intelObj->lastLevel - intelObj->firstLevel;
-   surf->ss2.width = firstImage->Width - 1;
-   surf->ss2.height = firstImage->Height - 1;
-
-   surf->ss3.tile_walk = BRW_TILEWALK_XMAJOR;
-   surf->ss3.tiled_surface = intelObj->mt->region->tiled; /* always zero */
-   surf->ss3.pitch = (intelObj->mt->pitch * intelObj->mt->cpp) - 1;
-   surf->ss3.depth = firstImage->Depth - 1;
-
-   surf->ss4.min_lod = 0;
+/*    surf.ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */
+   if (key->bo)
+     surf.ss1.base_addr = key->bo->offset; /* reloc */
+   else
+     surf.ss1.base_addr = key->offset;
+
+   surf.ss2.mip_count = key->last_level - key->first_level;
+   surf.ss2.width = key->width - 1;
+   surf.ss2.height = key->height - 1;
+
+   surf.ss3.tile_walk = BRW_TILEWALK_XMAJOR;
+   surf.ss3.tiled_surface = key->tiled;
+   surf.ss3.pitch = (key->pitch * key->cpp) - 1;
+   surf.ss3.depth = key->depth - 1;
+
+   surf.ss4.min_lod = 0;
  
-   if (tObj->Target == GL_TEXTURE_CUBE_MAP) {
-      surf->ss0.cube_pos_x = 1;
-      surf->ss0.cube_pos_y = 1;
-      surf->ss0.cube_pos_z = 1;
-      surf->ss0.cube_neg_x = 1;
-      surf->ss0.cube_neg_y = 1;
-      surf->ss0.cube_neg_z = 1;
+   if (key->target == GL_TEXTURE_CUBE_MAP) {
+      surf.ss0.cube_pos_x = 1;
+      surf.ss0.cube_pos_y = 1;
+      surf.ss0.cube_pos_z = 1;
+      surf.ss0.cube_neg_x = 1;
+      surf.ss0.cube_neg_y = 1;
+      surf.ss0.cube_neg_z = 1;
    }
-}
 
+   bo = brw_upload_cache(&brw->cache, BRW_SS_SURFACE,
+			 key, sizeof(*key),
+			 &key->bo, key->bo ? 1 : 0,
+			 &surf, sizeof(surf),
+			 NULL, NULL);
+   if (key->bo) {
+      /* Emit relocation to surface contents */
+      dri_emit_reloc(bo,
+		     DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
+		     0,
+		     offsetof(struct brw_surface_state, ss1),
+		     key->bo);
+   }
+   return bo;
+}
 
+static int
+brw_update_texture_surface( GLcontext *ctx, GLuint unit )
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct gl_texture_object *tObj = brw->attribs.Texture->Unit[unit]._Current;
+   struct intel_texture_object *intelObj = intel_texture_object(tObj);
+   struct gl_texture_image *firstImage = tObj->Image[0][intelObj->firstLevel];
+   struct brw_wm_surface_key key;
+   int ret = 0;
+
+   memset(&key, 0, sizeof(key));
+
+   if (intelObj->imageOverride) {
+      key.pitch = intelObj->pitchOverride / intelObj->mt->cpp;
+      key.depth = intelObj->depthOverride;
+      key.bo = NULL;
+      key.offset = intelObj->textureOffset;
+   } else {
+      key.format = firstImage->TexFormat->MesaFormat;
+      key.pitch = intelObj->mt->pitch;
+      key.depth = firstImage->Depth;
+      key.bo = intelObj->mt->region->buffer;
+      key.offset = 0;
+      ret |= dri_bufmgr_check_aperture_space(key.bo);
+   }
 
-#define OFFSET(TYPE, FIELD) ( (GLuint)&(((TYPE *)0)->FIELD) )
+   key.target = tObj->Target;
+   key.depthmode = tObj->DepthMode;
+   key.first_level = intelObj->firstLevel;
+   key.last_level = intelObj->lastLevel;
+   key.width = firstImage->Width;
+   key.height = firstImage->Height;
+   key.cpp = intelObj->mt->cpp;
+   key.tiled = intelObj->mt->region->tiled;
+
+   dri_bo_unreference(brw->wm.surf_bo[unit + MAX_DRAW_BUFFERS]);
+   brw->wm.surf_bo[unit + MAX_DRAW_BUFFERS] = brw_search_cache(&brw->cache, BRW_SS_SURFACE,
+							       &key, sizeof(key),
+							       &key.bo, key.bo ? 1 : 0,
+							       NULL);
+   if (brw->wm.surf_bo[unit + MAX_DRAW_BUFFERS] == NULL) {
+      brw->wm.surf_bo[unit + MAX_DRAW_BUFFERS] = brw_create_texture_surface(brw, &key);
+   }
 
+   ret |= dri_bufmgr_check_aperture_space(brw->wm.surf_bo[unit + MAX_DRAW_BUFFERS]);
+   return ret;
+}
 
-static void upload_wm_surfaces(struct brw_context *brw )
+/**
+ * Sets up a surface state structure to point at the given region.
+ * While it is only used for the front/back buffer currently, it should be
+ * usable for further buffers when doing ARB_draw_buffer support.
+ */
+static int
+brw_update_region_surface(struct brw_context *brw, struct intel_region *region,
+			  unsigned int unit, GLboolean cached)
 {
-   GLcontext *ctx = &brw->intel.ctx;
-   struct intel_context *intel = &brw->intel;
-   struct brw_surface_binding_table bind;
-   GLuint i;
-
-   memcpy(&bind, &brw->wm.bind, sizeof(bind));
-      
-   {
+   dri_bo *region_bo = NULL;
+   int ret = 0;
+   struct {
+      unsigned int surface_type;
+      unsigned int surface_format;
+      unsigned int width, height, cpp;
+      GLubyte color_mask[4];
+      GLboolean tiled, color_blend;
+   } key;
+
+   memset(&key, 0, sizeof(key));
+
+   if (region != NULL) {
+      region_bo = region->buffer;
+
+      key.surface_type = BRW_SURFACE_2D;
+      if (region->cpp == 4)
+	 key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
+      else
+	 key.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM;
+      key.tiled = region->tiled;
+      key.width = region->pitch; /* XXX: not really! */
+      key.height = region->height;
+      key.cpp = region->cpp;
+
+      ret |= dri_bufmgr_check_aperture_space(region->buffer);
+   } else {
+      key.surface_type = BRW_SURFACE_NULL;
+      key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
+      key.tiled = 0;
+      key.width = 1;
+      key.height = 1;
+      key.cpp = 4;
+   }
+   memcpy(key.color_mask, brw->attribs.Color->ColorMask,
+	  sizeof(key.color_mask));
+   key.color_blend = (!brw->attribs.Color->_LogicOpEnabled &&
+		      brw->attribs.Color->BlendEnabled);
+
+   dri_bo_unreference(brw->wm.surf_bo[unit]);
+   brw->wm.surf_bo[unit] = NULL;
+   if (cached) 
+       brw->wm.surf_bo[unit] = brw_search_cache(&brw->cache, BRW_SS_SURFACE,
+	       &key, sizeof(key),
+	       &region_bo, 1,
+	       NULL);
+
+   if (brw->wm.surf_bo[unit] == NULL) {
       struct brw_surface_state surf;
-      struct intel_region *region = brw->state.draw_region;
 
       memset(&surf, 0, sizeof(surf));
 
-      if (region->cpp == 4)
-	 surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
-      else 
-	 surf.ss0.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM;
+      surf.ss0.surface_format = key.surface_format;
+      surf.ss0.surface_type = key.surface_type;
+      if (region_bo != NULL)
+	 surf.ss1.base_addr = region_bo->offset; /* reloc */
 
-      surf.ss0.surface_type = BRW_SURFACE_2D;
+      surf.ss2.width = key.width - 1;
+      surf.ss2.height = key.height - 1;
+      surf.ss3.tile_walk = BRW_TILEWALK_XMAJOR;
+      surf.ss3.tiled_surface = key.tiled;
+      surf.ss3.pitch = (key.width * key.cpp) - 1;
 
       /* _NEW_COLOR */
-      surf.ss0.color_blend = (!brw->attribs.Color->_LogicOpEnabled &&
-			      brw->attribs.Color->BlendEnabled);
+      surf.ss0.color_blend = key.color_blend;
+      surf.ss0.writedisable_red =   !key.color_mask[0];
+      surf.ss0.writedisable_green = !key.color_mask[1];
+      surf.ss0.writedisable_blue =  !key.color_mask[2];
+      surf.ss0.writedisable_alpha = !key.color_mask[3];
+
+      /* Key size will never match key size for textures, so we're safe. */
+      brw->wm.surf_bo[unit] = brw_upload_cache(&brw->cache, BRW_SS_SURFACE,
+					      &key, sizeof(key),
+					       &region_bo, 1,
+					       &surf, sizeof(surf),
+					       NULL, NULL);
+      if (region_bo != NULL) {
+	 dri_emit_reloc(brw->wm.surf_bo[unit],
+			DRM_BO_FLAG_MEM_TT |
+			DRM_BO_FLAG_READ |
+			DRM_BO_FLAG_WRITE,
+			0,
+			offsetof(struct brw_surface_state, ss1),
+			region_bo);
+      }
+   }
 
+   ret |= dri_bufmgr_check_aperture_space(brw->wm.surf_bo[unit]);
 
-      surf.ss0.writedisable_red =   !brw->attribs.Color->ColorMask[0];
-      surf.ss0.writedisable_green = !brw->attribs.Color->ColorMask[1];
-      surf.ss0.writedisable_blue =  !brw->attribs.Color->ColorMask[2];
-      surf.ss0.writedisable_alpha = !brw->attribs.Color->ColorMask[3];
+   return ret;
+}
 
-      /* BRW_NEW_LOCK */
-      surf.ss1.base_addr = bmBufferOffset(&brw->intel, region->buffer);
 
+/**
+ * Constructs the binding table for the WM surface state, which maps unit
+ * numbers to surface state objects.
+ */
+static dri_bo *
+brw_wm_get_binding_table(struct brw_context *brw)
+{
+   dri_bo *bind_bo;
+
+   bind_bo = brw_search_cache(&brw->cache, BRW_SS_SURF_BIND,
+			      NULL, 0,
+			      brw->wm.surf_bo, brw->wm.nr_surfaces,
+			      NULL);
+
+   if (bind_bo == NULL) {
+      GLuint data_size = brw->wm.nr_surfaces * sizeof(GLuint);
+      uint32_t *data = malloc(data_size);
+      int i;
+
+      for (i = 0; i < brw->wm.nr_surfaces; i++)
+         if (brw->wm.surf_bo[i])
+            data[i] = brw->wm.surf_bo[i]->offset;
+         else
+            data[i] = 0;
+
+      bind_bo = brw_upload_cache( &brw->cache, BRW_SS_SURF_BIND,
+				  NULL, 0,
+				  brw->wm.surf_bo, brw->wm.nr_surfaces,
+				  data, data_size,
+				  NULL, NULL);
+
+      /* Emit binding table relocations to surface state */
+      for (i = 0; i < BRW_WM_MAX_SURF; i++) {
+	 if (brw->wm.surf_bo[i] != NULL) {
+	    dri_emit_reloc(bind_bo,
+			   DRM_BO_FLAG_MEM_TT |
+			   DRM_BO_FLAG_READ |
+			   DRM_BO_FLAG_WRITE,
+			   0,
+			   i * sizeof(GLuint),
+			   brw->wm.surf_bo[i]);
+	 }
+      }
 
-      surf.ss2.width = region->pitch - 1; /* XXX: not really! */
-      surf.ss2.height = region->height - 1;
-      surf.ss3.tile_walk = BRW_TILEWALK_XMAJOR;
-      surf.ss3.tiled_surface = region->tiled;
-      surf.ss3.pitch = (region->pitch * region->cpp) - 1;
+      free(data);
+   }
 
-      brw->wm.bind.surf_ss_offset[0] = brw_cache_data( &brw->cache[BRW_SS_SURFACE], &surf );
-      brw->wm.nr_surfaces = 1;
+   return bind_bo;
+}
+
+static int prepare_wm_surfaces(struct brw_context *brw )
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   struct intel_context *intel = &brw->intel;
+   GLuint i, ret;
+
+   if (brw->state.nr_draw_regions  > 1) {
+      for (i = 0; i < brw->state.nr_draw_regions; i++) {
+         ret = brw_update_region_surface(brw, brw->state.draw_regions[i], i,
+                                         GL_FALSE);
+         if (ret)
+            return ret;
+      }
+   }else {
+      ret = brw_update_region_surface(brw, brw->state.draw_regions[0], 0, GL_TRUE);
+      if (ret)
+         return ret;
    }
 
+   brw->wm.nr_surfaces = MAX_DRAW_BUFFERS;
 
    for (i = 0; i < BRW_MAX_TEX_UNIT; i++) {
       struct gl_texture_unit *texUnit = &brw->attribs.Texture->Unit[i];
 
-      /* _NEW_TEXTURE, BRW_NEW_TEXDATA 
-       */
-      if (texUnit->_ReallyEnabled &&
-	  intel_finalize_mipmap_tree(intel,texUnit->_Current)) {
-
-	 struct brw_surface_state surf;
-
-	 brw_update_texture_surface(ctx, i, &surf);
-
-	 brw->wm.bind.surf_ss_offset[i+1] = brw_cache_data( &brw->cache[BRW_SS_SURFACE], &surf );
-	 brw->wm.nr_surfaces = i+2;
-      }
-      else if( texUnit->_ReallyEnabled &&
-	       texUnit->_Current == intel->frame_buffer_texobj )
-      {
-	 brw->wm.bind.surf_ss_offset[i+1] = brw->wm.bind.surf_ss_offset[0];
-	 brw->wm.nr_surfaces = i+2;
-      }    
-      else {
-	 brw->wm.bind.surf_ss_offset[i+1] = 0;
+      /* _NEW_TEXTURE, BRW_NEW_TEXDATA */
+      if(texUnit->_ReallyEnabled) {
+         if (texUnit->_Current == intel->frame_buffer_texobj) {
+            dri_bo_unreference(brw->wm.surf_bo[i+MAX_DRAW_BUFFERS]);
+            brw->wm.surf_bo[i+MAX_DRAW_BUFFERS] = brw->wm.surf_bo[0];
+            dri_bo_reference(brw->wm.surf_bo[i+MAX_DRAW_BUFFERS]);
+            brw->wm.nr_surfaces = i + MAX_DRAW_BUFFERS + 1;
+         } else {
+            ret = brw_update_texture_surface(ctx, i);
+            brw->wm.nr_surfaces = i + MAX_DRAW_BUFFERS + 1;
+
+            if (ret)
+               return ret;
+         }
+      } else {
+         dri_bo_unreference(brw->wm.surf_bo[i+MAX_DRAW_BUFFERS]);
+         brw->wm.surf_bo[i+MAX_DRAW_BUFFERS] = NULL;
       }
+
    }
 
-   brw->wm.bind_ss_offset = brw_cache_data( &brw->cache[BRW_SS_SURF_BIND],
-					    &brw->wm.bind );
+   dri_bo_unreference(brw->wm.bind_bo);
+   brw->wm.bind_bo = brw_wm_get_binding_table(brw);
+
+   return dri_bufmgr_check_aperture_space(brw->wm.bind_bo);
 }
 
+
 const struct brw_tracked_state brw_wm_surfaces = {
    .dirty = {
       .mesa = _NEW_COLOR | _NEW_TEXTURE | _NEW_BUFFERS,
-      .brw = (BRW_NEW_CONTEXT | 
-	      BRW_NEW_LOCK),	/* required for bmBufferOffset */
+      .brw = BRW_NEW_CONTEXT,
       .cache = 0
    },
-   .update = upload_wm_surfaces
+   .prepare = prepare_wm_surfaces,
 };
 
 
diff --git a/i965/bufmgr.h b/i965/bufmgr.h
deleted file mode 100644
index e748c0d..0000000
--- a/i965/bufmgr.h
+++ /dev/null
@@ -1,215 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#ifndef BUFMGR_H
-#define BUFMGR_H
-
-#include "intel_context.h"
-
-
-/* The buffer manager context.  Opaque.
- */
-struct bufmgr;
-struct buffer;
-
-
-struct bufmgr *bm_fake_intel_Attach( struct intel_context *intel ); 
-
-/* Flags for validate and other calls.  If both NO_UPLOAD and NO_EVICT
- * are specified, ValidateBuffers is essentially a query.
- */
-#define BM_MEM_LOCAL   0x1
-#define BM_MEM_AGP     0x2
-#define BM_MEM_VRAM    0x4	/* not yet used */
-#define BM_WRITE       0x8	/* not yet used */
-#define BM_READ        0x10	/* not yet used */
-#define BM_NO_UPLOAD   0x20
-#define BM_NO_EVICT    0x40
-#define BM_NO_MOVE     0x80	/* not yet used */
-#define BM_NO_ALLOC    0x100	/* legacy "fixed" buffers only */
-#define BM_CLIENT      0x200	/* for map - pointer will be accessed
-				 * without dri lock */
-
-#define BM_MEM_MASK (BM_MEM_LOCAL|BM_MEM_AGP|BM_MEM_VRAM)
-
-
-
-
-/* Create a pool of a given memory type, from a certain offset and a
- * certain size.  
- *
- * Also passed in is a virtual pointer to the start of the pool.  This
- * is useful in the faked-out version in i915 so that MapBuffer can
- * return a pointer to a buffer residing in AGP space.  
- *
- * Flags passed into a pool are inherited by all buffers allocated in
- * that pool.  So pools representing the static front,back,depth
- * buffer allocations should have MEM_AGP|NO_UPLOAD|NO_EVICT|NO_MOVE to match
- * the behaviour of the legacy allocations.
- *
- * Returns -1 for failure, pool number for success.
- */
-int bmInitPool( struct intel_context *, 
-		unsigned long low_offset,
-		void *low_virtual,
-		unsigned long size,
-		unsigned flags);
-
-
-/* Stick closely to ARB_vbo semantics - they're well defined and
- * understood, and drivers can just pass the calls through without too
- * much thunking.
- */
-void bmGenBuffers(struct intel_context *, const char *, unsigned n, struct buffer **buffers,
-		  int align );
-void bmDeleteBuffers(struct intel_context *, unsigned n, struct buffer **buffers);
-
-
-/* Hook to inform faked buffer manager about fixed-position
- * front,depth,back buffers.  These may move to a fully memory-managed
- * scheme, or they may continue to be managed as is.
- */
-struct buffer *bmGenBufferStatic(struct intel_context *,
-				 unsigned pool);
-
-/* On evict, buffer manager will call invalidate_cb() to note that the
- * buffer needs to be reloaded.
- *
- * Buffer is uploaded by calling bmMapBuffer() and copying data into
- * the returned pointer.
- *
- * This is basically a big hack to get some more performance by
- * turning off backing store for buffers where we either have it
- * already (textures) or don't need it (batch buffers, temporary
- * vbo's).
- */
-void bmBufferSetInvalidateCB(struct intel_context *,
-			     struct buffer *buf,
-			     void (*invalidate_cb)( struct intel_context *, void *ptr ),
-			     void *ptr,
-			     GLboolean dont_fence_subdata);
-
-
-/* The driver has more intimate knowledge of the hardare than a GL
- * client would, so flags here is more proscriptive than the usage
- * values in the ARB_vbo interface:
- */
-int bmBufferData(struct intel_context *, 
-		  struct buffer *buf, 
-		  unsigned size, 
-		  const void *data, 
-		  unsigned flags );
-
-int bmBufferSubData(struct intel_context *, 
-		     struct buffer *buf, 
-		     unsigned offset, 
-		     unsigned size, 
-		     const void *data );
-
-
-int bmBufferDataAUB(struct intel_context *, 
-		     struct buffer *buf, 
-		     unsigned size, 
-		     const void *data, 
-		     unsigned flags,
-		     unsigned aubtype,
-		     unsigned aubsubtype );
-
-int bmBufferSubDataAUB(struct intel_context *, 
-			struct buffer *buf, 
-			unsigned offset, 
-			unsigned size, 
-			const void *data,
-			unsigned aubtype,
-			unsigned aubsubtype );
-
-
-/* In this version, taking the offset will provoke an upload on
- * buffers not already resident in AGP:
- */
-unsigned bmBufferOffset(struct intel_context *, 
-			struct buffer *buf);
-
-
-/* Extract data from the buffer:
- */
-void bmBufferGetSubData(struct intel_context *, 
-			struct buffer *buf, 
-			unsigned offset, 
-			unsigned size, 
-			void *data );
-
-void *bmMapBuffer( struct intel_context *,
-		   struct buffer *buf, 
-		   unsigned access );
-
-void bmUnmapBuffer( struct intel_context *,
-		    struct buffer *buf );
-
-void bmUnmapBufferAUB( struct intel_context *,
-		       struct buffer *buf,
-		       unsigned aubtype,
-		       unsigned aubsubtype );
-
-
-/* Pertains to all buffers who's offset has been taken since the last
- * fence or release.
- */
-int bmValidateBuffers( struct intel_context * );
-void bmReleaseBuffers( struct intel_context * );
-
-GLuint bmCtxId( struct intel_context *intel );
-
-
-GLboolean bmError( struct intel_context * );
-void bmEvictAll( struct intel_context * );
-
-void *bmFindVirtual( struct intel_context *intel,
-		     unsigned int offset,
-		     size_t sz );
-
-/* This functionality is used by the buffer manager, not really sure
- * if we need to be exposing it in this way, probably libdrm will
- * offer equivalent calls.
- *
- * For now they can stay, but will likely change/move before final:
- */
-unsigned bmSetFence( struct intel_context * );
-unsigned bmSetFenceLock( struct intel_context * );
-unsigned bmLockAndFence( struct intel_context *intel );
-int bmTestFence( struct intel_context *, unsigned fence );
-void bmFinishFence( struct intel_context *, unsigned fence );
-void bmFinishFenceLock( struct intel_context *, unsigned fence );
-
-void bm_fake_NotifyContendedLockTake( struct intel_context * );
-
-extern int INTEL_DEBUG;
-#define DEBUG_BUFMGR 0x10000000
-
-#define DBG(...)  do { if (INTEL_DEBUG & DEBUG_BUFMGR) _mesa_printf(__VA_ARGS__); } while(0)
-
-#endif
diff --git a/i965/bufmgr_fake.c b/i965/bufmgr_fake.c
deleted file mode 100644
index fb4903d..0000000
--- a/i965/bufmgr_fake.c
+++ /dev/null
@@ -1,1463 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-/* Originally a fake version of the buffer manager so that we can
- * prototype the changes in a driver fairly quickly, has been fleshed
- * out to a fully functional interim solution.
- *
- * Basically wraps the old style memory management in the new
- * programming interface, but is more expressive and avoids many of
- * the bugs in the old texture manager.
- */
-#include "bufmgr.h"
-
-#include "intel_context.h"
-#include "intel_ioctl.h"
-#include "intel_batchbuffer.h"
-
-#include "simple_list.h"
-#include "mm.h"
-#include "imports.h"
-
-#define BM_POOL_MAX 8
-
-/* Internal flags:
- */
-#define BM_NO_BACKING_STORE   0x2000
-#define BM_NO_FENCE_SUBDATA   0x4000
-
-
-static int check_fenced( struct intel_context *intel );
-
-static int nr_attach = 0;
-
-/* Wrapper around mm.c's mem_block, which understands that you must
- * wait for fences to expire before memory can be freed.  This is
- * specific to our use of memcpy for uploads - an upload that was
- * processed through the command queue wouldn't need to care about
- * fences.
- */
-struct block {
-   struct block *next, *prev;
-   struct pool *pool;		/* BM_MEM_AGP */
-   struct mem_block *mem;	/* BM_MEM_AGP */
-
-   unsigned referenced:1;
-   unsigned on_hardware:1;
-   unsigned fenced:1;	
-   
-
-   unsigned fence;		/* BM_MEM_AGP, Split to read_fence, write_fence */
-
-   struct buffer *buf;
-   void *virtual;
-};
-
-
-struct buffer {
-   unsigned id;			/* debug only */
-   const char *name;
-   unsigned size;
-
-   unsigned mapped:1;		
-   unsigned dirty:1;		
-   unsigned aub_dirty:1;	
-   unsigned alignment:13;
-   unsigned flags:16;
-
-   struct block *block;
-   void *backing_store;
-   void (*invalidate_cb)( struct intel_context *, void * );
-   void *invalidate_ptr;
-};
-
-struct pool {
-   unsigned size;
-   unsigned low_offset;
-   struct buffer *static_buffer;
-   unsigned flags;
-   struct mem_block *heap;
-   void *virtual;
-   struct block lru;		/* only allocated, non-fence-pending blocks here */
-};
-
-struct bufmgr {
-   _glthread_Mutex mutex;	/**< for thread safety */
-   struct pool pool[BM_POOL_MAX];
-   unsigned nr_pools;
-
-   unsigned buf_nr;		/* for generating ids */
-
-   struct block referenced;	/* after bmBufferOffset */
-   struct block on_hardware;	/* after bmValidateBuffers */
-   struct block fenced;		/* after bmFenceBuffers (mi_flush, emit irq, write dword) */
-                                /* then to pool->lru or free() */
-
-   unsigned ctxId;
-   unsigned last_fence;
-   unsigned free_on_hardware;
-
-   unsigned fail:1;
-   unsigned need_fence:1;
-};
-
-#define MAXFENCE 0x7fffffff
-
-static GLboolean FENCE_LTE( unsigned a, unsigned b )
-{
-   if (a == b)
-      return GL_TRUE;
-
-   if (a < b && b - a < (1<<24))
-      return GL_TRUE;
-
-   if (a > b && MAXFENCE - a + b < (1<<24))
-      return GL_TRUE;
-
-   return GL_FALSE;
-}
-
-int bmTestFence( struct intel_context *intel, unsigned fence )
-{
-   /* Slight problem with wrap-around:
-    */
-   return fence == 0 || FENCE_LTE(fence, intel->sarea->last_dispatch);
-}
-
-#define LOCK(bm) \
-  int dolock = nr_attach > 1; \
-  if (dolock) _glthread_LOCK_MUTEX(bm->mutex)
-
-#define UNLOCK(bm) \
-  if (dolock) _glthread_UNLOCK_MUTEX(bm->mutex)
-
-
-
-static GLboolean alloc_from_pool( struct intel_context *intel,				
-				  unsigned pool_nr,
-				  struct buffer *buf )
-{
-   struct bufmgr *bm = intel->bm;
-   struct pool *pool = &bm->pool[pool_nr];
-   struct block *block = (struct block *)calloc(sizeof *block, 1);
-   GLuint sz, align = (1<<buf->alignment);
-
-   if (!block)
-      return GL_FALSE;
-
-   sz = (buf->size + align-1) & ~(align-1);
-
-   block->mem = mmAllocMem(pool->heap, 
-			   sz, 
-			   buf->alignment, 0);
-   if (!block->mem) {
-      free(block);
-      return GL_FALSE;
-   }
-
-   make_empty_list(block);
-
-   /* Insert at head or at tail???   
-    */
-   insert_at_tail(&pool->lru, block);
-
-   block->pool = pool;
-   block->virtual = pool->virtual + block->mem->ofs;
-   block->buf = buf;
-
-   buf->block = block;
-
-   return GL_TRUE;
-}
-
-
-
-
-
-
-
-
-/* Release the card storage associated with buf:
- */
-static void free_block( struct intel_context *intel, struct block *block )
-{
-   DBG("free block %p\n", block);
-
-   if (!block) 
-      return;
-
-   check_fenced(intel);
-
-   if (block->referenced) {
-      _mesa_printf("tried to free block on referenced list\n");
-      assert(0);
-   }
-   else if (block->on_hardware) {
-      block->buf = NULL;
-      intel->bm->free_on_hardware += block->mem->size;
-   }
-   else if (block->fenced) {
-      block->buf = NULL;
-   }
-   else {
-      DBG("    - free immediately\n");
-      remove_from_list(block);
-
-      mmFreeMem(block->mem);
-      free(block);
-   }
-}
-
-
-static void alloc_backing_store( struct intel_context *intel, struct buffer *buf )
-{
-   assert(!buf->backing_store);
-   assert(!(buf->flags & (BM_NO_EVICT|BM_NO_BACKING_STORE)));
-
-   buf->backing_store = ALIGN_MALLOC(buf->size, 64);
-}
-
-static void free_backing_store( struct intel_context *intel, struct buffer *buf )
-{
-   assert(!(buf->flags & (BM_NO_EVICT|BM_NO_BACKING_STORE)));
-	  
-   if (buf->backing_store) {
-      ALIGN_FREE(buf->backing_store);
-      buf->backing_store = NULL;
-   }
-}
-
-
-
-
-
-
-static void set_dirty( struct intel_context *intel,
-			      struct buffer *buf )
-{
-   if (buf->flags & BM_NO_BACKING_STORE)
-      buf->invalidate_cb(intel, buf->invalidate_ptr);
-
-   assert(!(buf->flags & BM_NO_EVICT));
-
-   DBG("set_dirty - buf %d\n", buf->id);
-   buf->dirty = 1;
-}
-
-
-static int evict_lru( struct intel_context *intel, GLuint max_fence, GLuint *pool )
-{
-   struct bufmgr *bm = intel->bm;
-   struct block *block, *tmp;
-   int i;
-
-   DBG("%s\n", __FUNCTION__);
-
-   for (i = 0; i < bm->nr_pools; i++) {
-      if (!(bm->pool[i].flags & BM_NO_EVICT)) {
-	 foreach_s(block, tmp, &bm->pool[i].lru) {
-
-	    if (block->buf &&
-		(block->buf->flags & BM_NO_FENCE_SUBDATA))
-	       continue;
-
-	    if (block->fence && max_fence &&
-		!FENCE_LTE(block->fence, max_fence))
-	       return 0;
-
-	    set_dirty(intel, block->buf);
-	    block->buf->block = NULL;
-
-	    free_block(intel, block);
-	    *pool = i;
-	    return 1;
-	 }
-      }
-   }
-
-
-   return 0;
-}
-
-
-#define foreach_s_rev(ptr, t, list)   \
-        for(ptr=(list)->prev,t=(ptr)->prev; list != ptr; ptr=t, t=(t)->prev)
-
-static int evict_mru( struct intel_context *intel, GLuint *pool )
-{
-   struct bufmgr *bm = intel->bm;
-   struct block *block, *tmp;
-   int i;
-
-   DBG("%s\n", __FUNCTION__);
-
-   for (i = 0; i < bm->nr_pools; i++) {
-      if (!(bm->pool[i].flags & BM_NO_EVICT)) {
-	 foreach_s_rev(block, tmp, &bm->pool[i].lru) {
-
-	    if (block->buf &&
-		(block->buf->flags & BM_NO_FENCE_SUBDATA))
-	       continue;
-
-	    set_dirty(intel, block->buf);
-	    block->buf->block = NULL;
-
-	    free_block(intel, block);
-	    *pool = i;
-	    return 1;
-	 }
-      }
-   }
-
-
-   return 0;
-}
-
-
-static int check_fenced( struct intel_context *intel )
-{
-   struct bufmgr *bm = intel->bm;
-   struct block *block, *tmp;
-   int ret = 0;
-
-   foreach_s(block, tmp, &bm->fenced ) {
-      assert(block->fenced);
-
-      if (bmTestFence(intel, block->fence)) {
-
-	 block->fenced = 0;
-
-	 if (!block->buf) {
-	    DBG("delayed free: offset %x sz %x\n", block->mem->ofs, block->mem->size);
-	    remove_from_list(block);
-	    mmFreeMem(block->mem);
-	    free(block);
-	 }
-	 else {
-	    DBG("return to lru: offset %x sz %x\n", block->mem->ofs, block->mem->size);
-	    move_to_tail(&block->pool->lru, block);
-	 }
-
-	 ret = 1;
-      }
-      else {
-	 /* Blocks are ordered by fence, so if one fails, all from
-	  * here will fail also:
-	  */
-	 break;
-      }
-   }
-
-   /* Also check the referenced list: 
-    */
-   foreach_s(block, tmp, &bm->referenced ) {
-      if (block->fenced &&
-	  bmTestFence(intel, block->fence)) {
-	 block->fenced = 0;
-      }
-   }
-
-   
-   DBG("%s: %d\n", __FUNCTION__, ret);
-   return ret;
-}
-
-
-
-static void fence_blocks( struct intel_context *intel,
-			  unsigned fence )
-{
-   struct bufmgr *bm = intel->bm;
-   struct block *block, *tmp;
-
-   foreach_s (block, tmp, &bm->on_hardware) {
-      DBG("Fence block %p (sz 0x%x buf %p) with fence %d\n", block, 
-	  block->mem->size, block->buf, fence);
-      block->fence = fence;
-
-      block->on_hardware = 0;
-      block->fenced = 1;
-
-      /* Move to tail of pending list here
-       */
-      move_to_tail(&bm->fenced, block);
-   }
-
-   /* Also check the referenced list:
-    */  
-   foreach_s (block, tmp, &bm->referenced) {
-      if (block->on_hardware) {
-	 DBG("Fence block %p (sz 0x%x buf %p) with fence %d\n", block, 
-	     block->mem->size, block->buf, fence);
-	 
-	 block->fence = fence;
-	 block->on_hardware = 0;
-	 block->fenced = 1;
-      }
-   }
-
-
-   bm->last_fence = fence;
-   assert(is_empty_list(&bm->on_hardware));
-}
-
-
-
-
-static GLboolean alloc_block( struct intel_context *intel,
-			      struct buffer *buf )
-{
-   struct bufmgr *bm = intel->bm;
-   int i;
-
-   assert(intel->locked);
-
-   DBG("%s 0x%x bytes (%s)\n", __FUNCTION__, buf->size, buf->name);
-
-   for (i = 0; i < bm->nr_pools; i++) {
-      if (!(bm->pool[i].flags & BM_NO_ALLOC) &&
-	  alloc_from_pool(intel, i, buf)) {
-
-	 DBG("%s --> 0x%x (sz %x)\n", __FUNCTION__, 
-	     buf->block->mem->ofs, buf->block->mem->size);
-	 
-	 return GL_TRUE;
-      }
-   }
-
-   DBG("%s --> fail\n", __FUNCTION__);
-   return GL_FALSE;   
-}
-
-
-static GLboolean evict_and_alloc_block( struct intel_context *intel,
-					struct buffer *buf )
-{
-   GLuint pool;
-   struct bufmgr *bm = intel->bm;
-
-   assert(buf->block == NULL);
-
-   /* Put a cap on the amount of free memory we'll allow to accumulate
-    * before emitting a fence.
-    */
-   if (bm->free_on_hardware > 1 * 1024 * 1024) {
-      DBG("fence for free space: %x\n", bm->free_on_hardware);
-      bmSetFence(intel);
-   }
-
-   /* Search for already free memory:
-    */
-   if (alloc_block(intel, buf))
-      return GL_TRUE;
-
-   /* Look for memory that may have become free: 
-    */
-   if (check_fenced(intel) &&
-       alloc_block(intel, buf))
-      return GL_TRUE;
-
-   /* Look for memory blocks not used for >1 frame:
-    */
-   while (evict_lru(intel, intel->second_last_swap_fence, &pool))
-      if (alloc_from_pool(intel, pool, buf))
-	 return GL_TRUE;
-
-   /* If we're not thrashing, allow lru eviction to dig deeper into
-    * recently used textures.  We'll probably be thrashing soon:
-    */
-   if (!intel->thrashing) {
-      while (evict_lru(intel, 0, &pool))
-	 if (alloc_from_pool(intel, pool, buf))
-	    return GL_TRUE;
-   }
-
-   /* Keep thrashing counter alive?
-    */
-   if (intel->thrashing)
-      intel->thrashing = 20;
-
-   /* Wait on any already pending fences - here we are waiting for any
-    * freed memory that has been submitted to hardware and fenced to
-    * become available:
-    */
-   while (!is_empty_list(&bm->fenced)) {
-      GLuint fence = bm->fenced.next->fence;
-      bmFinishFence(intel, fence);
-
-      if (alloc_block(intel, buf))
-	 return GL_TRUE;
-   }
-
-
-   /* 
-    */
-   if (!is_empty_list(&bm->on_hardware)) {
-      bmSetFence(intel);
-
-      while (!is_empty_list(&bm->fenced)) {
-	 GLuint fence = bm->fenced.next->fence;
-	 bmFinishFence(intel, fence);
-      }
-
-      if (!intel->thrashing) {	 
-	 DBG("thrashing\n");
-      }
-      intel->thrashing = 20; 
-
-      if (alloc_block(intel, buf))
-	 return GL_TRUE;
-   }
-
-   while (evict_mru(intel, &pool))
-      if (alloc_from_pool(intel, pool, buf))
-	 return GL_TRUE;
-
-   DBG("%s 0x%x bytes failed\n", __FUNCTION__, buf->size);
-
-   assert(is_empty_list(&bm->on_hardware));
-   assert(is_empty_list(&bm->fenced));
-
-   return GL_FALSE;
-}
-
-
-
-
-
-
-
-
-
-
-/***********************************************************************
- * Public functions
- */
-
-
-/* The initialization functions are skewed in the fake implementation.
- * This call would be to attach to an existing manager, rather than to
- * create a local one.
- */
-struct bufmgr *bm_fake_intel_Attach( struct intel_context *intel )
-{
-   _glthread_DECLARE_STATIC_MUTEX(initMutex);   
-   static struct bufmgr bm;
-   
-   /* This function needs a mutex of its own...
-    */
-   _glthread_LOCK_MUTEX(initMutex);
-
-   if (nr_attach == 0) {
-      _glthread_INIT_MUTEX(bm.mutex);
-
-      make_empty_list(&bm.referenced);
-      make_empty_list(&bm.fenced);
-      make_empty_list(&bm.on_hardware);
-      
-      /* The context id of any of the share group.  This won't be used
-       * in communication with the kernel, so it doesn't matter if
-       * this context is eventually deleted.
-       */
-      bm.ctxId = intel->hHWContext;
-   }
-
-   nr_attach++;
-
-   _glthread_UNLOCK_MUTEX(initMutex);
-
-   return &bm;
-}
-
-
-
-/* The virtual pointer would go away in a true implementation.
- */
-int bmInitPool( struct intel_context *intel, 
-		unsigned long low_offset,
-		void *low_virtual,
-		unsigned long size,
-		unsigned flags)
-{
-   struct bufmgr *bm = intel->bm;
-   int retval = 0;
-
-   LOCK(bm);
-   {
-      GLuint i;
-
-      for (i = 0; i < bm->nr_pools; i++) {
-	 if (bm->pool[i].low_offset == low_offset &&
-	     bm->pool[i].size == size) {
-	    retval = i;
-	    goto out;
-	 }
-      }
-
-
-      if (bm->nr_pools >= BM_POOL_MAX)
-	 retval = -1;
-      else {
-	 i = bm->nr_pools++;
-   
-	 DBG("bmInitPool %d low_offset %x sz %x\n",
-	     i, low_offset, size);
-   
-	 bm->pool[i].low_offset = low_offset;
-	 bm->pool[i].size = size;
-	 bm->pool[i].heap = mmInit( low_offset, size );
-	 bm->pool[i].virtual = low_virtual - low_offset;
-	 bm->pool[i].flags = flags;
-   
-	 make_empty_list(&bm->pool[i].lru);
-	 
-	 retval = i;
-      }
-   }
- out:
-   UNLOCK(bm);
-   return retval;
-}
-
-static struct buffer *do_GenBuffer(struct intel_context *intel, const char *name, int align)
-{
-   struct bufmgr *bm = intel->bm;
-   struct buffer *buf = calloc(sizeof(*buf), 1);
-
-   buf->id = ++bm->buf_nr;
-   buf->name = name;
-   buf->alignment = align;	
-   buf->flags = BM_MEM_AGP|BM_MEM_VRAM|BM_MEM_LOCAL;
-
-   return buf;
-}
-
-
-void *bmFindVirtual( struct intel_context *intel,
-		     unsigned int offset,
-		     size_t sz )
-{
-   struct bufmgr *bm = intel->bm;
-   int i;
-
-   for (i = 0; i < bm->nr_pools; i++)
-      if (offset >= bm->pool[i].low_offset &&
-	  offset + sz <= bm->pool[i].low_offset + bm->pool[i].size)
-	 return bm->pool[i].virtual + offset;
-
-   return NULL;
-}
- 
-
-void bmGenBuffers(struct intel_context *intel, 
-		  const char *name, unsigned n, 
-		  struct buffer **buffers,
-		  int align )
-{
-   struct bufmgr *bm = intel->bm;
-   LOCK(bm);
-   {
-      int i;
-
-      for (i = 0; i < n; i++)
-	 buffers[i] = do_GenBuffer(intel, name, align);
-   }
-   UNLOCK(bm);
-}
-
-
-void bmDeleteBuffers(struct intel_context *intel, unsigned n, struct buffer **buffers)
-{
-   struct bufmgr *bm = intel->bm;
-
-   LOCK(bm);
-   {
-      unsigned i;
-   
-      for (i = 0; i < n; i++) {
-	 struct buffer *buf = buffers[i];
-
-	 if (buf && buf->block)
-	    free_block(intel, buf->block);
-
-	 if (buf && buf->backing_store)
-	    free_backing_store(intel, buf);
-
-	 if (buf) 
-	    free(buf);	 
-      }
-   }
-   UNLOCK(bm);
-}
-
-
-
-
-/* Hook to inform faked buffer manager about fixed-position
- * front,depth,back buffers.  These may move to a fully memory-managed
- * scheme, or they may continue to be managed as is.  It will probably
- * be useful to pass a fixed offset here one day.
- */
-struct buffer *bmGenBufferStatic(struct intel_context *intel,
-				 unsigned pool )
-{
-   struct bufmgr *bm = intel->bm;
-   struct buffer *buf;
-   LOCK(bm);
-   {
-      assert(bm->pool[pool].flags & BM_NO_EVICT);
-      assert(bm->pool[pool].flags & BM_NO_MOVE);
-
-      if (bm->pool[pool].static_buffer)
-	 buf = bm->pool[pool].static_buffer;
-      else {
-	 buf = do_GenBuffer(intel, "static", 12);
-   
-	 bm->pool[pool].static_buffer = buf;
-	 assert(!buf->block);
-
-	 buf->size = bm->pool[pool].size;
-	 buf->flags = bm->pool[pool].flags;
-	 buf->alignment = 12;
-	 
-	 if (!alloc_from_pool(intel, pool, buf))
-	    assert(0);
-      }
-   }
-   UNLOCK(bm);
-   return buf;
-}
-
-
-static void wait_quiescent(struct intel_context *intel,
-			   struct block *block)
-{
-   if (block->on_hardware) {
-      assert(intel->bm->need_fence);
-      bmSetFence(intel);
-      assert(!block->on_hardware);
-   }
-
-
-   if (block->fenced) {
-      bmFinishFence(intel, block->fence);
-   }
-
-   assert(!block->on_hardware);
-   assert(!block->fenced);
-}
-
-
-
-/* If buffer size changes, free and reallocate.  Otherwise update in
- * place.
- */
-int bmBufferData(struct intel_context *intel, 
-		 struct buffer *buf, 
-		 unsigned size, 
-		 const void *data, 
-		 unsigned flags )
-{
-   struct bufmgr *bm = intel->bm;
-   int retval = 0;
-
-   LOCK(bm);
-   {
-      DBG("bmBufferData %d sz 0x%x data: %p\n", buf->id, size, data);
-
-      assert(!buf->mapped);
-
-      if (buf->block) {
-	 struct block *block = buf->block;
-
-	 /* Optimistic check to see if we can reuse the block -- not
-	  * required for correctness:
-	  */
-	 if (block->fenced)
-	    check_fenced(intel);
-
-	 if (block->on_hardware ||
-	     block->fenced ||
-	     (buf->size && buf->size != size) || 
-	     (data == NULL)) {
-
-	    assert(!block->referenced);
-
-	    free_block(intel, block);
-	    buf->block = NULL;
-	    buf->dirty = 1;
-	 }
-      }
-
-      buf->size = size;
-      if (buf->block) {
-	 assert (buf->block->mem->size >= size);
-      }
-
-      if (buf->flags & (BM_NO_BACKING_STORE|BM_NO_EVICT)) {
-
-	 assert(intel->locked || data == NULL);
-
-	 if (data != NULL) {
-	    if (!buf->block && !evict_and_alloc_block(intel, buf)) {
-	       bm->fail = 1;
-	       retval = -1;
-	       goto out;
-	    }
-
-	    wait_quiescent(intel, buf->block);
-
-	    DBG("bmBufferData %d offset 0x%x sz 0x%x\n", 
-		buf->id, buf->block->mem->ofs, size);
-
-	    assert(buf->block->virtual == buf->block->pool->virtual + buf->block->mem->ofs);
-
-	    do_memcpy(buf->block->virtual, data, size);
-	 }
-	 buf->dirty = 0;
-      }
-      else {
-	       DBG("%s - set buf %d dirty\n", __FUNCTION__, buf->id);
-	 set_dirty(intel, buf);
-	 free_backing_store(intel, buf);
-   
-	 if (data != NULL) {      
-	    alloc_backing_store(intel, buf);
-	    do_memcpy(buf->backing_store, data, size);
-	 }
-      }
-   }
- out:
-   UNLOCK(bm);
-   return retval;
-}
-
-
-/* Update the buffer in place, in whatever space it is currently resident:
- */
-int bmBufferSubData(struct intel_context *intel, 
-		     struct buffer *buf, 
-		     unsigned offset, 
-		     unsigned size, 
-		     const void *data )
-{
-   struct bufmgr *bm = intel->bm;
-   int retval = 0;
-
-   if (size == 0) 
-      return 0;
-
-   LOCK(bm); 
-   {
-      DBG("bmBufferSubdata %d offset 0x%x sz 0x%x\n", buf->id, offset, size);
-      
-      assert(offset+size <= buf->size);
-
-      if (buf->flags & (BM_NO_EVICT|BM_NO_BACKING_STORE)) {
-
-	 assert(intel->locked);
-
-	 if (!buf->block && !evict_and_alloc_block(intel, buf)) {
-	    bm->fail = 1;
-	    retval = -1;
-	    goto out;
-	 }
-	 
-	 if (!(buf->flags & BM_NO_FENCE_SUBDATA))
-	    wait_quiescent(intel, buf->block);
-
-	 buf->dirty = 0;
-
-	 do_memcpy(buf->block->virtual + offset, data, size);
-      }
-      else {
-	 DBG("%s - set buf %d dirty\n", __FUNCTION__, buf->id);
-	 set_dirty(intel, buf);
-
-	 if (buf->backing_store == NULL)
-	    alloc_backing_store(intel, buf);
-
-	 do_memcpy(buf->backing_store + offset, data, size); 
-      }
-   }
- out:
-   UNLOCK(bm);
-   return retval;
-}
-
-
-
-int bmBufferDataAUB(struct intel_context *intel, 
-		     struct buffer *buf, 
-		     unsigned size, 
-		     const void *data, 
-		     unsigned flags,
-		     unsigned aubtype,
-		     unsigned aubsubtype )
-{
-   int retval = bmBufferData(intel, buf, size, data, flags);
-   
-
-   /* This only works because in this version of the buffer manager we
-    * allocate all buffers statically in agp space and so can emit the
-    * uploads to the aub file with the correct offsets as they happen.
-    */
-   if (retval == 0 && data && intel->aub_file) {
-
-      if (buf->block && !buf->dirty) {
-	 intel->vtbl.aub_gtt_data(intel,
-				      buf->block->mem->ofs,
-				      buf->block->virtual,
-				      size,
-				      aubtype,
-				      aubsubtype);
-	 buf->aub_dirty = 0;
-      }
-   }
-   
-   return retval;
-}
-		       
-
-int bmBufferSubDataAUB(struct intel_context *intel, 
-			struct buffer *buf, 
-			unsigned offset, 
-			unsigned size, 
-			const void *data,
-			unsigned aubtype,
-			unsigned aubsubtype )
-{
-   int retval = bmBufferSubData(intel, buf, offset, size, data);
-   
-
-   /* This only works because in this version of the buffer manager we
-    * allocate all buffers statically in agp space and so can emit the
-    * uploads to the aub file with the correct offsets as they happen.
-    */
-   if (intel->aub_file) {
-      if (retval == 0 && buf->block && !buf->dirty)
-	 intel->vtbl.aub_gtt_data(intel,
-				      buf->block->mem->ofs + offset,
-				      ((const char *)buf->block->virtual) + offset,
-				      size,
-				      aubtype,
-				      aubsubtype);
-   }
-
-   return retval;
-}
-
-void bmUnmapBufferAUB( struct intel_context *intel, 
-		       struct buffer *buf,
-		       unsigned aubtype,
-		       unsigned aubsubtype )
-{
-   bmUnmapBuffer(intel, buf);
-
-   if (intel->aub_file) {
-      /* Hack - exclude the framebuffer mappings.  If you removed
-       * this, you'd get very big aubfiles, but you *would* be able to
-       * see fallback rendering.
-       */
-      if (buf->block  && !buf->dirty && buf->block->pool == &intel->bm->pool[0]) {
-	 buf->aub_dirty = 1;
-      }
-   }
-}
-
-unsigned bmBufferOffset(struct intel_context *intel, 
-			struct buffer *buf)
-{
-   struct bufmgr *bm = intel->bm;
-   unsigned retval = 0;
-
-   LOCK(bm);
-   {
-      assert(intel->locked);
-
-      if (!buf->block &&
-	  !evict_and_alloc_block(intel, buf)) {
-	 bm->fail = 1;
-	 retval = ~0;
-      }
-      else {
-	 assert(buf->block);
-	 assert(buf->block->buf == buf);
-
-	 DBG("Add buf %d (block %p, dirty %d) to referenced list\n", buf->id, buf->block,
-	     buf->dirty);
-
-	 move_to_tail(&bm->referenced, buf->block);
-	 buf->block->referenced = 1;
-
-	 retval = buf->block->mem->ofs;
-      }
-   }
-   UNLOCK(bm);
-
-   return retval;
-}
-
-
-
-/* Extract data from the buffer:
- */
-void bmBufferGetSubData(struct intel_context *intel, 
-			struct buffer *buf, 
-			unsigned offset, 
-			unsigned size, 
-			void *data )
-{
-   struct bufmgr *bm = intel->bm;
-
-   LOCK(bm);
-   {
-      DBG("bmBufferSubdata %d offset 0x%x sz 0x%x\n", buf->id, offset, size);
-
-      if (buf->flags & (BM_NO_EVICT|BM_NO_BACKING_STORE)) {
-	 if (buf->block && size) {
-	    wait_quiescent(intel, buf->block);
-	    do_memcpy(data, buf->block->virtual + offset, size); 
-	 }
-      }
-      else {
-	 if (buf->backing_store && size) {
-	    do_memcpy(data, buf->backing_store + offset, size); 
-	 }
-      }
-   }
-   UNLOCK(bm);
-}
-
-
-/* Return a pointer to whatever space the buffer is currently resident in:
- */
-void *bmMapBuffer( struct intel_context *intel,
-		   struct buffer *buf, 
-		   unsigned flags )
-{
-   struct bufmgr *bm = intel->bm;
-   void *retval = NULL;
-
-   LOCK(bm);
-   {
-      DBG("bmMapBuffer %d\n", buf->id);
-
-      if (buf->mapped) {
-	 _mesa_printf("%s: already mapped\n", __FUNCTION__);
-	 retval = NULL;
-      }
-      else if (buf->flags & (BM_NO_BACKING_STORE|BM_NO_EVICT)) {
-
-	 assert(intel->locked);
-
-	 if (!buf->block && !evict_and_alloc_block(intel, buf)) {
-	    DBG("%s: alloc failed\n", __FUNCTION__);
-	    bm->fail = 1;
-	    retval = NULL;
-	 }
-	 else {
-	    assert(buf->block);
-	    buf->dirty = 0;
-
-	    if (!(buf->flags & BM_NO_FENCE_SUBDATA)) 
-	       wait_quiescent(intel, buf->block);
-
-	    buf->mapped = 1;
-	    retval = buf->block->virtual;
-	 }
-      }
-      else {
-	 DBG("%s - set buf %d dirty\n", __FUNCTION__, buf->id);
-	 set_dirty(intel, buf);
-
-	 if (buf->backing_store == 0)
-	    alloc_backing_store(intel, buf);
-
-	 buf->mapped = 1;
-	 retval = buf->backing_store;
-      }
-   }
-   UNLOCK(bm);
-   return retval;
-}
-
-void bmUnmapBuffer( struct intel_context *intel, struct buffer *buf )
-{
-   struct bufmgr *bm = intel->bm;
-
-   LOCK(bm);
-   {
-      DBG("bmUnmapBuffer %d\n", buf->id);
-      buf->mapped = 0;
-   }
-   UNLOCK(bm);
-}
-
-
-
-
-/* This is the big hack that turns on BM_NO_BACKING_STORE.  Basically
- * says that an external party will maintain the backing store, eg
- * Mesa's local copy of texture data.
- */
-void bmBufferSetInvalidateCB(struct intel_context *intel,
-			     struct buffer *buf,
-			     void (*invalidate_cb)( struct intel_context *, void *ptr ),
-			     void *ptr,
-			     GLboolean dont_fence_subdata)
-{
-   struct bufmgr *bm = intel->bm;
-
-   LOCK(bm);
-   {
-      if (buf->backing_store)
-	 free_backing_store(intel, buf);
-
-      buf->flags |= BM_NO_BACKING_STORE;
-      
-      if (dont_fence_subdata)
-	 buf->flags |= BM_NO_FENCE_SUBDATA;
-
-      DBG("bmBufferSetInvalidateCB set buf %d dirty\n", buf->id);
-      buf->dirty = 1;
-      buf->invalidate_cb = invalidate_cb;
-      buf->invalidate_ptr = ptr;
-
-      /* Note that it is invalid right from the start.  Also note
-       * invalidate_cb is called with the bufmgr locked, so cannot
-       * itself make bufmgr calls.
-       */
-      invalidate_cb( intel, ptr );
-   }
-   UNLOCK(bm);
-}
-
-
-
-
-
-
-
-/* This is only protected against thread interactions by the DRI lock
- * and the policy of ensuring that all dma is flushed prior to
- * releasing that lock.  Otherwise you might have two threads building
- * up a list of buffers to validate at once.
- */
-int bmValidateBuffers( struct intel_context *intel )
-{
-   struct bufmgr *bm = intel->bm;
-   int retval = 0;
-
-   LOCK(bm);
-   {
-      DBG("%s fail %d\n", __FUNCTION__, bm->fail);
-      assert(intel->locked);
-
-      if (!bm->fail) {
-	 struct block *block, *tmp;
-
-	 foreach_s(block, tmp, &bm->referenced) {
-	    struct buffer *buf = block->buf;
-
-	    DBG("Validate buf %d / block %p / dirty %d\n", buf->id, block, buf->dirty);
-
-	    /* Upload the buffer contents if necessary:
-	     */
-	    if (buf->dirty) {
-	       DBG("Upload dirty buf %d (%s) sz %d offset 0x%x\n", buf->id, 
-		   buf->name, buf->size, block->mem->ofs);
-
-	       assert(!(buf->flags & (BM_NO_BACKING_STORE|BM_NO_EVICT)));
-
-	       wait_quiescent(intel, buf->block);
-
-	       do_memcpy(buf->block->virtual,
-			 buf->backing_store, 
-			 buf->size);
-
-	       if (intel->aub_file) {
-		  intel->vtbl.aub_gtt_data(intel,
-					       buf->block->mem->ofs,
-					       buf->backing_store,
-					       buf->size,
-					       0,
-					       0);
-	       }
-
-	       buf->dirty = 0;
-	       buf->aub_dirty = 0;
-	    }
-	    else if (buf->aub_dirty) {
-	       intel->vtbl.aub_gtt_data(intel,
-					    buf->block->mem->ofs,
-					    buf->block->virtual,
-					    buf->size,
-					    0,
-					    0);
-	       buf->aub_dirty = 0;
-	    }
-
-	    block->referenced = 0;
-	    block->on_hardware = 1;
-	    move_to_tail(&bm->on_hardware, block);
-	 }
-
-	 bm->need_fence = 1;
-      }
-
-      retval = bm->fail ? -1 : 0;
-   }
-   UNLOCK(bm);
-
-
-   if (retval != 0)
-      DBG("%s failed\n", __FUNCTION__);
-
-   return retval;
-}
-
-
-
-
-void bmReleaseBuffers( struct intel_context *intel )
-{
-   struct bufmgr *bm = intel->bm;
-
-   LOCK(bm);
-   {
-      struct block *block, *tmp;
-
-      foreach_s (block, tmp, &bm->referenced) {
-
-	 DBG("remove block %p from referenced list\n", block);
-
-	 if (block->on_hardware) {
-	    /* Return to the on-hardware list.
-	     */
-	    move_to_tail(&bm->on_hardware, block);	    
-	 }
-	 else if (block->fenced) {
-	    struct block *s;
-
-	    /* Hmm - have to scan the fenced list to insert the
-	     * buffers in order.  This is O(nm), but rare and the
-	     * numbers are low.
-	     */
-	    foreach (s, &bm->fenced) {
-	       if (FENCE_LTE(block->fence, s->fence))
-		  break;
-	    }
-	    
-	    move_to_tail(s, block);
-	 }
-	 else {			
-	    /* Return to the lru list:
-	     */
-	    move_to_tail(&block->pool->lru, block);
-	 }
-
-	 block->referenced = 0;
-      }
-   }
-   UNLOCK(bm);
-}
-
-
-/* This functionality is used by the buffer manager, not really sure
- * if we need to be exposing it in this way, probably libdrm will
- * offer equivalent calls.
- *
- * For now they can stay, but will likely change/move before final:
- */
-unsigned bmSetFence( struct intel_context *intel )
-{
-   assert(intel->locked);
-
-   /* Emit MI_FLUSH here:
-    */
-   if (intel->bm->need_fence) {
-
-      /* Emit a flush without using a batchbuffer.  Can't rely on the
-       * batchbuffer at this level really.  Would really prefer that
-       * the IRQ ioctly emitted the flush at the same time.
-       */
-      GLuint dword[2];
-      dword[0] = intel->vtbl.flush_cmd();
-      dword[1] = 0;
-      intel_cmd_ioctl(intel, (char *)&dword, sizeof(dword));
-      
-      intel->bm->last_fence = intelEmitIrqLocked( intel );
-      
-      fence_blocks(intel, intel->bm->last_fence);
-
-      intel->vtbl.note_fence(intel, intel->bm->last_fence);
-      intel->bm->need_fence = 0;
-
-      if (intel->thrashing) {
-	 intel->thrashing--;
-	 if (!intel->thrashing)
-	    DBG("not thrashing\n");
-      }
-      
-      intel->bm->free_on_hardware = 0;
-   }
-   
-   return intel->bm->last_fence;
-}
-
-unsigned bmSetFenceLock( struct intel_context *intel )
-{
-  unsigned last;
-  LOCK(intel->bm);
-  last = bmSetFence(intel);
-  UNLOCK(intel->bm);
-  return last;
-}
-unsigned bmLockAndFence( struct intel_context *intel )
-{
-   if (intel->bm->need_fence) {
-      LOCK_HARDWARE(intel);
-      LOCK(intel->bm);
-      bmSetFence(intel);
-      UNLOCK(intel->bm);
-      UNLOCK_HARDWARE(intel);
-   }
-
-   return intel->bm->last_fence;
-}
-
-
-void bmFinishFence( struct intel_context *intel, unsigned fence )
-{
-   if (!bmTestFence(intel, fence)) {
-      DBG("...wait on fence %d\n", fence);
-      intelWaitIrq( intel, fence );
-   }
-   assert(bmTestFence(intel, fence));
-   check_fenced(intel);
-}
-
-void bmFinishFenceLock( struct intel_context *intel, unsigned fence )
-{
-   LOCK(intel->bm);
-   bmFinishFence(intel, fence);
-   UNLOCK(intel->bm);
-}
-
-
-/* Specifically ignore texture memory sharing.
- *  -- just evict everything
- *  -- and wait for idle
- */
-void bm_fake_NotifyContendedLockTake( struct intel_context *intel )
-{
-   struct bufmgr *bm = intel->bm;
-
-   LOCK(bm);
-   {
-      struct block *block, *tmp;
-      GLuint i;
-
-      assert(is_empty_list(&bm->referenced));
-
-      bm->need_fence = 1;
-      bm->fail = 0;
-      bmFinishFence(intel, bmSetFence(intel));
-
-      assert(is_empty_list(&bm->fenced));
-      assert(is_empty_list(&bm->on_hardware));
-
-      for (i = 0; i < bm->nr_pools; i++) {
-	 if (!(bm->pool[i].flags & BM_NO_EVICT)) {
-	    foreach_s(block, tmp, &bm->pool[i].lru) {
-	       assert(bmTestFence(intel, block->fence));
-	       set_dirty(intel, block->buf);
-	    }
-	 }
-      }
-   }
-   UNLOCK(bm);
-}
-
-
-
-void bmEvictAll( struct intel_context *intel )
-{
-   struct bufmgr *bm = intel->bm;
-
-   LOCK(bm);
-   {
-      struct block *block, *tmp;
-      GLuint i;
-
-      DBG("%s\n", __FUNCTION__);
-
-      assert(is_empty_list(&bm->referenced));
-
-      bm->need_fence = 1;
-      bm->fail = 0;
-      bmFinishFence(intel, bmSetFence(intel));
-
-      assert(is_empty_list(&bm->fenced));
-      assert(is_empty_list(&bm->on_hardware));
-
-      for (i = 0; i < bm->nr_pools; i++) {
-	 if (!(bm->pool[i].flags & BM_NO_EVICT)) {
-	    foreach_s(block, tmp, &bm->pool[i].lru) {
-	       assert(bmTestFence(intel, block->fence));
-	       set_dirty(intel, block->buf);
-	       block->buf->block = NULL;
-
-	       free_block(intel, block);
-	    }
-	 }
-      }
-   }
-   UNLOCK(bm);
-}
-
-
-GLboolean bmError( struct intel_context *intel )
-{
-   struct bufmgr *bm = intel->bm;
-   GLboolean retval;
-
-   LOCK(bm);
-   {
-      retval = bm->fail;
-   }
-   UNLOCK(bm);
-
-   return retval;
-}
-
-
-GLuint bmCtxId( struct intel_context *intel )
-{
-   return intel->bm->ctxId;
-}
diff --git a/i965/intel_batchbuffer.c b/i965/intel_batchbuffer.c
deleted file mode 100644
index 64885ed..0000000
--- a/i965/intel_batchbuffer.c
+++ /dev/null
@@ -1,243 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#include "imports.h"
-#include "intel_batchbuffer.h"
-#include "intel_ioctl.h"
-#include "bufmgr.h"
-
-
-static void intel_batchbuffer_reset( struct intel_batchbuffer *batch )
-{
-   assert(batch->map == NULL);
-
-   batch->offset = (unsigned long)batch->ptr;
-   batch->offset = (batch->offset + 63) & ~63;
-   batch->ptr = (unsigned char *) batch->offset;
-
-   if (BATCH_SZ - batch->offset < BATCH_REFILL) {
-      bmBufferData(batch->intel, 
-		   batch->buffer,
-		   BATCH_SZ, 
-		   NULL, 
-		   0); 
-      batch->offset = 0;
-      batch->ptr = NULL;
-   }
-		
-   batch->flags = 0;
-}
-
-static void intel_batchbuffer_reset_cb( struct intel_context *intel,
-					void *ptr )
-{
-   struct intel_batchbuffer *batch = (struct intel_batchbuffer *)ptr;
-   assert(batch->map == NULL);
-   batch->flags = 0;
-   batch->offset = 0;
-   batch->ptr = NULL;
-}
-
-GLubyte *intel_batchbuffer_map( struct intel_batchbuffer *batch )
-{
-   if (!batch->map) {
-      batch->map = bmMapBuffer(batch->intel, batch->buffer, 
-			       BM_MEM_AGP|BM_MEM_LOCAL|BM_CLIENT|BM_WRITE);
-      batch->ptr += (unsigned long)batch->map;
-   }
-
-   return batch->map;
-}
-
-void intel_batchbuffer_unmap( struct intel_batchbuffer *batch )
-{
-   if (batch->map) {
-      batch->ptr -= (unsigned long)batch->map;
-      batch->map = NULL;
-      bmUnmapBuffer(batch->intel, batch->buffer);
-   }
-}
-
-
-
-/*======================================================================
- * Public functions
- */
-struct intel_batchbuffer *intel_batchbuffer_alloc( struct intel_context *intel )
-{
-   struct intel_batchbuffer *batch = calloc(sizeof(*batch), 1);
-
-   batch->intel = intel;
-
-   bmGenBuffers(intel, "batch", 1, &batch->buffer, 12);
-
-   bmBufferSetInvalidateCB(intel, batch->buffer,
-			   intel_batchbuffer_reset_cb,
-			   batch,
-			   GL_TRUE);
-
-   bmBufferData(batch->intel,
-		batch->buffer,
-		BATCH_SZ,
-		NULL,
-		0);
-
-
-   return batch;
-}
-
-void intel_batchbuffer_free( struct intel_batchbuffer *batch )
-{
-   if (batch->map) 
-      bmUnmapBuffer(batch->intel, batch->buffer);
-   
-   bmDeleteBuffers(batch->intel, 1, &batch->buffer);
-   free(batch);
-}
-
-
-#define MI_BATCH_BUFFER_END 	(0xA<<23)
-
-
-GLboolean intel_batchbuffer_flush( struct intel_batchbuffer *batch )
-{
-   struct intel_context *intel = batch->intel;
-   GLuint used = batch->ptr - (batch->map + batch->offset);
-   GLuint offset;
-   GLint retval = GL_TRUE;
-
-   assert(intel->locked);
-
-   if (used == 0) {
-      bmReleaseBuffers( batch->intel );
-      return GL_TRUE;
-   }
-
-   /* Add the MI_BATCH_BUFFER_END.  Always add an MI_FLUSH - this is a
-    * performance drain that we would like to avoid.
-    */
-   if (used & 4) {
-      ((int *)batch->ptr)[0] = MI_BATCH_BUFFER_END;
-      batch->ptr += 4;
-      used += 4;
-   }
-   else {
-      ((int *)batch->ptr)[0] = 0;
-      ((int *)batch->ptr)[1] = MI_BATCH_BUFFER_END;
-
-      batch->ptr += 8;
-      used += 8;
-   }
-
-   intel_batchbuffer_unmap(batch);
-
-   /* Get the batch buffer offset: Must call bmBufferOffset() before
-    * bmValidateBuffers(), otherwise the buffer won't be on the inuse
-    * list.
-    */
-   offset = bmBufferOffset(batch->intel, batch->buffer);
-
-   if (bmValidateBuffers( batch->intel ) != 0) {
-      assert(intel->locked);
-      bmReleaseBuffers( batch->intel );
-      retval = GL_FALSE;
-      goto out;
-   }
-
-
-   if (intel->aub_file) {
-      /* Send buffered commands to aubfile as a single packet. 
-       */
-      intel_batchbuffer_map(batch);
-      ((int *)batch->ptr)[-1] = intel->vtbl.flush_cmd();
-      intel->vtbl.aub_commands(intel,
-			       offset, /* Fulsim wierdness - don't adjust */
-			       batch->map + batch->offset,
-			       used);
-      ((int *)batch->ptr)[-1] = MI_BATCH_BUFFER_END;
-      intel_batchbuffer_unmap(batch);
-   }
-
-
-   /* Fire the batch buffer, which was uploaded above:
-    */
-   intel_batch_ioctl(batch->intel, 
-		     offset + batch->offset,
-		     used);
-
-   if (intel->aub_file && 
-       intel->ctx.DrawBuffer->_ColorDrawBufferMask[0] == BUFFER_BIT_FRONT_LEFT)
-      intel->vtbl.aub_dump_bmp( intel, 0 );
-
-   /* Reset the buffer:
-    */
- out:
-   intel_batchbuffer_reset( batch );
-   intel_batchbuffer_map( batch );
-
-   if (!retval)
-      DBG("%s failed\n", __FUNCTION__);
-
-   return retval;
-}
-
-
-
-
-
-
-
-void intel_batchbuffer_align( struct intel_batchbuffer *batch,
-			      GLuint align,
-			      GLuint sz )
-{
-   unsigned long ptr = (unsigned long) batch->ptr;
-   unsigned long aptr = (ptr + align) & ~((unsigned long)align-1);
-   GLuint fixup = aptr - ptr;
-
-   if (intel_batchbuffer_space(batch) < fixup + sz)
-      intel_batchbuffer_flush(batch);
-   else {
-      memset(batch->ptr, 0, fixup);      
-      batch->ptr += fixup;
-   }
-}
-
-
-
-
-void intel_batchbuffer_data(struct intel_batchbuffer *batch,
-			    const void *data,
-			    GLuint bytes,
-			    GLuint flags)
-{
-   assert((bytes & 3) == 0);
-   intel_batchbuffer_require_space(batch, bytes, flags);
-   __memcpy(batch->ptr, data, bytes);
-   batch->ptr += bytes;
-}
-
diff --git a/i965/intel_batchbuffer.h b/i965/intel_batchbuffer.h
deleted file mode 100644
index 25e0a65..0000000
--- a/i965/intel_batchbuffer.h
+++ /dev/null
@@ -1,127 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#ifndef INTEL_BATCHBUFFER_H
-#define INTEL_BATCHBUFFER_H
-
-#include "mtypes.h"
-#include "bufmgr.h"
-
-struct intel_context;
-
-#define BATCH_SZ (16 * 1024)
-#define BATCH_REFILL 4096
-#define BATCH_RESERVED 16
-
-#define INTEL_BATCH_NO_CLIPRECTS 0x1
-#define INTEL_BATCH_CLIPRECTS    0x2
-
-struct intel_batchbuffer {
-   struct intel_context *intel;
-
-   struct buffer *buffer;
-
-   GLuint flags;
-   unsigned long offset;
-
-   GLubyte *map;
-   GLubyte *ptr; 
-};
-
-struct intel_batchbuffer *intel_batchbuffer_alloc( struct intel_context *intel );
-
-void intel_batchbuffer_free( struct intel_batchbuffer *batch );
-
-
-GLboolean intel_batchbuffer_flush( struct intel_batchbuffer *batch );
-
-void intel_batchbuffer_unmap( struct intel_batchbuffer *batch );
-GLubyte *intel_batchbuffer_map( struct intel_batchbuffer *batch );
-
-
-/* Unlike bmBufferData, this currently requires the buffer be mapped.
- * Consider it a convenience function wrapping multple
- * intel_buffer_dword() calls.
- */
-void intel_batchbuffer_data(struct intel_batchbuffer *batch,
-			    const void *data,
-			    GLuint bytes,
-			    GLuint flags);
-
-void intel_batchbuffer_release_space(struct intel_batchbuffer *batch,
-				   GLuint bytes);
-
-
-/* Inline functions - might actually be better off with these
- * non-inlined.  Certainly better off switching all command packets to
- * be passed as structs rather than dwords, but that's a little bit of
- * work...
- */
-static inline GLuint 
-intel_batchbuffer_space( struct intel_batchbuffer *batch )
-{
-   return (BATCH_SZ - BATCH_RESERVED) - (batch->ptr - (batch->map + batch->offset));
-}
-
-
-static inline void 
-intel_batchbuffer_emit_dword(struct intel_batchbuffer *batch,
-			     GLuint dword)
-{
-   assert(batch->map);
-   assert(intel_batchbuffer_space(batch) >= 4);
-   *(GLuint *)(batch->ptr) = dword;
-   batch->ptr += 4;
-}
-
-static inline void 
-intel_batchbuffer_require_space(struct intel_batchbuffer *batch,
-				GLuint sz,
-				GLuint flags)
-{
-   assert(sz < BATCH_SZ - 8);
-   if (intel_batchbuffer_space(batch) < sz ||
-       (batch->flags != 0 && flags != 0 && batch->flags != flags))
-      intel_batchbuffer_flush(batch);
-   
-   batch->flags |= flags;
-}
-
-void intel_batchbuffer_align( struct intel_batchbuffer *batch,
-			      GLuint align,
-			      GLuint sz );
-
-
-/* Here are the crusty old macros, to be removed:
- */
-#define BATCH_LOCALS 
-#define BEGIN_BATCH(n, flags) intel_batchbuffer_require_space(intel->batch, n*4, flags)
-#define OUT_BATCH(d)  intel_batchbuffer_emit_dword(intel->batch, d)
-#define ADVANCE_BATCH() do { } while(0)
-
-
-#endif
diff --git a/i965/intel_blit.c b/i965/intel_blit.c
deleted file mode 100644
index f88cbb2..0000000
--- a/i965/intel_blit.c
+++ /dev/null
@@ -1,617 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-
-#include <stdio.h>
-#include <errno.h>
-
-#include "mtypes.h"
-#include "context.h"
-#include "enums.h"
-#include "vblank.h"
-
-#include "intel_reg.h"
-#include "intel_batchbuffer.h"
-#include "intel_context.h"
-#include "intel_blit.h"
-#include "intel_regions.h"
-#include "intel_structs.h"
-
-#include "bufmgr.h"
-
-
-
-
-/*
- * Copy the back buffer to the front buffer. 
- */
-void intelCopyBuffer( const __DRIdrawablePrivate *dPriv,
-		      const drm_clip_rect_t *rect ) 
-{
-   struct intel_context *intel;
-   GLboolean   missed_target;
-   int64_t ust;
-
-   DBG("%s\n", __FUNCTION__);
-
-   assert(dPriv);
-   assert(dPriv->driContextPriv);
-   assert(dPriv->driContextPriv->driverPrivate);
-
-   intel = (struct intel_context *) dPriv->driContextPriv->driverPrivate;
-   intelFlush( &intel->ctx );
-
-
-   bmFinishFenceLock(intel, intel->last_swap_fence);
-
-   /* The LOCK_HARDWARE is required for the cliprects.  Buffer offsets
-    * should work regardless.
-    */
-   LOCK_HARDWARE( intel );
-
-   if (!rect)
-   {
-       UNLOCK_HARDWARE( intel );
-       driWaitForVBlank( dPriv, &intel->vbl_seq, intel->vblank_flags, & missed_target );
-       LOCK_HARDWARE( intel );
-   }
-
-   {
-      intelScreenPrivate *intelScreen = intel->intelScreen;
-      __DRIdrawablePrivate *dPriv = intel->driDrawable;
-      int nbox = dPriv->numClipRects;
-      drm_clip_rect_t *pbox = dPriv->pClipRects;
-      int cpp = intelScreen->cpp;
-      struct intel_region *src, *dst;
-      int BR13, CMD;
-      int i;
-      int src_pitch, dst_pitch;
-
-      if (intel->sarea->pf_current_page == 0) {
-	 dst = intel->front_region;
-	 src = intel->back_region;
-      }
-      else {
-	 assert(0);
-	 src = intel->front_region;
-	 dst = intel->back_region;
-      }
-
-      src_pitch = src->pitch * src->cpp;
-      dst_pitch = dst->pitch * dst->cpp;
-
-      if (cpp == 2) {
-	 BR13 = (0xCC << 16) | (1<<24);
-	 CMD = XY_SRC_COPY_BLT_CMD;
-      } 
-      else {
-	 BR13 = (0xCC << 16) | (1<<24) | (1<<25);
-	 CMD = (XY_SRC_COPY_BLT_CMD | XY_SRC_COPY_BLT_WRITE_ALPHA |
-		XY_SRC_COPY_BLT_WRITE_RGB);
-      }
-
-      if (src->tiled) {
-	 CMD |= XY_SRC_TILED;
-	 src_pitch /= 4;
-      }
-      
-      if (dst->tiled) {
-	 CMD |= XY_DST_TILED;
- 	 dst_pitch /= 4;
-      }
-  
-      for (i = 0 ; i < nbox; i++, pbox++) 
-      {
-	 drm_clip_rect_t tmp = *pbox;
-
-	 if (rect) {
-	    if (!intel_intersect_cliprects(&tmp, &tmp, rect))
-	       continue;
-	 }
-
-
-	 if (tmp.x1 > tmp.x2 ||
-	     tmp.y1 > tmp.y2 ||
-	     tmp.x2 > intelScreen->width ||
-	     tmp.y2 > intelScreen->height)
-	    continue;
- 
-	 BEGIN_BATCH(8, INTEL_BATCH_NO_CLIPRECTS);
-	 OUT_BATCH( CMD );
-	 OUT_BATCH( dst_pitch | BR13 );
-	 OUT_BATCH( (tmp.y1 << 16) | tmp.x1 );
-	 OUT_BATCH( (tmp.y2 << 16) | tmp.x2 );
-	 OUT_BATCH( bmBufferOffset(intel, dst->buffer) );
-	 OUT_BATCH( (tmp.y1 << 16) | tmp.x1 );
-	 OUT_BATCH( src_pitch );
-	 OUT_BATCH( bmBufferOffset(intel, src->buffer) ); 
-	 ADVANCE_BATCH();
-      }
-   }
-
-   intel_batchbuffer_flush( intel->batch );
-   intel->second_last_swap_fence = intel->last_swap_fence;
-   intel->last_swap_fence = bmSetFenceLock( intel );
-   UNLOCK_HARDWARE( intel );
-
-   if (!rect)
-   {
-       intel->swap_count++;
-       (*dri_interface->getUST)(&ust);
-       if (missed_target) {
-	   intel->swap_missed_count++;
-	   intel->swap_missed_ust = ust -  intel->swap_ust;
-       }
-   
-       intel->swap_ust = ust;
-   }
-
-}
-
-
-
-
-void intelEmitFillBlit( struct intel_context *intel,
-			GLuint cpp,
-			GLshort dst_pitch,
-			struct buffer *dst_buffer,
-			GLuint dst_offset,
-			GLboolean dst_tiled,
-			GLshort x, GLshort y, 
-			GLshort w, GLshort h,
-			GLuint color )
-{
-   GLuint BR13, CMD;
-   BATCH_LOCALS;
-
-   dst_pitch *= cpp;
-
-   switch(cpp) {
-   case 1: 
-   case 2: 
-   case 3: 
-      BR13 = (0xF0 << 16) | (1<<24);
-      CMD = XY_COLOR_BLT_CMD;
-      break;
-   case 4:
-      BR13 = (0xF0 << 16) | (1<<24) | (1<<25);
-      CMD = (XY_COLOR_BLT_CMD | XY_COLOR_BLT_WRITE_ALPHA |
-	     XY_COLOR_BLT_WRITE_RGB);
-      break;
-   default:
-      return;
-   }
-
-   if (dst_tiled) {
-      CMD |= XY_DST_TILED;
-      dst_pitch /= 4;
-   }
-
-   BEGIN_BATCH(6, INTEL_BATCH_NO_CLIPRECTS);
-   OUT_BATCH( CMD );
-   OUT_BATCH( dst_pitch | BR13 );
-   OUT_BATCH( (y << 16) | x );
-   OUT_BATCH( ((y+h) << 16) | (x+w) );
-   OUT_BATCH( bmBufferOffset(intel, dst_buffer) + dst_offset );
-   OUT_BATCH( color );
-   ADVANCE_BATCH();
-}
-
-static GLuint translate_raster_op(GLenum logicop)
-{
-   switch(logicop) {
-   case GL_CLEAR: return 0x00;
-   case GL_AND: return 0x88;
-   case GL_AND_REVERSE: return 0x44;
-   case GL_COPY: return 0xCC;
-   case GL_AND_INVERTED: return 0x22;
-   case GL_NOOP: return 0xAA;
-   case GL_XOR: return 0x66;
-   case GL_OR: return 0xEE;
-   case GL_NOR: return 0x11;
-   case GL_EQUIV: return 0x99;
-   case GL_INVERT: return 0x55;
-   case GL_OR_REVERSE: return 0xDD;
-   case GL_COPY_INVERTED: return 0x33;
-   case GL_OR_INVERTED: return 0xBB;
-   case GL_NAND: return 0x77;
-   case GL_SET: return 0xFF;
-   default: return 0;
-   }
-}
-
-
-/* Copy BitBlt
- */
-void intelEmitCopyBlit( struct intel_context *intel,
-			GLuint cpp,
-			GLshort src_pitch,
-			struct buffer *src_buffer,
-			GLuint  src_offset,
-			GLboolean src_tiled,
-			GLshort dst_pitch,
-			struct buffer *dst_buffer,
-			GLuint  dst_offset,
-			GLboolean dst_tiled,
-			GLshort src_x, GLshort src_y,
-			GLshort dst_x, GLshort dst_y,
-			GLshort w, GLshort h,
-			GLenum logic_op )
-{
-   GLuint CMD, BR13;
-   int dst_y2 = dst_y + h;
-   int dst_x2 = dst_x + w;
-   BATCH_LOCALS;
-
-
-   DBG("%s src:buf(%d)/%d %d,%d dst:buf(%d)/%d %d,%d sz:%dx%d op:%d\n",
-       __FUNCTION__,
-       src_buffer, src_pitch, src_x, src_y,
-       dst_buffer, dst_pitch, dst_x, dst_y,
-       w,h,logic_op);
-
-   assert( logic_op - GL_CLEAR >= 0 );
-   assert( logic_op - GL_CLEAR < 0x10 );
-      
-   src_pitch *= cpp;
-   dst_pitch *= cpp;
-
-   switch(cpp) {
-   case 1: 
-   case 2: 
-   case 3: 
-      BR13 = (translate_raster_op(logic_op) << 16) | (1<<24);
-      CMD = XY_SRC_COPY_BLT_CMD;
-      break;
-   case 4:
-      BR13 = (translate_raster_op(logic_op) << 16) | (1<<24) |
-	  (1<<25);
-      CMD = (XY_SRC_COPY_BLT_CMD | XY_SRC_COPY_BLT_WRITE_ALPHA |
-	     XY_SRC_COPY_BLT_WRITE_RGB);
-      break;
-   default:
-      return;
-   }
-
-   if (src_tiled) {
-      CMD |= XY_SRC_TILED;
-      src_pitch /= 4;
-   }
-   
-   if (dst_tiled) {
-      CMD |= XY_DST_TILED;
-      dst_pitch /= 4;
-   }
-
-   if (dst_y2 < dst_y ||
-       dst_x2 < dst_x) {
-      return;
-   }
-
-   dst_pitch &= 0xffff;
-   src_pitch &= 0xffff;
-
-   /* Initial y values don't seem to work with negative pitches.  If
-    * we adjust the offsets manually (below), it seems to work fine.
-    *
-    * On the other hand, if we always adjust, the hardware doesn't
-    * know which blit directions to use, so overlapping copypixels get
-    * the wrong result.
-    */
-   if (dst_pitch > 0 && src_pitch > 0) {
-      BEGIN_BATCH(8, INTEL_BATCH_NO_CLIPRECTS);
-      OUT_BATCH( CMD );
-      OUT_BATCH( dst_pitch | BR13 );
-      OUT_BATCH( (dst_y << 16) | dst_x );
-      OUT_BATCH( (dst_y2 << 16) | dst_x2 );
-      OUT_BATCH( bmBufferOffset(intel, dst_buffer) + dst_offset );	
-      OUT_BATCH( (src_y << 16) | src_x );
-      OUT_BATCH( src_pitch );
-      OUT_BATCH( bmBufferOffset(intel, src_buffer) + src_offset ); 
-      ADVANCE_BATCH();
-   }
-   else {
-      BEGIN_BATCH(8, INTEL_BATCH_NO_CLIPRECTS);
-      OUT_BATCH( CMD );
-      OUT_BATCH( (dst_pitch & 0xffff) | BR13 );
-      OUT_BATCH( (0 << 16) | dst_x );
-      OUT_BATCH( (h << 16) | dst_x2 );
-      OUT_BATCH( bmBufferOffset(intel, dst_buffer) + dst_offset + dst_y * dst_pitch );	
-      OUT_BATCH( (0 << 16) | src_x );
-      OUT_BATCH( (src_pitch & 0xffff) );
-      OUT_BATCH( bmBufferOffset(intel, src_buffer) + src_offset + src_y * src_pitch ); 
-      ADVANCE_BATCH();
-   }
-}
-
-
-
-void intelClearWithBlit(GLcontext *ctx, GLbitfield flags)
-{
-   struct intel_context *intel = intel_context( ctx );
-   intelScreenPrivate *intelScreen = intel->intelScreen;
-   GLuint clear_depth, clear_color;
-   GLint cx, cy, cw, ch;
-   GLint cpp = intelScreen->cpp;
-   GLboolean all;
-   GLint i;
-   struct intel_region *front = intel->front_region;
-   struct intel_region *back = intel->back_region;
-   struct intel_region *depth = intel->depth_region;
-   GLuint BR13, FRONT_CMD, BACK_CMD, DEPTH_CMD;
-   GLuint front_pitch;
-   GLuint back_pitch;
-   GLuint depth_pitch;
-   BATCH_LOCALS;
-
-   
-   clear_color = intel->ClearColor;
-   clear_depth = 0;
-
-   if (flags & BUFFER_BIT_DEPTH) {
-      clear_depth = (GLuint)(ctx->Depth.Clear * intel->ClearDepth);
-   }
-
-   if (flags & BUFFER_BIT_STENCIL) {
-      clear_depth |= (ctx->Stencil.Clear & 0xff) << 24;
-   }
-
-   switch(cpp) {
-   case 2: 
-      BR13 = (0xF0 << 16) | (1<<24);
-      BACK_CMD  = FRONT_CMD = XY_COLOR_BLT_CMD;
-      DEPTH_CMD = XY_COLOR_BLT_CMD;
-      break;
-   case 4:
-      BR13 = (0xF0 << 16) | (1<<24) | (1<<25);
-      BACK_CMD = FRONT_CMD = (XY_COLOR_BLT_CMD |
-			      XY_COLOR_BLT_WRITE_ALPHA | 
-			      XY_COLOR_BLT_WRITE_RGB);
-      DEPTH_CMD = XY_COLOR_BLT_CMD;
-      if (flags & BUFFER_BIT_DEPTH) DEPTH_CMD |= XY_COLOR_BLT_WRITE_RGB;
-      if (flags & BUFFER_BIT_STENCIL) DEPTH_CMD |= XY_COLOR_BLT_WRITE_ALPHA;
-      break;
-   default:
-      return;
-   }
-
-
-
-   intelFlush( &intel->ctx );
-   LOCK_HARDWARE( intel );
-   {
-      /* get clear bounds after locking */
-      cx = ctx->DrawBuffer->_Xmin;
-      cy = ctx->DrawBuffer->_Ymin;
-      ch = ctx->DrawBuffer->_Ymax - ctx->DrawBuffer->_Ymin;
-      cw = ctx->DrawBuffer->_Xmax - ctx->DrawBuffer->_Xmin;
-      all = (cw == ctx->DrawBuffer->Width && ch == ctx->DrawBuffer->Height);
-
-      /* flip top to bottom */
-      cy = intel->driDrawable->h - cy - ch;
-      cx = cx + intel->drawX;
-      cy += intel->drawY;
-
-      /* adjust for page flipping */
-      if ( intel->sarea->pf_current_page == 0 ) {
-	 front = intel->front_region;
-	 back = intel->back_region;
-      } 
-      else {
-	 back = intel->front_region;
-	 front = intel->back_region;
-      }
-      
-      front_pitch = front->pitch * front->cpp;
-      back_pitch = back->pitch * back->cpp;
-      depth_pitch = depth->pitch * depth->cpp;
-      
-      if (front->tiled) {
-	 FRONT_CMD |= XY_DST_TILED;
-	 front_pitch /= 4;
-      }
-
-      if (back->tiled) {
-	 BACK_CMD |= XY_DST_TILED;
-	 back_pitch /= 4;
-      }
-
-      if (depth->tiled) {
-	 DEPTH_CMD |= XY_DST_TILED;
-	 depth_pitch /= 4;
-      }
-
-      for (i = 0 ; i < intel->numClipRects ; i++) 
-      { 	 
-	 drm_clip_rect_t *box = &intel->pClipRects[i];	 
-	 drm_clip_rect_t b;
-
-	 if (!all) {
-	    GLint x = box->x1;
-	    GLint y = box->y1;
-	    GLint w = box->x2 - x;
-	    GLint h = box->y2 - y;
-
-	    if (x < cx) w -= cx - x, x = cx; 
-	    if (y < cy) h -= cy - y, y = cy;
-	    if (x + w > cx + cw) w = cx + cw - x;
-	    if (y + h > cy + ch) h = cy + ch - y;
-	    if (w <= 0) continue;
-	    if (h <= 0) continue;
-
-	    b.x1 = x;
-	    b.y1 = y;
-	    b.x2 = x + w;
-	    b.y2 = y + h;      
-	 } else {
-	    b = *box;
-	 }
-
-
-	 if (b.x1 > b.x2 ||
-	     b.y1 > b.y2 ||
-	     b.x2 > intelScreen->width ||
-	     b.y2 > intelScreen->height)
-	    continue;
-
-	 if ( flags & BUFFER_BIT_FRONT_LEFT ) {	    
-	    BEGIN_BATCH(6, INTEL_BATCH_NO_CLIPRECTS);
-	    OUT_BATCH( FRONT_CMD );
-	    OUT_BATCH( front_pitch | BR13 );
-	    OUT_BATCH( (b.y1 << 16) | b.x1 );
-	    OUT_BATCH( (b.y2 << 16) | b.x2 );
-	    OUT_BATCH( bmBufferOffset(intel, front->buffer) );
-	    OUT_BATCH( clear_color );
-	    ADVANCE_BATCH();
-	 }
-
-	 if ( flags & BUFFER_BIT_BACK_LEFT ) {
-	    BEGIN_BATCH(6, INTEL_BATCH_NO_CLIPRECTS); 
-	    OUT_BATCH( BACK_CMD );
-	    OUT_BATCH( back_pitch | BR13 );
-	    OUT_BATCH( (b.y1 << 16) | b.x1 );
-	    OUT_BATCH( (b.y2 << 16) | b.x2 );
-	    OUT_BATCH( bmBufferOffset(intel, back->buffer) );
-	    OUT_BATCH( clear_color );
-	    ADVANCE_BATCH();
-	 }
-
-	 if ( flags & (BUFFER_BIT_STENCIL | BUFFER_BIT_DEPTH) ) {
-	    BEGIN_BATCH(6, INTEL_BATCH_NO_CLIPRECTS);
-	    OUT_BATCH( DEPTH_CMD );
-	    OUT_BATCH( depth_pitch | BR13 );
-	    OUT_BATCH( (b.y1 << 16) | b.x1 );
-	    OUT_BATCH( (b.y2 << 16) | b.x2 );
-	    OUT_BATCH( bmBufferOffset(intel, depth->buffer) );
-	    OUT_BATCH( clear_depth );
-	    ADVANCE_BATCH();
-	 }      
-      }
-   }
-   intel_batchbuffer_flush( intel->batch );
-   UNLOCK_HARDWARE( intel );
-}
-
-
-
-#define BR13_565  0x1
-#define BR13_8888 0x3
-
-
-void
-intelEmitImmediateColorExpandBlit(struct intel_context *intel,
-				  GLuint cpp,
-				  GLubyte *src_bits, GLuint src_size,
-				  GLuint fg_color,
-				  GLshort dst_pitch,
-				  struct buffer *dst_buffer,
-				  GLuint dst_offset,
-				  GLboolean dst_tiled,
-				  GLshort x, GLshort y, 
-				  GLshort w, GLshort h,
-				  GLenum logic_op)
-{
-   struct xy_setup_blit setup;
-   struct xy_text_immediate_blit text;
-   int dwords = ((src_size + 7) & ~7) / 4;
-
-   assert( logic_op - GL_CLEAR >= 0 );
-   assert( logic_op - GL_CLEAR < 0x10 );
-
-   if (w < 0 || h < 0) 
-      return;
-
-   dst_pitch *= cpp;
-
-   if (dst_tiled) 
-      dst_pitch /= 4;
-
-   DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d, %d bytes %d dwords\n",
-       __FUNCTION__,
-       dst_buffer, dst_pitch, dst_offset, x, y, w, h, src_size, dwords);
-
-   memset(&setup, 0, sizeof(setup));
-   
-   setup.br0.client = CLIENT_2D;
-   setup.br0.opcode = OPCODE_XY_SETUP_BLT;
-   setup.br0.write_alpha = (cpp == 4);
-   setup.br0.write_rgb = (cpp == 4);
-   setup.br0.dst_tiled = dst_tiled;
-   setup.br0.length = (sizeof(setup) / sizeof(int)) - 2;
-      
-   setup.br13.dest_pitch = dst_pitch;
-   setup.br13.rop = translate_raster_op(logic_op);
-   setup.br13.color_depth = (cpp == 4) ? BR13_8888 : BR13_565;
-   setup.br13.clipping_enable = 0;
-   setup.br13.mono_source_transparency = 1;
-
-   setup.dw2.clip_y1 = 0;
-   setup.dw2.clip_x1 = 0;
-   setup.dw3.clip_y2 = 100;
-   setup.dw3.clip_x2 = 100;
-
-   setup.dest_base_addr = bmBufferOffset(intel, dst_buffer) + dst_offset;
-   setup.background_color = 0;
-   setup.foreground_color = fg_color;
-   setup.pattern_base_addr = 0;
-
-   memset(&text, 0, sizeof(text));
-   text.dw0.client = CLIENT_2D;
-   text.dw0.opcode = OPCODE_XY_TEXT_IMMEDIATE_BLT;
-   text.dw0.pad0 = 0;
-   text.dw0.byte_packed = 1;	/* ?maybe? */
-   text.dw0.pad1 = 0;
-   text.dw0.dst_tiled = dst_tiled;
-   text.dw0.pad2 = 0;
-   text.dw0.length = (sizeof(text)/sizeof(int)) - 2 + dwords;
-   text.dw1.dest_y1 = y;	/* duplicates info in setup blit */
-   text.dw1.dest_x1 = x;
-   text.dw2.dest_y2 = y + h;
-   text.dw2.dest_x2 = x + w;
-
-   intel_batchbuffer_require_space( intel->batch,
-				    sizeof(setup) + 
-				    sizeof(text) + 
-				    dwords,
-				    INTEL_BATCH_NO_CLIPRECTS );
-
-   intel_batchbuffer_data( intel->batch,
-			   &setup,
-			   sizeof(setup),
-			   INTEL_BATCH_NO_CLIPRECTS );
-
-   intel_batchbuffer_data( intel->batch,
-			   &text,
-			   sizeof(text),
-			   INTEL_BATCH_NO_CLIPRECTS );
-
-   intel_batchbuffer_data( intel->batch,
-			   src_bits,
-			   dwords * 4,
-			   INTEL_BATCH_NO_CLIPRECTS );
-}
-
diff --git a/i965/intel_buffer_objects.c b/i965/intel_buffer_objects.c
deleted file mode 100644
index 015e433..0000000
--- a/i965/intel_buffer_objects.c
+++ /dev/null
@@ -1,207 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-
-#include "imports.h"
-#include "mtypes.h"
-#include "bufferobj.h"
-
-#include "intel_context.h"
-#include "intel_buffer_objects.h"
-#include "bufmgr.h"
-
-
-/**
- * There is some duplication between mesa's bufferobjects and our
- * bufmgr buffers.  Both have an integer handle and a hashtable to
- * lookup an opaque structure.  It would be nice if the handles and
- * internal structure where somehow shared.
- */
-static struct gl_buffer_object *intel_bufferobj_alloc( GLcontext *ctx, 
-						       GLuint name, 
-						       GLenum target )
-{
-   struct intel_context *intel = intel_context(ctx);
-   struct intel_buffer_object *obj = MALLOC_STRUCT(intel_buffer_object);
-
-   _mesa_initialize_buffer_object(&obj->Base, name, target);
-
-   /* XXX:  We generate our own handle, which is different to 'name' above.
-    */
-   bmGenBuffers(intel, "bufferobj", 1, &obj->buffer, 6);
-   assert(obj->buffer);
-
-   return &obj->Base;
-}
-
-
-/**
- * Deallocate/free a vertex/pixel buffer object.
- * Called via glDeleteBuffersARB().
- */
-static void intel_bufferobj_free( GLcontext *ctx, 
-				  struct gl_buffer_object *obj )
-{ 
-   struct intel_context *intel = intel_context(ctx);
-   struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
-
-   assert(intel_obj);
-
-   if (intel_obj->buffer) 
-      bmDeleteBuffers( intel, 1, &intel_obj->buffer );
-  
-   _mesa_free(intel_obj);
-}
-
-
-
-/**
- * Allocate space for and store data in a buffer object.  Any data that was
- * previously stored in the buffer object is lost.  If data is NULL,
- * memory will be allocated, but no copy will occur.
- * Called via glBufferDataARB().
- */
-static void intel_bufferobj_data( GLcontext *ctx, 
-				  GLenum target, 
-				  GLsizeiptrARB size,
-				  const GLvoid *data, 
-				  GLenum usage,
-				  struct gl_buffer_object *obj )
-{
-   struct intel_context *intel = intel_context(ctx);
-   struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
-
-   /* XXX: do something useful with 'usage' (eg. populate flags
-    * argument below)
-    */
-   assert(intel_obj);
-
-   obj->Size = size;
-   obj->Usage = usage;
-
-   bmBufferDataAUB(intel, intel_obj->buffer, size, data, 0,
-		   0, 0);
-}
-
-
-/**
- * Replace data in a subrange of buffer object.  If the data range
- * specified by size + offset extends beyond the end of the buffer or
- * if data is NULL, no copy is performed.
- * Called via glBufferSubDataARB().
- */
-static void intel_bufferobj_subdata( GLcontext *ctx, 
-				     GLenum target, 
-				     GLintptrARB offset,
-				     GLsizeiptrARB size, 
-				     const GLvoid * data,
-				     struct gl_buffer_object * obj )
-{
-   struct intel_context *intel = intel_context(ctx);
-   struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
-
-   assert(intel_obj);
-   bmBufferSubDataAUB(intel, intel_obj->buffer, offset, size, data, 0, 0);
-}
-
-
-/**
- * Called via glGetBufferSubDataARB().
- */
-static void intel_bufferobj_get_subdata( GLcontext *ctx, 
-					 GLenum target, 
-					 GLintptrARB offset,
-					 GLsizeiptrARB size, 
-					 GLvoid * data,
-					 struct gl_buffer_object * obj )
-{
-   struct intel_context *intel = intel_context(ctx);
-   struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
-
-   assert(intel_obj);
-   bmBufferGetSubData(intel, intel_obj->buffer, offset, size, data);
-}
-
-
-
-/**
- * Called via glMapBufferARB().
- */
-static void *intel_bufferobj_map( GLcontext *ctx, 
-				  GLenum target, 
-				  GLenum access,
-				  struct gl_buffer_object *obj )
-{
-   struct intel_context *intel = intel_context(ctx);
-   struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
-
-   /* XXX: Translate access to flags arg below:
-    */
-   assert(intel_obj);
-   assert(intel_obj->buffer);
-   obj->Pointer = bmMapBuffer(intel, intel_obj->buffer, 0);
-   return obj->Pointer;
-}
-
-
-/**
- * Called via glMapBufferARB().
- */
-static GLboolean intel_bufferobj_unmap( GLcontext *ctx,
-					GLenum target,
-					struct gl_buffer_object *obj )
-{
-   struct intel_context *intel = intel_context(ctx);
-   struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
-
-   assert(intel_obj);
-   assert(intel_obj->buffer);
-   assert(obj->Pointer);
-   bmUnmapBufferAUB(intel, intel_obj->buffer, 0, 0);
-   obj->Pointer = NULL;
-   return GL_TRUE;
-}
-
-struct buffer *intel_bufferobj_buffer( const struct intel_buffer_object *intel_obj )
-{
-   assert(intel_obj->Base.Name);
-   assert(intel_obj->buffer);
-   return intel_obj->buffer;
-}  
-
-void intel_bufferobj_init( struct intel_context *intel )
-{
-   GLcontext *ctx = &intel->ctx;
-
-   ctx->Driver.NewBufferObject = intel_bufferobj_alloc;
-   ctx->Driver.DeleteBuffer = intel_bufferobj_free;
-   ctx->Driver.BufferData = intel_bufferobj_data;
-   ctx->Driver.BufferSubData = intel_bufferobj_subdata;
-   ctx->Driver.GetBufferSubData = intel_bufferobj_get_subdata;
-   ctx->Driver.MapBuffer = intel_bufferobj_map;
-   ctx->Driver.UnmapBuffer = intel_bufferobj_unmap;
-}
diff --git a/i965/intel_buffers.c b/i965/intel_buffers.c
deleted file mode 100644
index de6a867..0000000
--- a/i965/intel_buffers.c
+++ /dev/null
@@ -1,581 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#include "intel_screen.h"
-#include "intel_context.h"
-#include "intel_blit.h"
-#include "intel_regions.h"
-#include "intel_batchbuffer.h"
-#include "context.h"
-#include "utils.h"
-#include "framebuffer.h"
-#include "vblank.h"
-#include "macros.h"
-#include "swrast/swrast.h"
-
-GLboolean intel_intersect_cliprects( drm_clip_rect_t *dst,
-				     const drm_clip_rect_t *a,
-				     const drm_clip_rect_t *b )
-{
-   dst->x1 = MAX2(a->x1, b->x1);
-   dst->x2 = MIN2(a->x2, b->x2);
-   dst->y1 = MAX2(a->y1, b->y1);
-   dst->y2 = MIN2(a->y2, b->y2);
-
-   return (dst->x1 <= dst->x2 &&
-	   dst->y1 <= dst->y2);
-}
-
-struct intel_region *intel_drawbuf_region( struct intel_context *intel )
-{
-   switch (intel->ctx.DrawBuffer->_ColorDrawBufferMask[0]) {
-   case BUFFER_BIT_FRONT_LEFT:
-      return intel->front_region;
-   case BUFFER_BIT_BACK_LEFT:
-      return intel->back_region;
-   default:
-      /* Not necessary to fallback - could handle either NONE or
-       * FRONT_AND_BACK cases below.
-       */
-      return NULL;		
-   }
-}
-
-struct intel_region *intel_readbuf_region( struct intel_context *intel )
-{
-   GLcontext *ctx = &intel->ctx;
-
-   /* This will have to change to support EXT_fbo's, but is correct
-    * for now:
-    */
-   switch (ctx->ReadBuffer->_ColorReadBufferIndex) {
-   case BUFFER_FRONT_LEFT:
-      return intel->front_region;
-   case BUFFER_BACK_LEFT:
-      return intel->back_region;
-   default:
-      assert(0);
-      return NULL;
-   }
-}
-
-
-
-static void intelBufferSize(GLframebuffer *buffer,
-			    GLuint *width, 
-			    GLuint *height)
-{
-   GET_CURRENT_CONTEXT(ctx);
-   struct intel_context *intel = intel_context(ctx);
-   /* Need to lock to make sure the driDrawable is uptodate.  This
-    * information is used to resize Mesa's software buffers, so it has
-    * to be correct.
-    */
-   LOCK_HARDWARE(intel);
-   if (intel->driDrawable) {
-      *width = intel->driDrawable->w;
-      *height = intel->driDrawable->h;
-   }
-   else {
-      *width = 0;
-      *height = 0;
-   }
-   UNLOCK_HARDWARE(intel);
-}
-
-
-static void intelSetFrontClipRects( struct intel_context *intel )
-{
-   __DRIdrawablePrivate *dPriv = intel->driDrawable;
-
-   if (!dPriv) return;
-
-   intel->numClipRects = dPriv->numClipRects;
-   intel->pClipRects = dPriv->pClipRects;
-   intel->drawX = dPriv->x;
-   intel->drawY = dPriv->y;
-}
-
-
-static void intelSetBackClipRects( struct intel_context *intel )
-{
-   __DRIdrawablePrivate *dPriv = intel->driDrawable;
-
-   if (!dPriv) return;
-
-   if (intel->sarea->pf_enabled == 0 && dPriv->numBackClipRects == 0) {
-      intel->numClipRects = dPriv->numClipRects;
-      intel->pClipRects = dPriv->pClipRects;
-      intel->drawX = dPriv->x;
-      intel->drawY = dPriv->y;
-   } else {
-      intel->numClipRects = dPriv->numBackClipRects;
-      intel->pClipRects = dPriv->pBackClipRects;
-      intel->drawX = dPriv->backX;
-      intel->drawY = dPriv->backY;
-      
-      if (dPriv->numBackClipRects == 1 &&
-	  dPriv->x == dPriv->backX &&
-	  dPriv->y == dPriv->backY) {
-      
-	 /* Repeat the calculation of the back cliprect dimensions here
-	  * as early versions of dri.a in the Xserver are incorrect.  Try
-	  * very hard not to restrict future versions of dri.a which
-	  * might eg. allocate truly private back buffers.
-	  */
-	 int x1, y1;
-	 int x2, y2;
-	 
-	 x1 = dPriv->x;
-	 y1 = dPriv->y;      
-	 x2 = dPriv->x + dPriv->w;
-	 y2 = dPriv->y + dPriv->h;
-	 
-	 if (x1 < 0) x1 = 0;
-	 if (y1 < 0) y1 = 0;
-	 if (x2 > intel->intelScreen->width) x2 = intel->intelScreen->width;
-	 if (y2 > intel->intelScreen->height) y2 = intel->intelScreen->height;
-
-	 if (x1 == dPriv->pBackClipRects[0].x1 &&
-	     y1 == dPriv->pBackClipRects[0].y1) {
-
-	    dPriv->pBackClipRects[0].x2 = x2;
-	    dPriv->pBackClipRects[0].y2 = y2;
-	 }
-      }
-   }
-}
-
-
-void intelWindowMoved( struct intel_context *intel )
-{
-   __DRIdrawablePrivate *dPriv = intel->driDrawable;
-
-   if (!intel->ctx.DrawBuffer) {
-      intelSetFrontClipRects( intel );
-   }
-   else {
-      switch (intel->ctx.DrawBuffer->_ColorDrawBufferMask[0]) {
-      case BUFFER_BIT_FRONT_LEFT:
-	 intelSetFrontClipRects( intel );
-	 break;
-      case BUFFER_BIT_BACK_LEFT:
-	 intelSetBackClipRects( intel );
-	 break;
-      default:
-	 /* glDrawBuffer(GL_NONE or GL_FRONT_AND_BACK): software fallback */
-	 intelSetFrontClipRects( intel );
-      }
-   }
-
-   {
-      if (intel->intelScreen->driScrnPriv->ddxMinor >= 7) {
-	 volatile drmI830Sarea *sarea = intel->sarea;
-	 drm_clip_rect_t drw_rect = { .x1 = dPriv->x, .x2 = dPriv->x + dPriv->w,
-				      .y1 = dPriv->y, .y2 = dPriv->y + dPriv->h 
-	 };
-	 drm_clip_rect_t pipeA_rect = { .x1 = sarea->pipeA_x,
-					.x2 = sarea->pipeA_x + sarea->pipeA_w,
-					.y1 = sarea->pipeA_y,
-                                        .y2 = sarea->pipeA_y + sarea->pipeA_h };
-         drm_clip_rect_t pipeB_rect = { .x1 = sarea->pipeB_x,
-                                        .x2 = sarea->pipeB_x + sarea->pipeB_w,
-                                        .y1 = sarea->pipeB_y,
-                                        .y2 = sarea->pipeB_y + sarea->pipeB_h };
-         GLint areaA = driIntersectArea( drw_rect, pipeA_rect );
-         GLint areaB = driIntersectArea( drw_rect, pipeB_rect );
-         GLuint flags = intel->vblank_flags;
-	 
-         if (areaB > areaA || (areaA > 0 && areaB > 0)) {
-            flags = intel->vblank_flags | VBLANK_FLAG_SECONDARY;
-         } else {
-            flags = intel->vblank_flags & ~VBLANK_FLAG_SECONDARY;
-         }
-	 
-         if (flags != intel->vblank_flags) {
-            intel->vblank_flags = flags;
-            driGetCurrentVBlank(dPriv, intel->vblank_flags, &intel->vbl_seq);
-         }
-      } else {
-         intel->vblank_flags &= ~VBLANK_FLAG_SECONDARY;
-      }
-   }
-   _mesa_resize_framebuffer(&intel->ctx,
-   			    (GLframebuffer*)dPriv->driverPrivate,
-			    dPriv->w, dPriv->h);
-
-   /* Set state we know depends on drawable parameters:
-    */
-   {
-      GLcontext *ctx = &intel->ctx;
-
-      if (ctx->Driver.Scissor)
-	 ctx->Driver.Scissor( ctx, ctx->Scissor.X, ctx->Scissor.Y,
-			      ctx->Scissor.Width, ctx->Scissor.Height );
-      
-      if (ctx->Driver.DepthRange)
-	 ctx->Driver.DepthRange( ctx, 
-				 ctx->Viewport.Near,
-				 ctx->Viewport.Far );
-
-      intel->NewGLState |= _NEW_SCISSOR;
-   }
-
-   /* This works because the lock is always grabbed before emitting
-    * commands and commands are always flushed prior to releasing
-    * the lock.
-    */
-   intel->NewGLState |= _NEW_WINDOW_POS; 
-}
-
-
-
-/* A true meta version of this would be very simple and additionally
- * machine independent.  Maybe we'll get there one day.
- */
-static void intelClearWithTris(struct intel_context *intel, 
-			       GLbitfield mask)
-{
-   GLcontext *ctx = &intel->ctx;
-   drm_clip_rect_t clear;
-   GLint cx, cy, cw, ch;
-
-   if (INTEL_DEBUG & DEBUG_DRI)
-      _mesa_printf("%s %x\n", __FUNCTION__, mask);
-
-   {
-
-      intel->vtbl.install_meta_state(intel);
-
-      /* Get clear bounds after locking */
-      cx = ctx->DrawBuffer->_Xmin;
-      cy = ctx->DrawBuffer->_Ymin;
-      cw = ctx->DrawBuffer->_Xmax - ctx->DrawBuffer->_Xmin;
-      ch = ctx->DrawBuffer->_Ymax - ctx->DrawBuffer->_Ymin;
-
-      clear.x1 = cx;
-      clear.y1 = cy;
-      clear.x2 = cx + cw;
-      clear.y2 = cy + ch;
-
-      /* Back and stencil cliprects are the same.  Try and do both
-       * buffers at once:
-       */
-      if (mask & (BUFFER_BIT_BACK_LEFT|BUFFER_BIT_STENCIL|BUFFER_BIT_DEPTH)) { 
-	 intel->vtbl.meta_draw_region(intel, 
-				      intel->back_region,
-				      intel->depth_region );
-
-	 if (mask & BUFFER_BIT_BACK_LEFT)
-	    intel->vtbl.meta_color_mask(intel, GL_TRUE );
-	 else
-	    intel->vtbl.meta_color_mask(intel, GL_FALSE );
-
-	 if (mask & BUFFER_BIT_STENCIL) 
-	    intel->vtbl.meta_stencil_replace( intel, 
-					      intel->ctx.Stencil.WriteMask[0], 
-					      intel->ctx.Stencil.Clear);
-	 else
-	    intel->vtbl.meta_no_stencil_write(intel);
-
-	 if (mask & BUFFER_BIT_DEPTH) 
-	    intel->vtbl.meta_depth_replace( intel );
-	 else
-	    intel->vtbl.meta_no_depth_write(intel);
-      
-	 /* XXX: Using INTEL_BATCH_NO_CLIPRECTS here is dangerous as the
-	  * drawing origin may not be correctly emitted.
-	  */
-	 intel->vtbl.meta_draw_quad(intel, 
-				    clear.x1, clear.x2, 
-				    clear.y1, clear.y2, 
-				    intel->ctx.Depth.Clear,
-				    intel->clear_chan[0], 
-				    intel->clear_chan[1], 
-				    intel->clear_chan[2], 
-				    intel->clear_chan[3], 
-				    0, 0, 0, 0);
-      }
-
-      /* Front may have different cliprects: 
-       */
-      if (mask & BUFFER_BIT_FRONT_LEFT) {
-	 intel->vtbl.meta_no_depth_write(intel);
-	 intel->vtbl.meta_no_stencil_write(intel);
-	 intel->vtbl.meta_color_mask(intel, GL_TRUE );
-	 intel->vtbl.meta_draw_region(intel, 
-				      intel->front_region,
-				      intel->depth_region);
-
-	 /* XXX: Using INTEL_BATCH_NO_CLIPRECTS here is dangerous as the
-	  * drawing origin may not be correctly emitted.
-	  */
-	 intel->vtbl.meta_draw_quad(intel, 
-				    clear.x1, clear.x2, 
-				    clear.y1, clear.y2, 
-				    0,
-				    intel->clear_chan[0], 
-				    intel->clear_chan[1], 
-				    intel->clear_chan[2], 
-				    intel->clear_chan[3], 
-				    0, 0, 0, 0);
-      }
-
-      intel->vtbl.leave_meta_state( intel );
-   }
-}
-
-
-
-
-
-static void intelClear(GLcontext *ctx, GLbitfield mask)
-{
-   struct intel_context *intel = intel_context( ctx );
-   const GLuint colorMask = *((GLuint *) &ctx->Color.ColorMask);
-   GLbitfield tri_mask = 0;
-   GLbitfield blit_mask = 0;
-   GLbitfield swrast_mask = 0;
-
-   if (INTEL_DEBUG & DEBUG_DRI)
-      fprintf(stderr, "%s %x\n", __FUNCTION__, mask);
-
-
-   if (mask & BUFFER_BIT_FRONT_LEFT) {
-      if (colorMask == ~0) {
-	 blit_mask |= BUFFER_BIT_FRONT_LEFT;
-      } 
-      else {
-	 tri_mask |= BUFFER_BIT_FRONT_LEFT;
-      }
-   }
-
-   if (mask & BUFFER_BIT_BACK_LEFT) {
-      if (colorMask == ~0) {
-	 blit_mask |= BUFFER_BIT_BACK_LEFT;
-      } 
-      else {
-	 tri_mask |= BUFFER_BIT_BACK_LEFT;
-      }
-   }
-
-
-   if (mask & BUFFER_BIT_STENCIL) {
-      if (!intel->hw_stencil) {
-	 swrast_mask |= BUFFER_BIT_STENCIL;
-      }
-      else if ((ctx->Stencil.WriteMask[0] & 0xff) != 0xff ||
-	       intel->depth_region->tiled) {
-	 tri_mask |= BUFFER_BIT_STENCIL;
-      } 
-      else {
-	 blit_mask |= BUFFER_BIT_STENCIL;
-      }
-   }
-
-   /* Do depth with stencil if possible to avoid 2nd pass over the
-    * same buffer.
-    */
-   if (mask & BUFFER_BIT_DEPTH) {
-      if ((tri_mask & BUFFER_BIT_STENCIL) ||
-	  intel->depth_region->tiled)
-	 tri_mask |= BUFFER_BIT_DEPTH;
-      else 
-	 blit_mask |= BUFFER_BIT_DEPTH;
-   }
-
-   swrast_mask |= (mask & BUFFER_BIT_ACCUM);
-
-   intelFlush( ctx );
-
-   if (blit_mask)
-      intelClearWithBlit( ctx, blit_mask );
-
-   if (tri_mask) 
-      intelClearWithTris( intel, tri_mask );
-
-   if (swrast_mask)
-      _swrast_Clear( ctx, swrast_mask );
-}
-
-
-
-
-
-
-
-/* Flip the front & back buffers
- */
-static void intelPageFlip( const __DRIdrawablePrivate *dPriv )
-{
-#if 0
-   struct intel_context *intel;
-   int tmp, ret;
-
-   if (INTEL_DEBUG & DEBUG_IOCTL)
-      fprintf(stderr, "%s\n", __FUNCTION__);
-
-   assert(dPriv);
-   assert(dPriv->driContextPriv);
-   assert(dPriv->driContextPriv->driverPrivate);
-
-   intel = (struct intel_context *) dPriv->driContextPriv->driverPrivate;
-
-   intelFlush( &intel->ctx );
-   LOCK_HARDWARE( intel );
-
-   if (dPriv->pClipRects) {
-      *(drm_clip_rect_t *)intel->sarea->boxes = dPriv->pClipRects[0];
-      intel->sarea->nbox = 1;
-   }
-
-   ret = drmCommandNone(intel->driFd, DRM_I830_FLIP); 
-   if (ret) {
-      fprintf(stderr, "%s: %d\n", __FUNCTION__, ret);
-      UNLOCK_HARDWARE( intel );
-      exit(1);
-   }
-
-   tmp = intel->sarea->last_enqueue;
-   intelRefillBatchLocked( intel );
-   UNLOCK_HARDWARE( intel );
-
-
-   intelSetDrawBuffer( &intel->ctx, intel->ctx.Color.DriverDrawBuffer );
-#endif
-}
-
-
-void intelSwapBuffers( __DRIdrawablePrivate *dPriv )
-{
-   if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) {
-      struct intel_context *intel;
-      GLcontext *ctx;
-      intel = (struct intel_context *) dPriv->driContextPriv->driverPrivate;
-      ctx = &intel->ctx;
-      if (ctx->Visual.doubleBufferMode) {
-	 _mesa_notifySwapBuffers( ctx );  /* flush pending rendering comands */
-	 if ( 0 /*intel->doPageFlip*/ ) { /* doPageFlip is never set !!! */
-	    intelPageFlip( dPriv );
-	 } else {
-	    intelCopyBuffer( dPriv, NULL );
-	 }
-	 if (intel->aub_file) {
-	    intelFlush(ctx);
-	    intel->vtbl.aub_dump_bmp( intel, 1 );
-
-	    intel->aub_wrap = 1;
-	 }
-      }
-   } else {
-      /* XXX this shouldn't be an error but we can't handle it for now */
-      fprintf(stderr, "%s: drawable has no context!\n", __FUNCTION__);
-   }
-}
-
-void intelCopySubBuffer( __DRIdrawablePrivate *dPriv,
-			 int x, int y, int w, int h )
-{
-   if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) {
-      struct intel_context *intel = dPriv->driContextPriv->driverPrivate;
-      GLcontext *ctx = &intel->ctx;
-
-      if (ctx->Visual.doubleBufferMode) {
-	 drm_clip_rect_t rect;
-	 rect.x1 = x + dPriv->x;
-	 rect.y1 = (dPriv->h - y - h) + dPriv->y;
-	 rect.x2 = rect.x1 + w;
-	 rect.y2 = rect.y1 + h;
-	 _mesa_notifySwapBuffers( ctx );  /* flush pending rendering comands */
-	 intelCopyBuffer( dPriv, &rect );
-      }
-   } else {
-      /* XXX this shouldn't be an error but we can't handle it for now */
-      fprintf(stderr, "%s: drawable has no context!\n", __FUNCTION__);
-   }
-}
-
-
-static void intelDrawBuffer(GLcontext *ctx, GLenum mode )
-{
-   struct intel_context *intel = intel_context(ctx);
-   int front = 0;
- 
-   if (!ctx->DrawBuffer)
-      return;
-
-   switch ( ctx->DrawBuffer->_ColorDrawBufferMask[0] ) {
-   case BUFFER_BIT_FRONT_LEFT:
-      front = 1;
-      FALLBACK( intel, INTEL_FALLBACK_DRAW_BUFFER, GL_FALSE );
-      break;
-   case BUFFER_BIT_BACK_LEFT:
-      front = 0;
-      FALLBACK( intel, INTEL_FALLBACK_DRAW_BUFFER, GL_FALSE );
-      break;
-   default:
-      FALLBACK( intel, INTEL_FALLBACK_DRAW_BUFFER, GL_TRUE );
-      return;
-   }
-
-   if ( intel->sarea->pf_current_page == 1 ) 
-      front ^= 1;
-   
-   intelSetFrontClipRects( intel );
-
-
-   if (front) {
-      if (intel->draw_region != intel->front_region) {
-	 intel_region_release(intel, &intel->draw_region);
-	 intel_region_reference(&intel->draw_region, intel->front_region);
-      }
-   } else {
-      if (intel->draw_region != intel->back_region) {
-	 intel_region_release(intel, &intel->draw_region);
-	 intel_region_reference(&intel->draw_region, intel->back_region);
-      }
-   }
-
-   intel->vtbl.set_draw_region( intel, 
-				intel->draw_region,
-				intel->depth_region);
-}
-
-static void intelReadBuffer( GLcontext *ctx, GLenum mode )
-{
-   /* nothing, until we implement h/w glRead/CopyPixels or CopyTexImage */
-}
-
-
-
-void intelInitBufferFuncs( struct dd_function_table *functions )
-{
-   functions->Clear = intelClear;
-   functions->GetBufferSize = intelBufferSize;
-   functions->DrawBuffer = intelDrawBuffer;
-   functions->ReadBuffer = intelReadBuffer;
-}
diff --git a/i965/intel_context.c b/i965/intel_context.c
deleted file mode 100644
index 5ee5282..0000000
--- a/i965/intel_context.c
+++ /dev/null
@@ -1,716 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-
-#include "glheader.h"
-#include "context.h"
-#include "matrix.h"
-#include "simple_list.h"
-#include "extensions.h"
-#include "framebuffer.h"
-#include "imports.h"
-#include "points.h"
-
-#include "swrast/swrast.h"
-#include "swrast_setup/swrast_setup.h"
-#include "tnl/tnl.h"
-#include "vbo/vbo.h"
-
-#include "tnl/t_pipeline.h"
-#include "tnl/t_vertex.h"
-
-#include "drivers/common/driverfuncs.h"
-
-#include "intel_screen.h"
-
-#include "i830_dri.h"
-#include "i830_common.h"
-
-#include "intel_tex.h"
-#include "intel_span.h"
-#include "intel_ioctl.h"
-#include "intel_batchbuffer.h"
-#include "intel_blit.h"
-#include "intel_regions.h"
-#include "intel_buffer_objects.h"
-
-#include "bufmgr.h"
-
-#include "utils.h"
-#include "vblank.h"
-#ifndef INTEL_DEBUG
-int INTEL_DEBUG = (0);
-#endif
-
-#define need_GL_ARB_multisample
-#define need_GL_ARB_point_parameters
-#define need_GL_ARB_texture_compression
-#define need_GL_ARB_vertex_buffer_object
-#define need_GL_ARB_vertex_program
-#define need_GL_ARB_window_pos
-#define need_GL_ARB_occlusion_query
-#define need_GL_EXT_blend_color
-#define need_GL_EXT_blend_equation_separate
-#define need_GL_EXT_blend_func_separate
-#define need_GL_EXT_blend_minmax
-#define need_GL_EXT_cull_vertex
-#define need_GL_EXT_fog_coord
-#define need_GL_EXT_multi_draw_arrays
-#define need_GL_EXT_secondary_color
-#include "extension_helper.h"
-
-#ifndef VERBOSE
-int VERBOSE = 0;
-#endif
-
-/***************************************
- * Mesa's Driver Functions
- ***************************************/
-
-#define DRIVER_VERSION                     "4.1.3002"
-
-static const GLubyte *intelGetString( GLcontext *ctx, GLenum name )
-{
-   const char * chipset;
-   static char buffer[128];
-
-   switch (name) {
-   case GL_VENDOR:
-      return (GLubyte *)"Tungsten Graphics, Inc";
-      break;
-      
-   case GL_RENDERER:
-      switch (intel_context(ctx)->intelScreen->deviceID) {
-      case PCI_CHIP_I965_Q:
-	 chipset = "Intel(R) 965Q"; break;
-         break;
-      case PCI_CHIP_I965_G:
-      case PCI_CHIP_I965_G_1:
-	 chipset = "Intel(R) 965G"; break;
-         break;
-      case PCI_CHIP_I946_GZ:
-	 chipset = "Intel(R) 946GZ"; break;
-         break;
-      case PCI_CHIP_I965_GM:
-	 chipset = "Intel(R) 965GM"; break;
-         break;
-      case PCI_CHIP_IGD_GM:
-	 chipset = "Intel(R) Integrated Graphics Device";
-         break;
-      default:
-	 chipset = "Unknown Intel Chipset"; break;
-      }
-
-      (void) driGetRendererString( buffer, chipset, DRIVER_VERSION, 0 );
-      return (GLubyte *) buffer;
-
-   default:
-      return NULL;
-   }
-}
-
-
-/**
- * Extension strings exported by the intel driver.
- *
- * \note
- * It appears that ARB_texture_env_crossbar has "disappeared" compared to the
- * old i830-specific driver.
- */
-const struct dri_extension card_extensions[] =
-{
-    { "GL_ARB_multisample",                GL_ARB_multisample_functions },
-    { "GL_ARB_multitexture",               NULL },
-    { "GL_ARB_point_parameters",           GL_ARB_point_parameters_functions },
-    { "GL_ARB_texture_border_clamp",       NULL },
-    { "GL_ARB_texture_compression",        GL_ARB_texture_compression_functions },
-    { "GL_ARB_texture_cube_map",           NULL },
-    { "GL_ARB_texture_env_add",            NULL },
-    { "GL_ARB_texture_env_combine",        NULL },
-    { "GL_ARB_texture_env_dot3",           NULL },
-    { "GL_ARB_texture_mirrored_repeat",    NULL },
-    { "GL_ARB_texture_non_power_of_two",   NULL },
-    { "GL_ARB_texture_rectangle",          NULL },
-    { "GL_NV_texture_rectangle",           NULL },
-    { "GL_EXT_texture_rectangle",          NULL },
-    { "GL_ARB_texture_rectangle",          NULL },
-    { "GL_ARB_vertex_buffer_object",       GL_ARB_vertex_buffer_object_functions },
-    { "GL_ARB_vertex_program",             GL_ARB_vertex_program_functions },
-    { "GL_ARB_window_pos",                 GL_ARB_window_pos_functions },
-    { "GL_EXT_blend_color",                GL_EXT_blend_color_functions },
-    { "GL_EXT_blend_equation_separate",    GL_EXT_blend_equation_separate_functions },
-    { "GL_EXT_blend_func_separate",        GL_EXT_blend_func_separate_functions },
-    { "GL_EXT_blend_minmax",               GL_EXT_blend_minmax_functions },
-    { "GL_EXT_blend_logic_op",             NULL },
-    { "GL_EXT_blend_subtract",             NULL },
-    { "GL_EXT_cull_vertex",                GL_EXT_cull_vertex_functions },
-    { "GL_EXT_fog_coord",                  GL_EXT_fog_coord_functions },
-    { "GL_EXT_multi_draw_arrays",          GL_EXT_multi_draw_arrays_functions },
-    { "GL_EXT_secondary_color",            GL_EXT_secondary_color_functions },
-    { "GL_EXT_stencil_wrap",               NULL },
-    { "GL_EXT_texture_edge_clamp",         NULL },
-    { "GL_EXT_texture_env_combine",        NULL },
-    { "GL_EXT_texture_env_dot3",           NULL },
-    { "GL_EXT_texture_filter_anisotropic", NULL },
-    { "GL_EXT_texture_lod_bias",           NULL },
-    { "GL_3DFX_texture_compression_FXT1",  NULL },
-    { "GL_APPLE_client_storage",           NULL },
-    { "GL_MESA_pack_invert",               NULL },
-    { "GL_MESA_ycbcr_texture",             NULL },
-    { "GL_NV_blend_square",                NULL },
-    { "GL_SGIS_generate_mipmap",           NULL },
-    { NULL,                                NULL }
-};
-
-const struct dri_extension arb_oc_extension = 
-    { "GL_ARB_occlusion_query",            GL_ARB_occlusion_query_functions};
-
-void intelInitExtensions(GLcontext *ctx, GLboolean enable_imaging)
-{	     
-	struct intel_context *intel = ctx?intel_context(ctx):NULL;
-	driInitExtensions(ctx, card_extensions, enable_imaging);
-	if (!ctx || intel->intelScreen->drmMinor >= 8)
-		driInitSingleExtension (ctx, &arb_oc_extension);
-}
-
-static const struct dri_debug_control debug_control[] =
-{
-    { "fall",  DEBUG_FALLBACKS },
-    { "tex",   DEBUG_TEXTURE },
-    { "ioctl", DEBUG_IOCTL },
-    { "prim",  DEBUG_PRIMS },
-    { "vert",  DEBUG_VERTS },
-    { "state", DEBUG_STATE },
-    { "verb",  DEBUG_VERBOSE },
-    { "dri",   DEBUG_DRI },
-    { "dma",   DEBUG_DMA },
-    { "san",   DEBUG_SANITY },
-    { "sync",  DEBUG_SYNC },
-    { "sleep", DEBUG_SLEEP },
-    { "pix",   DEBUG_PIXEL },
-    { "buf",   DEBUG_BUFMGR },
-    { "stats", DEBUG_STATS },
-    { "tile",  DEBUG_TILE },
-    { "sing",  DEBUG_SINGLE_THREAD },
-    { "thre",  DEBUG_SINGLE_THREAD },
-    { "wm",    DEBUG_WM },
-    { "vs",    DEBUG_VS },
-    { NULL,    0 }
-};
-
-
-static void intelInvalidateState( GLcontext *ctx, GLuint new_state )
-{
-   struct intel_context *intel = intel_context(ctx);
-
-   _swrast_InvalidateState( ctx, new_state );
-   _swsetup_InvalidateState( ctx, new_state );
-   _vbo_InvalidateState( ctx, new_state );
-   _tnl_InvalidateState( ctx, new_state );
-   _tnl_invalidate_vertex_state( ctx, new_state );
-   
-   intel->NewGLState |= new_state;
-
-   if (intel->vtbl.invalidate_state)
-      intel->vtbl.invalidate_state( intel, new_state );
-}
-
-
-void intelFlush( GLcontext *ctx )
-{
-   struct intel_context *intel = intel_context( ctx );
-
-   bmLockAndFence(intel);
-}
-
-void intelFinish( GLcontext *ctx ) 
-{
-   struct intel_context *intel = intel_context( ctx );
-
-   bmFinishFence(intel, bmLockAndFence(intel));
-}
-
-static void
-intelBeginQuery(GLcontext *ctx, GLenum target, struct gl_query_object *q)
-{
-	struct intel_context *intel = intel_context( ctx );
-	drmI830MMIO io = {
-		.read_write = MMIO_READ,
-		.reg = MMIO_REGS_PS_DEPTH_COUNT,
-		.data = &q->Result 
-	};
-	intel->stats_wm++;
-	intelFinish(&intel->ctx);
-	drmCommandWrite(intel->driFd, DRM_I830_MMIO, &io, sizeof(io));
-}
-
-static void
-intelEndQuery(GLcontext *ctx, GLenum target, struct gl_query_object *q)
-{
-	struct intel_context *intel = intel_context( ctx );
-	GLuint64EXT tmp;	
-	drmI830MMIO io = {
-		.read_write = MMIO_READ,
-		.reg = MMIO_REGS_PS_DEPTH_COUNT,
-		.data = &tmp
-	};
-	intelFinish(&intel->ctx);
-	drmCommandWrite(intel->driFd, DRM_I830_MMIO, &io, sizeof(io));
-	q->Result = tmp - q->Result;
-	q->Ready = GL_TRUE;
-	intel->stats_wm--;
-}
-
-
-void intelInitDriverFunctions( struct dd_function_table *functions )
-{
-   _mesa_init_driver_functions( functions );
-
-   functions->Flush = intelFlush;
-   functions->Finish = intelFinish;
-   functions->GetString = intelGetString;
-   functions->UpdateState = intelInvalidateState;
-   functions->BeginQuery = intelBeginQuery;
-   functions->EndQuery = intelEndQuery;
-
-   /* CopyPixels can be accelerated even with the current memory
-    * manager:
-    */
-   if (!getenv("INTEL_NO_BLIT")) {
-      functions->CopyPixels = intelCopyPixels;
-      functions->Bitmap = intelBitmap;
-   }
-
-   intelInitTextureFuncs( functions );
-   intelInitStateFuncs( functions );
-   intelInitBufferFuncs( functions );
-}
-
-
-
-GLboolean intelInitContext( struct intel_context *intel,
-			    const __GLcontextModes *mesaVis,
-			    __DRIcontextPrivate *driContextPriv,
-			    void *sharedContextPrivate,
-			    struct dd_function_table *functions )
-{
-   GLcontext *ctx = &intel->ctx;
-   GLcontext *shareCtx = (GLcontext *) sharedContextPrivate;
-   __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv;
-   intelScreenPrivate *intelScreen = (intelScreenPrivate *)sPriv->private;
-   volatile drmI830Sarea *saPriv = (volatile drmI830Sarea *)
-      (((GLubyte *)sPriv->pSAREA)+intelScreen->sarea_priv_offset);
-
-   if (!_mesa_initialize_context(&intel->ctx,
-				 mesaVis, shareCtx, 
-				 functions,
-				 (void*) intel)) {
-      _mesa_printf("%s: failed to init mesa context\n", __FUNCTION__);
-      return GL_FALSE;
-   }
-
-   driContextPriv->driverPrivate = intel;
-   intel->intelScreen = intelScreen;
-   intel->driScreen = sPriv;
-   intel->sarea = saPriv;
-
-   driParseConfigFiles (&intel->optionCache, &intelScreen->optionCache,
-		   intel->driScreen->myNum, "i965");
-
-   intel->vblank_flags = (intel->intelScreen->irq_active != 0)
-	   ? driGetDefaultVBlankFlags(&intel->optionCache) : VBLANK_FLAG_NO_IRQ;
-
-   ctx->Const.MaxTextureMaxAnisotropy = 2.0;
-
-   if (getenv("INTEL_STRICT_CONFORMANCE")) {
-      intel->strict_conformance = 1;
-   }
-
-   if (intel->strict_conformance) {
-      ctx->Const.MinLineWidth = 1.0;
-      ctx->Const.MinLineWidthAA = 1.0;
-      ctx->Const.MaxLineWidth = 1.0;
-      ctx->Const.MaxLineWidthAA = 1.0;
-      ctx->Const.LineWidthGranularity = 1.0;
-   }
-   else {
-      ctx->Const.MinLineWidth = 1.0;
-      ctx->Const.MinLineWidthAA = 1.0;
-      ctx->Const.MaxLineWidth = 5.0;
-      ctx->Const.MaxLineWidthAA = 5.0;
-      ctx->Const.LineWidthGranularity = 0.5;
-   }
-
-   ctx->Const.MinPointSize = 1.0;
-   ctx->Const.MinPointSizeAA = 1.0;
-   ctx->Const.MaxPointSize = 255.0;
-   ctx->Const.MaxPointSizeAA = 3.0;
-   ctx->Const.PointSizeGranularity = 1.0;
-
-   /* reinitialize the context point state.
-    * It depend on constants in __GLcontextRec::Const
-    */
-   _mesa_init_point(ctx);
-
-   /* Initialize the software rasterizer and helper modules. */
-   _swrast_CreateContext( ctx );
-   _vbo_CreateContext( ctx );
-   _tnl_CreateContext( ctx );
-   _swsetup_CreateContext( ctx );
-
-   TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
-
-   /* Configure swrast to match hardware characteristics: */
-   _swrast_allow_pixel_fog( ctx, GL_FALSE );
-   _swrast_allow_vertex_fog( ctx, GL_TRUE );
-
-   /* Dri stuff */
-   intel->hHWContext = driContextPriv->hHWContext;
-   intel->driFd = sPriv->fd;
-   intel->driHwLock = (drmLock *) &sPriv->pSAREA->lock;
-
-   intel->hw_stencil = mesaVis->stencilBits && mesaVis->depthBits == 24;
-   intel->hw_stipple = 1;
-
-   switch(mesaVis->depthBits) {
-   case 0:			/* what to do in this case? */
-   case 16:
-      intel->depth_scale = 1.0/0xffff;
-      intel->polygon_offset_scale = 1.0/0xffff;
-      intel->depth_clear_mask = ~0;
-      intel->ClearDepth = 0xffff;
-      break;
-   case 24:
-      intel->depth_scale = 1.0/0xffffff;
-      intel->polygon_offset_scale = 2.0/0xffffff; /* req'd to pass glean */
-      intel->depth_clear_mask = 0x00ffffff;
-      intel->stencil_clear_mask = 0xff000000;
-      intel->ClearDepth = 0x00ffffff;
-      break;
-   default:
-      assert(0); 
-      break;
-   }
-
-   /* Initialize swrast, tnl driver tables: */
-   intelInitSpanFuncs( ctx );
-
-   intel->no_hw = getenv("INTEL_NO_HW") != NULL;
-
-   if (!intel->intelScreen->irq_active) {
-      _mesa_printf("IRQs not active.  Exiting\n");
-      exit(1);
-   }
-   intelInitExtensions(ctx, GL_TRUE); 
-
-   INTEL_DEBUG  = driParseDebugString( getenv( "INTEL_DEBUG" ),
-				       debug_control );
-
-
-   /* Buffer manager: 
-    */
-   intel->bm = bm_fake_intel_Attach( intel );
-
-
-   bmInitPool(intel,
-	      intel->intelScreen->tex.offset, /* low offset */
-	      intel->intelScreen->tex.map, /* low virtual */
-	      intel->intelScreen->tex.size,
-	      BM_MEM_AGP);
-
-   /* These are still static, but create regions for them.  
-    */
-   intel->front_region = 
-      intel_region_create_static(intel,
-				 BM_MEM_AGP,
-				 intelScreen->front.offset,
-				 intelScreen->front.map,
-				 intelScreen->cpp,
-				 intelScreen->front.pitch / intelScreen->cpp,
-				 intelScreen->height,
-				 intelScreen->front.size,
-				 intelScreen->front.tiled != 0);
-
-   intel->back_region = 
-      intel_region_create_static(intel,
-				 BM_MEM_AGP,
-				 intelScreen->back.offset,
-				 intelScreen->back.map,
-				 intelScreen->cpp,
-				 intelScreen->back.pitch / intelScreen->cpp,
-				 intelScreen->height,
-				 intelScreen->back.size,
-                                 intelScreen->back.tiled != 0);
-
-   /* Still assuming front.cpp == depth.cpp
-    *
-    * XXX: Setting tiling to false because Depth tiling only supports
-    * YMAJOR but the blitter only supports XMAJOR tiling.  Have to
-    * resolve later.
-    */
-   intel->depth_region = 
-      intel_region_create_static(intel,
-				 BM_MEM_AGP,
-				 intelScreen->depth.offset,
-				 intelScreen->depth.map,
-				 intelScreen->cpp,
-				 intelScreen->depth.pitch / intelScreen->cpp,
-				 intelScreen->height,
-				 intelScreen->depth.size,
-                                 intelScreen->depth.tiled != 0);
-   
-   intel_bufferobj_init( intel );
-   intel->batch = intel_batchbuffer_alloc( intel );
-
-   if (intel->ctx.Mesa_DXTn) {
-      _mesa_enable_extension( ctx, "GL_EXT_texture_compression_s3tc" );
-      _mesa_enable_extension( ctx, "GL_S3_s3tc" );
-   }
-   else if (driQueryOptionb (&intel->optionCache, "force_s3tc_enable")) {
-      _mesa_enable_extension( ctx, "GL_EXT_texture_compression_s3tc" );
-   }
-
-/*    driInitTextureObjects( ctx, & intel->swapped, */
-/* 			  DRI_TEXMGR_DO_TEXTURE_1D | */
-/* 			  DRI_TEXMGR_DO_TEXTURE_2D |  */
-/* 			  DRI_TEXMGR_DO_TEXTURE_RECT ); */
-
-
-   if (getenv("INTEL_NO_RAST")) {
-      fprintf(stderr, "disabling 3D rasterization\n");
-      intel->no_rast = 1;
-   }
-
-
-   return GL_TRUE;
-}
-
-void intelDestroyContext(__DRIcontextPrivate *driContextPriv)
-{
-   struct intel_context *intel = (struct intel_context *) driContextPriv->driverPrivate;
-
-   assert(intel); /* should never be null */
-   if (intel) {
-      GLboolean   release_texture_heaps;
-
-
-      intel->vtbl.destroy( intel );
-
-      release_texture_heaps = (intel->ctx.Shared->RefCount == 1);
-      _swsetup_DestroyContext (&intel->ctx);
-      _tnl_DestroyContext (&intel->ctx);
-      _vbo_DestroyContext (&intel->ctx);
-
-      _swrast_DestroyContext (&intel->ctx);
-      intel->Fallback = 0;	/* don't call _swrast_Flush later */
-      intel_batchbuffer_free(intel->batch);
-      intel->batch = NULL;
-      
-
-      if ( release_texture_heaps ) {
-         /* This share group is about to go away, free our private
-          * texture object data.
-          */
-
-	 /* XXX: destroy the shared bufmgr struct here?
-	  */
-      }
-
-      /* Free the regions created to describe front/back/depth
-       * buffers:
-       */
-#if 0
-      intel_region_release(intel, &intel->front_region);
-      intel_region_release(intel, &intel->back_region);
-      intel_region_release(intel, &intel->depth_region);
-      intel_region_release(intel, &intel->draw_region);
-#endif
-
-      /* free the Mesa context */
-      _mesa_destroy_context(&intel->ctx);
-   }
-
-   driContextPriv->driverPrivate = NULL;
-}
-
-GLboolean intelUnbindContext(__DRIcontextPrivate *driContextPriv)
-{
-   return GL_TRUE;
-}
-
-GLboolean intelMakeCurrent(__DRIcontextPrivate *driContextPriv,
-			  __DRIdrawablePrivate *driDrawPriv,
-			  __DRIdrawablePrivate *driReadPriv)
-{
-
-   if (driContextPriv) {
-      struct intel_context *intel = (struct intel_context *) driContextPriv->driverPrivate;
-
-      if (intel->driReadDrawable != driReadPriv) {
-          intel->driReadDrawable = driReadPriv;
-      }
-
-      if ( intel->driDrawable != driDrawPriv ) {
-	 /* Shouldn't the readbuffer be stored also? */
-	 driDrawableInitVBlank( driDrawPriv, intel->vblank_flags,
-		      &intel->vbl_seq );
-
-	 intel->driDrawable = driDrawPriv;
-	 intelWindowMoved( intel );
-      }
-
-      _mesa_make_current(&intel->ctx,
-			 (GLframebuffer *) driDrawPriv->driverPrivate,
-			 (GLframebuffer *) driReadPriv->driverPrivate);
-
-      intel->ctx.Driver.DrawBuffer( &intel->ctx, intel->ctx.Color.DrawBuffer[0] );
-   } else {
-      _mesa_make_current(NULL, NULL, NULL);
-   }
-
-   return GL_TRUE;
-}
-
-
-static void intelContendedLock( struct intel_context *intel, GLuint flags )
-{
-   __DRIdrawablePrivate *dPriv = intel->driDrawable;
-   __DRIscreenPrivate *sPriv = intel->driScreen;
-   volatile drmI830Sarea * sarea = intel->sarea;
-   int me = intel->hHWContext;
-   int my_bufmgr = bmCtxId(intel);
-
-   drmGetLock(intel->driFd, intel->hHWContext, flags);
-
-   /* If the window moved, may need to set a new cliprect now.
-    *
-    * NOTE: This releases and regains the hw lock, so all state
-    * checking must be done *after* this call:
-    */
-   if (dPriv)
-      DRI_VALIDATE_DRAWABLE_INFO(sPriv, dPriv);
-
-
-   intel->locked = 1;
-   intel->need_flush = 1;
-
-   /* Lost context?
-    */
-   if (sarea->ctxOwner != me) {
-      DBG("Lost Context: sarea->ctxOwner %x me %x\n", sarea->ctxOwner, me);
-      sarea->ctxOwner = me;
-      intel->vtbl.lost_hardware( intel );
-   }
-
-   /* As above, but don't evict the texture data on transitions
-    * between contexts which all share a local buffer manager.
-    */
-   if (sarea->texAge != my_bufmgr) {
-      DBG("Lost Textures: sarea->texAge %x my_bufmgr %x\n", sarea->ctxOwner, my_bufmgr);
-      sarea->texAge = my_bufmgr;
-      bm_fake_NotifyContendedLockTake( intel ); 
-   }
-
-   /* Drawable changed?
-    */
-   if (dPriv && intel->lastStamp != dPriv->lastStamp) {
-      intelWindowMoved( intel );
-      intel->lastStamp = dPriv->lastStamp;
-   }
-}
-
-_glthread_DECLARE_STATIC_MUTEX(lockMutex);
-
-/* Lock the hardware and validate our state.  
- */
-void LOCK_HARDWARE( struct intel_context *intel )
-{
-    char __ret=0;
-
-    _glthread_LOCK_MUTEX(lockMutex);
-    assert(!intel->locked);
-
-
-    DRM_CAS(intel->driHwLock, intel->hHWContext,
-	    (DRM_LOCK_HELD|intel->hHWContext), __ret);
-    if (__ret)
-        intelContendedLock( intel, 0 );
-
-   intel->locked = 1;
-
-   if (intel->aub_wrap) {
-      bm_fake_NotifyContendedLockTake( intel ); 
-      intel->vtbl.lost_hardware( intel );
-      intel->vtbl.aub_wrap(intel);
-      intel->aub_wrap = 0;
-   }
-
-   if (bmError(intel)) {
-      bmEvictAll(intel);
-      intel->vtbl.lost_hardware( intel );
-   }
-
-   /* Make sure nothing has been emitted prior to getting the lock: 
-    */
-   assert(intel->batch->map == 0);
-
-   /* XXX: postpone, may not be needed:
-    */
-   if (!intel_batchbuffer_map(intel->batch)) {
-      bmEvictAll(intel);
-      intel->vtbl.lost_hardware( intel );
-
-      /* This could only fail if the batchbuffer was greater in size
-       * than the available texture memory:
-       */
-      if (!intel_batchbuffer_map(intel->batch)) {
-	 _mesa_printf("double failure to map batchbuffer\n");
-	 assert(0);
-      }
-   }
-}
- 
-  
-/* Unlock the hardware using the global current context 
- */
-void UNLOCK_HARDWARE( struct intel_context *intel )
-{
-   /* Make sure everything has been released: 
-    */
-   assert(intel->batch->ptr == intel->batch->map + intel->batch->offset);
-
-   intel_batchbuffer_unmap(intel->batch);
-   intel->vtbl.note_unlock( intel );
-   intel->locked = 0;
-
-
-
-   DRM_UNLOCK(intel->driFd, intel->driHwLock, intel->hHWContext);
-   _glthread_UNLOCK_MUTEX(lockMutex); 
-}
-
-
diff --git a/i965/intel_context.h b/i965/intel_context.h
deleted file mode 100644
index a244757..0000000
--- a/i965/intel_context.h
+++ /dev/null
@@ -1,531 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#ifndef INTELCONTEXT_INC
-#define INTELCONTEXT_INC
-
-
-
-#include "mtypes.h"
-#include "drm.h"
-#include "texmem.h"
-
-#include "intel_screen.h"
-#include "i830_common.h"
-#include "tnl/t_vertex.h"
-
-#define TAG(x) intel##x
-#include "tnl_dd/t_dd_vertex.h"
-#undef TAG
-
-#define DV_PF_555  (1<<8)
-#define DV_PF_565  (2<<8)
-#define DV_PF_8888 (3<<8)
-
-struct intel_region;
-struct intel_context;
-
-typedef void (*intel_tri_func)(struct intel_context *, intelVertex *, intelVertex *,
-							  intelVertex *);
-typedef void (*intel_line_func)(struct intel_context *, intelVertex *, intelVertex *);
-typedef void (*intel_point_func)(struct intel_context *, intelVertex *);
-
-#define INTEL_FALLBACK_DRAW_BUFFER	 0x1
-#define INTEL_FALLBACK_READ_BUFFER	 0x2
-#define INTEL_FALLBACK_USER		 0x4
-#define INTEL_FALLBACK_RENDERMODE	 0x8
-#define INTEL_FALLBACK_TEXTURE   	 0x10
-
-extern void intelFallback( struct intel_context *intel, GLuint bit, GLboolean mode );
-#define FALLBACK( intel, bit, mode ) intelFallback( intel, bit, mode )
-
-
-
-struct intel_texture_object
-{
-   struct gl_texture_object base; /* The "parent" object */
-
-   /* The mipmap tree must include at least these levels once
-    * validated:
-    */
-   GLuint firstLevel;
-   GLuint lastLevel;
-
-   GLuint dirty_images[6];
-   GLuint dirty;
-
-   /* On validation any active images held in main memory or in other
-    * regions will be copied to this region and the old storage freed.
-    */
-   struct intel_mipmap_tree *mt;
-};
-
-
-
-struct intel_context
-{
-   GLcontext ctx;		/* the parent class */
-
-   struct {
-      void (*destroy)( struct intel_context *intel ); 
-      void (*emit_state)( struct intel_context *intel );
-      void (*emit_invarient_state)( struct intel_context *intel );
-      void (*lost_hardware)( struct intel_context *intel );
-      void (*note_fence)( struct intel_context *intel, GLuint fence );
-      void (*note_unlock)( struct intel_context *intel );
-      void (*update_texture_state)( struct intel_context *intel );
-
-      void (*render_start)( struct intel_context *intel );
-      void (*set_draw_region)( struct intel_context *intel, 
-			       struct intel_region *draw_region,
-			       struct intel_region *depth_region );
-
-      GLuint (*flush_cmd)( void );
-
-      void (*emit_flush)( struct intel_context *intel,
-			  GLuint unused );
-
-      void (*aub_commands)( struct intel_context *intel, 
-			    GLuint offset,
-			    const void *buf,
-			    GLuint sz );
-      void (*aub_dump_bmp)( struct intel_context *intel, GLuint buffer );
-      void (*aub_wrap)( struct intel_context *intel );
-      void (*aub_gtt_data)( struct intel_context *intel, 
-			    GLuint offset,
-			    const void *src,
-			    GLuint size,
-			    GLuint aubtype, 
-			    GLuint aubsubtype);
-
-
-      void (*reduced_primitive_state)( struct intel_context *intel, GLenum rprim );
-
-      GLboolean (*check_vertex_size)( struct intel_context *intel, GLuint expected );
-
-      void (*invalidate_state)( struct intel_context *intel, GLuint new_state );
-
-      /* Metaops: 
-       */
-      void (*install_meta_state)( struct intel_context *intel );
-      void (*leave_meta_state)( struct intel_context *intel );
-
-      void (*meta_draw_region)( struct intel_context *intel,
-				struct intel_region *draw_region,
-				struct intel_region *depth_region );
-
-      void (*meta_color_mask)( struct intel_context *intel,
-			       GLboolean );
-      
-      void (*meta_stencil_replace)( struct intel_context *intel,
-				    GLuint mask,
-				    GLuint clear );
-
-      void (*meta_depth_replace)( struct intel_context *intel );
-
-      void (*meta_texture_blend_replace) (struct intel_context * intel);
-      
-      void (*meta_no_stencil_write)( struct intel_context *intel );
-      void (*meta_no_depth_write)( struct intel_context *intel );
-      void (*meta_no_texture)( struct intel_context *intel );
-      void (*meta_import_pixel_state) (struct intel_context * intel);
-      void (*meta_frame_buffer_texture)( struct intel_context *intel,
-					 GLint xoff, GLint yoff );
-
-      void (*meta_draw_quad)(struct intel_context *intel, 
-			     GLfloat x0, GLfloat x1,
-			     GLfloat y0, GLfloat y1, 
-			     GLfloat z,
-			     GLubyte red, GLubyte green,
-			     GLubyte blue, GLubyte alpha,
-			     GLfloat s0, GLfloat s1,
-			     GLfloat t0, GLfloat t1);
-
-
-
-   } vtbl;
-
-   GLint refcount;   
-   GLuint Fallback;
-   GLuint NewGLState;
-   
-   GLuint last_swap_fence;
-   GLuint second_last_swap_fence;
-   
-   GLboolean aub_wrap;
-   GLuint stats_wm;
-
-   struct intel_batchbuffer *batch;
-
-   GLubyte clear_chan[4];
-   GLuint ClearColor;
-   GLuint ClearDepth;
-
-   GLfloat depth_scale;
-   GLfloat polygon_offset_scale; /* dependent on depth_scale, bpp */
-   GLuint depth_clear_mask;
-   GLuint stencil_clear_mask;
-
-   GLboolean hw_stencil;
-   GLboolean hw_stipple;
-   GLboolean depth_buffer_is_float;
-   GLboolean no_hw;
-   GLboolean no_rast;
-   GLboolean thrashing;
-   GLboolean locked;
-   GLboolean strict_conformance;
-   GLboolean need_flush;
-
-
-   
-   /* AGP memory buffer manager:
-    */
-   struct bufmgr *bm;
-
-
-   /* State for intelvb.c and inteltris.c.
-    */
-   GLenum render_primitive;
-   GLenum reduced_primitive;
-
-   struct intel_region *front_region;
-   struct intel_region *back_region;
-   struct intel_region *draw_region;
-   struct intel_region *depth_region;
-
-   /* These refer to the current draw (front vs. back) buffer:
-    */
-   int drawX;			/* origin of drawable in draw buffer */
-   int drawY;
-   GLuint numClipRects;		/* cliprects for that buffer */
-   drm_clip_rect_t *pClipRects;
-   struct gl_texture_object *frame_buffer_texobj;
-
-   GLboolean scissor;
-   drm_clip_rect_t draw_rect;
-   drm_clip_rect_t scissor_rect;
-
-   drm_context_t hHWContext;
-   drmLock *driHwLock;
-   int driFd;
-
-   __DRIdrawablePrivate *driDrawable;
-   __DRIdrawablePrivate *driReadDrawable;
-   __DRIscreenPrivate *driScreen;
-   intelScreenPrivate *intelScreen; 
-   volatile drmI830Sarea *sarea; 
-   
-   FILE *aub_file;
-
-   GLuint lastStamp;
-
-   /**
-    * Configuration cache
-    */
-   driOptionCache optionCache;
-
-   /* VBI
-    */
-   GLuint vbl_seq;
-   GLuint vblank_flags;
-
-   int64_t swap_ust;
-   int64_t swap_missed_ust;
-
-   GLuint swap_count;
-   GLuint swap_missed_count;
-};
-
-/* These are functions now:
- */
-void LOCK_HARDWARE( struct intel_context *intel );
-void UNLOCK_HARDWARE( struct intel_context *intel );
-
-
-#define SUBPIXEL_X 0.125
-#define SUBPIXEL_Y 0.125
-
-/* ================================================================
- * Color packing:
- */
-
-#define INTEL_PACKCOLOR4444(r,g,b,a) \
-  ((((a) & 0xf0) << 8) | (((r) & 0xf0) << 4) | ((g) & 0xf0) | ((b) >> 4))
-
-#define INTEL_PACKCOLOR1555(r,g,b,a) \
-  ((((r) & 0xf8) << 7) | (((g) & 0xf8) << 2) | (((b) & 0xf8) >> 3) | \
-    ((a) ? 0x8000 : 0))
-
-#define INTEL_PACKCOLOR565(r,g,b) \
-  ((((r) & 0xf8) << 8) | (((g) & 0xfc) << 3) | (((b) & 0xf8) >> 3))
-
-#define INTEL_PACKCOLOR8888(r,g,b,a) \
-  ((a<<24) | (r<<16) | (g<<8) | b)
-
-
-#define INTEL_PACKCOLOR(format, r,  g,  b, a)		\
-(format == DV_PF_555 ? INTEL_PACKCOLOR1555(r,g,b,a) :	\
- (format == DV_PF_565 ? INTEL_PACKCOLOR565(r,g,b) :	\
-  (format == DV_PF_8888 ? INTEL_PACKCOLOR8888(r,g,b,a) :	\
-   0)))
-
-
-
-/* ================================================================
- * From linux kernel i386 header files, copes with odd sizes better
- * than COPY_DWORDS would:
- */
-#if defined(i386) || defined(__i386__)
-static inline void * __memcpy(void * to, const void * from, size_t n)
-{
-   int d0, d1, d2;
-   __asm__ __volatile__(
-      "rep ; movsl\n\t"
-      "testb $2,%b4\n\t"
-      "je 1f\n\t"
-      "movsw\n"
-      "1:\ttestb $1,%b4\n\t"
-      "je 2f\n\t"
-      "movsb\n"
-      "2:"
-      : "=&c" (d0), "=&D" (d1), "=&S" (d2)
-      :"0" (n/4), "q" (n),"1" ((long) to),"2" ((long) from)
-      : "memory");
-   return (to);
-}
-#else
-#define __memcpy(a,b,c) memcpy(a,b,c)
-#endif
-
-
-/* The system memcpy (at least on ubuntu 5.10) has problems copying
- * to agp (writecombined) memory from a source which isn't 64-byte
- * aligned - there is a 4x performance falloff.
- *
- * The x86 __memcpy is immune to this but is slightly slower
- * (10%-ish) than the system memcpy.
- *
- * The sse_memcpy seems to have a slight cliff at 64/32 bytes, but
- * isn't much faster than x86_memcpy for agp copies.
- * 
- * TODO: switch dynamically.
- */
-static inline void *do_memcpy( void *dest, const void *src, size_t n )
-{
-   if ( (((unsigned long)src) & 63) ||
-	(((unsigned long)dest) & 63)) {
-      return  __memcpy(dest, src, n);	
-   }
-   else
-      return memcpy(dest, src, n);
-}
-
-
-
-
-
-/* ================================================================
- * Debugging:
- */
-extern int INTEL_DEBUG;
-
-#define DEBUG_TEXTURE	0x1
-#define DEBUG_STATE	0x2
-#define DEBUG_IOCTL	0x4
-#define DEBUG_PRIMS	0x8
-#define DEBUG_VERTS	0x10
-#define DEBUG_FALLBACKS	0x20
-#define DEBUG_VERBOSE	0x40
-#define DEBUG_DRI       0x80
-#define DEBUG_DMA       0x100
-#define DEBUG_SANITY    0x200
-#define DEBUG_SYNC      0x400
-#define DEBUG_SLEEP     0x800
-#define DEBUG_PIXEL     0x1000
-#define DEBUG_STATS     0x2000
-#define DEBUG_TILE      0x4000
-#define DEBUG_SINGLE_THREAD   0x8000
-#define DEBUG_WM        0x10000
-#define DEBUG_URB       0x20000
-#define DEBUG_VS        0x40000
-
-
-#define PCI_CHIP_845_G			0x2562
-#define PCI_CHIP_I830_M			0x3577
-#define PCI_CHIP_I855_GM		0x3582
-#define PCI_CHIP_I865_G			0x2572
-#define PCI_CHIP_I915_G			0x2582
-#define PCI_CHIP_I915_GM		0x2592
-#define PCI_CHIP_I945_G			0x2772
-#define PCI_CHIP_I965_G			0x29A2
-#define PCI_CHIP_I965_Q			0x2992
-#define PCI_CHIP_I965_G_1		0x2982
-#define PCI_CHIP_I946_GZ		0x2972
-#define PCI_CHIP_I965_GM                0x2A02
-
-#define PCI_CHIP_IGD_GM       0x2A42
-
-
-/* ================================================================
- * intel_context.c:
- */
-
-extern GLboolean intelInitContext( struct intel_context *intel, 
-				   const __GLcontextModes *mesaVis,
-				   __DRIcontextPrivate *driContextPriv,
-				   void *sharedContextPrivate,
-				   struct dd_function_table *functions );
-
-extern void intelGetLock(struct intel_context *intel, GLuint flags);
-
-extern void intelInitState( GLcontext *ctx );
-extern void intelFinish( GLcontext *ctx );
-extern void intelFlush( GLcontext *ctx );
-
-extern void intelInitDriverFunctions( struct dd_function_table *functions );
-
-
-/* ================================================================
- * intel_state.c:
- */
-extern void intelInitStateFuncs( struct dd_function_table *functions );
-
-#define COMPAREFUNC_ALWAYS		0
-#define COMPAREFUNC_NEVER		0x1
-#define COMPAREFUNC_LESS		0x2
-#define COMPAREFUNC_EQUAL		0x3
-#define COMPAREFUNC_LEQUAL		0x4
-#define COMPAREFUNC_GREATER		0x5
-#define COMPAREFUNC_NOTEQUAL		0x6
-#define COMPAREFUNC_GEQUAL		0x7
-
-#define STENCILOP_KEEP			0
-#define STENCILOP_ZERO			0x1
-#define STENCILOP_REPLACE		0x2
-#define STENCILOP_INCRSAT		0x3
-#define STENCILOP_DECRSAT		0x4
-#define STENCILOP_INCR			0x5
-#define STENCILOP_DECR			0x6
-#define STENCILOP_INVERT		0x7
-
-#define LOGICOP_CLEAR			0
-#define LOGICOP_NOR			0x1
-#define LOGICOP_AND_INV 		0x2
-#define LOGICOP_COPY_INV		0x3
-#define LOGICOP_AND_RVRSE		0x4
-#define LOGICOP_INV			0x5
-#define LOGICOP_XOR			0x6
-#define LOGICOP_NAND			0x7
-#define LOGICOP_AND			0x8
-#define LOGICOP_EQUIV			0x9
-#define LOGICOP_NOOP			0xa
-#define LOGICOP_OR_INV			0xb
-#define LOGICOP_COPY			0xc
-#define LOGICOP_OR_RVRSE		0xd
-#define LOGICOP_OR			0xe
-#define LOGICOP_SET			0xf
-
-#define BLENDFACT_ZERO			0x01
-#define BLENDFACT_ONE			0x02
-#define BLENDFACT_SRC_COLR		0x03
-#define BLENDFACT_INV_SRC_COLR 		0x04
-#define BLENDFACT_SRC_ALPHA		0x05
-#define BLENDFACT_INV_SRC_ALPHA 	0x06
-#define BLENDFACT_DST_ALPHA		0x07
-#define BLENDFACT_INV_DST_ALPHA 	0x08
-#define BLENDFACT_DST_COLR		0x09
-#define BLENDFACT_INV_DST_COLR		0x0a
-#define BLENDFACT_SRC_ALPHA_SATURATE	0x0b
-#define BLENDFACT_CONST_COLOR		0x0c
-#define BLENDFACT_INV_CONST_COLOR	0x0d
-#define BLENDFACT_CONST_ALPHA		0x0e
-#define BLENDFACT_INV_CONST_ALPHA	0x0f
-#define BLENDFACT_MASK          	0x0f
-
-extern int intel_translate_shadow_compare_func( GLenum func );
-extern int intel_translate_compare_func( GLenum func );
-extern int intel_translate_stencil_op( GLenum op );
-extern int intel_translate_blend_factor( GLenum factor );
-extern int intel_translate_logic_op( GLenum opcode );
-
-
-/* ================================================================
- * intel_buffers.c:
- */
-void intelInitBufferFuncs( struct dd_function_table *functions );
-
-struct intel_region *intel_readbuf_region( struct intel_context *intel );
-struct intel_region *intel_drawbuf_region( struct intel_context *intel );
-
-extern void intelWindowMoved( struct intel_context *intel );
-
-extern GLboolean intel_intersect_cliprects( drm_clip_rect_t *dest,
-					    const drm_clip_rect_t *a,
-					    const drm_clip_rect_t *b );
-
-
-/* ================================================================
- * intel_pixel_copy.c:
- */
-void intelCopyPixels(GLcontext * ctx,
-                     GLint srcx, GLint srcy,
-                     GLsizei width, GLsizei height,
-                     GLint destx, GLint desty, GLenum type);
-
-GLboolean intel_check_blit_fragment_ops(GLcontext * ctx);
-
-void intelBitmap(GLcontext * ctx,
-		 GLint x, GLint y,
-		 GLsizei width, GLsizei height,
-		 const struct gl_pixelstore_attrib *unpack,
-		 const GLubyte * pixels);
-
-void intelInitExtensions(GLcontext *ctx, GLboolean enable_imaging);
-#define _NEW_WINDOW_POS 0x40000000
-
-
-/*======================================================================
- * Inline conversion functions.  
- * These are better-typed than the macros used previously:
- */
-static inline struct intel_context *intel_context( GLcontext *ctx )
-{
-   return (struct intel_context *)ctx;
-}
-
-static inline struct intel_texture_object *intel_texture_object( struct gl_texture_object *obj )
-{
-   return (struct intel_texture_object *)obj;
-}
-
-static inline struct intel_texture_image *intel_texture_image( struct gl_texture_image *img )
-{
-   return (struct intel_texture_image *)img;
-}
-
-#endif
-
diff --git a/i965/intel_ioctl.c b/i965/intel_ioctl.c
deleted file mode 100644
index 0a8e976..0000000
--- a/i965/intel_ioctl.c
+++ /dev/null
@@ -1,205 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-
-#include <stdio.h>
-#include <unistd.h>
-#include <errno.h>
-#include <sched.h>
-
-#include "mtypes.h"
-#include "context.h"
-#include "swrast/swrast.h"
-
-#include "intel_context.h"
-#include "intel_ioctl.h"
-#include "intel_batchbuffer.h"
-#include "intel_blit.h"
-#include "intel_regions.h"
-#include "drm.h"
-#include "bufmgr.h"
-
-static int intelWaitIdleLocked( struct intel_context *intel )
-{
-   static int in_wait_idle = 0;
-   unsigned int fence;
-
-   if (!in_wait_idle) {
-      if (INTEL_DEBUG & DEBUG_SYNC) {
-	 fprintf(stderr, "waiting for idle\n");
-      }
-
-      in_wait_idle = 1;
-      fence = bmSetFence(intel);
-      intelWaitIrq(intel, fence);
-      in_wait_idle = 0;
-
-      return bmTestFence(intel, fence);
-   } else {
-      return 1;
-   }
-}
-
-int intelEmitIrqLocked( struct intel_context *intel )
-{
-   int seq = 1;
-
-   if (!intel->no_hw) {
-      drmI830IrqEmit ie;
-      int ret;
-      
-      assert(((*(int *)intel->driHwLock) & ~DRM_LOCK_CONT) == 
-	     (DRM_LOCK_HELD|intel->hHWContext));
-
-      ie.irq_seq = &seq;
-
-      ret = drmCommandWriteRead( intel->driFd, DRM_I830_IRQ_EMIT, 
-				 &ie, sizeof(ie) );
-      if ( ret ) {
-	 fprintf( stderr, "%s: drmI830IrqEmit: %d\n", __FUNCTION__, ret );
-	 exit(1);
-      }   
-
-      if (0)
-	 fprintf(stderr, "%s -->  %d\n", __FUNCTION__, seq );
-   }
-
-   return seq;
-}
-
-void intelWaitIrq( struct intel_context *intel, int seq )
-{
-   if (!intel->no_hw) {
-      drmI830IrqWait iw;
-      int ret, lastdispatch;
-      
-      if (0)
-	 fprintf(stderr, "%s %d\n", __FUNCTION__, seq );
-
-      iw.irq_seq = seq;
-	
-      do {
-	 lastdispatch = intel->sarea->last_dispatch;
-	 ret = drmCommandWrite( intel->driFd, DRM_I830_IRQ_WAIT, &iw, sizeof(iw) );
-
-	 /* This seems quite often to return before it should!?! 
-	  */
-      } while (ret == -EAGAIN || ret == -EINTR || (ret == -EBUSY && lastdispatch != intel->sarea->last_dispatch) || (ret == 0 && seq > intel->sarea->last_dispatch)
-	       || (ret == 0 && intel->sarea->last_dispatch - seq >= (1 << 24)));
-      
-
-      if ( ret ) {
-	 fprintf( stderr, "%s: drmI830IrqWait: %d\n", __FUNCTION__, ret );
-
-	 if (intel->aub_file) {
-	    intel->vtbl.aub_dump_bmp( intel, intel->ctx.Visual.doubleBufferMode ? 1 : 0 );
-	 }
-
-	 exit(1);
-      }
-   }
-}
-
-
-void intel_batch_ioctl( struct intel_context *intel, 
-			GLuint start_offset,
-			GLuint used)
-{
-   drmI830BatchBuffer batch;
-
-   assert(intel->locked);
-   assert(used);
-
-   if (0)
-      fprintf(stderr, "%s used %d offset %x..%x\n",
-	      __FUNCTION__, 
-	      used, 
-	      start_offset,
-	      start_offset + used);
-
-   batch.start = start_offset;
-   batch.used = used;
-   batch.cliprects = NULL;
-   batch.num_cliprects = 0;
-   batch.DR1 = 0;
-   batch.DR4 = 0;
-      
-   if (INTEL_DEBUG & DEBUG_DMA)
-      fprintf(stderr, "%s: 0x%x..0x%x\n",
-	      __FUNCTION__, 
-	      batch.start, 
-	      batch.start + batch.used * 4);
-
-   if (!intel->no_hw) {
-      if (drmCommandWrite (intel->driFd, DRM_I830_BATCHBUFFER, &batch, 
-			   sizeof(batch))) {
-	 fprintf(stderr, "DRM_I830_BATCHBUFFER: %d\n",  -errno);
-	 UNLOCK_HARDWARE(intel);
-	 exit(1);
-      }
-
-      if (INTEL_DEBUG & DEBUG_SYNC) {
-	intelWaitIdleLocked(intel);
-      }
-   }
-}
-
-void intel_cmd_ioctl( struct intel_context *intel, 
-		      char *buf,
-		      GLuint used)
-{
-   drmI830CmdBuffer cmd;
-
-   assert(intel->locked);
-   assert(used);
-
-   cmd.buf = buf;
-   cmd.sz = used;
-   cmd.cliprects = intel->pClipRects;
-   cmd.num_cliprects = 0;
-   cmd.DR1 = 0;
-   cmd.DR4 = 0;
-      
-   if (INTEL_DEBUG & DEBUG_DMA)
-      fprintf(stderr, "%s: 0x%x..0x%x\n",
-	      __FUNCTION__, 
-	      0, 
-	      0 + cmd.sz);
-
-   if (!intel->no_hw) {
-      if (drmCommandWrite (intel->driFd, DRM_I830_CMDBUFFER, &cmd, 
-			   sizeof(cmd))) {
-	 fprintf(stderr, "DRM_I830_CMDBUFFER: %d\n",  -errno);
-	 UNLOCK_HARDWARE(intel);
-	 exit(1);
-      }
-
-      if (INTEL_DEBUG & DEBUG_SYNC) {
-	intelWaitIdleLocked(intel);
-      }
-   }
-}
diff --git a/i965/intel_mipmap_tree.c b/i965/intel_mipmap_tree.c
deleted file mode 100644
index 8548bc8..0000000
--- a/i965/intel_mipmap_tree.c
+++ /dev/null
@@ -1,270 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#include "intel_context.h"
-#include "intel_mipmap_tree.h"
-#include "intel_regions.h"
-#include "bufmgr.h"
-#include "enums.h"
-#include "imports.h"
-
-static GLenum target_to_target( GLenum target )
-{
-   switch (target) {
-   case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB:
-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB:
-   case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB:
-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB:
-   case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB:
-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB:
-      return GL_TEXTURE_CUBE_MAP_ARB;
-   default:
-      return target;
-   }
-}
-
-struct intel_mipmap_tree *intel_miptree_create( struct intel_context *intel,
-						GLenum target,
-						GLenum internal_format,
-						GLuint first_level,
-						GLuint last_level,
-						GLuint width0,
-						GLuint height0,
-						GLuint depth0,
-						GLuint cpp,
-						GLboolean compressed)
-{
-   GLboolean ok;
-   struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1);
-
-   if (INTEL_DEBUG & DEBUG_TEXTURE)
-      _mesa_printf("%s target %s format %s level %d..%d\n", __FUNCTION__,
-		   _mesa_lookup_enum_by_nr(target),
-		   _mesa_lookup_enum_by_nr(internal_format),
-		   first_level,
-		   last_level);
-
-   mt->target = target_to_target(target);
-   mt->internal_format = internal_format;
-   mt->first_level = first_level;
-   mt->last_level = last_level;
-   mt->width0 = width0;
-   mt->height0 = height0;
-   mt->depth0 = depth0;
-   mt->cpp = compressed ? 2 : cpp;
-   mt->compressed = compressed;
-
-   switch (intel->intelScreen->deviceID) {
-#if 0
-   case PCI_CHIP_I945_G:
-      ok = i945_miptree_layout( mt );
-      break;
-   case PCI_CHIP_I915_G:
-   case PCI_CHIP_I915_GM:
-      ok = i915_miptree_layout( mt );
-      break;
-#endif
-   default:
-      if (INTEL_DEBUG & DEBUG_TEXTURE)
-	 _mesa_printf("assuming BRW texture layouts\n");
-      ok = brw_miptree_layout( intel, mt );
-      break;
-   }
-
-   if (ok)
-      mt->region = intel_region_alloc( intel, 
-				       mt->cpp,
-				       mt->pitch, 
-				       mt->total_height );
-
-   if (!mt->region) {
-      free(mt);
-      return NULL;
-   }
-
-   return mt;
-}
-
-
-/**
- * intel_miptree_pitch_align:
- *
- * @intel: intel context pointer
- *
- * @mt: the miptree to compute pitch alignment for
- *
- * @pitch: the natural pitch value
- *
- * Given @pitch, compute a larger value which accounts for
- * any necessary alignment required by the device
- */
-
-int intel_miptree_pitch_align (struct intel_context *intel,
-			       struct intel_mipmap_tree *mt,
-			       int pitch)
-{
-   if (!mt->compressed)
-      pitch = ((pitch * mt->cpp + 3) & ~3) / mt->cpp;
-
-   return pitch;
-}
-
-
-void intel_miptree_destroy( struct intel_context *intel,
-			    struct intel_mipmap_tree *mt )
-{
-   if (mt) {
-      GLuint i;
-
-      intel_region_release(intel, &(mt->region));
-
-      for (i = 0; i < MAX_TEXTURE_LEVELS; i++)
-	 if (mt->level[i].image_offset)
-	    free(mt->level[i].image_offset);
-
-      free(mt);
-   }
-}
-
-
-
-
-void intel_miptree_set_level_info(struct intel_mipmap_tree *mt,
-				  GLuint level,
-				  GLuint nr_images,
-				  GLuint x, GLuint y,
-				  GLuint w, GLuint h, GLuint d)
-{
-   mt->level[level].width = w;
-   mt->level[level].height = h;
-   mt->level[level].depth = d;
-   mt->level[level].level_offset = (x + y * mt->pitch) * mt->cpp;
-   mt->level[level].nr_images = nr_images;
-
-   if (INTEL_DEBUG & DEBUG_TEXTURE)
-      _mesa_printf("%s level %d img size: %d,%d level_offset 0x%x\n", __FUNCTION__, level, w, h, 
-		   mt->level[level].level_offset);
-
-   /* Not sure when this would happen, but anyway: 
-    */
-   if (mt->level[level].image_offset) {
-      free(mt->level[level].image_offset);
-      mt->level[level].image_offset = NULL;
-   }
-
-   if (nr_images > 1) {
-      mt->level[level].image_offset = malloc(nr_images * sizeof(GLuint));
-      mt->level[level].image_offset[0] = 0;
-   }
-}
-
-
-
-void intel_miptree_set_image_offset(struct intel_mipmap_tree *mt,
-				    GLuint level,
-				    GLuint img,
-				    GLuint x, GLuint y)
-{
-   if (INTEL_DEBUG & DEBUG_TEXTURE)
-      _mesa_printf("%s level %d img %d pos %d,%d\n", __FUNCTION__, level, img, x, y);
-
-   if (img == 0)
-      assert(x == 0 && y == 0);
-
-   if (img > 0)
-      mt->level[level].image_offset[img] = (x + y * mt->pitch) * mt->cpp;
-}
-
-
-/* Although we use the image_offset[] array to store relative offsets
- * to cube faces, Mesa doesn't know anything about this and expects
- * each cube face to be treated as a separate image.
- *
- * These functions present that view to mesa:
- */
-const GLuint *intel_miptree_depth_offsets(struct intel_mipmap_tree *mt,
-					  GLuint level)
-{
-   static const GLuint zero = 0;
-
-   if (mt->target != GL_TEXTURE_3D ||
-       mt->level[level].nr_images == 1)
-      return &zero;
-   else
-      return mt->level[level].image_offset;
-}
-
-
-GLuint intel_miptree_image_offset(struct intel_mipmap_tree *mt,
-				  GLuint face,
-				  GLuint level)
-{
-   if (mt->target == GL_TEXTURE_CUBE_MAP_ARB)
-      return (mt->level[level].level_offset +
-	      mt->level[level].image_offset[face]);
-   else
-      return mt->level[level].level_offset;
-}
-
-
-
-
-
-
-/* Upload data for a particular image.
- */
-GLboolean intel_miptree_image_data(struct intel_context *intel, 
-				   struct intel_mipmap_tree *dst,
-				   GLuint face,
-				   GLuint level,
-				   const void *src, 
-				   GLuint src_row_pitch,
-				   GLuint src_image_pitch)
-{
-   GLuint depth = dst->level[level].depth;
-   GLuint dst_offset = intel_miptree_image_offset(dst, face, level);
-   const GLuint *dst_depth_offset = intel_miptree_depth_offsets(dst, level);
-   GLuint i;
-
-   DBG("%s\n", __FUNCTION__);
-   for (i = 0; i < depth; i++) {
-      if (!intel_region_data(intel,
-			     dst->region, 
-			     dst_offset + dst_depth_offset[i],
-			     0,
-			     0,
-			     src,
-			     src_row_pitch,
-			     0, 0,	/* source x,y */
-			     dst->level[level].width,
-			     dst->level[level].height))
-	 return GL_FALSE;
-      src += src_image_pitch;
-   }
-   return GL_TRUE;
-}
-
diff --git a/i965/intel_mipmap_tree.h b/i965/intel_mipmap_tree.h
deleted file mode 100644
index c67c726..0000000
--- a/i965/intel_mipmap_tree.h
+++ /dev/null
@@ -1,171 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#ifndef INTEL_MIPMAP_TREE_H
-#define INTEL_MIPMAP_TREE_H
-
-#include "intel_regions.h"
-
-/* A layer on top of the intel_regions code which adds:
- *
- * - Code to size and layout a region to hold a set of mipmaps.
- * - Query to determine if a new image fits in an existing tree.
- *
- * The fixed mipmap layout of intel hardware where one offset
- * specifies the position of all images in a mipmap hierachy
- * complicates the implementation of GL texture image commands,
- * compared to hardware where each image is specified with an
- * independent offset.
- *
- * In an ideal world, each texture object would be associated with a
- * single bufmgr buffer or 2d intel_region, and all the images within
- * the texture object would slot into the tree as they arrive.  The
- * reality can be a little messier, as images can arrive from the user
- * with sizes that don't fit in the existing tree, or in an order
- * where the tree layout cannot be guessed immediately.  
- * 
- * This structure encodes an idealized mipmap tree.  The GL image
- * commands build these where possible, otherwise store the images in
- * temporary system buffers.
- */
-
-
-struct intel_mipmap_level {
-   GLuint level_offset;
-   GLuint width;
-   GLuint height;
-   GLuint depth;
-   GLuint nr_images;
-
-   /* Explicitly store the offset of each image for each cube face or
-    * depth value.  Pretty much have to accept that hardware formats
-    * are going to be so diverse that there is no unified way to
-    * compute the offsets of depth/cube images within a mipmap level,
-    * so have to store them as a lookup table:
-    */
-   GLuint *image_offset;
-};
-
-struct intel_mipmap_tree {
-   /* Effectively the key:
-    */
-   GLenum target;
-   GLenum internal_format;
-
-   GLuint first_level;
-   GLuint last_level;
-
-   GLuint width0, height0, depth0;
-   GLuint cpp;
-   GLboolean compressed;
-
-   /* Derived from the above:
-    */   
-   GLuint pitch;
-   GLuint depth_pitch;		/* per-image on i945? */
-   GLuint total_height;
-   
-   /* Includes image offset tables:
-    */
-   struct intel_mipmap_level level[MAX_TEXTURE_LEVELS];
-
-   /* The data is held here:
-    */
-   struct intel_region *region;
-
-   /* These are also refcounted:
-    */
-   GLuint refcount;
-};
-
-
-
-struct intel_mipmap_tree *intel_miptree_create( struct intel_context *intel,
-						GLenum target,
-						GLenum internal_format,
-						GLuint first_level,
-						GLuint last_level,
-						GLuint width0,
-						GLuint height0,
-						GLuint depth0,
-						GLuint cpp,
-						GLboolean compressed);
-
-int intel_miptree_pitch_align (struct intel_context *intel,
-			       struct intel_mipmap_tree *mt,
-			       int pitch);
-
-void intel_miptree_destroy( struct intel_context *intel,
-			    struct intel_mipmap_tree *mt );
-
-
-/* Return the linear offset of an image relative to the start of the
- * tree:
- */
-GLuint intel_miptree_image_offset( struct intel_mipmap_tree *mt,
-				   GLuint face,
-				   GLuint level );
-
-/* Return pointers to each 2d slice within an image.  Indexed by depth
- * value.
- */
-const GLuint *intel_miptree_depth_offsets(struct intel_mipmap_tree *mt,
-					  GLuint level);
-
-
-void intel_miptree_set_level_info(struct intel_mipmap_tree *mt,
-				  GLuint level,
-				  GLuint nr_images,
-				  GLuint x, GLuint y,
-				  GLuint w, GLuint h, GLuint d);
-
-void intel_miptree_set_image_offset(struct intel_mipmap_tree *mt,
-				    GLuint level,
-				    GLuint img,
-				    GLuint x, GLuint y);
-
-
-/* Upload an image into a tree
- */
-GLboolean intel_miptree_image_data(struct intel_context *intel, 
-				   struct intel_mipmap_tree *dst,
-				   GLuint face,
-				   GLuint level,
-				   const void *src, 
-				   GLuint src_row_pitch,
-				   GLuint src_image_pitch);
-
-/* i915_mipmap_tree.c:
- */
-GLboolean i915_miptree_layout( struct intel_mipmap_tree *mt );
-GLboolean i945_miptree_layout( struct intel_mipmap_tree *mt );
-GLboolean brw_miptree_layout( struct intel_context *intel,
-			      struct intel_mipmap_tree *mt );
-
-
-
-#endif
diff --git a/i965/intel_reg.h b/i965/intel_reg.h
deleted file mode 100644
index 3c448b3..0000000
--- a/i965/intel_reg.h
+++ /dev/null
@@ -1,91 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-
-#ifndef _INTEL_REG_H_
-#define _INTEL_REG_H_
-
-
-
-#define CMD_3D (0x3<<29)
-
-
-#define _3DPRIMITIVE         ((0x3<<29)|(0x1f<<24))
-#define PRIM_INDIRECT            (1<<23)
-#define PRIM_INLINE              (0<<23)
-#define PRIM_INDIRECT_SEQUENTIAL (0<<17)
-#define PRIM_INDIRECT_ELTS       (1<<17)
-
-#define PRIM3D_TRILIST		(0x0<<18)
-#define PRIM3D_TRISTRIP 	(0x1<<18)
-#define PRIM3D_TRISTRIP_RVRSE	(0x2<<18)
-#define PRIM3D_TRIFAN		(0x3<<18)
-#define PRIM3D_POLY		(0x4<<18)
-#define PRIM3D_LINELIST 	(0x5<<18)
-#define PRIM3D_LINESTRIP	(0x6<<18)
-#define PRIM3D_RECTLIST 	(0x7<<18)
-#define PRIM3D_POINTLIST	(0x8<<18)
-#define PRIM3D_DIB		(0x9<<18)
-#define PRIM3D_MASK		(0x1f<<18)
-
-#define I915PACKCOLOR4444(r,g,b,a) \
-  ((((a) & 0xf0) << 8) | (((r) & 0xf0) << 4) | ((g) & 0xf0) | ((b) >> 4))
-
-#define I915PACKCOLOR1555(r,g,b,a) \
-  ((((r) & 0xf8) << 7) | (((g) & 0xf8) << 2) | (((b) & 0xf8) >> 3) | \
-    ((a) ? 0x8000 : 0))
-
-#define I915PACKCOLOR565(r,g,b) \
-  ((((r) & 0xf8) << 8) | (((g) & 0xfc) << 3) | (((b) & 0xf8) >> 3))
-
-#define I915PACKCOLOR8888(r,g,b,a) \
-  ((a<<24) | (r<<16) | (g<<8) | b)
-
-
-
-
-#define BR00_BITBLT_CLIENT   0x40000000
-#define BR00_OP_COLOR_BLT    0x10000000
-#define BR00_OP_SRC_COPY_BLT 0x10C00000
-#define BR13_SOLID_PATTERN   0x80000000
-
-#define XY_COLOR_BLT_CMD		((2<<29)|(0x50<<22)|0x4)
-#define XY_COLOR_BLT_WRITE_ALPHA	(1<<21)
-#define XY_COLOR_BLT_WRITE_RGB		(1<<20)
-
-#define XY_SRC_COPY_BLT_CMD             ((2<<29)|(0x53<<22)|6)
-#define XY_SRC_COPY_BLT_WRITE_ALPHA     (1<<21)
-#define XY_SRC_COPY_BLT_WRITE_RGB       (1<<20)
-
-#define XY_SRC_TILED  (1<<15)
-#define XY_DST_TILED  (1<<11)
-
-#define FENCE_LINEAR 0
-#define FENCE_XMAJOR 1
-#define FENCE_YMAJOR 2
-
-#endif
diff --git a/i965/intel_regions.c b/i965/intel_regions.c
deleted file mode 100644
index 835ecdd..0000000
--- a/i965/intel_regions.c
+++ /dev/null
@@ -1,295 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-/* Provide additional functionality on top of bufmgr buffers:
- *   - 2d semantics and blit operations
- *   - refcounting of buffers for multiple images in a buffer.
- *   - refcounting of buffer mappings.
- *   - some logic for moving the buffers to the best memory pools for
- *     given operations.
- *
- * Most of this is to make it easier to implement the fixed-layout
- * mipmap tree required by intel hardware in the face of GL's
- * programming interface where each image can be specifed in random
- * order and it isn't clear what layout the tree should have until the
- * last moment.
- */
-
-#include "intel_context.h"
-#include "intel_regions.h"
-#include "intel_blit.h"
-#include "bufmgr.h"
-#include "imports.h"
-
-/* XXX: Thread safety?
- */
-GLubyte *intel_region_map(struct intel_context *intel, struct intel_region *region)
-{
-   DBG("%s\n", __FUNCTION__);
-   if (!region->map_refcount++) {
-      region->map = bmMapBuffer(intel, region->buffer, 0);
-      if (!region->map)
-	 region->map_refcount--;
-   }
-
-   return region->map;
-}
-
-void intel_region_unmap(struct intel_context *intel, 
-			struct intel_region *region)
-{
-   DBG("%s\n", __FUNCTION__);
-   if (!--region->map_refcount) {
-      bmUnmapBufferAUB(intel, region->buffer, 0, 0);
-      region->map = NULL;
-   }
-}
-
-struct intel_region *intel_region_alloc( struct intel_context *intel, 
-					 GLuint cpp,
-					 GLuint pitch, 
-					 GLuint height )
-{
-   struct intel_region *region = calloc(sizeof(*region), 1);
-
-   DBG("%s %dx%dx%d == 0x%x bytes\n", __FUNCTION__,
-       cpp, pitch, height, cpp*pitch*height);
-
-   region->cpp = cpp;
-   region->pitch = pitch;
-   region->height = height; 	/* needed? */
-   region->refcount = 1;
-
-   bmGenBuffers(intel, "tex", 1, &region->buffer, 6);
-   bmBufferData(intel, region->buffer, pitch * cpp * height, NULL, 0);
-
-   return region;
-}
-
-void intel_region_reference( struct intel_region **dst,
-			     struct intel_region *src)
-{
-   src->refcount++;
-   assert(*dst == NULL);
-   *dst = src;
-}
-
-void intel_region_release( struct intel_context *intel,
-			   struct intel_region **region )
-{
-   if (!*region)
-      return;
-
-   DBG("%s %d\n", __FUNCTION__, (*region)->refcount-1);
-   
-   if (--(*region)->refcount == 0) {
-      assert((*region)->map_refcount == 0);
-      bmDeleteBuffers(intel, 1, &(*region)->buffer);
-      free(*region);
-   }
-   *region = NULL;
-}
-
-
-struct intel_region *intel_region_create_static( struct intel_context *intel, 
-						 GLuint mem_type,
-						 GLuint offset,
-						 void *virtual,
-						 GLuint cpp,
-						 GLuint pitch, 
-						 GLuint height,
-						 GLuint size,
-						 GLboolean tiled )
-{
-   struct intel_region *region = calloc(sizeof(*region), 1);
-   GLint pool;
-
-   DBG("%s\n", __FUNCTION__);
-
-   region->cpp = cpp;
-   region->pitch = pitch;
-   region->height = height; 	/* needed? */
-   region->refcount = 1;
-   region->tiled = tiled;
-
-   /* Recipe for creating a static buffer - create a static pool with
-    * the right offset and size, generate a buffer and use a special
-    * call to bind it to all of the memory in that pool.
-    */
-   pool = bmInitPool(intel, offset, virtual, size, 
-		     (BM_MEM_AGP |
-		      BM_NO_UPLOAD | 
-		      BM_NO_EVICT | 
-		      BM_NO_MOVE));
-   if (pool < 0) {
-      _mesa_printf("bmInitPool failed for static region\n");
-      exit(1);
-   }
-
-   region->buffer = bmGenBufferStatic(intel, pool);
-
-   return region;
-}
-
-
-
-
-void _mesa_copy_rect( GLubyte *dst,
-		      GLuint cpp,
-		      GLuint dst_pitch,
-		      GLuint dst_x, 
-		      GLuint dst_y,
-		      GLuint width,
-		      GLuint height,
-		      const GLubyte *src,
-		      GLuint src_pitch,
-		      GLuint src_x,
-		      GLuint src_y )
-{
-   GLuint i;
-
-   dst_pitch *= cpp;
-   src_pitch *= cpp;
-   dst += dst_x * cpp;
-   src += src_x * cpp;
-   dst += dst_y * dst_pitch;
-   src += src_y * dst_pitch;
-   width *= cpp;
-
-   if (width == dst_pitch && 
-       width == src_pitch)
-      do_memcpy(dst, src, height * width);
-   else {
-      for (i = 0; i < height; i++) {
-	 do_memcpy(dst, src, width);
-	 dst += dst_pitch;
-	 src += src_pitch;
-      }
-   }
-}
-
-
-/* Upload data to a rectangular sub-region.  Lots of choices how to do this:
- *
- * - memcpy by span to current destination
- * - upload data as new buffer and blit
- *
- * Currently always memcpy.
- */
-GLboolean intel_region_data(struct intel_context *intel, 
-			    struct intel_region *dst,
-			    GLuint dst_offset,
-			    GLuint dstx, GLuint dsty,
-			    const void *src, GLuint src_pitch,
-			    GLuint srcx, GLuint srcy,
-			    GLuint width, GLuint height)
-{
-   DBG("%s\n", __FUNCTION__);
-
-   if (width == dst->pitch && 
-       width == src_pitch &&
-       dst_offset == 0 &&
-       height == dst->height &&
-       srcx == 0 &&
-       srcy == 0) 
-   {
-      return (bmBufferDataAUB(intel,
-			      dst->buffer,
-			      dst->cpp * width * dst->height,
-			      src, 0, 0, 0) == 0);
-   }
-   else {
-      GLubyte *map = intel_region_map(intel, dst);
-
-      if (map) {
-	 assert (dst_offset + dstx + width + 
-		 (dsty + height - 1) * dst->pitch * dst->cpp <= 
-		 dst->pitch * dst->cpp * dst->height);
-	 
-	 _mesa_copy_rect(map + dst_offset,
-			 dst->cpp,
-			 dst->pitch,
-			 dstx, dsty,
-			 width, height,
-			 src,
-			 src_pitch,
-			 srcx, srcy);      
-	 
-	 intel_region_unmap(intel, dst);
-	 return GL_TRUE;
-      }
-      else 
-	 return GL_FALSE;
-   }
-}
-			  
-/* Copy rectangular sub-regions. Need better logic about when to
- * push buffers into AGP - will currently do so whenever possible.
- */
-void intel_region_copy( struct intel_context *intel,
-			struct intel_region *dst,
-			GLuint dst_offset,
-			GLuint dstx, GLuint dsty,
-			struct intel_region *src,
-			GLuint src_offset,
-			GLuint srcx, GLuint srcy,
-			GLuint width, GLuint height )
-{
-   DBG("%s\n", __FUNCTION__);
-
-   assert(src->cpp == dst->cpp);
-
-   intelEmitCopyBlit(intel,
-		     dst->cpp,
-		     src->pitch, src->buffer, src_offset, src->tiled,
-		     dst->pitch, dst->buffer, dst_offset, dst->tiled,
-		     srcx, srcy,
-		     dstx, dsty,
-		     width, height,
-		     GL_COPY );
-}
-
-/* Fill a rectangular sub-region.  Need better logic about when to
- * push buffers into AGP - will currently do so whenever possible.
- */
-void intel_region_fill( struct intel_context *intel,
-			struct intel_region *dst,
-			GLuint dst_offset,
-			GLuint dstx, GLuint dsty,
-			GLuint width, GLuint height,
-			GLuint color )
-{
-   DBG("%s\n", __FUNCTION__);
-   
-   intelEmitFillBlit(intel,
-		     dst->cpp,
-		     dst->pitch, dst->buffer, dst_offset, dst->tiled,
-		     dstx, dsty,
-		     width, height,
-		     color );
-}
-
diff --git a/i965/intel_regions.h b/i965/intel_regions.h
deleted file mode 100644
index d2235f1..0000000
--- a/i965/intel_regions.h
+++ /dev/null
@@ -1,140 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#ifndef INTEL_REGIONS_H
-#define INTEL_REGIONS_H
-
-#include "mtypes.h"
-#include "bufmgr.h"		/* for DBG! */
-struct intel_context;
-
-/* A layer on top of the bufmgr buffers that adds a few useful things:
- *
- * - Refcounting for local buffer references.
- * - Refcounting for buffer maps
- * - Buffer dimensions - pitch and height.
- * - Blitter commands for copying 2D regions between buffers.
- */
-struct intel_region {
-   struct buffer *buffer;
-   GLuint refcount;
-   GLuint cpp;
-   GLuint pitch;
-   GLuint height;
-   GLboolean tiled;
-   GLubyte *map;
-   GLuint map_refcount;
-};
-
-/* Allocate a refcounted region.  Pointers to regions should only be
- * copied by calling intel_reference_region().
- *
- * No support for dynamically allocating tiled regions at this point.
- */
-struct intel_region *intel_region_alloc( struct intel_context *intel,
-					 GLuint cpp,
-					 GLuint pitch, 
-					 GLuint height );
-
-void intel_region_reference( struct intel_region **dst, 
-			     struct intel_region *src );
-
-void intel_region_release(struct intel_context *intel,
-			  struct intel_region **ib );
-
-/* Static regions may be tiled.  The assumption is that the X server
- * has set up fence registers to define tiled zones in agp and these
- * buffers are within those zones.  Tiling regions without fence
- * registers is more work.
- */
-struct intel_region *intel_region_create_static( struct intel_context *intel,
-						 GLuint mem_type,
-						 GLuint offset,
-						 void *virtual,
-						 GLuint cpp,
-						 GLuint pitch,
-						 GLuint height,
-						 GLuint size,
-						 GLboolean tiled );
-
-/* Map/unmap regions.  This is refcounted also: 
- */
-GLubyte *intel_region_map(struct intel_context *intel, 
-		       struct intel_region *ib);
-
-void intel_region_unmap(struct intel_context *intel,
-			struct intel_region *ib);
-
-
-/* Upload data to a rectangular sub-region
- */
-GLboolean intel_region_data(struct intel_context *intel, 
-			    struct intel_region *dest,
-			    GLuint dest_offset,
-			    GLuint destx, GLuint desty,
-			    const void *src, GLuint src_stride,
-			    GLuint srcx, GLuint srcy,
-			    GLuint width, GLuint height);
-			  
-/* Copy rectangular sub-regions
- */
-void intel_region_copy( struct intel_context *intel,
-			struct intel_region *dest,
-			GLuint dest_offset,
-			GLuint destx, GLuint desty,
-			struct intel_region *src,
-			GLuint src_offset,
-			GLuint srcx, GLuint srcy,
-			GLuint width, GLuint height );
-
-/* Fill a rectangular sub-region
- */
-void intel_region_fill( struct intel_context *intel,
-			struct intel_region *dest,
-			GLuint dest_offset,
-			GLuint destx, GLuint desty,
-			GLuint width, GLuint height,
-			GLuint color );
-
-
-/***********************************************************************
- * Misc utilities: move to somewhere generic
- */
-void _mesa_copy_rect( GLubyte *dst,
-		      GLuint cpp,
-		      GLuint dst_pitch,
-		      GLuint dst_x, 
-		      GLuint dst_y,
-		      GLuint width,
-		      GLuint height,
-		      const GLubyte *src,
-		      GLuint src_pitch,
-		      GLuint src_x,
-		      GLuint src_y );
-
-
-#endif
diff --git a/i965/intel_screen.c b/i965/intel_screen.c
deleted file mode 100644
index 5dac50d..0000000
--- a/i965/intel_screen.c
+++ /dev/null
@@ -1,701 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#include "glheader.h"
-#include "context.h"
-#include "framebuffer.h"
-#include "matrix.h"
-#include "renderbuffer.h"
-#include "simple_list.h"
-#include "utils.h"
-#include "vblank.h"
-#include "xmlpool.h"
-
-
-#include "intel_screen.h"
-
-#include "intel_context.h"
-#include "intel_tex.h"
-#include "intel_span.h"
-#include "intel_ioctl.h"
-
-#include "i830_dri.h"
-
-PUBLIC const char __driConfigOptions[] =
-DRI_CONF_BEGIN
-    DRI_CONF_SECTION_PERFORMANCE
-       DRI_CONF_FTHROTTLE_MODE(DRI_CONF_FTHROTTLE_IRQS) 
-       DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_0)
-    DRI_CONF_SECTION_END
-    DRI_CONF_SECTION_QUALITY
-       DRI_CONF_FORCE_S3TC_ENABLE(false)
-       DRI_CONF_ALLOW_LARGE_TEXTURES(1)
-      DRI_CONF_SECTION_END
-DRI_CONF_END;
-const GLuint __driNConfigOptions = 4;
-
-#ifdef USE_NEW_INTERFACE
-static PFNGLXCREATECONTEXTMODES create_context_modes = NULL;
-#endif /*USE_NEW_INTERFACE*/
-
-/**
- * Map all the memory regions described by the screen.
- * \return GL_TRUE if success, GL_FALSE if error.
- */
-GLboolean
-intelMapScreenRegions(__DRIscreenPrivate *sPriv)
-{
-   intelScreenPrivate *intelScreen = (intelScreenPrivate *)sPriv->private;
-
-   if (intelScreen->front.handle) {
-      if (drmMap(sPriv->fd,
-                 intelScreen->front.handle,
-                 intelScreen->front.size,
-                 (drmAddress *)&intelScreen->front.map) != 0) {
-         _mesa_problem(NULL, "drmMap(frontbuffer) failed!");
-         return GL_FALSE;
-      }
-   } else {
-      /* Use the old static allocation method if the server isn't setting up
-       * a movable handle for us.  Add in the front buffer offset from
-       * framebuffer start, as our span routines (unlike other drivers) expect
-       * the renderbuffer address to point to the beginning of the
-       * renderbuffer.
-       */
-      intelScreen->front.map = (char *)sPriv->pFB;
-      if (intelScreen->front.map == NULL) {
-	 fprintf(stderr, "Failed to find framebuffer mapping\n");
-	 return GL_FALSE;
-      }
-   }
-
-   if (drmMap(sPriv->fd,
-              intelScreen->back.handle,
-              intelScreen->back.size,
-              (drmAddress *)&intelScreen->back.map) != 0) {
-      intelUnmapScreenRegions(intelScreen);
-      return GL_FALSE;
-   }
-
-   if (drmMap(sPriv->fd,
-              intelScreen->depth.handle,
-              intelScreen->depth.size,
-              (drmAddress *)&intelScreen->depth.map) != 0) {
-      intelUnmapScreenRegions(intelScreen);
-      return GL_FALSE;
-   }
-
-   if (drmMap(sPriv->fd,
-              intelScreen->tex.handle,
-              intelScreen->tex.size,
-              (drmAddress *)&intelScreen->tex.map) != 0) {
-      intelUnmapScreenRegions(intelScreen);
-      return GL_FALSE;
-   }
-
-   if (0)
-      printf("Mappings:  front: %p  back: %p  depth: %p  tex: %p\n",
-          intelScreen->front.map,
-          intelScreen->back.map,
-          intelScreen->depth.map,
-          intelScreen->tex.map);
-   return GL_TRUE;
-}
-
-
-void
-intelUnmapScreenRegions(intelScreenPrivate *intelScreen)
-{
-#define REALLY_UNMAP 1
-   /* If front.handle is present, we're doing the dynamic front buffer mapping,
-    * but if we've fallen back to static allocation then we shouldn't try to
-    * unmap here.
-    */
-   if (intelScreen->front.handle) {
-#if REALLY_UNMAP
-      if (drmUnmap(intelScreen->front.map, intelScreen->front.size) != 0)
-         printf("drmUnmap front failed!\n");
-#endif
-      intelScreen->front.map = NULL;
-   }
-   if (intelScreen->back.map) {
-#if REALLY_UNMAP
-      if (drmUnmap(intelScreen->back.map, intelScreen->back.size) != 0)
-         printf("drmUnmap back failed!\n");
-#endif
-      intelScreen->back.map = NULL;
-   }
-   if (intelScreen->depth.map) {
-#if REALLY_UNMAP
-      drmUnmap(intelScreen->depth.map, intelScreen->depth.size);
-      intelScreen->depth.map = NULL;
-#endif
-   }
-   if (intelScreen->tex.map) {
-#if REALLY_UNMAP
-      drmUnmap(intelScreen->tex.map, intelScreen->tex.size);
-      intelScreen->tex.map = NULL;
-#endif
-   }
-}
-
-
-static void
-intelPrintDRIInfo(intelScreenPrivate *intelScreen,
-                  __DRIscreenPrivate *sPriv,
-                  I830DRIPtr gDRIPriv)
-{
-   fprintf(stderr, "*** Front size:   0x%x  offset: 0x%x  pitch: %d\n",
-           intelScreen->front.size, intelScreen->front.offset,
-           intelScreen->front.pitch);
-   fprintf(stderr, "*** Back size:    0x%x  offset: 0x%x  pitch: %d\n",
-           intelScreen->back.size, intelScreen->back.offset,
-           intelScreen->back.pitch);
-   fprintf(stderr, "*** Depth size:   0x%x  offset: 0x%x  pitch: %d\n",
-           intelScreen->depth.size, intelScreen->depth.offset,
-           intelScreen->depth.pitch);
-   fprintf(stderr, "*** Rotated size: 0x%x  offset: 0x%x  pitch: %d\n",
-           intelScreen->rotated.size, intelScreen->rotated.offset,
-           intelScreen->rotated.pitch);
-   fprintf(stderr, "*** Texture size: 0x%x  offset: 0x%x\n",
-           intelScreen->tex.size, intelScreen->tex.offset);
-   fprintf(stderr, "*** Memory : 0x%x\n", gDRIPriv->mem);
-}
-
-
-static void
-intelPrintSAREA(volatile drmI830Sarea *sarea)
-{
-   fprintf(stderr, "SAREA: sarea width %d  height %d\n", sarea->width, sarea->height);
-   fprintf(stderr, "SAREA: pitch: %d\n", sarea->pitch);
-   fprintf(stderr,
-           "SAREA: front offset: 0x%08x  size: 0x%x  handle: 0x%x\n",
-           sarea->front_offset, sarea->front_size,
-           (unsigned) sarea->front_handle);
-   fprintf(stderr,
-           "SAREA: back  offset: 0x%08x  size: 0x%x  handle: 0x%x\n",
-           sarea->back_offset, sarea->back_size,
-           (unsigned) sarea->back_handle);
-   fprintf(stderr, "SAREA: depth offset: 0x%08x  size: 0x%x  handle: 0x%x\n",
-           sarea->depth_offset, sarea->depth_size,
-           (unsigned) sarea->depth_handle);
-   fprintf(stderr, "SAREA: tex   offset: 0x%08x  size: 0x%x  handle: 0x%x\n",
-           sarea->tex_offset, sarea->tex_size,
-           (unsigned) sarea->tex_handle);
-   fprintf(stderr, "SAREA: rotation: %d\n", sarea->rotation);
-   fprintf(stderr,
-           "SAREA: rotated offset: 0x%08x  size: 0x%x\n",
-           sarea->rotated_offset, sarea->rotated_size);
-   fprintf(stderr, "SAREA: rotated pitch: %d\n", sarea->rotated_pitch);
-}
-
-
-/**
- * A number of the screen parameters are obtained/computed from
- * information in the SAREA.  This function updates those parameters.
- */
-void
-intelUpdateScreenFromSAREA(intelScreenPrivate *intelScreen,
-                           volatile drmI830Sarea *sarea)
-{
-   intelScreen->width = sarea->width;
-   intelScreen->height = sarea->height;
-
-   intelScreen->front.offset = sarea->front_offset;
-   intelScreen->front.pitch = sarea->pitch * intelScreen->cpp;
-   intelScreen->front.handle = sarea->front_handle;
-   intelScreen->front.size = sarea->front_size;
-   intelScreen->front.tiled = sarea->front_tiled;
-
-   intelScreen->back.offset = sarea->back_offset;
-   intelScreen->back.pitch = sarea->pitch * intelScreen->cpp;
-   intelScreen->back.handle = sarea->back_handle;
-   intelScreen->back.size = sarea->back_size;
-   intelScreen->back.tiled = sarea->back_tiled;
-
-   intelScreen->depth.offset = sarea->depth_offset;
-   intelScreen->depth.pitch = sarea->pitch * intelScreen->cpp;
-   intelScreen->depth.handle = sarea->depth_handle;
-   intelScreen->depth.size = sarea->depth_size;
-   intelScreen->depth.tiled = sarea->depth_tiled;
-
-   intelScreen->tex.offset = sarea->tex_offset;
-   intelScreen->logTextureGranularity = sarea->log_tex_granularity;
-   intelScreen->tex.handle = sarea->tex_handle;
-   intelScreen->tex.size = sarea->tex_size;
-
-   intelScreen->rotated.offset = sarea->rotated_offset;
-   intelScreen->rotated.pitch = sarea->rotated_pitch * intelScreen->cpp;
-   intelScreen->rotated.size = sarea->rotated_size;
-   intelScreen->rotated.tiled = sarea->rotated_tiled;
-   intelScreen->current_rotation = sarea->rotation;
-#if 0
-   matrix23Rotate(&intelScreen->rotMatrix,
-                  sarea->width, sarea->height, sarea->rotation);
-#endif
-   intelScreen->rotatedWidth = sarea->virtualX;
-   intelScreen->rotatedHeight = sarea->virtualY;
-
-   if (0)
-      intelPrintSAREA(sarea);
-}
-
-
-static GLboolean intelInitDriver(__DRIscreenPrivate *sPriv)
-{
-   intelScreenPrivate *intelScreen;
-   I830DRIPtr         gDRIPriv = (I830DRIPtr)sPriv->pDevPriv;
-   PFNGLXSCRENABLEEXTENSIONPROC glx_enable_extension =
-     (PFNGLXSCRENABLEEXTENSIONPROC) (*dri_interface->getProcAddress("glxEnableExtension"));
-   void * const psc = sPriv->psc->screenConfigs;
-   volatile drmI830Sarea *sarea;
-
-   if (sPriv->devPrivSize != sizeof(I830DRIRec)) {
-      fprintf(stderr,"\nERROR!  sizeof(I830DRIRec) (%ld) does not match passed size from device driver (%d)\n", (unsigned long)sizeof(I830DRIRec), sPriv->devPrivSize);
-      return GL_FALSE;
-   }
-
-   /* Allocate the private area */
-   intelScreen = (intelScreenPrivate *)CALLOC(sizeof(intelScreenPrivate));
-   if (!intelScreen) {
-      fprintf(stderr,"\nERROR!  Allocating private area failed\n");
-      return GL_FALSE;
-   }
-   /* parse information in __driConfigOptions */
-   driParseOptionInfo (&intelScreen->optionCache,
-		       __driConfigOptions, __driNConfigOptions);
-
-   intelScreen->driScrnPriv = sPriv;
-   sPriv->private = (void *)intelScreen;
-   intelScreen->sarea_priv_offset = gDRIPriv->sarea_priv_offset;
-   sarea = (volatile drmI830Sarea *)
-         (((GLubyte *)sPriv->pSAREA)+intelScreen->sarea_priv_offset);
-
-   intelScreen->deviceID = gDRIPriv->deviceID;
-   intelScreen->mem = gDRIPriv->mem;
-   intelScreen->cpp = gDRIPriv->cpp;
-
-   switch (gDRIPriv->bitsPerPixel) {
-   case 15: intelScreen->fbFormat = DV_PF_555; break;
-   case 16: intelScreen->fbFormat = DV_PF_565; break;
-   case 32: intelScreen->fbFormat = DV_PF_8888; break;
-   }
-			 
-   intelUpdateScreenFromSAREA(intelScreen, sarea);
-
-   if (0)
-      intelPrintDRIInfo(intelScreen, sPriv, gDRIPriv);
-
-   if (!intelMapScreenRegions(sPriv)) {
-      fprintf(stderr,"\nERROR!  mapping regions\n");
-      _mesa_free(intelScreen);
-      sPriv->private = NULL;
-      return GL_FALSE;
-   }
-
-   intelScreen->drmMinor = sPriv->drmMinor;
-
-   /* Determine if IRQs are active? */
-   {
-      int ret;
-      drmI830GetParam gp;
-
-      gp.param = I830_PARAM_IRQ_ACTIVE;
-      gp.value = &intelScreen->irq_active;
-
-      ret = drmCommandWriteRead( sPriv->fd, DRM_I830_GETPARAM,
-				 &gp, sizeof(gp));
-      if (ret) {
-	 fprintf(stderr, "drmI830GetParam: %d\n", ret);
-	 return GL_FALSE;
-      }
-   }
-
-   /* Determine if batchbuffers are allowed */
-   {
-      int ret;
-      drmI830GetParam gp;
-
-      gp.param = I830_PARAM_ALLOW_BATCHBUFFER;
-      gp.value = &intelScreen->allow_batchbuffer;
-
-      ret = drmCommandWriteRead( sPriv->fd, DRM_I830_GETPARAM,
-				 &gp, sizeof(gp));
-      if (ret) {
-	 fprintf(stderr, "drmI830GetParam: (%d) %d\n", gp.param, ret);
-	 return GL_FALSE;
-      }
-   }
-
-   if (glx_enable_extension != NULL) {
-      (*glx_enable_extension)( psc, "GLX_SGI_swap_control" );
-      (*glx_enable_extension)( psc, "GLX_SGI_video_sync" );
-      (*glx_enable_extension)( psc, "GLX_MESA_swap_control" );
-      (*glx_enable_extension)( psc, "GLX_MESA_swap_frame_usage" );
-      (*glx_enable_extension)( psc, "GLX_SGI_make_current_read" );
-      (*glx_enable_extension)( psc, "GLX_MESA_copy_sub_buffer" );
-   }
-   
-   return GL_TRUE;
-}
-
-
-static void intelDestroyScreen(__DRIscreenPrivate *sPriv)
-{
-   intelScreenPrivate *intelScreen = (intelScreenPrivate *)sPriv->private;
-
-   intelUnmapScreenRegions(intelScreen);
-   FREE(intelScreen);
-   sPriv->private = NULL;
-}
-
-static GLboolean intelCreateBuffer( __DRIscreenPrivate *driScrnPriv,
-				    __DRIdrawablePrivate *driDrawPriv,
-				    const __GLcontextModes *mesaVis,
-				    GLboolean isPixmap )
-{
-   intelScreenPrivate *screen = (intelScreenPrivate *) driScrnPriv->private;
-
-   if (isPixmap) {
-      return GL_FALSE; /* not implemented */
-   } else {
-      GLboolean swStencil = (mesaVis->stencilBits > 0 && 
-			     mesaVis->depthBits != 24);
-
-      struct gl_framebuffer *fb = _mesa_create_framebuffer(mesaVis);
-
-      {
-         driRenderbuffer *frontRb
-            = driNewRenderbuffer(GL_RGBA,
-                                 screen->front.map,
-                                 screen->cpp,
-                                 screen->front.offset, screen->front.pitch,
-                                 driDrawPriv);
-         intelSetSpanFunctions(frontRb, mesaVis);
-         _mesa_add_renderbuffer(fb, BUFFER_FRONT_LEFT, &frontRb->Base);
-      }
-
-      if (mesaVis->doubleBufferMode) {
-         driRenderbuffer *backRb
-            = driNewRenderbuffer(GL_RGBA,
-                                 screen->back.map,
-                                 screen->cpp,
-                                 screen->back.offset, screen->back.pitch,
-                                 driDrawPriv);
-         intelSetSpanFunctions(backRb, mesaVis);
-         _mesa_add_renderbuffer(fb, BUFFER_BACK_LEFT, &backRb->Base);
-      }
-
-      if (mesaVis->depthBits == 16) {
-         driRenderbuffer *depthRb
-            = driNewRenderbuffer(GL_DEPTH_COMPONENT16,
-                                 screen->depth.map,
-                                 screen->cpp,
-                                 screen->depth.offset, screen->depth.pitch,
-                                 driDrawPriv);
-         intelSetSpanFunctions(depthRb, mesaVis);
-         _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depthRb->Base);
-      }
-      else if (mesaVis->depthBits == 24) {
-         driRenderbuffer *depthRb
-            = driNewRenderbuffer(GL_DEPTH_COMPONENT24,
-                                 screen->depth.map,
-                                 screen->cpp,
-                                 screen->depth.offset, screen->depth.pitch,
-                                 driDrawPriv);
-         intelSetSpanFunctions(depthRb, mesaVis);
-         _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depthRb->Base);
-      }
-
-      if (mesaVis->stencilBits > 0 && !swStencil) {
-         driRenderbuffer *stencilRb
-            = driNewRenderbuffer(GL_STENCIL_INDEX8_EXT,
-                                 screen->depth.map,
-                                 screen->cpp,
-                                 screen->depth.offset, screen->depth.pitch,
-                                 driDrawPriv);
-         intelSetSpanFunctions(stencilRb, mesaVis);
-         _mesa_add_renderbuffer(fb, BUFFER_STENCIL, &stencilRb->Base);
-      }
-
-      _mesa_add_soft_renderbuffers(fb,
-                                   GL_FALSE, /* color */
-                                   GL_FALSE, /* depth */
-                                   swStencil,
-                                   mesaVis->accumRedBits > 0,
-                                   GL_FALSE, /* alpha */
-                                   GL_FALSE /* aux */);
-      driDrawPriv->driverPrivate = (void *) fb;
-
-      return (driDrawPriv->driverPrivate != NULL);
-   }
-}
-
-static void intelDestroyBuffer(__DRIdrawablePrivate *driDrawPriv)
-{
-   _mesa_unreference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate)));
-}
-
-
-/**
- * Get information about previous buffer swaps.
- */
-static int
-intelGetSwapInfo( __DRIdrawablePrivate *dPriv, __DRIswapInfo * sInfo )
-{
-   struct intel_context *intel;
-
-   if ( (dPriv == NULL) || (dPriv->driContextPriv == NULL)
-	|| (dPriv->driContextPriv->driverPrivate == NULL)
-	|| (sInfo == NULL) ) {
-      return -1;
-   }
-
-   intel = dPriv->driContextPriv->driverPrivate;
-   sInfo->swap_count = intel->swap_count;
-   sInfo->swap_ust = intel->swap_ust;
-   sInfo->swap_missed_count = intel->swap_missed_count;
-
-   sInfo->swap_missed_usage = (sInfo->swap_missed_count != 0)
-       ? driCalculateSwapUsage( dPriv, 0, intel->swap_missed_ust )
-       : 0.0;
-
-   return 0;
-}
-
-
-/* There are probably better ways to do this, such as an
- * init-designated function to register chipids and createcontext
- * functions.
- */
-extern GLboolean i830CreateContext( const __GLcontextModes *mesaVis,
-				    __DRIcontextPrivate *driContextPriv,
-				    void *sharedContextPrivate);
-
-extern GLboolean i915CreateContext( const __GLcontextModes *mesaVis,
-				   __DRIcontextPrivate *driContextPriv,
-				   void *sharedContextPrivate);
-
-extern GLboolean brwCreateContext( const __GLcontextModes *mesaVis,
-				   __DRIcontextPrivate *driContextPriv,
-				   void *sharedContextPrivate);
-
-
-
-
-static GLboolean intelCreateContext( const __GLcontextModes *mesaVis,
-				   __DRIcontextPrivate *driContextPriv,
-				   void *sharedContextPrivate)
-{
-#if 0
-   __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv;
-   intelScreenPrivate *intelScreen = (intelScreenPrivate *)sPriv->private;
-   switch (intelScreen->deviceID) {
-   case PCI_CHIP_845_G:
-   case PCI_CHIP_I830_M:
-   case PCI_CHIP_I855_GM:
-   case PCI_CHIP_I865_G:
-      return i830CreateContext( mesaVis, driContextPriv, 
-				sharedContextPrivate );
-
-   case PCI_CHIP_I915_G:
-   case PCI_CHIP_I915_GM:
-   case PCI_CHIP_I945_G:
-   case PCI_CHIP_I945_GM:
-      return i915CreateContext( mesaVis, driContextPriv, 
-			       sharedContextPrivate );
- 
-   default:
-      fprintf(stderr, "Unrecognized deviceID %x\n", intelScreen->deviceID);
-      return GL_FALSE;
-   }
-#else
-   return brwCreateContext( mesaVis, driContextPriv, 
-			    sharedContextPrivate );
-#endif
-}
-
-
-static const struct __DriverAPIRec intelAPI = {
-   .InitDriver      = intelInitDriver,
-   .DestroyScreen   = intelDestroyScreen,
-   .CreateContext   = intelCreateContext,
-   .DestroyContext  = intelDestroyContext,
-   .CreateBuffer    = intelCreateBuffer,
-   .DestroyBuffer   = intelDestroyBuffer,
-   .SwapBuffers     = intelSwapBuffers,
-   .MakeCurrent     = intelMakeCurrent,
-   .UnbindContext   = intelUnbindContext,
-   .GetSwapInfo     = intelGetSwapInfo,
-   .GetMSC          = driGetMSC32,
-   .WaitForMSC      = driWaitForMSC32,
-   .WaitForSBC      = NULL,
-   .SwapBuffersMSC  = NULL,
-   .CopySubBuffer   = intelCopySubBuffer
-};
-
-
-static __GLcontextModes *
-intelFillInModes( unsigned pixel_bits, unsigned depth_bits,
-		 unsigned stencil_bits, GLboolean have_back_buffer )
-{
-   __GLcontextModes * modes;
-   __GLcontextModes * m;
-   unsigned num_modes;
-   unsigned depth_buffer_factor;
-   unsigned back_buffer_factor;
-   GLenum fb_format;
-   GLenum fb_type;
-
-   /* GLX_SWAP_COPY_OML is only supported because the Intel driver doesn't
-    * support pageflipping at all.
-    */
-   static const GLenum back_buffer_modes[] = {
-      GLX_NONE, GLX_SWAP_UNDEFINED_OML, GLX_SWAP_COPY_OML
-   };
-
-   u_int8_t depth_bits_array[3];
-   u_int8_t stencil_bits_array[3];
-
-
-   depth_bits_array[0] = 0;
-   depth_bits_array[1] = depth_bits;
-   depth_bits_array[2] = depth_bits;
-
-   /* Just like with the accumulation buffer, always provide some modes
-    * with a stencil buffer.  It will be a sw fallback, but some apps won't
-    * care about that.
-    */
-   stencil_bits_array[0] = 0;
-   stencil_bits_array[1] = 0;
-   stencil_bits_array[2] = (stencil_bits == 0) ? 8 : stencil_bits;
-
-   depth_buffer_factor = ((depth_bits != 0) || (stencil_bits != 0)) ? 3 : 1;
-   back_buffer_factor  = (have_back_buffer) ? 3 : 1;
-
-   num_modes = depth_buffer_factor * back_buffer_factor * 4;
-
-    if ( pixel_bits == 16 ) {
-        fb_format = GL_RGB;
-        fb_type = GL_UNSIGNED_SHORT_5_6_5;
-    }
-    else {
-        fb_format = GL_BGRA;
-        fb_type = GL_UNSIGNED_INT_8_8_8_8_REV;
-    }
-
-   modes = (*dri_interface->createContextModes)( num_modes, sizeof( __GLcontextModes ) );
-   m = modes;
-   if ( ! driFillInModes( & m, fb_format, fb_type,
-			  depth_bits_array, stencil_bits_array, depth_buffer_factor,
-			  back_buffer_modes, back_buffer_factor,
-			  GLX_TRUE_COLOR ) ) {
-	fprintf( stderr, "[%s:%u] Error creating FBConfig!\n",
-		 __func__, __LINE__ );
-	return NULL;
-   }
-   if ( ! driFillInModes( & m, fb_format, fb_type,
-			  depth_bits_array, stencil_bits_array, depth_buffer_factor,
-			  back_buffer_modes, back_buffer_factor,
-			  GLX_DIRECT_COLOR ) ) {
-	fprintf( stderr, "[%s:%u] Error creating FBConfig!\n",
-		 __func__, __LINE__ );
-	return NULL;
-   }
-
-   /* Mark the visual as slow if there are "fake" stencil bits.
-    */
-   for ( m = modes ; m != NULL ; m = m->next ) {
-      if ( (m->stencilBits != 0) && (m->stencilBits != stencil_bits) ) {
-	 m->visualRating = GLX_SLOW_CONFIG;
-      }
-   }
-
-   return modes;
-}
-
-
-/**
- * This is the bootstrap function for the driver.  libGL supplies all of the
- * requisite information about the system, and the driver initializes itself.
- * This routine also fills in the linked list pointed to by \c driver_modes
- * with the \c __GLcontextModes that the driver can support for windows or
- * pbuffers.
- * 
- * \return A pointer to a \c __DRIscreenPrivate on success, or \c NULL on 
- *         failure.
- */
-PUBLIC
-void * __driCreateNewScreen_20050727( __DRInativeDisplay *dpy, int scrn, __DRIscreen *psc,
-			     const __GLcontextModes * modes,
-			     const __DRIversion * ddx_version,
-			     const __DRIversion * dri_version,
-			     const __DRIversion * drm_version,
-			     const __DRIframebuffer * frame_buffer,
-			     drmAddress pSAREA, int fd, 
-			     int internal_api_version,
-			     const __DRIinterfaceMethods * interface,
-			     __GLcontextModes ** driver_modes )
-			     
-{
-   __DRIscreenPrivate *psp;
-   static const __DRIversion ddx_expected = { 1, 6, 0 };
-   static const __DRIversion dri_expected = { 4, 0, 0 };
-   static const __DRIversion drm_expected = { 1, 3, 0 };
-
-   dri_interface = interface;
-
-   if ( ! driCheckDriDdxDrmVersions2( "i915",
-				      dri_version, & dri_expected,
-				      ddx_version, & ddx_expected,
-				      drm_version, & drm_expected ) ) {
-      return NULL;
-   }
-
-   psp = __driUtilCreateNewScreen(dpy, scrn, psc, NULL,
-				  ddx_version, dri_version, drm_version,
-				  frame_buffer, pSAREA, fd,
-				  internal_api_version, &intelAPI);
-   if ( psp != NULL ) {
-      I830DRIPtr dri_priv = (I830DRIPtr) psp->pDevPriv;
-      *driver_modes = intelFillInModes( dri_priv->cpp * 8,
-					(dri_priv->cpp == 2) ? 16 : 24,
-					(dri_priv->cpp == 2) ? 0  : 8,
-					GL_TRUE );
-      /* Calling driInitExtensions here, with a NULL context pointer, does not actually
-       * enable the extensions.  It just makes sure that all the dispatch offsets for all
-       * the extensions that *might* be enables are known.  This is needed because the
-       * dispatch offsets need to be known when _mesa_context_create is called, but we can't
-       * enable the extensions until we have a context pointer.
-       *
-       * Hello chicken.  Hello egg.  How are you two today?
-       */
-      intelInitExtensions(NULL, GL_FALSE);
-   }
-
-   return (void *) psp;
-}
diff --git a/i965/intel_span.c b/i965/intel_span.c
deleted file mode 100644
index 60fbecc..0000000
--- a/i965/intel_span.c
+++ /dev/null
@@ -1,283 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#include "glheader.h"
-#include "macros.h"
-#include "mtypes.h"
-#include "colormac.h"
-
-#include "intel_screen.h"
-#include "intel_regions.h"
-#include "intel_span.h"
-#include "intel_ioctl.h"
-#include "intel_tex.h"
-#include "intel_batchbuffer.h"
-#include "swrast/swrast.h"
-
-#undef DBG
-#define DBG 0
-
-#define LOCAL_VARS						\
-   struct intel_context *intel = intel_context(ctx);                    \
-   __DRIdrawablePrivate *dPriv = intel->driDrawable;		\
-   driRenderbuffer *drb = (driRenderbuffer *) rb;		\
-   GLuint pitch = drb->pitch;					\
-   GLuint height = dPriv->h;					\
-   char *buf = (char *) drb->Base.Data +			\
-			dPriv->x * drb->cpp +			\
-			dPriv->y * pitch;			\
-   GLushort p;							\
-   (void) buf; (void) p
-
-#define LOCAL_DEPTH_VARS					\
-   struct intel_context *intel = intel_context(ctx);                    \
-   __DRIdrawablePrivate *dPriv = intel->driDrawable;		\
-   driRenderbuffer *drb = (driRenderbuffer *) rb;		\
-   GLuint pitch = drb->pitch;					\
-   GLuint height = dPriv->h;					\
-   char *buf = (char *) drb->Base.Data +			\
-			dPriv->x * drb->cpp +			\
-			dPriv->y * pitch
-
-#define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS 
-
-#define INIT_MONO_PIXEL(p,color)\
-	 p = INTEL_PACKCOLOR565(color[0],color[1],color[2])
-
-#define Y_FLIP(_y) (height - _y - 1)
-
-#define HW_LOCK()
-
-#define HW_UNLOCK()
-
-/* 16 bit, 565 rgb color spanline and pixel functions
- */
-#define WRITE_RGBA( _x, _y, r, g, b, a )				\
-   *(GLushort *)(buf + _x*2 + _y*pitch)  = ( (((int)r & 0xf8) << 8) |	\
-		                             (((int)g & 0xfc) << 3) |	\
-		                             (((int)b & 0xf8) >> 3))
-#define WRITE_PIXEL( _x, _y, p )  \
-   *(GLushort *)(buf + _x*2 + _y*pitch) = p
-
-#define READ_RGBA( rgba, _x, _y )				\
-do {								\
-   GLushort p = *(GLushort *)(buf + _x*2 + _y*pitch);		\
-   rgba[0] = (((p >> 11) & 0x1f) * 255) / 31;			\
-   rgba[1] = (((p >>  5) & 0x3f) * 255) / 63;			\
-   rgba[2] = (((p >>  0) & 0x1f) * 255) / 31;			\
-   rgba[3] = 255;						\
-} while(0)
-
-#define TAG(x) intel##x##_565
-#include "spantmp.h"
-
-/* 15 bit, 555 rgb color spanline and pixel functions
- */
-#define WRITE_RGBA( _x, _y, r, g, b, a )			\
-   *(GLushort *)(buf + _x*2 + _y*pitch)  = (((r & 0xf8) << 7) |	\
-		                            ((g & 0xf8) << 3) |	\
-                         		    ((b & 0xf8) >> 3))
-
-#define WRITE_PIXEL( _x, _y, p )  \
-   *(GLushort *)(buf + _x*2 + _y*pitch)  = p
-
-#define READ_RGBA( rgba, _x, _y )				\
-do {								\
-   GLushort p = *(GLushort *)(buf + _x*2 + _y*pitch);		\
-   rgba[0] = (p >> 7) & 0xf8;					\
-   rgba[1] = (p >> 3) & 0xf8;					\
-   rgba[2] = (p << 3) & 0xf8;					\
-   rgba[3] = 255;						\
-} while(0)
-
-#define TAG(x) intel##x##_555
-#include "spantmp.h"
-
-/* 16 bit depthbuffer functions.
- */
-#define WRITE_DEPTH( _x, _y, d ) \
-   *(GLushort *)(buf + (_x)*2 + (_y)*pitch)  = d;
-
-#define READ_DEPTH( d, _x, _y )	\
-   d = *(GLushort *)(buf + (_x)*2 + (_y)*pitch);	 
-
-
-#define TAG(x) intel##x##_z16
-#include "depthtmp.h"
-
-
-#undef LOCAL_VARS
-#define LOCAL_VARS						\
-   struct intel_context *intel = intel_context(ctx);			\
-   __DRIdrawablePrivate *dPriv = intel->driDrawable;		\
-   driRenderbuffer *drb = (driRenderbuffer *) rb;		\
-   GLuint pitch = drb->pitch;					\
-   GLuint height = dPriv->h;					\
-   char *buf = (char *)drb->Base.Data +				\
-			dPriv->x * drb->cpp +			\
-			dPriv->y * pitch;			\
-   GLuint p;							\
-   (void) buf; (void) p
-
-#undef INIT_MONO_PIXEL
-#define INIT_MONO_PIXEL(p,color)\
-	 p = INTEL_PACKCOLOR8888(color[0],color[1],color[2],color[3])
-
-/* 32 bit, 8888 argb color spanline and pixel functions
- */
-#define WRITE_RGBA(_x, _y, r, g, b, a)			\
-    *(GLuint *)(buf + _x*4 + _y*pitch) = ((r << 16) |	\
-					  (g << 8)  |	\
-					  (b << 0)  |	\
-					  (a << 24) )
-
-#define WRITE_PIXEL(_x, _y, p)			\
-    *(GLuint *)(buf + _x*4 + _y*pitch) = p
-
-
-#define READ_RGBA(rgba, _x, _y)					\
-    do {							\
-	GLuint p = *(GLuint *)(buf + _x*4 + _y*pitch);		\
-	rgba[0] = (p >> 16) & 0xff;				\
-	rgba[1] = (p >> 8)  & 0xff;				\
-	rgba[2] = (p >> 0)  & 0xff;				\
-	rgba[3] = (p >> 24) & 0xff;				\
-    } while (0)
-
-#define TAG(x) intel##x##_8888
-#include "spantmp.h"
-
-
-/* 24/8 bit interleaved depth/stencil functions
- */
-#define WRITE_DEPTH( _x, _y, d ) {			\
-   GLuint tmp = *(GLuint *)(buf + (_x)*4 + (_y)*pitch);	\
-   tmp &= 0xff000000;					\
-   tmp |= (d) & 0xffffff;				\
-   *(GLuint *)(buf + (_x)*4 + (_y)*pitch) = tmp;		\
-}
-
-#define READ_DEPTH( d, _x, _y )		\
-   d = *(GLuint *)(buf + (_x)*4 + (_y)*pitch) & 0xffffff;
-
-
-#define TAG(x) intel##x##_z24_s8
-#include "depthtmp.h"
-
-#define WRITE_STENCIL( _x, _y, d ) {			\
-   GLuint tmp = *(GLuint *)(buf + (_x)*4 + (_y)*pitch);	\
-   tmp &= 0xffffff;					\
-   tmp |= ((d)<<24);					\
-   *(GLuint *)(buf + (_x)*4 + (_y)*pitch) = tmp;		\
-}
-
-#define READ_STENCIL( d, _x, _y )			\
-   d = *(GLuint *)(buf + (_x)*4 + (_y)*pitch) >> 24;
-
-#define TAG(x) intel##x##_z24_s8
-#include "stenciltmp.h"
-
-
-/* Move locking out to get reasonable span performance.
- */
-void intelSpanRenderStart( GLcontext *ctx )
-{
-   struct intel_context *intel = intel_context(ctx);
-
-   if (intel->need_flush) {
-      LOCK_HARDWARE(intel);
-      intel->vtbl.emit_flush(intel, 0);
-      intel_batchbuffer_flush(intel->batch);
-      intel->need_flush = 0;
-      UNLOCK_HARDWARE(intel);
-      intelFinish(&intel->ctx);
-   }
-
-
-   LOCK_HARDWARE(intel);
-
-   /* Just map the framebuffer and all textures.  Bufmgr code will
-    * take care of waiting on the necessary fences:
-    */
-   intel_region_map(intel, intel->front_region);
-   intel_region_map(intel, intel->back_region);
-   intel_region_map(intel, intel->depth_region);
-}
-
-void intelSpanRenderFinish( GLcontext *ctx )
-{
-   struct intel_context *intel = intel_context( ctx );
-
-   _swrast_flush( ctx );
-
-   /* Now unmap the framebuffer:
-    */
-   intel_region_unmap(intel, intel->front_region);
-   intel_region_unmap(intel, intel->back_region);
-   intel_region_unmap(intel, intel->depth_region);
-
-   UNLOCK_HARDWARE( intel );
-}
-
-void intelInitSpanFuncs( GLcontext *ctx )
-{
-   struct swrast_device_driver *swdd = _swrast_GetDeviceDriverReference(ctx);
-   swdd->SpanRenderStart = intelSpanRenderStart;
-   swdd->SpanRenderFinish = intelSpanRenderFinish; 
-}
-
-
-/**
- * Plug in the Get/Put routines for the given driRenderbuffer.
- */
-void
-intelSetSpanFunctions(driRenderbuffer *drb, const GLvisual *vis)
-{
-   if (drb->Base.InternalFormat == GL_RGBA) {
-      if (vis->redBits == 5 && vis->greenBits == 5 && vis->blueBits == 5) {
-         intelInitPointers_555(&drb->Base);
-      }
-      else if (vis->redBits == 5 && vis->greenBits == 6 && vis->blueBits == 5) {
-         intelInitPointers_565(&drb->Base);
-      }
-      else {
-         assert(vis->redBits == 8);
-         assert(vis->greenBits == 8);
-         assert(vis->blueBits == 8);
-         intelInitPointers_8888(&drb->Base);
-      }
-   }
-   else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT16) {
-      intelInitDepthPointers_z16(&drb->Base);
-   }
-   else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT24) {
-      intelInitDepthPointers_z24_s8(&drb->Base);
-   }
-   else if (drb->Base.InternalFormat == GL_STENCIL_INDEX8_EXT) {
-      intelInitStencilPointers_z24_s8(&drb->Base);
-   }
-}
diff --git a/i965/intel_state.c b/i965/intel_state.c
index 701b30c..0fba5a7 100644
--- a/i965/intel_state.c
+++ b/i965/intel_state.c
@@ -195,15 +195,16 @@ int intel_translate_logic_op( GLenum opcode )
 static void intelClearColor(GLcontext *ctx, const GLfloat color[4])
 {
    struct intel_context *intel = intel_context(ctx);
-   intelScreenPrivate *screen = intel->intelScreen;
 
    UNCLAMPED_FLOAT_TO_RGBA_CHAN(intel->clear_chan, color);
 
-   intel->ClearColor = INTEL_PACKCOLOR(screen->fbFormat,
-				       intel->clear_chan[0], 
-				       intel->clear_chan[1], 
-				       intel->clear_chan[2], 
-				       intel->clear_chan[3]);
+   intel->ClearColor8888 = INTEL_PACKCOLOR8888(intel->clear_chan[0],
+					       intel->clear_chan[1],
+					       intel->clear_chan[2],
+					       intel->clear_chan[3]);
+   intel->ClearColor565 = INTEL_PACKCOLOR565(intel->clear_chan[0],
+					     intel->clear_chan[1],
+					     intel->clear_chan[2]);
 }
 
 
@@ -222,99 +223,3 @@ void intelInitStateFuncs( struct dd_function_table *functions )
    functions->RenderMode = intelRenderMode;
    functions->ClearColor = intelClearColor;
 }
-
-
-
-
-void intelInitState( GLcontext *ctx )
-{
-   /* Mesa should do this for us:
-    */
-   ctx->Driver.AlphaFunc( ctx, 
-			  ctx->Color.AlphaFunc,
-			  ctx->Color.AlphaRef);
-
-   ctx->Driver.BlendColor( ctx,
-			   ctx->Color.BlendColor );
-
-   ctx->Driver.BlendEquationSeparate( ctx, 
-				      ctx->Color.BlendEquationRGB,
-				      ctx->Color.BlendEquationA);
-
-   ctx->Driver.BlendFuncSeparate( ctx,
-				  ctx->Color.BlendSrcRGB,
-				  ctx->Color.BlendDstRGB,
-				  ctx->Color.BlendSrcA,
-				  ctx->Color.BlendDstA);
-
-   ctx->Driver.ColorMask( ctx, 
-			  ctx->Color.ColorMask[RCOMP],
-			  ctx->Color.ColorMask[GCOMP],
-			  ctx->Color.ColorMask[BCOMP],
-			  ctx->Color.ColorMask[ACOMP]);
-
-   ctx->Driver.CullFace( ctx, ctx->Polygon.CullFaceMode );
-   ctx->Driver.DepthFunc( ctx, ctx->Depth.Func );
-   ctx->Driver.DepthMask( ctx, ctx->Depth.Mask );
-
-   ctx->Driver.Enable( ctx, GL_ALPHA_TEST, ctx->Color.AlphaEnabled );
-   ctx->Driver.Enable( ctx, GL_BLEND, ctx->Color.BlendEnabled );
-   ctx->Driver.Enable( ctx, GL_COLOR_LOGIC_OP, ctx->Color.ColorLogicOpEnabled );
-   ctx->Driver.Enable( ctx, GL_COLOR_SUM, ctx->Fog.ColorSumEnabled );
-   ctx->Driver.Enable( ctx, GL_CULL_FACE, ctx->Polygon.CullFlag );
-   ctx->Driver.Enable( ctx, GL_DEPTH_TEST, ctx->Depth.Test );
-   ctx->Driver.Enable( ctx, GL_DITHER, ctx->Color.DitherFlag );
-   ctx->Driver.Enable( ctx, GL_FOG, ctx->Fog.Enabled );
-   ctx->Driver.Enable( ctx, GL_LIGHTING, ctx->Light.Enabled );
-   ctx->Driver.Enable( ctx, GL_LINE_SMOOTH, ctx->Line.SmoothFlag );
-   ctx->Driver.Enable( ctx, GL_POLYGON_STIPPLE, ctx->Polygon.StippleFlag );
-   ctx->Driver.Enable( ctx, GL_SCISSOR_TEST, ctx->Scissor.Enabled );
-   ctx->Driver.Enable( ctx, GL_STENCIL_TEST, ctx->Stencil.Enabled );
-   ctx->Driver.Enable( ctx, GL_TEXTURE_1D, GL_FALSE );
-   ctx->Driver.Enable( ctx, GL_TEXTURE_2D, GL_FALSE );
-   ctx->Driver.Enable( ctx, GL_TEXTURE_RECTANGLE_NV, GL_FALSE );
-   ctx->Driver.Enable( ctx, GL_TEXTURE_3D, GL_FALSE );
-   ctx->Driver.Enable( ctx, GL_TEXTURE_CUBE_MAP, GL_FALSE );
-
-   ctx->Driver.Fogfv( ctx, GL_FOG_COLOR, ctx->Fog.Color );
-   ctx->Driver.Fogfv( ctx, GL_FOG_MODE, 0 );
-   ctx->Driver.Fogfv( ctx, GL_FOG_DENSITY, &ctx->Fog.Density );
-   ctx->Driver.Fogfv( ctx, GL_FOG_START, &ctx->Fog.Start );
-   ctx->Driver.Fogfv( ctx, GL_FOG_END, &ctx->Fog.End );
-
-   ctx->Driver.FrontFace( ctx, ctx->Polygon.FrontFace );
-
-   {
-      GLfloat f = (GLfloat)ctx->Light.Model.ColorControl;
-      ctx->Driver.LightModelfv( ctx, GL_LIGHT_MODEL_COLOR_CONTROL, &f );
-   }
-
-   ctx->Driver.LineWidth( ctx, ctx->Line.Width );
-   ctx->Driver.LogicOpcode( ctx, ctx->Color.LogicOp );
-   ctx->Driver.PointSize( ctx, ctx->Point.Size );
-   ctx->Driver.PolygonStipple( ctx, (const GLubyte *)ctx->PolygonStipple );
-   ctx->Driver.Scissor( ctx, ctx->Scissor.X, ctx->Scissor.Y,
-			ctx->Scissor.Width, ctx->Scissor.Height );
-   ctx->Driver.ShadeModel( ctx, ctx->Light.ShadeModel );
-   ctx->Driver.StencilFuncSeparate( ctx, GL_FRONT,
-                                    ctx->Stencil.Function[0],
-                                    ctx->Stencil.Ref[0],
-                                    ctx->Stencil.ValueMask[0] );
-   ctx->Driver.StencilFuncSeparate( ctx, GL_BACK,
-                                    ctx->Stencil.Function[1],
-                                    ctx->Stencil.Ref[1],
-                                    ctx->Stencil.ValueMask[1] );
-   ctx->Driver.StencilMaskSeparate( ctx, GL_FRONT, ctx->Stencil.WriteMask[0] );
-   ctx->Driver.StencilMaskSeparate( ctx, GL_BACK, ctx->Stencil.WriteMask[1] );
-   ctx->Driver.StencilOpSeparate( ctx, GL_FRONT,
-                                  ctx->Stencil.FailFunc[0],
-                                  ctx->Stencil.ZFailFunc[0],
-                                  ctx->Stencil.ZPassFunc[0]);
-   ctx->Driver.StencilOpSeparate( ctx, GL_BACK,
-                                  ctx->Stencil.FailFunc[1],
-                                  ctx->Stencil.ZFailFunc[1],
-                                  ctx->Stencil.ZPassFunc[1]);
-
-
-   ctx->Driver.DrawBuffer( ctx, ctx->Color.DrawBuffer[0] );
-}
diff --git a/i965/intel_tex.c b/i965/intel_tex.c
deleted file mode 100644
index 4523969..0000000
--- a/i965/intel_tex.c
+++ /dev/null
@@ -1,315 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#include "mtypes.h"
-#include "image.h"
-#include "texstore.h"
-#include "texformat.h"
-#include "teximage.h"
-#include "texobj.h"
-#include "swrast/swrast.h"
-
-
-#include "intel_context.h"
-#include "intel_tex.h"
-#include "intel_mipmap_tree.h"
-
-
-static GLuint target_to_face( GLenum target )
-{
-   switch (target) {
-   case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB:
-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB:
-   case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB:
-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB:
-   case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB:
-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB:
-      return ((GLuint) target - 
-	      (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X);
-   default:
-      return 0;
-   }
-}
-
-static void intelTexImage1D( GLcontext *ctx, GLenum target, GLint level,
-			    GLint internalFormat,
-			    GLint width, GLint border,
-			    GLenum format, GLenum type, const GLvoid *pixels,
-			    const struct gl_pixelstore_attrib *packing,
-			    struct gl_texture_object *texObj,
-			    struct gl_texture_image *texImage )
-{
-   struct intel_texture_object *intelObj = intel_texture_object(texObj);
-
-   _mesa_store_teximage1d( ctx, target, level, internalFormat,
-			   width, border, format, type,
-			   pixels, packing, texObj, texImage );
-
-   intelObj->dirty_images[0] |= (1 << level);
-   intelObj->dirty |= 1;
-}
-
-static void intelTexSubImage1D( GLcontext *ctx, 
-			       GLenum target,
-			       GLint level,	
-			       GLint xoffset,
-				GLsizei width,
-			       GLenum format, GLenum type,
-			       const GLvoid *pixels,
-			       const struct gl_pixelstore_attrib *packing,
-			       struct gl_texture_object *texObj,
-			       struct gl_texture_image *texImage )
-{
-   struct intel_texture_object *intelObj = intel_texture_object(texObj);
-
-   _mesa_store_texsubimage1d(ctx, target, level, xoffset, width, 
-			     format, type, pixels, packing, texObj,
-			     texImage);
-
-   intelObj->dirty_images[0] |= (1 << level);
-   intelObj->dirty |= 1;
-}
-
-
-/* Handles 2D, CUBE, RECT:
- */
-static void intelTexImage2D( GLcontext *ctx, GLenum target, GLint level,
-			    GLint internalFormat,
-			    GLint width, GLint height, GLint border,
-			    GLenum format, GLenum type, const GLvoid *pixels,
-			    const struct gl_pixelstore_attrib *packing,
-			    struct gl_texture_object *texObj,
-			    struct gl_texture_image *texImage )
-{
-   struct intel_texture_object *intelObj = intel_texture_object(texObj);
-   GLuint face = target_to_face(target);
-
-   _mesa_store_teximage2d( ctx, target, level, internalFormat,
-			   width, height, border, format, type,
-			   pixels, packing, texObj, texImage );
-
-   intelObj->dirty_images[face] |= (1 << level);
-   intelObj->dirty |= 1 << face;
-}
-
-static void intelTexSubImage2D( GLcontext *ctx, 
-			       GLenum target,
-			       GLint level,	
-			       GLint xoffset, GLint yoffset,
-			       GLsizei width, GLsizei height,
-			       GLenum format, GLenum type,
-			       const GLvoid *pixels,
-			       const struct gl_pixelstore_attrib *packing,
-			       struct gl_texture_object *texObj,
-			       struct gl_texture_image *texImage )
-{
-   struct intel_texture_object *intelObj = intel_texture_object(texObj);
-   GLuint face = target_to_face(target);
-
-   _mesa_store_texsubimage2d(ctx, target, level, xoffset, yoffset, width, 
-			     height, format, type, pixels, packing, texObj,
-			     texImage);
-
-   intelObj->dirty_images[face] |= (1 << level);
-   intelObj->dirty |= 1 << face;
-}
-
-static void intelCompressedTexImage2D( GLcontext *ctx, GLenum target, GLint level,
-                              GLint internalFormat,
-                              GLint width, GLint height, GLint border,
-                              GLsizei imageSize, const GLvoid *data,
-                              struct gl_texture_object *texObj,
-                              struct gl_texture_image *texImage )
-{
-   struct intel_texture_object *intelObj = intel_texture_object(texObj);
-   GLuint face = target_to_face(target);
-
-   _mesa_store_compressed_teximage2d(ctx, target, level, internalFormat, width,
-				     height, border, imageSize, data, texObj, texImage);
-   
-   intelObj->dirty_images[face] |= (1 << level);
-   intelObj->dirty |= 1 << face;
-}
-
-
-static void intelCompressedTexSubImage2D( GLcontext *ctx, GLenum target, GLint level,
-                                 GLint xoffset, GLint yoffset,
-                                 GLsizei width, GLsizei height,
-                                 GLenum format,
-                                 GLsizei imageSize, const GLvoid *data,
-                                 struct gl_texture_object *texObj,
-                                 struct gl_texture_image *texImage )
-{
-   struct intel_texture_object *intelObj = intel_texture_object(texObj);
-   GLuint face = target_to_face(target);
-
-   _mesa_store_compressed_texsubimage2d(ctx, target, level, xoffset, yoffset, width,
-					height, format, imageSize, data, texObj, texImage);
-   
-   intelObj->dirty_images[face] |= (1 << level);
-   intelObj->dirty |= 1 << face;
-}
-
-
-static void intelTexImage3D( GLcontext *ctx, GLenum target, GLint level,
-                            GLint internalFormat,
-                            GLint width, GLint height, GLint depth,
-                            GLint border,
-                            GLenum format, GLenum type, const GLvoid *pixels,
-                            const struct gl_pixelstore_attrib *packing,
-                            struct gl_texture_object *texObj,
-                            struct gl_texture_image *texImage )
-{
-   struct intel_texture_object *intelObj = intel_texture_object(texObj);
-
-   _mesa_store_teximage3d(ctx, target, level, internalFormat,
-			  width, height, depth, border,
-			  format, type, pixels,
-			  &ctx->Unpack, texObj, texImage);
-   
-   intelObj->dirty_images[0] |= (1 << level);
-   intelObj->dirty |= 1 << 0;
-}
-
-
-static void
-intelTexSubImage3D( GLcontext *ctx, GLenum target, GLint level,
-                   GLint xoffset, GLint yoffset, GLint zoffset,
-                   GLsizei width, GLsizei height, GLsizei depth,
-                   GLenum format, GLenum type,
-                   const GLvoid *pixels,
-                   const struct gl_pixelstore_attrib *packing,
-                   struct gl_texture_object *texObj,
-                   struct gl_texture_image *texImage )
-{
-   struct intel_texture_object *intelObj = intel_texture_object(texObj);
-
-   _mesa_store_texsubimage3d(ctx, target, level, xoffset, yoffset, zoffset,
-                             width, height, depth,
-                             format, type, pixels, packing, texObj, texImage);
-
-   intelObj->dirty_images[0] |= (1 << level);
-   intelObj->dirty |= 1 << 0;
-}
-
-
-
-
-static struct gl_texture_object *intelNewTextureObject( GLcontext *ctx, 
-							GLuint name, 
-							GLenum target )
-{
-   struct intel_texture_object *obj = CALLOC_STRUCT(intel_texture_object);
-
-   _mesa_initialize_texture_object(&obj->base, name, target);
-
-   return &obj->base;
-}
-
-static GLboolean intelIsTextureResident(GLcontext *ctx,
-                                      struct gl_texture_object *texObj)
-{
-#if 0
-   struct intel_context *intel = intel_context(ctx);
-   struct intel_texture_object *intelObj = intel_texture_object(texObj);
-   
-   return 
-      intelObj->mt && 
-      intelObj->mt->region && 
-      intel_is_region_resident(intel, intelObj->mt->region);
-#endif
-   return 1;
-}
-
-
-
-static void intelTexParameter( GLcontext *ctx, 
-			       GLenum target,
-			       struct gl_texture_object *texObj,
-			       GLenum pname, 
-			       const GLfloat *params )
-{
-   struct intel_texture_object *intelObj = intel_texture_object(texObj);
- 
-   switch (pname) {
-      /* Anything which can affect the calculation of firstLevel and
-       * lastLevel, as changes to these may invalidate the miptree.
-       */
-   case GL_TEXTURE_MIN_FILTER:
-   case GL_TEXTURE_MAG_FILTER:
-   case GL_TEXTURE_BASE_LEVEL:
-   case GL_TEXTURE_MAX_LEVEL:
-   case GL_TEXTURE_MIN_LOD:
-   case GL_TEXTURE_MAX_LOD:
-      intelObj->dirty |= 1;
-      break;
-
-   default:
-      break;
-   }
-}
-
-
-static void
-intel_delete_texture_object( GLcontext *ctx, struct gl_texture_object *texObj )
-{
-   struct intel_context *intel = intel_context(ctx);
-   struct intel_texture_object *intelObj = intel_texture_object(texObj);
-
-   if (intelObj->mt)
-      intel_miptree_destroy(intel, intelObj->mt);
-
-   _mesa_delete_texture_object( ctx, texObj );
-}
-
-void intelInitTextureFuncs( struct dd_function_table *functions )
-{
-   functions->NewTextureObject          = intelNewTextureObject;
-   functions->TexImage1D                = intelTexImage1D;
-   functions->TexImage2D                = intelTexImage2D;
-   functions->TexImage3D                = intelTexImage3D;
-   functions->TexSubImage1D             = intelTexSubImage1D;
-   functions->TexSubImage2D             = intelTexSubImage2D;
-   functions->TexSubImage3D             = intelTexSubImage3D;
-   functions->CopyTexImage1D            = _swrast_copy_teximage1d;
-   functions->CopyTexImage2D            = _swrast_copy_teximage2d;
-   functions->CopyTexSubImage1D         = _swrast_copy_texsubimage1d;
-   functions->CopyTexSubImage2D         = _swrast_copy_texsubimage2d;
-   functions->CopyTexSubImage3D         = _swrast_copy_texsubimage3d;
-   functions->DeleteTexture             = intel_delete_texture_object;
-   functions->UpdateTexturePalette      = NULL;
-   functions->IsTextureResident = intelIsTextureResident;
-   functions->TestProxyTexImage         = _mesa_test_proxy_teximage;
-   functions->CompressedTexImage2D      = intelCompressedTexImage2D;
-   functions->CompressedTexSubImage2D   = intelCompressedTexSubImage2D;
-   functions->TexParameter              = intelTexParameter;
-}
-
-
-
-
-
diff --git a/i965/intel_tex_validate.c b/i965/intel_tex_validate.c
deleted file mode 100644
index cb23b9d..0000000
--- a/i965/intel_tex_validate.c
+++ /dev/null
@@ -1,256 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#include "mtypes.h"
-#include "macros.h"
-
-#include "intel_context.h"
-#include "intel_mipmap_tree.h"
-#include "intel_tex.h"
-#include "bufmgr.h"
-
-/**
- * Compute which mipmap levels that really need to be sent to the hardware.
- * This depends on the base image size, GL_TEXTURE_MIN_LOD,
- * GL_TEXTURE_MAX_LOD, GL_TEXTURE_BASE_LEVEL, and GL_TEXTURE_MAX_LEVEL.
- */
-static void intel_calculate_first_last_level( struct intel_texture_object *intelObj )
-{
-   struct gl_texture_object *tObj = &intelObj->base;
-   const struct gl_texture_image * const baseImage =
-       tObj->Image[0][tObj->BaseLevel];
-
-   /* These must be signed values.  MinLod and MaxLod can be negative numbers,
-    * and having firstLevel and lastLevel as signed prevents the need for
-    * extra sign checks.
-    */
-   int   firstLevel;
-   int   lastLevel;
-
-   /* Yes, this looks overly complicated, but it's all needed.
-    */
-   switch (tObj->Target) {
-   case GL_TEXTURE_1D:
-   case GL_TEXTURE_2D:
-   case GL_TEXTURE_3D:
-   case GL_TEXTURE_CUBE_MAP:
-      if (tObj->MinFilter == GL_NEAREST || tObj->MinFilter == GL_LINEAR) {
-         /* GL_NEAREST and GL_LINEAR only care about GL_TEXTURE_BASE_LEVEL.
-          */
-         firstLevel = lastLevel = tObj->BaseLevel;
-      }
-      else {
-	 /* Currently not taking min/max lod into account here, those
-	  * values are programmed as sampler state elsewhere and we
-	  * upload the same mipmap levels regardless.  Not sure if
-	  * this makes sense as it means it isn't possible for the app
-	  * to use min/max lod to reduce texture memory pressure:
-	  */
-	 firstLevel = tObj->BaseLevel;
-	 lastLevel = MIN2(tObj->BaseLevel + baseImage->MaxLog2, 
-			  tObj->MaxLevel);
-	 lastLevel = MAX2(firstLevel, lastLevel); /* need at least one level */
-      }
-      break;
-   case GL_TEXTURE_RECTANGLE_NV:
-   case GL_TEXTURE_4D_SGIS:
-      firstLevel = lastLevel = 0;
-      break;
-   default:
-      return;
-   }
-
-   /* save these values */
-   intelObj->firstLevel = firstLevel;
-   intelObj->lastLevel = lastLevel;
-}
-
-static GLboolean copy_image_data_to_tree( struct intel_context *intel,
-					  struct intel_texture_object *intelObj,
-					  struct gl_texture_image *texImage,
-					  GLuint face,
-					  GLuint level)
-{
-   return intel_miptree_image_data(intel,
-				   intelObj->mt,
-				   face,
-				   level,
-				   texImage->Data,
-				   texImage->RowStride,
-				   (texImage->RowStride * 
-				    texImage->Height * 
-				    texImage->TexFormat->TexelBytes));
-}
-
-static void intel_texture_invalidate( struct intel_texture_object *intelObj )
-{
-   GLint nr_faces, face;
-   intelObj->dirty = ~0;
-
-   nr_faces = (intelObj->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
-   for (face = 0; face < nr_faces; face++) 
-      intelObj->dirty_images[face] = ~0;
-}
-
-static void intel_texture_invalidate_cb( struct intel_context *intel,
-					 void *ptr )
-{
-   intel_texture_invalidate( (struct intel_texture_object *) ptr );
-}
-
-
-/*  
- */
-GLuint intel_finalize_mipmap_tree( struct intel_context *intel,
-				   struct gl_texture_object *tObj )
-{
-   struct intel_texture_object *intelObj = intel_texture_object(tObj);
-   GLuint face, i;
-   GLuint nr_faces = 0;
-   struct gl_texture_image *firstImage;
-
-   if( tObj == intel->frame_buffer_texobj )
-      return GL_FALSE;
-   
-   /* We know/require this is true by now: 
-    */
-   assert(intelObj->base.Complete);
-
-   /* What levels must the tree include at a minimum?
-    */
-   if (intelObj->dirty) {
-      intel_calculate_first_last_level( intelObj );
-/*       intel_miptree_destroy(intel, intelObj->mt); */
-/*       intelObj->mt = NULL; */
-   }
-
-   firstImage = intelObj->base.Image[0][intelObj->firstLevel];
-
-   /* Fallback case:
-    */
-   if (firstImage->Border) {
-      if (intelObj->mt) {
-	 intel_miptree_destroy(intel, intelObj->mt);
-	 intelObj->mt = NULL;
-	 /* Set all images dirty:
-	  */
-	 intel_texture_invalidate(intelObj);
-      }
-      return GL_FALSE;
-   }
-
-
-
-   /* Check tree can hold all active levels.  Check tree matches
-    * target, imageFormat, etc.
-    */
-   if (intelObj->mt &&
-       (intelObj->mt->target != intelObj->base.Target ||
-	intelObj->mt->internal_format != firstImage->InternalFormat ||
-	intelObj->mt->first_level != intelObj->firstLevel ||
-	intelObj->mt->last_level != intelObj->lastLevel ||
-	intelObj->mt->width0 != firstImage->Width ||
-	intelObj->mt->height0 != firstImage->Height ||
-	intelObj->mt->depth0 != firstImage->Depth ||
-	intelObj->mt->cpp != firstImage->TexFormat->TexelBytes ||
-	intelObj->mt->compressed != firstImage->IsCompressed)) 
-   {
-      intel_miptree_destroy(intel, intelObj->mt);
-      intelObj->mt = NULL;
-      
-      /* Set all images dirty:
-       */
-      intel_texture_invalidate(intelObj);
-   }
-      
-
-   /* May need to create a new tree:
-    */
-   if (!intelObj->mt) {
-      intelObj->mt = intel_miptree_create(intel,
-					  intelObj->base.Target,
-					  firstImage->InternalFormat,
-					  intelObj->firstLevel,
-					  intelObj->lastLevel,
-					  firstImage->Width,
-					  firstImage->Height,
-					  firstImage->Depth,
-					  firstImage->TexFormat->TexelBytes,
-					  firstImage->IsCompressed);
-
-      /* Tell the buffer manager that we will manage the backing
-       * store, but we still want it to do fencing for us.
-       */
-      bmBufferSetInvalidateCB(intel, 
-			      intelObj->mt->region->buffer,
-			      intel_texture_invalidate_cb,
-			      intelObj,
-			      GL_FALSE);
-   }
-
-   /* Pull in any images not in the object's tree:
-    */
-   if (intelObj->dirty) {
-      nr_faces = (intelObj->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
-      for (face = 0; face < nr_faces; face++) {
-	 if (intelObj->dirty_images[face]) {
-	    for (i = intelObj->firstLevel; i <= intelObj->lastLevel; i++) {
-	       struct gl_texture_image *texImage = intelObj->base.Image[face][i];
-
-	       /* Need to import images in main memory or held in other trees.
-		*/
-	       if (intelObj->dirty_images[face] & (1<<i) &&
-		   texImage) {
-
-		  if (INTEL_DEBUG & DEBUG_TEXTURE)
-		     _mesa_printf("copy data from image %d (%p) into object miptree\n",
-				  i,
-				  texImage->Data);
-
-		  if (!copy_image_data_to_tree(intel,
-					       intelObj,
-					       texImage,
-					       face,
-					       i))
-		     return GL_FALSE;
-
-	       }
-	    }
-	 }
-      }
-
-      /* Only clear the dirty flags if everything went ok:
-       */
-      for (face = 0; face < nr_faces; face++) {
-	 intelObj->dirty_images[face] = 0;
-      }
-
-      intelObj->dirty = 0;
-   }
-
-   return GL_TRUE;
-}
diff --git a/i965/server/i830_common.h b/i965/server/i830_common.h
deleted file mode 100644
index fe2b8e8..0000000
--- a/i965/server/i830_common.h
+++ /dev/null
@@ -1,232 +0,0 @@
-/**************************************************************************
-
-Copyright 2001 VA Linux Systems Inc., Fremont, California.
-Copyright 2002 Tungsten Graphics Inc., Cedar Park, Texas.
-
-All Rights Reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a
-copy of this software and associated documentation files (the "Software"),
-to deal in the Software without restriction, including without limitation
-on the rights to use, copy, modify, merge, publish, distribute, sub
-license, and/or sell copies of the Software, and to permit persons to whom
-the Software is furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice (including the next
-paragraph) shall be included in all copies or substantial portions of the
-Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
-ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
-DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
-OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
-USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-**************************************************************************/
-
-/* $XFree86: xc/programs/Xserver/hw/xfree86/drivers/i810/i830_common.h,v 1.1 2002/09/11 00:29:32 dawes Exp $ */
-
-#ifndef _I830_COMMON_H_
-#define _I830_COMMON_H_
-
-
-#define I830_NR_TEX_REGIONS 255	/* maximum due to use of chars for next/prev */
-#define I830_LOG_MIN_TEX_REGION_SIZE 14
-
-
-/* Driver specific DRM command indices
- * NOTE: these are not OS specific, but they are driver specific
- */
-#define DRM_I830_INIT                     0x00
-#define DRM_I830_FLUSH                    0x01
-#define DRM_I830_FLIP                     0x02
-#define DRM_I830_BATCHBUFFER              0x03
-#define DRM_I830_IRQ_EMIT                 0x04
-#define DRM_I830_IRQ_WAIT                 0x05
-#define DRM_I830_GETPARAM                 0x06
-#define DRM_I830_SETPARAM                 0x07
-#define DRM_I830_ALLOC                    0x08
-#define DRM_I830_FREE                     0x09
-#define DRM_I830_INIT_HEAP                0x0a
-#define DRM_I830_CMDBUFFER                0x0b
-#define DRM_I830_DESTROY_HEAP             0x0c
-#define DRM_I830_MMIO		       	  0x10
-
-typedef struct {
-   enum {
-      I830_INIT_DMA = 0x01,
-      I830_CLEANUP_DMA = 0x02,
-      I830_RESUME_DMA = 0x03
-   } func;
-   unsigned int mmio_offset;
-   int sarea_priv_offset;
-   unsigned int ring_start;
-   unsigned int ring_end;
-   unsigned int ring_size;
-   unsigned int front_offset;
-   unsigned int back_offset;
-   unsigned int depth_offset;
-   unsigned int w;
-   unsigned int h;
-   unsigned int pitch;
-   unsigned int pitch_bits;
-   unsigned int back_pitch;
-   unsigned int depth_pitch;
-   unsigned int cpp;
-   unsigned int chipset;
-} drmI830Init;
-
-typedef struct {
-	drmTextureRegion texList[I830_NR_TEX_REGIONS+1];
-        int last_upload;	/* last time texture was uploaded */
-        int last_enqueue;	/* last time a buffer was enqueued */
-        volatile int last_dispatch;	/* age of the most recently dispatched buffer */
-	int ctxOwner;		/* last context to upload state */
-	int texAge;
-        int pf_enabled;		/* is pageflipping allowed? */
-        int pf_active;               
-        int pf_current_page;	/* which buffer is being displayed? */
-        int perf_boxes;	        /* performance boxes to be displayed */   
-	int width, height;      /* screen size in pixels */
-
-	drm_handle_t front_handle;
-	int front_offset;
-	int front_size;
-
-	drm_handle_t back_handle;
-	int back_offset;
-	int back_size;
-
-	drm_handle_t depth_handle;
-	int depth_offset;
-	int depth_size;
-
-	drm_handle_t tex_handle;
-	int tex_offset;
-	int tex_size;
-	int log_tex_granularity;
-	int pitch;
-	int rotation;           /* 0, 90, 180 or 270 */
-	int rotated_offset;
-	int rotated_size;
-	int rotated_pitch;
-	int virtualX, virtualY;
-
-	unsigned int front_tiled;
-        unsigned int back_tiled;
-        unsigned int depth_tiled;
-        unsigned int rotated_tiled;
-        unsigned int rotated2_tiled;
-
-	int pipeA_x;
-	int pipeA_y;
-	int pipeA_w;
-	int pipeA_h;
-	int pipeB_x;
-	int pipeB_y;
-	int pipeB_w;
-	int pipeB_h;
-
-} drmI830Sarea;
-
-/* Flags for perf_boxes
- */
-#define I830_BOX_RING_EMPTY    0x1 /* populated by kernel */
-#define I830_BOX_FLIP          0x2 /* populated by kernel */
-#define I830_BOX_WAIT          0x4 /* populated by kernel & client */
-#define I830_BOX_TEXTURE_LOAD  0x8 /* populated by kernel */
-#define I830_BOX_LOST_CONTEXT  0x10 /* populated by client */
-
-
-typedef struct {
-   	int start;		/* agp offset */
-	int used;		/* nr bytes in use */
-	int DR1;		/* hw flags for GFX_OP_DRAWRECT_INFO */
-        int DR4;		/* window origin for GFX_OP_DRAWRECT_INFO*/
-	int num_cliprects;	/* mulitpass with multiple cliprects? */
-        drm_clip_rect_t *cliprects; /* pointer to userspace cliprects */
-} drmI830BatchBuffer;
-
-typedef struct {
-   	char *buf;		/* agp offset */
-	int sz; 		/* nr bytes in use */
-	int DR1;		/* hw flags for GFX_OP_DRAWRECT_INFO */
-        int DR4;		/* window origin for GFX_OP_DRAWRECT_INFO*/
-	int num_cliprects;	/* mulitpass with multiple cliprects? */
-        drm_clip_rect_t *cliprects; /* pointer to userspace cliprects */
-} drmI830CmdBuffer;
- 
-typedef struct {
-	int *irq_seq;
-} drmI830IrqEmit;
-
-typedef struct {
-	int irq_seq;
-} drmI830IrqWait;
-
-typedef struct {
-	int param;
-	int *value;
-} drmI830GetParam;
-
-#define I830_PARAM_IRQ_ACTIVE     1
-#define I830_PARAM_ALLOW_BATCHBUFFER   2 
-
-typedef struct {
-	int param;
-	int value;
-} drmI830SetParam;
-
-#define I830_SETPARAM_USE_MI_BATCHBUFFER_START  1
-#define I830_SETPARAM_TEX_LRU_LOG_GRANULARITY   2
-#define I830_SETPARAM_ALLOW_BATCHBUFFER         3
-
-
-/* A memory manager for regions of shared memory:
- */
-#define I830_MEM_REGION_AGP 1
-
-typedef struct {
-	int region;
-	int alignment;
-	int size;
-	int *region_offset;	/* offset from start of fb or agp */
-} drmI830MemAlloc;
-
-typedef struct {
-	int region;
-	int region_offset;
-} drmI830MemFree;
-
-typedef struct {
-	int region;
-	int size;
-	int start;	
-} drmI830MemInitHeap;
-
-typedef struct {
-	int region;
-} drmI830MemDestroyHeap;
-
-#define MMIO_READ  0
-#define MMIO_WRITE 1
-
-#define MMIO_REGS_IA_PRIMATIVES_COUNT           0
-#define MMIO_REGS_IA_VERTICES_COUNT             1
-#define MMIO_REGS_VS_INVOCATION_COUNT           2
-#define MMIO_REGS_GS_PRIMITIVES_COUNT           3
-#define MMIO_REGS_GS_INVOCATION_COUNT           4
-#define MMIO_REGS_CL_PRIMITIVES_COUNT           5
-#define MMIO_REGS_CL_INVOCATION_COUNT           6
-#define MMIO_REGS_PS_INVOCATION_COUNT           7
-#define MMIO_REGS_PS_DEPTH_COUNT                8
-
-typedef struct {
-        unsigned int read_write:1;
-        unsigned int reg:31;
-        void __user *data;
-} drmI830MMIO;
-
-#endif /* _I830_DRM_H_ */
diff --git a/i965/server/intel.h b/i965/server/intel.h
deleted file mode 100644
index d7858a2..0000000
--- a/i965/server/intel.h
+++ /dev/null
@@ -1,328 +0,0 @@
-#ifndef _INTEL_H_
-#define _INTEL_H_
-
-#include "xf86drm.h"		/* drm_handle_t, etc */
-
-/* Intel */
-#ifndef PCI_CHIP_I810
-#define PCI_CHIP_I810              0x7121
-#define PCI_CHIP_I810_DC100        0x7123
-#define PCI_CHIP_I810_E            0x7125
-#define PCI_CHIP_I815              0x1132
-#define PCI_CHIP_I810_BRIDGE       0x7120
-#define PCI_CHIP_I810_DC100_BRIDGE 0x7122
-#define PCI_CHIP_I810_E_BRIDGE     0x7124
-#define PCI_CHIP_I815_BRIDGE       0x1130
-#endif
-
-#define PCI_CHIP_845_G			0x2562
-#define PCI_CHIP_I830_M			0x3577
-
-#ifndef PCI_CHIP_I855_GM
-#define PCI_CHIP_I855_GM	   0x3582
-#define PCI_CHIP_I855_GM_BRIDGE	   0x3580
-#endif
-
-#ifndef PCI_CHIP_I865_G
-#define PCI_CHIP_I865_G		   0x2572
-#define PCI_CHIP_I865_G_BRIDGE	   0x2570
-#endif
-
-#ifndef PCI_CHIP_I915_G
-#define PCI_CHIP_I915_G		   0x2582
-#define PCI_CHIP_I915_G_BRIDGE	   0x2580
-#endif
-
-#ifndef PCI_CHIP_I915_GM
-#define PCI_CHIP_I915_GM	   0x2592
-#define PCI_CHIP_I915_GM_BRIDGE	   0x2590
-#endif
-
-#ifndef PCI_CHIP_E7221_G
-#define PCI_CHIP_E7221_G	   0x258A
-/* Same as I915_G_BRIDGE */
-#define PCI_CHIP_E7221_G_BRIDGE	   0x2580
-#endif
-
-#ifndef PCI_CHIP_I945_G
-#define PCI_CHIP_I945_G        0x2772
-#define PCI_CHIP_I945_G_BRIDGE 0x2770
-#endif
-
-#ifndef PCI_CHIP_I945_GM
-#define PCI_CHIP_I945_GM        0x27A2
-#define PCI_CHIP_I945_GM_BRIDGE 0x27A0
-#endif
-
-#define IS_I810(pI810) (pI810->Chipset == PCI_CHIP_I810 ||	\
-			pI810->Chipset == PCI_CHIP_I810_DC100 || \
-			pI810->Chipset == PCI_CHIP_I810_E)
-#define IS_I815(pI810) (pI810->Chipset == PCI_CHIP_I815)
-#define IS_I830(pI810) (pI810->Chipset == PCI_CHIP_I830_M)
-#define IS_845G(pI810) (pI810->Chipset == PCI_CHIP_845_G)
-#define IS_I85X(pI810)  (pI810->Chipset == PCI_CHIP_I855_GM)
-#define IS_I852(pI810)  (pI810->Chipset == PCI_CHIP_I855_GM && (pI810->variant == I852_GM || pI810->variant == I852_GME))
-#define IS_I855(pI810)  (pI810->Chipset == PCI_CHIP_I855_GM && (pI810->variant == I855_GM || pI810->variant == I855_GME))
-#define IS_I865G(pI810) (pI810->Chipset == PCI_CHIP_I865_G)
-
-#define IS_I915G(pI810) (pI810->Chipset == PCI_CHIP_I915_G || pI810->Chipset == PCI_CHIP_E7221_G)
-#define IS_I915GM(pI810) (pI810->Chipset == PCI_CHIP_I915_GM)
-#define IS_I945G(pI810) (pI810->Chipset == PCI_CHIP_I945_G)
-#define IS_I945GM(pI810) (pI810->Chipset == PCI_CHIP_I945_GM)
-#define IS_I9XX(pI810) (IS_I915G(pI810) || IS_I915GM(pI810) || IS_I945G(pI810) || IS_I945GM(pI810))
-
-#define IS_MOBILE(pI810) (IS_I830(pI810) || IS_I85X(pI810) || IS_I915GM(pI810) || IS_I945GM(pI810))
-
-#define I830_GMCH_CTRL		0x52
-
-
-#define I830_GMCH_GMS_MASK			0x70
-#define I830_GMCH_GMS_DISABLED		0x00
-#define I830_GMCH_GMS_LOCAL			0x10
-#define I830_GMCH_GMS_STOLEN_512	0x20
-#define I830_GMCH_GMS_STOLEN_1024	0x30
-#define I830_GMCH_GMS_STOLEN_8192	0x40
-
-#define I855_GMCH_GMS_MASK			(0x7 << 4)
-#define I855_GMCH_GMS_DISABLED			0x00
-#define I855_GMCH_GMS_STOLEN_1M			(0x1 << 4)
-#define I855_GMCH_GMS_STOLEN_4M			(0x2 << 4)
-#define I855_GMCH_GMS_STOLEN_8M			(0x3 << 4)
-#define I855_GMCH_GMS_STOLEN_16M		(0x4 << 4)
-#define I855_GMCH_GMS_STOLEN_32M		(0x5 << 4)
-#define I915G_GMCH_GMS_STOLEN_48M		(0x6 << 4)
-#define I915G_GMCH_GMS_STOLEN_64M		(0x7 << 4)
-
-typedef unsigned char Bool;
-#define TRUE 1
-#define FALSE 0
-
-#define PIPE_NONE	0<<0
-#define PIPE_CRT	1<<0
-#define PIPE_TV		1<<1
-#define PIPE_DFP	1<<2
-#define PIPE_LFP	1<<3
-#define PIPE_CRT2	1<<4
-#define PIPE_TV2	1<<5
-#define PIPE_DFP2	1<<6
-#define PIPE_LFP2	1<<7
-
-typedef struct _I830MemPool *I830MemPoolPtr;
-typedef struct _I830MemRange *I830MemRangePtr;
-typedef struct _I830MemRange {
-   long Start;
-   long End;
-   long Size;
-   unsigned long Physical;
-   unsigned long Offset;		/* Offset of AGP-allocated portion */
-   unsigned long Alignment;
-   drm_handle_t Key;
-   unsigned long Pitch; // add pitch
-   I830MemPoolPtr Pool;
-} I830MemRange;
-
-typedef struct _I830MemPool {
-   I830MemRange Total;
-   I830MemRange Free;
-   I830MemRange Fixed;
-   I830MemRange Allocated;
-} I830MemPool;
-
-typedef struct {
-   int tail_mask;
-   I830MemRange mem;
-   unsigned char *virtual_start;
-   int head;
-   int tail;
-   int space;
-} I830RingBuffer;
-
-typedef struct _I830Rec {
-   unsigned char *MMIOBase;
-   unsigned char *FbBase;
-   int cpp;
-
-   unsigned int bios_version;
-
-   /* These are set in PreInit and never changed. */
-   long FbMapSize;
-   long TotalVideoRam;
-   I830MemRange StolenMemory;		/* pre-allocated memory */
-   long BIOSMemorySize;			/* min stolen pool size */
-   int BIOSMemSizeLoc;
-
-   /* These change according to what has been allocated. */
-   long FreeMemory;
-   I830MemRange MemoryAperture;
-   I830MemPool StolenPool;
-   long allocatedMemory;
-
-   /* Regions allocated either from the above pools, or from agpgart. */
-   /* for single and dual head configurations */
-   I830MemRange FrontBuffer;
-   I830MemRange FrontBuffer2;
-   I830MemRange Scratch;
-   I830MemRange Scratch2;
-
-   I830RingBuffer *LpRing;
-
-   I830MemRange BackBuffer;
-   I830MemRange DepthBuffer;
-   I830MemRange TexMem;
-   int TexGranularity;
-   I830MemRange ContextMem;
-   int drmMinor;
-   Bool have3DWindows;
-
-   Bool NeedRingBufferLow;
-   Bool allowPageFlip;
-   Bool disableTiling;
-
-   int Chipset;
-   unsigned long LinearAddr;
-   unsigned long MMIOAddr;
-
-   drmSize           registerSize;     /**< \brief MMIO register map size */
-   drm_handle_t         registerHandle;   /**< \brief MMIO register map handle */
-  //   IOADDRESS ioBase;
-   int               irq;              /**< \brief IRQ number */
-   int GttBound;
-
-   drm_handle_t ring_map;
-   unsigned int Fence[8];
-
-} I830Rec;
-
-/*
- * 12288 is set as the maximum, chosen because it is enough for
- * 1920x1440@32bpp with a 2048 pixel line pitch with some to spare.
- */
-#define I830_MAXIMUM_VBIOS_MEM		12288
-#define I830_DEFAULT_VIDEOMEM_2D	(MB(32) / 1024)
-#define I830_DEFAULT_VIDEOMEM_3D	(MB(64) / 1024)
-
-/* Flags for memory allocation function */
-#define FROM_ANYWHERE			0x00000000
-#define FROM_POOL_ONLY			0x00000001
-#define FROM_NEW_ONLY			0x00000002
-#define FROM_MASK			0x0000000f
-
-#define ALLOCATE_AT_TOP			0x00000010
-#define ALLOCATE_AT_BOTTOM		0x00000020
-#define FORCE_GAPS			0x00000040
-
-#define NEED_PHYSICAL_ADDR		0x00000100
-#define ALIGN_BOTH_ENDS			0x00000200
-#define FORCE_LOW			0x00000400
-
-#define ALLOC_NO_TILING			0x00001000
-#define ALLOC_INITIAL			0x00002000
-
-#define ALLOCATE_DRY_RUN		0x80000000
-
-/* Chipset registers for VIDEO BIOS memory RW access */
-#define _855_DRAM_RW_CONTROL 0x58
-#define _845_DRAM_RW_CONTROL 0x90
-#define DRAM_WRITE    0x33330000
-
-#define KB(x) ((x) * 1024)
-#define MB(x) ((x) * KB(1024))
-
-#define GTT_PAGE_SIZE			KB(4)
-#define ROUND_TO(x, y)			(((x) + (y) - 1) / (y) * (y))
-#define ROUND_DOWN_TO(x, y)		((x) / (y) * (y))
-#define ROUND_TO_PAGE(x)		ROUND_TO((x), GTT_PAGE_SIZE)
-#define ROUND_TO_MB(x)			ROUND_TO((x), MB(1))
-#define PRIMARY_RINGBUFFER_SIZE		KB(128)
-
-
-/* Ring buffer registers, p277, overview p19
- */
-#define LP_RING     0x2030
-#define HP_RING     0x2040
-
-#define RING_TAIL      0x00
-#define TAIL_ADDR           0x000FFFF8
-#define I830_TAIL_MASK	    0x001FFFF8
-
-#define RING_HEAD      0x04
-#define HEAD_WRAP_COUNT     0xFFE00000
-#define HEAD_WRAP_ONE       0x00200000
-#define HEAD_ADDR           0x001FFFFC
-#define I830_HEAD_MASK      0x001FFFFC
-
-#define RING_START     0x08
-#define START_ADDR          0x03FFFFF8
-#define I830_RING_START_MASK	0xFFFFF000
-
-#define RING_LEN       0x0C
-#define RING_NR_PAGES       0x001FF000 
-#define I830_RING_NR_PAGES	0x001FF000
-#define RING_REPORT_MASK    0x00000006
-#define RING_REPORT_64K     0x00000002
-#define RING_REPORT_128K    0x00000004
-#define RING_NO_REPORT      0x00000000
-#define RING_VALID_MASK     0x00000001
-#define RING_VALID          0x00000001
-#define RING_INVALID        0x00000000
-
-
-/* Fence/Tiling ranges [0..7]
- */
-#define FENCE            0x2000
-#define FENCE_NR         8
-
-#define I915G_FENCE_START_MASK	0x0ff00000
-
-#define I830_FENCE_START_MASK	0x07f80000
-
-#define FENCE_START_MASK    0x03F80000
-#define FENCE_X_MAJOR       0x00000000
-#define FENCE_Y_MAJOR       0x00001000
-#define FENCE_SIZE_MASK     0x00000700
-#define FENCE_SIZE_512K     0x00000000
-#define FENCE_SIZE_1M       0x00000100
-#define FENCE_SIZE_2M       0x00000200
-#define FENCE_SIZE_4M       0x00000300
-#define FENCE_SIZE_8M       0x00000400
-#define FENCE_SIZE_16M      0x00000500
-#define FENCE_SIZE_32M      0x00000600
-#define FENCE_SIZE_64M	    0x00000700
-#define I915G_FENCE_SIZE_1M       0x00000000
-#define I915G_FENCE_SIZE_2M       0x00000100
-#define I915G_FENCE_SIZE_4M       0x00000200
-#define I915G_FENCE_SIZE_8M       0x00000300
-#define I915G_FENCE_SIZE_16M      0x00000400
-#define I915G_FENCE_SIZE_32M      0x00000500
-#define I915G_FENCE_SIZE_64M	0x00000600
-#define I915G_FENCE_SIZE_128M	0x00000700
-#define FENCE_PITCH_1       0x00000000
-#define FENCE_PITCH_2       0x00000010
-#define FENCE_PITCH_4       0x00000020
-#define FENCE_PITCH_8       0x00000030
-#define FENCE_PITCH_16      0x00000040
-#define FENCE_PITCH_32      0x00000050
-#define FENCE_PITCH_64	    0x00000060
-#define FENCE_VALID         0x00000001
-
-#include <mmio.h>
-
-#  define MMIO_IN8(base, offset) \
-	*(volatile unsigned char *)(((unsigned char*)(base)) + (offset))
-#  define MMIO_IN32(base, offset) \
-	read_MMIO_LE32(base, offset)
-#  define MMIO_OUT8(base, offset, val) \
-	*(volatile unsigned char *)(((unsigned char*)(base)) + (offset)) = (val)
-#  define MMIO_OUT32(base, offset, val) \
-	*(volatile unsigned int *)(void *)(((unsigned char*)(base)) + (offset)) = CPU_TO_LE32(val)
-
-
-				/* Memory mapped register access macros */
-#define INREG8(addr)        MMIO_IN8(MMIO, addr)
-#define INREG(addr)         MMIO_IN32(MMIO, addr)
-#define OUTREG8(addr, val)  MMIO_OUT8(MMIO, addr, val)
-#define OUTREG(addr, val)   MMIO_OUT32(MMIO, addr, val)
-
-#define DSPABASE		0x70184
-
-#endif
diff --git a/i965/server/intel_dri.c b/i965/server/intel_dri.c
deleted file mode 100644
index 169fdbe..0000000
--- a/i965/server/intel_dri.c
+++ /dev/null
@@ -1,1282 +0,0 @@
-/**
- * \file server/intel_dri.c
- * \brief File to perform the device-specific initialization tasks typically
- * done in the X server.
- *
- * Here they are converted to run in the client (or perhaps a standalone
- * process), and to work with the frame buffer device rather than the X
- * server infrastructure.
- * 
- * Copyright (C) 2006 Dave Airlie (airlied@linux.ie)
-
- Permission is hereby granted, free of charge, to any person obtaining a
- copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sub license, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
- 
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial portions
- of the Software.
- 
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- IN NO EVENT SHALL THE COPYRIGHT HOLDERS AND/OR THEIR SUPPLIERS BE LIABLE FOR
- ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-*/
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <errno.h>
-#include <unistd.h>
-
-#include "driver.h"
-#include "drm.h"
-
-#include "intel.h"
-#include "i830_dri.h"
-
-#include "memops.h"
-#include "pciaccess.h"
-
-static size_t drm_page_size;
-static int nextTile = 0;
-#define xf86DrvMsg(...) do {} while(0)
-
-static const int pitches[] = {
-  128 * 8,
-  128 * 16,
-  128 * 32,
-  128 * 64,
-  0
-};
-
-static Bool I830DRIDoMappings(DRIDriverContext *ctx, I830Rec *pI830, drmI830Sarea *sarea);
-
-static unsigned long
-GetBestTileAlignment(unsigned long size)
-{
-   unsigned long i;
-
-   for (i = KB(512); i < size; i <<= 1)
-      ;
-
-   if (i > MB(64))
-      i = MB(64);
-
-   return i;
-}
-
-static void SetFenceRegs(const DRIDriverContext *ctx, I830Rec *pI830)
-{
-  int i;
-  unsigned char *MMIO = ctx->MMIOAddress;
-
-  for (i = 0; i < 8; i++) {
-    OUTREG(FENCE + i * 4, pI830->Fence[i]);
-    //    if (I810_DEBUG & DEBUG_VERBOSE_VGA)
-    fprintf(stderr,"Fence Register : %x\n", pI830->Fence[i]);
-  }
-}
-
-/* Tiled memory is good... really, really good...
- *
- * Need to make it less likely that we miss out on this - probably
- * need to move the frontbuffer away from the 'guarenteed' alignment
- * of the first memory segment, or perhaps allocate a discontigous
- * framebuffer to get more alignment 'sweet spots'.
- */
-static void
-SetFence(const DRIDriverContext *ctx, I830Rec *pI830,
-	 int nr, unsigned int start, unsigned int pitch,
-         unsigned int size)
-{
-   unsigned int val;
-   unsigned int fence_mask = 0;
-   unsigned int fence_pitch;
-
-   if (nr < 0 || nr > 7) {
-      fprintf(stderr,
-		 "SetFence: fence %d out of range\n",nr);
-      return;
-   }
-
-   pI830->Fence[nr] = 0;
-
-   if (IS_I9XX(pI830))
-   	fence_mask = ~I915G_FENCE_START_MASK;
-   else
-   	fence_mask = ~I830_FENCE_START_MASK;
-
-   if (start & fence_mask) {
-      fprintf(stderr,
-		 "SetFence: %d: start (0x%08x) is not %s aligned\n",
-		 nr, start, (IS_I9XX(pI830)) ? "1MB" : "512k");
-      return;
-   }
-
-   if (start % size) {
-      fprintf(stderr,
-		 "SetFence: %d: start (0x%08x) is not size (%dk) aligned\n",
-		 nr, start, size / 1024);
-      return;
-   }
-
-   if (pitch & 127) {
-      fprintf(stderr,
-		 "SetFence: %d: pitch (%d) not a multiple of 128 bytes\n",
-		 nr, pitch);
-      return;
-   }
-
-   val = (start | FENCE_X_MAJOR | FENCE_VALID);
-
-   if (IS_I9XX(pI830)) {
-   	switch (size) {
-	   case MB(1):
-      		val |= I915G_FENCE_SIZE_1M;
-      		break;
-   	   case MB(2):
-      		val |= I915G_FENCE_SIZE_2M;
-      		break;
-   	   case MB(4):
-      		val |= I915G_FENCE_SIZE_4M;
-      		break;
-   	   case MB(8):
-      		val |= I915G_FENCE_SIZE_8M;
-      		break;
-   	   case MB(16):
-      		val |= I915G_FENCE_SIZE_16M;
-      		break;
-   	   case MB(32):
-      		val |= I915G_FENCE_SIZE_32M;
-      		break;
-   	   case MB(64):
-      		val |= I915G_FENCE_SIZE_64M;
-      		break;
-   	   default:
-      		fprintf(stderr,
-		 "SetFence: %d: illegal size (%d kByte)\n", nr, size / 1024);
-      		return;
-   	}
-    } else {
-   	switch (size) {
-	   case KB(512):
-      		val |= FENCE_SIZE_512K;
-      		break;
-	   case MB(1):
-      		val |= FENCE_SIZE_1M;
-      		break;
-   	   case MB(2):
-      		val |= FENCE_SIZE_2M;
-      		break;
-   	   case MB(4):
-      		val |= FENCE_SIZE_4M;
-      		break;
-   	   case MB(8):
-      		val |= FENCE_SIZE_8M;
-      		break;
-   	   case MB(16):
-      		val |= FENCE_SIZE_16M;
-      		break;
-   	   case MB(32):
-      		val |= FENCE_SIZE_32M;
-      		break;
-   	   case MB(64):
-      		val |= FENCE_SIZE_64M;
-      		break;
-   	   default:
-      		fprintf(stderr,
-		 "SetFence: %d: illegal size (%d kByte)\n", nr, size / 1024);
-      		return;
-   	}
-   }
-
-   if (IS_I9XX(pI830))
-	fence_pitch = pitch / 512;
-   else
-	fence_pitch = pitch / 128;
-
-   switch (fence_pitch) {
-   case 1:
-      val |= FENCE_PITCH_1;
-      break;
-   case 2:
-      val |= FENCE_PITCH_2;
-      break;
-   case 4:
-      val |= FENCE_PITCH_4;
-      break;
-   case 8:
-      val |= FENCE_PITCH_8;
-      break;
-   case 16:
-      val |= FENCE_PITCH_16;
-      break;
-   case 32:
-      val |= FENCE_PITCH_32;
-      break;
-   case 64:
-      val |= FENCE_PITCH_64;
-      break;
-   default:
-      fprintf(stderr,
-		 "SetFence: %d: illegal pitch (%d)\n", nr, pitch);
-      return;
-   }
-
-   pI830->Fence[nr] = val;
-}
-
-static Bool
-MakeTiles(const DRIDriverContext *ctx, I830Rec *pI830, I830MemRange *pMem)
-{
-   int pitch, ntiles, i;
-
-   pitch = pMem->Pitch * ctx->cpp;
-   /*
-    * Simply try to break the region up into at most four pieces of size
-    * equal to the alignment.
-    */
-   ntiles = ROUND_TO(pMem->Size, pMem->Alignment) / pMem->Alignment;
-   if (ntiles >= 4) {
-      return FALSE;
-   }
-
-   for (i = 0; i < ntiles; i++, nextTile++) {
-     SetFence(ctx, pI830, nextTile, pMem->Start + i * pMem->Alignment,
-	       pitch, pMem->Alignment);
-   }
-   return TRUE;
-}
-
-static void I830SetupMemoryTiling(const DRIDriverContext *ctx, I830Rec *pI830)
-{
-  int i;
-
-  /* Clear out */
-  for (i = 0; i < 8; i++)
-    pI830->Fence[i] = 0;
-  
-  nextTile = 0;
-
-  if (pI830->BackBuffer.Alignment >= KB(512)) {
-    if (MakeTiles(ctx, pI830, &(pI830->BackBuffer))) {
-      fprintf(stderr,
-		 "Activating tiled memory for the back buffer.\n");
-    } else {
-      fprintf(stderr,
-		 "MakeTiles failed for the back buffer.\n");
-      pI830->allowPageFlip = FALSE;
-    }
-  }
-  
-  if (pI830->DepthBuffer.Alignment >= KB(512)) {
-    if (MakeTiles(ctx, pI830, &(pI830->DepthBuffer))) {
-      fprintf(stderr,
-		 "Activating tiled memory for the depth buffer.\n");
-    } else {
-      fprintf(stderr,
-		 "MakeTiles failed for the depth buffer.\n");
-    }
-  }
-
-  return;
-}
-
-static int I830DetectMemory(const DRIDriverContext *ctx, I830Rec *pI830)
-{
-  struct pci_device host_bridge;
-  uint32_t gmch_ctrl;
-  int memsize = 0;
-  int range;
-
-  memset(&host_bridge, 0, sizeof(host_bridge));
-
-  pci_device_cfg_read_u32(&host_bridge, &gmch_ctrl, I830_GMCH_CTRL);
-  
-  /* We need to reduce the stolen size, by the GTT and the popup.
-   * The GTT varying according the the FbMapSize and the popup is 4KB */
-  range = (ctx->shared.fbSize / (1024*1024)) + 4;
-
-   if (IS_I85X(pI830) || IS_I865G(pI830) || IS_I9XX(pI830)) {
-      switch (gmch_ctrl & I830_GMCH_GMS_MASK) {
-      case I855_GMCH_GMS_STOLEN_1M:
-	 memsize = MB(1) - KB(range);
-	 break;
-      case I855_GMCH_GMS_STOLEN_4M:
-	 memsize = MB(4) - KB(range);
-	 break;
-      case I855_GMCH_GMS_STOLEN_8M:
-	 memsize = MB(8) - KB(range);
-	 break;
-      case I855_GMCH_GMS_STOLEN_16M:
-	 memsize = MB(16) - KB(range);
-	 break;
-      case I855_GMCH_GMS_STOLEN_32M:
-	 memsize = MB(32) - KB(range);
-	 break;
-      case I915G_GMCH_GMS_STOLEN_48M:
-	 if (IS_I9XX(pI830))
-	    memsize = MB(48) - KB(range);
-	 break;
-      case I915G_GMCH_GMS_STOLEN_64M:
-	 if (IS_I9XX(pI830))
-	    memsize = MB(64) - KB(range);
-	 break;
-      }
-   } else {
-      switch (gmch_ctrl & I830_GMCH_GMS_MASK) {
-      case I830_GMCH_GMS_STOLEN_512:
-	 memsize = KB(512) - KB(range);
-	 break;
-      case I830_GMCH_GMS_STOLEN_1024:
-	 memsize = MB(1) - KB(range);
-	 break;
-      case I830_GMCH_GMS_STOLEN_8192:
-	 memsize = MB(8) - KB(range);
-	 break;
-      case I830_GMCH_GMS_LOCAL:
-	 memsize = 0;
-	 xf86DrvMsg(pScrn->scrnIndex, X_WARNING,
-		    "Local memory found, but won't be used.\n");
-	 break;
-      }
-   }
-   if (memsize > 0) {
-     fprintf(stderr,
-		 "detected %d kB stolen memory.\n", memsize / 1024);
-   } else {
-     fprintf(stderr,
-		 "no video memory detected.\n");
-   }
-   return memsize;
-}
-
-static int AgpInit(const DRIDriverContext *ctx, I830Rec *info)
-{
-  unsigned long mode = 0x4;
-
-  if (drmAgpAcquire(ctx->drmFD) < 0) {
-    fprintf(stderr, "[gart] AGP not available\n");
-    return 0;
-  }
-  
-  if (drmAgpEnable(ctx->drmFD, mode) < 0) {
-    fprintf(stderr, "[gart] AGP not enabled\n");
-    drmAgpRelease(ctx->drmFD);
-    return 0;
-  }
-  else
-    fprintf(stderr, "[gart] AGP enabled at %dx\n", ctx->agpmode);
-
-  return 1;
-}
-
-/*
- * Allocate memory from the given pool.  Grow the pool if needed and if
- * possible.
- */
-static unsigned long
-AllocFromPool(const DRIDriverContext *ctx, I830Rec *pI830, 
-	      I830MemRange *result, I830MemPool *pool,
-	      long size, unsigned long alignment, int flags)
-{
-   long needed, start, end;
-
-   if (!result || !pool || !size)
-      return 0;
-
-   /* Calculate how much space is needed. */
-   if (alignment <= GTT_PAGE_SIZE)
-      needed = size;
-   else {
-	 start = ROUND_TO(pool->Free.Start, alignment);
-	 end = ROUND_TO(start + size, alignment);
-	 needed = end - pool->Free.Start;
-   }
-   if (needed > pool->Free.Size) {
-     return 0;
-   }
-
-   result->Start = ROUND_TO(pool->Free.Start, alignment);
-   pool->Free.Start += needed;
-   result->End = pool->Free.Start;
-
-   pool->Free.Size = pool->Free.End - pool->Free.Start;
-   result->Size = result->End - result->Start;
-   result->Pool = pool;
-   result->Alignment = alignment;
-   return needed;
-}
-
-static unsigned long AllocFromAGP(const DRIDriverContext *ctx, I830Rec *pI830, long size, unsigned long alignment, I830MemRange  *result)
-{
-   unsigned long start, end;
-   unsigned long newApStart, newApEnd;
-   int ret;
-   if (!result || !size)
-      return 0;
-   
-   if (!alignment)
-     alignment = 4;
-
-   start = ROUND_TO(pI830->MemoryAperture.Start, alignment);
-   end = ROUND_TO(start + size, alignment);
-   newApStart = end;
-   newApEnd = pI830->MemoryAperture.End;
-
-   ret=drmAgpAlloc(ctx->drmFD, size, 0, &(result->Physical), (drm_handle_t *)&(result->Key));
-   
-   if (ret)
-   {
-     fprintf(stderr,"drmAgpAlloc failed %d\n", ret);
-     return 0;
-   }
-   pI830->allocatedMemory += size;
-   pI830->MemoryAperture.Start = newApStart;
-   pI830->MemoryAperture.End = newApEnd;
-   pI830->MemoryAperture.Size = newApEnd - newApStart;
-   //   pI830->FreeMemory -= size;
-   result->Start = start;
-   result->End = start + size;
-   result->Size = size;
-   result->Offset = start;
-   result->Alignment = alignment;
-   result->Pool = NULL;
-  
-   return size;
-}
-
-unsigned long
-I830AllocVidMem(const DRIDriverContext *ctx, I830Rec *pI830, I830MemRange *result, I830MemPool *pool, long size, unsigned long alignment, int flags)
-{
-  int ret;
-
-  if (!result)
-    return 0;
-
-   /* Make sure these are initialised. */
-   result->Size = 0;
-   result->Key = -1;
-
-   if (!size) {
-      return 0;
-   }
-
-   if (pool->Free.Size < size)
-     return AllocFromAGP(ctx, pI830, size, alignment, result);
-   else
-   {
-     ret = AllocFromPool(ctx, pI830, result, pool, size, alignment, flags);
-
-     if (ret==0)
-       return AllocFromAGP(ctx, pI830, size, alignment, result);
-     return ret;
-   }
-}
-
-static Bool BindAgpRange(const DRIDriverContext *ctx, I830MemRange *mem)
-{
-  if (!mem)
-    return FALSE;
-  
-  if (mem->Key == -1)
-    return TRUE;
-
-  return !drmAgpBind(ctx->drmFD, mem->Key, mem->Offset);
-}
-
-/* simple memory allocation routines needed */
-/* put ring buffer in low memory */
-/* need to allocate front, back, depth buffers aligned correctly,
-   allocate ring buffer, 
-*/
-
-/* */
-static Bool
-I830AllocateMemory(const DRIDriverContext *ctx, I830Rec *pI830)
-{
-  unsigned long size, ret;
-  unsigned long lines, lineSize, align;
-
-  /* allocate ring buffer */
-  memset(pI830->LpRing, 0, sizeof(I830RingBuffer));
-  pI830->LpRing->mem.Key = -1;
-
-  size = PRIMARY_RINGBUFFER_SIZE;
-  
-  ret = I830AllocVidMem(ctx, pI830, &pI830->LpRing->mem, &pI830->StolenPool, size, 0x1000, 0);
-  
-  if (ret != size)
-  {
-    fprintf(stderr,"unable to allocate ring buffer %ld\n", ret);
-    return FALSE;
-  }
-
-  pI830->LpRing->tail_mask = pI830->LpRing->mem.Size - 1;
-
-  
-  /* allocate front buffer */
-  memset(&(pI830->FrontBuffer), 0, sizeof(pI830->FrontBuffer));
-  pI830->FrontBuffer.Key = -1;
-  pI830->FrontBuffer.Pitch = ctx->shared.virtualWidth;
-
-  align = KB(512);  
-
-  lineSize = ctx->shared.virtualWidth * ctx->cpp;
-  lines = (ctx->shared.virtualHeight + 15) / 16 * 16;
-  size = lineSize * lines;
-  size = ROUND_TO_PAGE(size);
-
-  align = GetBestTileAlignment(size);
-
-  ret = I830AllocVidMem(ctx, pI830, &pI830->FrontBuffer, &pI830->StolenPool, size, align, 0);
-  if (ret < size)
-  {
-    fprintf(stderr,"unable to allocate front buffer %ld\n", ret);
-    return FALSE;
-  }
-
-  memset(&(pI830->BackBuffer), 0, sizeof(pI830->BackBuffer));
-  pI830->BackBuffer.Key = -1;
-  pI830->BackBuffer.Pitch = ctx->shared.virtualWidth;
-
-  ret = I830AllocVidMem(ctx, pI830, &pI830->BackBuffer, &pI830->StolenPool, size, align, 0);
-  if (ret < size)
-  {
-    fprintf(stderr,"unable to allocate back buffer %ld\n", ret);
-    return FALSE;
-  }
-  
-  memset(&(pI830->DepthBuffer), 0, sizeof(pI830->DepthBuffer));
-  pI830->DepthBuffer.Key = -1;
-  pI830->DepthBuffer.Pitch = ctx->shared.virtualWidth;
-
-  ret = I830AllocVidMem(ctx, pI830, &pI830->DepthBuffer, &pI830->StolenPool, size, align, 0);
-  if (ret < size)
-  {
-    fprintf(stderr,"unable to allocate depth buffer %ld\n", ret);
-    return FALSE;
-  }
-
-  memset(&(pI830->ContextMem), 0, sizeof(pI830->ContextMem));
-  pI830->ContextMem.Key = -1;
-  size = KB(32);
-
-  ret = I830AllocVidMem(ctx, pI830, &pI830->ContextMem, &pI830->StolenPool, size, align, 0);
-  if (ret < size)
-  {
-    fprintf(stderr,"unable to allocate context buffer %ld\n", ret);
-    return FALSE;
-  }
-  
-  memset(&(pI830->TexMem), 0, sizeof(pI830->TexMem));
-  pI830->TexMem.Key = -1;
-
-  size = 32768 * 1024;
-  ret = AllocFromAGP(ctx, pI830, size, align, &pI830->TexMem);
-  if (ret < size)
-  {
-    fprintf(stderr,"unable to allocate texture memory %ld\n", ret);
-    return FALSE;
-  }
-
-  return TRUE;
-}
-
-static Bool
-I830BindMemory(const DRIDriverContext *ctx, I830Rec *pI830)
-{
-  if (!BindAgpRange(ctx, &pI830->LpRing->mem))
-    return FALSE;
-  if (!BindAgpRange(ctx, &pI830->FrontBuffer))
-    return FALSE;
-  if (!BindAgpRange(ctx, &pI830->BackBuffer))
-    return FALSE;
-  if (!BindAgpRange(ctx, &pI830->DepthBuffer))
-    return FALSE;
-  if (!BindAgpRange(ctx, &pI830->ContextMem))
-    return FALSE;
-  if (!BindAgpRange(ctx, &pI830->TexMem))
-    return FALSE;
-
-  return TRUE;
-}
-
-static Bool
-I830CleanupDma(const DRIDriverContext *ctx)
-{
-   drmI830Init info;
-
-   memset(&info, 0, sizeof(drmI830Init));
-   info.func = I830_CLEANUP_DMA;
-
-   if (drmCommandWrite(ctx->drmFD, DRM_I830_INIT,
-		       &info, sizeof(drmI830Init))) {
-     fprintf(stderr, "I830 Dma Cleanup Failed\n");
-      return FALSE;
-   }
-
-   return TRUE;
-}
-
-static Bool
-I830InitDma(const DRIDriverContext *ctx, I830Rec *pI830)
-{
-   I830RingBuffer *ring = pI830->LpRing;
-   drmI830Init info;
-
-   memset(&info, 0, sizeof(drmI830Init));
-   info.func = I830_INIT_DMA;
-
-   info.ring_start = ring->mem.Start + pI830->LinearAddr;
-   info.ring_end = ring->mem.End + pI830->LinearAddr;
-   info.ring_size = ring->mem.Size;
-
-   info.mmio_offset = (unsigned int)ctx->MMIOStart;
-
-   info.sarea_priv_offset = sizeof(drm_sarea_t);
-
-   info.front_offset = pI830->FrontBuffer.Start;
-   info.back_offset = pI830->BackBuffer.Start;
-   info.depth_offset = pI830->DepthBuffer.Start;
-   info.w = ctx->shared.virtualWidth;
-   info.h = ctx->shared.virtualHeight;
-   info.pitch = ctx->shared.virtualWidth;
-   info.back_pitch = pI830->BackBuffer.Pitch;
-   info.depth_pitch = pI830->DepthBuffer.Pitch;
-   info.cpp = ctx->cpp;
-
-   if (drmCommandWrite(ctx->drmFD, DRM_I830_INIT,
-		       &info, sizeof(drmI830Init))) {
-      fprintf(stderr,
-		 "I830 Dma Initialization Failed\n");
-      return FALSE;
-   }
-
-   return TRUE;
-}
-
-static int I830CheckDRMVersion( const DRIDriverContext *ctx,
-				  I830Rec *pI830 )
-{
-   drmVersionPtr  version;
-
-   version = drmGetVersion(ctx->drmFD);
-
-   if (version) {
-     int req_minor, req_patch;
-
-     req_minor = 4;
-     req_patch = 0;	
-
-     if (version->version_major != 1 ||
-	 version->version_minor < req_minor ||
-	 (version->version_minor == req_minor && 
-	  version->version_patchlevel < req_patch)) {
-       /* Incompatible drm version */
-       fprintf(stderr,
-	       "[dri] I830DRIScreenInit failed because of a version "
-	       "mismatch.\n"
-	       "[dri] i915.o kernel module version is %d.%d.%d "
-	       "but version 1.%d.%d or newer is needed.\n"
-	       "[dri] Disabling DRI.\n",
-	       version->version_major,
-	       version->version_minor,
-	       version->version_patchlevel,
-	       req_minor,
-	       req_patch);
-       drmFreeVersion(version);
-       return 0;
-     }
-     
-     pI830->drmMinor = version->version_minor;
-     drmFreeVersion(version);
-   }
-   return 1;
-}
-
-static void
-I830SetRingRegs(const DRIDriverContext *ctx, I830Rec *pI830)
-{
-  unsigned int itemp;
-  unsigned char *MMIO = ctx->MMIOAddress;
-
-   OUTREG(LP_RING + RING_LEN, 0);
-   OUTREG(LP_RING + RING_TAIL, 0);
-   OUTREG(LP_RING + RING_HEAD, 0);
-
-   if ((long)(pI830->LpRing->mem.Start & I830_RING_START_MASK) !=
-       pI830->LpRing->mem.Start) {
-      fprintf(stderr,
-		 "I830SetRingRegs: Ring buffer start (%lx) violates its "
-		 "mask (%x)\n", pI830->LpRing->mem.Start, I830_RING_START_MASK);
-   }
-   /* Don't care about the old value.  Reserved bits must be zero anyway. */
-   itemp = pI830->LpRing->mem.Start & I830_RING_START_MASK;
-   OUTREG(LP_RING + RING_START, itemp);
-
-   if (((pI830->LpRing->mem.Size - 4096) & I830_RING_NR_PAGES) !=
-       pI830->LpRing->mem.Size - 4096) {
-      fprintf(stderr,
-		 "I830SetRingRegs: Ring buffer size - 4096 (%lx) violates its "
-		 "mask (%x)\n", pI830->LpRing->mem.Size - 4096,
-		 I830_RING_NR_PAGES);
-   }
-   /* Don't care about the old value.  Reserved bits must be zero anyway. */
-   itemp = (pI830->LpRing->mem.Size - 4096) & I830_RING_NR_PAGES;
-   itemp |= (RING_NO_REPORT | RING_VALID);
-   OUTREG(LP_RING + RING_LEN, itemp);
-
-   pI830->LpRing->head = INREG(LP_RING + RING_HEAD) & I830_HEAD_MASK;
-   pI830->LpRing->tail = INREG(LP_RING + RING_TAIL);
-   pI830->LpRing->space = pI830->LpRing->head - (pI830->LpRing->tail + 8);
-   if (pI830->LpRing->space < 0)
-      pI830->LpRing->space += pI830->LpRing->mem.Size;
-
-   SetFenceRegs(ctx, pI830);
-   
-   /* RESET THE DISPLAY PIPE TO POINT TO THE FRONTBUFFER - hacky
-      hacky hacky */
-   OUTREG(DSPABASE, pI830->FrontBuffer.Start + pI830->LinearAddr);
-
-}
-
-static Bool
-I830SetParam(const DRIDriverContext *ctx, int param, int value)
-{
-   drmI830SetParam sp;
-
-   memset(&sp, 0, sizeof(sp));
-   sp.param = param;
-   sp.value = value;
-
-   if (drmCommandWrite(ctx->drmFD, DRM_I830_SETPARAM, &sp, sizeof(sp))) {
-      fprintf(stderr, "I830 SetParam Failed\n");
-      return FALSE;
-   }
-
-   return TRUE;
-}
-
-static Bool
-I830DRIMapScreenRegions(DRIDriverContext *ctx, I830Rec *pI830, drmI830Sarea *sarea)
-{
-   fprintf(stderr,
-              "[drm] Mapping front buffer\n");
-
-   if (drmAddMap(ctx->drmFD,
-                 (drm_handle_t)(sarea->front_offset + pI830->LinearAddr),
-                 sarea->front_size,
-                 DRM_FRAME_BUFFER,  /*DRM_AGP,*/
-                 0,
-                 &sarea->front_handle) < 0) {
-     fprintf(stderr,
-	     "[drm] drmAddMap(front_handle) failed. Disabling DRI\n");
-      return FALSE;
-   }
-   ctx->shared.hFrameBuffer = sarea->front_handle;
-   ctx->shared.fbSize = sarea->front_size;
-   fprintf(stderr, "[drm] Front Buffer = 0x%08x\n",
-	   sarea->front_handle);
-
-   if (drmAddMap(ctx->drmFD,
-                 (drm_handle_t)(sarea->back_offset),
-                 sarea->back_size, DRM_AGP, 0,
-                 &sarea->back_handle) < 0) {
-      fprintf(stderr,
-                 "[drm] drmAddMap(back_handle) failed. Disabling DRI\n");
-      return FALSE;
-   }
-   fprintf(stderr, "[drm] Back Buffer = 0x%08x\n",
-              sarea->back_handle);
-
-   if (drmAddMap(ctx->drmFD,
-                 (drm_handle_t)sarea->depth_offset,
-                 sarea->depth_size, DRM_AGP, 0,
-                 &sarea->depth_handle) < 0) {
-      fprintf(stderr,
-                 "[drm] drmAddMap(depth_handle) failed. Disabling DRI\n");
-      return FALSE;
-   }
-   fprintf(stderr, "[drm] Depth Buffer = 0x%08x\n",
-              sarea->depth_handle);
-
-   if (drmAddMap(ctx->drmFD,
-		 (drm_handle_t)sarea->tex_offset,
-		 sarea->tex_size, DRM_AGP, 0,
-		 &sarea->tex_handle) < 0) {
-      fprintf(stderr,
-		 "[drm] drmAddMap(tex_handle) failed. Disabling DRI\n");
-      return FALSE;
-   }
-   fprintf(stderr, "[drm] textures = 0x%08x\n",
-	      sarea->tex_handle);
-
-   return TRUE;
-}
-
-
-static void
-I830DRIUnmapScreenRegions(const DRIDriverContext *ctx, I830Rec *pI830, drmI830Sarea *sarea)
-{
-#if 1
-   if (sarea->front_handle) {
-      drmRmMap(ctx->drmFD, sarea->front_handle);
-      sarea->front_handle = 0;
-   }
-#endif
-   if (sarea->back_handle) {
-      drmRmMap(ctx->drmFD, sarea->back_handle);
-      sarea->back_handle = 0;
-   }
-   if (sarea->depth_handle) {
-      drmRmMap(ctx->drmFD, sarea->depth_handle);
-      sarea->depth_handle = 0;
-   }
-   if (sarea->tex_handle) {
-      drmRmMap(ctx->drmFD, sarea->tex_handle);
-      sarea->tex_handle = 0;
-   }
-}
-
-static void
-I830InitTextureHeap(const DRIDriverContext *ctx, I830Rec *pI830, drmI830Sarea *sarea)
-{
-   /* Start up the simple memory manager for agp space */
-   drmI830MemInitHeap drmHeap;
-   drmHeap.region = I830_MEM_REGION_AGP;
-   drmHeap.start  = 0;
-   drmHeap.size   = sarea->tex_size;
-      
-   if (drmCommandWrite(ctx->drmFD, DRM_I830_INIT_HEAP,
-			  &drmHeap, sizeof(drmHeap))) {
-      fprintf(stderr,
-		    "[drm] Failed to initialized agp heap manager\n");
-   } else {
-      fprintf(stderr,
-		    "[drm] Initialized kernel agp heap manager, %d\n",
-		    sarea->tex_size);
-
-      I830SetParam(ctx, I830_SETPARAM_TEX_LRU_LOG_GRANULARITY, 
-		      sarea->log_tex_granularity);
-   }
-}
-
-static Bool
-I830DRIDoMappings(DRIDriverContext *ctx, I830Rec *pI830, drmI830Sarea *sarea)
-{
-  if (drmAddMap(ctx->drmFD,
-		(drm_handle_t)pI830->LpRing->mem.Start,
-		pI830->LpRing->mem.Size, DRM_AGP, 0,
-		&pI830->ring_map) < 0) {
-    fprintf(stderr,
-	    "[drm] drmAddMap(ring_map) failed. Disabling DRI\n");
-    return FALSE;
-  }
-  fprintf(stderr, "[drm] ring buffer = 0x%08x\n",
-	  pI830->ring_map);
-
-  if (I830InitDma(ctx, pI830) == FALSE) {
-    return FALSE;
-  }
-  
-   /* init to zero to be safe */
-
-  I830DRIMapScreenRegions(ctx, pI830, sarea);
-  I830InitTextureHeap(ctx, pI830, sarea);
-
-   if (ctx->pciDevice != PCI_CHIP_845_G &&
-       ctx->pciDevice != PCI_CHIP_I830_M) {
-      I830SetParam(ctx, I830_SETPARAM_USE_MI_BATCHBUFFER_START, 1 );
-   }
-
-   /* Okay now initialize the dma engine */
-   {
-      pI830->irq = drmGetInterruptFromBusID(ctx->drmFD,
-					    ctx->pciBus,
-					    ctx->pciDevice,
-					    ctx->pciFunc);
-
-      if (drmCtlInstHandler(ctx->drmFD, pI830->irq)) {
-	 fprintf(stderr,
-		    "[drm] failure adding irq handler\n");
-	 pI830->irq = 0;
-	 return FALSE;
-      }
-      else
-	 fprintf(stderr,
-		    "[drm] dma control initialized, using IRQ %d\n",
-		    pI830->irq);
-   }
-
-   fprintf(stderr, "[dri] visual configs initialized\n");
-
-   return TRUE;
-}
-
-static Bool
-I830ClearScreen(DRIDriverContext *ctx, I830Rec *pI830, drmI830Sarea *sarea)
-{
-  /* need to drmMap front and back buffers and zero them */
-  drmAddress map_addr;
-  int ret;
-
-  ret = drmMap(ctx->drmFD,
-	       sarea->front_handle,
-	       sarea->front_size,
-	       &map_addr);
-
-  if (ret)
-  {
-    fprintf(stderr, "Unable to map front buffer\n");
-    return FALSE;
-  }
-
-  drimemsetio((char *)map_addr,
-	      0,
-	      sarea->front_size);
-  drmUnmap(map_addr, sarea->front_size);
-
-
-  ret = drmMap(ctx->drmFD,
-	       sarea->back_handle,
-	       sarea->back_size,
-	       &map_addr);
-
-  if (ret)
-  {
-    fprintf(stderr, "Unable to map back buffer\n");
-    return FALSE;
-  }
-
-  drimemsetio((char *)map_addr,
-	      0,
-	      sarea->back_size);
-  drmUnmap(map_addr, sarea->back_size);
-
-  return TRUE;
-}
-
-static Bool
-I830ScreenInit(DRIDriverContext *ctx, I830Rec *pI830)
-		  
-{
-   I830DRIPtr pI830DRI;
-   drmI830Sarea *pSAREAPriv;
-   int err;
-      
-   drm_page_size = getpagesize();   
-
-   pI830->registerSize = ctx->MMIOSize;
-   /* This is a hack for now.  We have to have more than a 4k page here
-    * because of the size of the state.  However, the state should be
-    * in a per-context mapping.  This will be added in the Mesa 3.5 port
-    * of the I830 driver.
-    */
-   ctx->shared.SAREASize = SAREA_MAX;
-
-   /* Note that drmOpen will try to load the kernel module, if needed. */
-   ctx->drmFD = drmOpen("i915", NULL );
-   if (ctx->drmFD < 0) {
-      fprintf(stderr, "[drm] drmOpen failed\n");
-      return 0;
-   }
-
-   if ((err = drmSetBusid(ctx->drmFD, ctx->pciBusID)) < 0) {
-      fprintf(stderr, "[drm] drmSetBusid failed (%d, %s), %s\n",
-	      ctx->drmFD, ctx->pciBusID, strerror(-err));
-      return 0;
-   }
-
-   if (drmAddMap( ctx->drmFD,
-		  0,
-		  ctx->shared.SAREASize,
-		  DRM_SHM,
-		  DRM_CONTAINS_LOCK,
-		  &ctx->shared.hSAREA) < 0)
-   {
-     fprintf(stderr, "[drm] drmAddMap failed\n");
-     return 0;
-   }
-
-   fprintf(stderr, "[drm] added %d byte SAREA at 0x%08x\n",
-	   ctx->shared.SAREASize, ctx->shared.hSAREA);
-   
-   if (drmMap( ctx->drmFD,
-	       ctx->shared.hSAREA,
-	       ctx->shared.SAREASize,
-	       (drmAddressPtr)(&ctx->pSAREA)) < 0)
-   {
-      fprintf(stderr, "[drm] drmMap failed\n");
-      return 0;
-   
-   }
-   
-   memset(ctx->pSAREA, 0, ctx->shared.SAREASize);
-   fprintf(stderr, "[drm] mapped SAREA 0x%08x to %p, size %d\n",
-	   ctx->shared.hSAREA, ctx->pSAREA, ctx->shared.SAREASize);
-   
-
-   if (drmAddMap(ctx->drmFD, 
-		 ctx->MMIOStart,
-		 ctx->MMIOSize,
-		 DRM_REGISTERS, 
-		 DRM_READ_ONLY, 
-		 &pI830->registerHandle) < 0) {
-      fprintf(stderr, "[drm] drmAddMap mmio failed\n");	
-      return 0;
-   }
-   fprintf(stderr,
-	   "[drm] register handle = 0x%08x\n", pI830->registerHandle);
-
-
-   if (!I830CheckDRMVersion(ctx, pI830)) {
-     return FALSE;
-   }
-
-   /* Create a 'server' context so we can grab the lock for
-    * initialization ioctls.
-    */
-   if ((err = drmCreateContext(ctx->drmFD, &ctx->serverContext)) != 0) {
-      fprintf(stderr, "%s: drmCreateContext failed %d\n", __FUNCTION__, err);
-      return 0;
-   }
-
-   DRM_LOCK(ctx->drmFD, ctx->pSAREA, ctx->serverContext, 0); 
-
-   /* Initialize the SAREA private data structure */
-   pSAREAPriv = (drmI830Sarea *)(((char*)ctx->pSAREA) + 
-				 sizeof(drm_sarea_t));
-   memset(pSAREAPriv, 0, sizeof(*pSAREAPriv));
-
-   pI830->StolenMemory.Size = I830DetectMemory(ctx, pI830);
-   pI830->StolenMemory.Start = 0;
-   pI830->StolenMemory.End = pI830->StolenMemory.Size;
-
-   pI830->MemoryAperture.Start = pI830->StolenMemory.End;
-   pI830->MemoryAperture.End = KB(40000);
-   pI830->MemoryAperture.Size = pI830->MemoryAperture.End - pI830->MemoryAperture.Start;
-
-   pI830->StolenPool.Fixed = pI830->StolenMemory;
-   pI830->StolenPool.Total = pI830->StolenMemory;
-   pI830->StolenPool.Free = pI830->StolenPool.Total;
-   pI830->FreeMemory = pI830->StolenPool.Total.Size;
-
-   if (!AgpInit(ctx, pI830))
-     return FALSE;
-
-   if (I830AllocateMemory(ctx, pI830) == FALSE)
-   {
-     return FALSE;
-   }
-
-   if (I830BindMemory(ctx, pI830) == FALSE)
-   {
-     return FALSE;
-   }
-
-   pSAREAPriv->front_offset = pI830->FrontBuffer.Start;
-   pSAREAPriv->front_size = pI830->FrontBuffer.Size;
-   pSAREAPriv->width = ctx->shared.virtualWidth;
-   pSAREAPriv->height = ctx->shared.virtualHeight;
-   pSAREAPriv->pitch = ctx->shared.virtualWidth;
-   pSAREAPriv->virtualX = ctx->shared.virtualWidth;
-   pSAREAPriv->virtualY = ctx->shared.virtualHeight;
-   pSAREAPriv->back_offset = pI830->BackBuffer.Start;
-   pSAREAPriv->back_size = pI830->BackBuffer.Size;
-   pSAREAPriv->depth_offset = pI830->DepthBuffer.Start;
-   pSAREAPriv->depth_size = pI830->DepthBuffer.Size;
-   pSAREAPriv->tex_offset = pI830->TexMem.Start;
-   pSAREAPriv->tex_size = pI830->TexMem.Size;
-   pSAREAPriv->log_tex_granularity = pI830->TexGranularity;
-
-   ctx->driverClientMsg = malloc(sizeof(I830DRIRec));
-   ctx->driverClientMsgSize = sizeof(I830DRIRec);
-   pI830DRI = (I830DRIPtr)ctx->driverClientMsg;
-   pI830DRI->deviceID = pI830->Chipset;
-   pI830DRI->regsSize = I830_REG_SIZE;
-   pI830DRI->width = ctx->shared.virtualWidth;
-   pI830DRI->height = ctx->shared.virtualHeight;
-   pI830DRI->mem = ctx->shared.fbSize;
-   pI830DRI->cpp = ctx->cpp;
-   pI830DRI->backOffset = pI830->BackBuffer.Start;
-   pI830DRI->backPitch = pI830->BackBuffer.Pitch; 
-
-   pI830DRI->depthOffset = pI830->DepthBuffer.Start;
-   pI830DRI->depthPitch = pI830->DepthBuffer.Pitch; 
-
-   pI830DRI->fbOffset = pI830->FrontBuffer.Start;
-   pI830DRI->fbStride = pI830->FrontBuffer.Pitch;
-
-   pI830DRI->bitsPerPixel = ctx->bpp;
-   pI830DRI->sarea_priv_offset = sizeof(drm_sarea_t);
-   
-   err = I830DRIDoMappings(ctx, pI830, pSAREAPriv);
-   if (err == FALSE)
-       return FALSE;
-
-   I830SetupMemoryTiling(ctx, pI830);
-
-   /* Quick hack to clear the front & back buffers.  Could also use
-    * the clear ioctl to do this, but would need to setup hw state
-    * first.
-    */
-   I830ClearScreen(ctx, pI830, pSAREAPriv);
-
-   I830SetRingRegs(ctx, pI830);
-
-   return TRUE;
-}
-
-
-/**
- * \brief Validate the fbdev mode.
- * 
- * \param ctx display handle.
- *
- * \return one on success, or zero on failure.
- *
- * Saves some registers and returns 1.
- *
- * \sa radeonValidateMode().
- */
-static int i830ValidateMode( const DRIDriverContext *ctx )
-{
-  return 1;
-}
-
-/**
- * \brief Examine mode returned by fbdev.
- * 
- * \param ctx display handle.
- *
- * \return one on success, or zero on failure.
- *
- * Restores registers that fbdev has clobbered and returns 1.
- *
- * \sa i810ValidateMode().
- */
-static int i830PostValidateMode( const DRIDriverContext *ctx )
-{
-  I830Rec *pI830 = ctx->driverPrivate;
-
-  I830SetRingRegs(ctx, pI830);
-  return 1;
-}
-
-
-/**
- * \brief Initialize the framebuffer device mode
- *
- * \param ctx display handle.
- *
- * \return one on success, or zero on failure.
- *
- * Fills in \p info with some default values and some information from \p ctx
- * and then calls I810ScreenInit() for the screen initialization.
- * 
- * Before exiting clears the framebuffer memory accessing it directly.
- */
-static int i830InitFBDev( DRIDriverContext *ctx )
-{
-  I830Rec *pI830 = calloc(1, sizeof(I830Rec));
-  int i;
-
-   {
-      int  dummy = ctx->shared.virtualWidth;
-
-      switch (ctx->bpp / 8) {
-      case 1: dummy = (ctx->shared.virtualWidth + 127) & ~127; break;
-      case 2: dummy = (ctx->shared.virtualWidth +  31) &  ~31; break;
-      case 3:
-      case 4: dummy = (ctx->shared.virtualWidth +  15) &  ~15; break;
-      }
-
-      ctx->shared.virtualWidth = dummy;
-      ctx->shared.Width = ctx->shared.virtualWidth;
-   }
-
-
-   for (i = 0; pitches[i] != 0; i++) {
-     if (pitches[i] >= ctx->shared.virtualWidth) {
-       ctx->shared.virtualWidth = pitches[i];
-       break;
-     }
-   }
-
-   ctx->driverPrivate = (void *)pI830;
-   
-   pI830->LpRing = calloc(1, sizeof(I830RingBuffer));
-   pI830->Chipset = ctx->chipset;
-   pI830->LinearAddr = ctx->FBStart;
-
-   if (!I830ScreenInit( ctx, pI830 ))
-      return 0;
-
-   
-   return 1;
-}
-
-
-/**
- * \brief The screen is being closed, so clean up any state and free any
- * resources used by the DRI.
- *
- * \param ctx display handle.
- *
- * Unmaps the SAREA, closes the DRM device file descriptor and frees the driver
- * private data.
- */
-static void i830HaltFBDev( DRIDriverContext *ctx )
-{
-  drmI830Sarea *pSAREAPriv;
-  I830Rec *pI830 = ctx->driverPrivate;
-
-   if (pI830->irq) {
-       drmCtlUninstHandler(ctx->drmFD);
-       pI830->irq = 0;   }
-
-   I830CleanupDma(ctx);
-
-  pSAREAPriv = (drmI830Sarea *)(((char*)ctx->pSAREA) + 
-				sizeof(drm_sarea_t));
-
-  I830DRIUnmapScreenRegions(ctx, pI830, pSAREAPriv);
-  drmUnmap( ctx->pSAREA, ctx->shared.SAREASize );
-  drmClose(ctx->drmFD);
-  
-  if (ctx->driverPrivate) {
-    free(ctx->driverPrivate);
-    ctx->driverPrivate = 0;
-  }
-}
-
-
-extern void i810NotifyFocus( int );
-
-/**
- * \brief Exported driver interface for Mini GLX.
- *
- * \sa DRIDriverRec.
- */
-const struct DRIDriverRec __driDriver = {
-   i830ValidateMode,
-   i830PostValidateMode,
-   i830InitFBDev,
-   i830HaltFBDev,
-   NULL,//I830EngineShutdown,
-   NULL, //I830EngineRestore,  
-#ifndef _EMBEDDED
-   0,
-#else
-   i810NotifyFocus, 
-#endif
-};
diff --git a/shared/intel_batchbuffer.c b/shared/intel_batchbuffer.c
new file mode 100644
index 0000000..a594fb6
--- /dev/null
+++ b/shared/intel_batchbuffer.c
@@ -0,0 +1,294 @@
+/**************************************************************************
+ * 
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "intel_batchbuffer.h"
+#include "intel_ioctl.h"
+#include "intel_decode.h"
+#include "intel_reg.h"
+
+/* Relocations in kernel space:
+ *    - pass dma buffer seperately
+ *    - memory manager knows how to patch
+ *    - pass list of dependent buffers
+ *    - pass relocation list
+ *
+ * Either:
+ *    - get back an offset for buffer to fire
+ *    - memory manager knows how to fire buffer
+ *
+ * Really want the buffer to be AGP and pinned.
+ *
+ */
+
+/* Cliprect fence: The highest fence protecting a dma buffer
+ * containing explicit cliprect information.  Like the old drawable
+ * lock but irq-driven.  X server must wait for this fence to expire
+ * before changing cliprects [and then doing sw rendering?].  For
+ * other dma buffers, the scheduler will grab current cliprect info
+ * and mix into buffer.  X server must hold the lock while changing
+ * cliprects???  Make per-drawable.  Need cliprects in shared memory
+ * -- beats storing them with every cmd buffer in the queue.
+ *
+ * ==> X server must wait for this fence to expire before touching the
+ * framebuffer with new cliprects.
+ *
+ * ==> Cliprect-dependent buffers associated with a
+ * cliprect-timestamp.  All of the buffers associated with a timestamp
+ * must go to hardware before any buffer with a newer timestamp.
+ *
+ * ==> Dma should be queued per-drawable for correct X/GL
+ * synchronization.  Or can fences be used for this?
+ *
+ * Applies to: Blit operations, metaops, X server operations -- X
+ * server automatically waits on its own dma to complete before
+ * modifying cliprects ???
+ */
+
+void
+intel_batchbuffer_reset(struct intel_batchbuffer *batch)
+{
+   struct intel_context *intel = batch->intel;
+
+   if (batch->buf != NULL) {
+      dri_bo_unreference(batch->buf);
+      batch->buf = NULL;
+   }
+
+   batch->buf = dri_bo_alloc(intel->bufmgr, "batchbuffer",
+			     intel->maxBatchSize, 4096,
+			     DRM_BO_FLAG_MEM_LOCAL | DRM_BO_FLAG_CACHED | DRM_BO_FLAG_CACHED_MAPPED);
+   dri_bo_map(batch->buf, GL_TRUE);
+   batch->map = batch->buf->virtual;
+   batch->size = intel->maxBatchSize;
+   batch->ptr = batch->map;
+   batch->dirty_state = ~0;
+   batch->cliprect_mode = IGNORE_CLIPRECTS;
+
+   /* account batchbuffer in aperture */
+   dri_bufmgr_check_aperture_space(batch->buf);
+
+}
+
+struct intel_batchbuffer *
+intel_batchbuffer_alloc(struct intel_context *intel)
+{
+   struct intel_batchbuffer *batch = calloc(sizeof(*batch), 1);
+
+   batch->intel = intel;
+   batch->last_fence = NULL;
+   intel_batchbuffer_reset(batch);
+
+   return batch;
+}
+
+void
+intel_batchbuffer_free(struct intel_batchbuffer *batch)
+{
+   if (batch->last_fence) {
+      dri_fence_wait(batch->last_fence);
+      dri_fence_unreference(batch->last_fence);
+      batch->last_fence = NULL;
+   }
+   if (batch->map) {
+      dri_bo_unmap(batch->buf);
+      batch->map = NULL;
+   }
+   dri_bo_unreference(batch->buf);
+   batch->buf = NULL;
+   free(batch);
+}
+
+
+
+/* TODO: Push this whole function into bufmgr.
+ */
+static void
+do_flush_locked(struct intel_batchbuffer *batch,
+		GLuint used, GLboolean allow_unlock)
+{
+   struct intel_context *intel = batch->intel;
+   void *start;
+   GLuint count;
+
+   dri_bo_unmap(batch->buf);
+   start = dri_process_relocs(batch->buf, &count);
+
+   batch->map = NULL;
+   batch->ptr = NULL;
+
+   /* Throw away non-effective packets.  Won't work once we have
+    * hardware contexts which would preserve statechanges beyond a
+    * single buffer.
+    */
+
+   if (!(intel->numClipRects == 0 &&
+	 batch->cliprect_mode == LOOP_CLIPRECTS)) {
+      if (intel->ttm == GL_TRUE) {
+	 intel_exec_ioctl(batch->intel,
+			  used,
+			  batch->cliprect_mode != LOOP_CLIPRECTS,
+			  allow_unlock,
+			  start, count, &batch->last_fence);
+      } else {
+	 intel_batch_ioctl(batch->intel,
+			   batch->buf->offset,
+			   used,
+			   batch->cliprect_mode != LOOP_CLIPRECTS,
+			   allow_unlock);
+      }
+   }
+      
+   dri_post_submit(batch->buf, &batch->last_fence);
+
+   if (intel->numClipRects == 0 &&
+       batch->cliprect_mode == LOOP_CLIPRECTS) {
+      if (allow_unlock) {
+	 /* If we are not doing any actual user-visible rendering,
+	  * do a sched_yield to keep the app from pegging the cpu while
+	  * achieving nothing.
+	  */
+         UNLOCK_HARDWARE(intel);
+         sched_yield();
+         LOCK_HARDWARE(intel);
+      }
+   }
+
+   if (INTEL_DEBUG & DEBUG_BATCH) {
+      dri_bo_map(batch->buf, GL_FALSE);
+      intel_decode(batch->buf->virtual, used / 4, batch->buf->offset,
+		   intel->intelScreen->deviceID);
+      dri_bo_unmap(batch->buf);
+
+      if (intel->vtbl.debug_batch != NULL)
+	 intel->vtbl.debug_batch(intel);
+   }
+
+   intel->vtbl.new_batch(intel);
+}
+
+void
+_intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file,
+			 int line)
+{
+   struct intel_context *intel = batch->intel;
+   GLuint used = batch->ptr - batch->map;
+   GLboolean was_locked = intel->locked;
+
+   if (used == 0)
+      return;
+
+   if (INTEL_DEBUG & DEBUG_BATCH)
+      fprintf(stderr, "%s:%d: Batchbuffer flush with %db used\n", file, line,
+	      used);
+   /* Add the MI_BATCH_BUFFER_END.  Always add an MI_FLUSH - this is a
+    * performance drain that we would like to avoid.
+    */
+   if (used & 4) {
+      ((int *) batch->ptr)[0] = intel->vtbl.flush_cmd();
+      ((int *) batch->ptr)[1] = 0;
+      ((int *) batch->ptr)[2] = MI_BATCH_BUFFER_END;
+      used += 12;
+   }
+   else {
+      ((int *) batch->ptr)[0] = intel->vtbl.flush_cmd();
+      ((int *) batch->ptr)[1] = MI_BATCH_BUFFER_END;
+      used += 8;
+   }
+
+   /* Workaround for recursive batchbuffer flushing: If the window is
+    * moved, we can get into a case where we try to flush during a
+    * flush.  What happens is that when we try to grab the lock for
+    * the first flush, we detect that the window moved which then
+    * causes another flush (from the intel_draw_buffer() call in
+    * intelUpdatePageFlipping()).  To work around this we reset the
+    * batchbuffer tail pointer before trying to get the lock.  This
+    * prevent the nested buffer flush, but a better fix would be to
+    * avoid that in the first place. */
+   batch->ptr = batch->map;
+
+   /* TODO: Just pass the relocation list and dma buffer up to the
+    * kernel.
+    */
+   if (!was_locked)
+      LOCK_HARDWARE(intel);
+
+   do_flush_locked(batch, used, GL_FALSE);
+
+   if (!was_locked)
+      UNLOCK_HARDWARE(intel);
+
+   if (INTEL_DEBUG & DEBUG_SYNC) {
+      fprintf(stderr, "waiting for idle\n");
+      if (batch->last_fence != NULL)
+	 dri_fence_wait(batch->last_fence);
+   }
+
+   /* Reset the buffer:
+    */
+   intel_batchbuffer_reset(batch);
+}
+
+void
+intel_batchbuffer_finish(struct intel_batchbuffer *batch)
+{
+   intel_batchbuffer_flush(batch);
+   if (batch->last_fence != NULL)
+      dri_fence_wait(batch->last_fence);
+}
+
+
+/*  This is the only way buffers get added to the validate list.
+ */
+GLboolean
+intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch,
+                             dri_bo *buffer,
+                             GLuint flags, GLuint delta)
+{
+   int ret;
+
+   ret = dri_emit_reloc(batch->buf, flags, delta, batch->ptr - batch->map, buffer);
+
+   /*
+    * Using the old buffer offset, write in what the right data would be, in case
+    * the buffer doesn't move and we can short-circuit the relocation processing
+    * in the kernel
+    */
+   intel_batchbuffer_emit_dword (batch, buffer->offset + delta);
+
+   return GL_TRUE;
+}
+
+void
+intel_batchbuffer_data(struct intel_batchbuffer *batch,
+                       const void *data, GLuint bytes,
+		       enum cliprect_mode cliprect_mode)
+{
+   assert((bytes & 3) == 0);
+   intel_batchbuffer_require_space(batch, bytes, cliprect_mode);
+   __memcpy(batch->ptr, data, bytes);
+   batch->ptr += bytes;
+}
diff --git a/shared/intel_batchbuffer.h b/shared/intel_batchbuffer.h
new file mode 100644
index 0000000..0da6020
--- /dev/null
+++ b/shared/intel_batchbuffer.h
@@ -0,0 +1,147 @@
+#ifndef INTEL_BATCHBUFFER_H
+#define INTEL_BATCHBUFFER_H
+
+#include "mtypes.h"
+
+#include "dri_bufmgr.h"
+
+struct intel_context;
+
+#define BATCH_SZ 16384
+#define BATCH_RESERVED 16
+
+enum cliprect_mode {
+   /**
+    * Batchbuffer contents may be looped over per cliprect, but do not
+    * require it.
+    */
+   IGNORE_CLIPRECTS,
+   /**
+    * Batchbuffer contents require looping over per cliprect at batch submit
+    * time.
+    */
+   LOOP_CLIPRECTS,
+   /**
+    * Batchbuffer contents contain drawing that should not be executed multiple
+    * times.
+    */
+   NO_LOOP_CLIPRECTS,
+   /**
+    * Batchbuffer contents contain drawing that already handles cliprects, such
+    * as 2D drawing to front/back/depth that doesn't respect DRAWING_RECTANGLE.
+    * Equivalent behavior to NO_LOOP_CLIPRECTS, but may not persist in batch
+    * outside of LOCK/UNLOCK.
+    */
+   REFERENCES_CLIPRECTS
+};
+
+struct intel_batchbuffer
+{
+   struct intel_context *intel;
+
+   dri_bo *buf;
+   dri_fence *last_fence;
+
+   GLubyte *map;
+   GLubyte *ptr;
+
+   enum cliprect_mode cliprect_mode;
+
+   GLuint size;
+
+   GLuint dirty_state;
+};
+
+struct intel_batchbuffer *intel_batchbuffer_alloc(struct intel_context
+                                                  *intel);
+
+void intel_batchbuffer_free(struct intel_batchbuffer *batch);
+
+
+void intel_batchbuffer_finish(struct intel_batchbuffer *batch);
+
+void _intel_batchbuffer_flush(struct intel_batchbuffer *batch,
+			      const char *file, int line);
+
+#define intel_batchbuffer_flush(batch) \
+	_intel_batchbuffer_flush(batch, __FILE__, __LINE__)
+
+void intel_batchbuffer_reset(struct intel_batchbuffer *batch);
+
+
+/* Unlike bmBufferData, this currently requires the buffer be mapped.
+ * Consider it a convenience function wrapping multple
+ * intel_buffer_dword() calls.
+ */
+void intel_batchbuffer_data(struct intel_batchbuffer *batch,
+                            const void *data, GLuint bytes,
+			    enum cliprect_mode cliprect_mode);
+
+void intel_batchbuffer_release_space(struct intel_batchbuffer *batch,
+                                     GLuint bytes);
+
+GLboolean intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch,
+                                       dri_bo *buffer,
+                                       GLuint flags, GLuint offset);
+
+/* Inline functions - might actually be better off with these
+ * non-inlined.  Certainly better off switching all command packets to
+ * be passed as structs rather than dwords, but that's a little bit of
+ * work...
+ */
+static INLINE GLuint
+intel_batchbuffer_space(struct intel_batchbuffer *batch)
+{
+   return (batch->size - BATCH_RESERVED) - (batch->ptr - batch->map);
+}
+
+
+static INLINE void
+intel_batchbuffer_emit_dword(struct intel_batchbuffer *batch, GLuint dword)
+{
+   assert(batch->map);
+   assert(intel_batchbuffer_space(batch) >= 4);
+   *(GLuint *) (batch->ptr) = dword;
+   batch->ptr += 4;
+}
+
+static INLINE void
+intel_batchbuffer_require_space(struct intel_batchbuffer *batch,
+                                GLuint sz,
+				enum cliprect_mode cliprect_mode)
+{
+   assert(sz < batch->size - 8);
+   if (intel_batchbuffer_space(batch) < sz)
+      intel_batchbuffer_flush(batch);
+
+   if (cliprect_mode != IGNORE_CLIPRECTS) {
+      if (batch->cliprect_mode == IGNORE_CLIPRECTS) {
+	 batch->cliprect_mode = cliprect_mode;
+      } else {
+	 if (batch->cliprect_mode != cliprect_mode) {
+	    intel_batchbuffer_flush(batch);
+	    batch->cliprect_mode = cliprect_mode;
+	 }
+      }
+   }
+}
+
+/* Here are the crusty old macros, to be removed:
+ */
+#define BATCH_LOCALS
+
+#define BEGIN_BATCH(n, cliprect_mode) do {				\
+   intel_batchbuffer_require_space(intel->batch, (n)*4, cliprect_mode); \
+} while (0)
+
+#define OUT_BATCH(d)  intel_batchbuffer_emit_dword(intel->batch, d)
+
+#define OUT_RELOC(buf, cliprect_mode, delta) do { 			\
+   assert((delta) >= 0);						\
+   intel_batchbuffer_emit_reloc(intel->batch, buf, cliprect_mode, delta); \
+} while (0)
+
+#define ADVANCE_BATCH() do { } while(0)
+
+
+#endif
diff --git a/shared/intel_blit.c b/shared/intel_blit.c
new file mode 100644
index 0000000..25ac609
--- /dev/null
+++ b/shared/intel_blit.c
@@ -0,0 +1,631 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+
+#include <stdio.h>
+#include <errno.h>
+
+#include "mtypes.h"
+#include "context.h"
+#include "enums.h"
+
+#include "intel_batchbuffer.h"
+#include "intel_blit.h"
+#include "intel_buffers.h"
+#include "intel_context.h"
+#include "intel_fbo.h"
+#include "intel_reg.h"
+#include "intel_regions.h"
+
+#define FILE_DEBUG_FLAG DEBUG_BLIT
+
+/**
+ * Copy the back color buffer to the front color buffer. 
+ * Used for SwapBuffers().
+ */
+void
+intelCopyBuffer(const __DRIdrawablePrivate * dPriv,
+                const drm_clip_rect_t * rect)
+{
+
+   struct intel_context *intel;
+   const intelScreenPrivate *intelScreen;
+   int ret;
+
+   DBG("%s\n", __FUNCTION__);
+
+   assert(dPriv);
+
+   intel = intelScreenContext(dPriv->driScreenPriv->private);
+   if (!intel)
+      return;
+
+   intelScreen = intel->intelScreen;
+
+   if (intel->last_swap_fence) {
+      dri_fence_wait(intel->last_swap_fence);
+      dri_fence_unreference(intel->last_swap_fence);
+      intel->last_swap_fence = NULL;
+   }
+   intel->last_swap_fence = intel->first_swap_fence;
+   intel->first_swap_fence = NULL;
+
+   /* The LOCK_HARDWARE is required for the cliprects.  Buffer offsets
+    * should work regardless.
+    */
+   LOCK_HARDWARE(intel);
+
+   if (dPriv && dPriv->numClipRects) {
+      struct intel_framebuffer *intel_fb = dPriv->driverPrivate;
+      struct intel_region *src, *dst;
+      int nbox = dPriv->numClipRects;
+      drm_clip_rect_t *pbox = dPriv->pClipRects;
+      int cpp;
+      int src_pitch, dst_pitch;
+      unsigned short src_x, src_y;
+      int BR13, CMD;
+      int i;
+
+      src = intel_get_rb_region(&intel_fb->Base, BUFFER_BACK_LEFT);
+      dst = intel_get_rb_region(&intel_fb->Base, BUFFER_FRONT_LEFT);
+
+      src_pitch = src->pitch * src->cpp;
+      dst_pitch = dst->pitch * dst->cpp;
+
+      cpp = src->cpp;
+
+      ASSERT(intel_fb);
+      ASSERT(intel_fb->Base.Name == 0);    /* Not a user-created FBO */
+      ASSERT(src);
+      ASSERT(dst);
+      ASSERT(src->cpp == dst->cpp);
+
+      if (cpp == 2) {
+	 BR13 = (0xCC << 16) | (1 << 24);
+	 CMD = XY_SRC_COPY_BLT_CMD;
+      }
+      else {
+	 BR13 = (0xCC << 16) | (1 << 24) | (1 << 25);
+	 CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
+      }
+
+#ifndef I915
+      if (src->tiled) {
+	 CMD |= XY_SRC_TILED;
+	 src_pitch /= 4;
+      }
+      if (dst->tiled) {
+	 CMD |= XY_DST_TILED;
+	 dst_pitch /= 4;
+      }
+#endif
+      /* do space/cliprects check before going any further */
+      intel_batchbuffer_require_space(intel->batch, 8 * 4, REFERENCES_CLIPRECTS);
+   again:
+      ret = dri_bufmgr_check_aperture_space(dst->buffer);
+      ret |= dri_bufmgr_check_aperture_space(src->buffer);
+      
+      if (ret) {
+	intel_batchbuffer_flush(intel->batch);
+	goto again;
+      }
+      
+      for (i = 0; i < nbox; i++, pbox++) {
+	 drm_clip_rect_t box = *pbox;
+
+	 if (rect) {
+	    if (!intel_intersect_cliprects(&box, &box, rect))
+	       continue;
+	 }
+
+	 if (box.x1 >= box.x2 ||
+	     box.y1 >= box.y2)
+	    continue;
+
+	 assert(box.x1 < box.x2);
+	 assert(box.y1 < box.y2);
+	 src_x = box.x1 - dPriv->x + dPriv->backX;
+	 src_y = box.y1 - dPriv->y + dPriv->backY;
+
+	 BEGIN_BATCH(8, REFERENCES_CLIPRECTS);
+	 OUT_BATCH(CMD);
+	 OUT_BATCH(BR13 | dst_pitch);
+	 OUT_BATCH((box.y1 << 16) | box.x1);
+	 OUT_BATCH((box.y2 << 16) | box.x2);
+
+	 OUT_RELOC(dst->buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, 0);
+	 OUT_BATCH((src_y << 16) | src_x);
+	 OUT_BATCH(src_pitch);
+	 OUT_RELOC(src->buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 0);
+	 ADVANCE_BATCH();
+      }
+
+      if (intel->first_swap_fence)
+	 dri_fence_unreference(intel->first_swap_fence);
+      intel_batchbuffer_flush(intel->batch);
+      intel->first_swap_fence = intel->batch->last_fence;
+      if (intel->first_swap_fence)
+	 dri_fence_reference(intel->first_swap_fence);
+   }
+
+   UNLOCK_HARDWARE(intel);
+}
+
+
+
+
+void
+intelEmitFillBlit(struct intel_context *intel,
+		  GLuint cpp,
+		  GLshort dst_pitch,
+		  dri_bo *dst_buffer,
+		  GLuint dst_offset,
+		  GLboolean dst_tiled,
+		  GLshort x, GLshort y,
+		  GLshort w, GLshort h,
+		  GLuint color)
+{
+   GLuint BR13, CMD;
+   BATCH_LOCALS;
+
+   dst_pitch *= cpp;
+
+   switch (cpp) {
+   case 1:
+   case 2:
+   case 3:
+      BR13 = (0xF0 << 16) | (1 << 24);
+      CMD = XY_COLOR_BLT_CMD;
+      break;
+   case 4:
+      BR13 = (0xF0 << 16) | (1 << 24) | (1 << 25);
+      CMD = XY_COLOR_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
+      break;
+   default:
+      return;
+   }
+#ifndef I915
+   if (dst_tiled) {
+      CMD |= XY_DST_TILED;
+      dst_pitch /= 4;
+   }
+#endif
+
+   DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
+       __FUNCTION__, dst_buffer, dst_pitch, dst_offset, x, y, w, h);
+
+   assert(w > 0);
+   assert(h > 0);
+
+   BEGIN_BATCH(6, NO_LOOP_CLIPRECTS);
+   OUT_BATCH(CMD);
+   OUT_BATCH(BR13 | dst_pitch);
+   OUT_BATCH((y << 16) | x);
+   OUT_BATCH(((y + h) << 16) | (x + w));
+   OUT_RELOC(dst_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, dst_offset);
+   OUT_BATCH(color);
+   ADVANCE_BATCH();
+}
+
+static GLuint translate_raster_op(GLenum logicop)
+{
+   switch(logicop) {
+   case GL_CLEAR: return 0x00;
+   case GL_AND: return 0x88;
+   case GL_AND_REVERSE: return 0x44;
+   case GL_COPY: return 0xCC;
+   case GL_AND_INVERTED: return 0x22;
+   case GL_NOOP: return 0xAA;
+   case GL_XOR: return 0x66;
+   case GL_OR: return 0xEE;
+   case GL_NOR: return 0x11;
+   case GL_EQUIV: return 0x99;
+   case GL_INVERT: return 0x55;
+   case GL_OR_REVERSE: return 0xDD;
+   case GL_COPY_INVERTED: return 0x33;
+   case GL_OR_INVERTED: return 0xBB;
+   case GL_NAND: return 0x77;
+   case GL_SET: return 0xFF;
+   default: return 0;
+   }
+}
+
+
+/* Copy BitBlt
+ */
+void
+intelEmitCopyBlit(struct intel_context *intel,
+		  GLuint cpp,
+		  GLshort src_pitch,
+		  dri_bo *src_buffer,
+		  GLuint src_offset,
+		  GLboolean src_tiled,
+		  GLshort dst_pitch,
+		  dri_bo *dst_buffer,
+		  GLuint dst_offset,
+		  GLboolean dst_tiled,
+		  GLshort src_x, GLshort src_y,
+		  GLshort dst_x, GLshort dst_y,
+		  GLshort w, GLshort h,
+		  GLenum logic_op)
+{
+   GLuint CMD, BR13;
+   int dst_y2 = dst_y + h;
+   int dst_x2 = dst_x + w;
+   int ret;
+   BATCH_LOCALS;
+
+   /* do space/cliprects check before going any further */
+   intel_batchbuffer_require_space(intel->batch, 8 * 4, NO_LOOP_CLIPRECTS);
+ again:
+   ret = dri_bufmgr_check_aperture_space(dst_buffer);
+   ret |= dri_bufmgr_check_aperture_space(src_buffer);
+   if (ret) {
+     intel_batchbuffer_flush(intel->batch);
+     goto again;
+   }
+
+   DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
+       __FUNCTION__,
+       src_buffer, src_pitch, src_offset, src_x, src_y,
+       dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h);
+
+   src_pitch *= cpp;
+   dst_pitch *= cpp;
+
+   BR13 = translate_raster_op(logic_op) << 16;
+
+   switch (cpp) {
+   case 1:
+   case 2:
+   case 3:
+      BR13 |= (1 << 24);
+      CMD = XY_SRC_COPY_BLT_CMD;
+      break;
+   case 4:
+      BR13 |= (1 << 24) | (1 << 25);
+      CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
+      break;
+   default:
+      return;
+   }
+
+#ifndef I915
+   if (dst_tiled) {
+      CMD |= XY_DST_TILED;
+      dst_pitch /= 4;
+   }
+   if (src_tiled) {
+      CMD |= XY_SRC_TILED;
+      src_pitch /= 4;
+   }
+#endif
+
+   if (dst_y2 <= dst_y || dst_x2 <= dst_x) {
+      return;
+   }
+
+   /* Initial y values don't seem to work with negative pitches.  If
+    * we adjust the offsets manually (below), it seems to work fine.
+    *
+    * On the other hand, if we always adjust, the hardware doesn't
+    * know which blit directions to use, so overlapping copypixels get
+    * the wrong result.
+    */
+   if (dst_pitch > 0 && src_pitch > 0) {
+      assert(dst_x < dst_x2);
+      assert(dst_y < dst_y2);
+
+      BEGIN_BATCH(8, NO_LOOP_CLIPRECTS);
+      OUT_BATCH(CMD);
+      OUT_BATCH(BR13 | dst_pitch);
+      OUT_BATCH((dst_y << 16) | dst_x);
+      OUT_BATCH((dst_y2 << 16) | dst_x2);
+      OUT_RELOC(dst_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE,
+		dst_offset);
+      OUT_BATCH((src_y << 16) | src_x);
+      OUT_BATCH(src_pitch);
+      OUT_RELOC(src_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
+		src_offset);
+      ADVANCE_BATCH();
+   }
+   else {
+      assert(dst_x < dst_x2);
+      assert(h > 0);
+
+      BEGIN_BATCH(8, NO_LOOP_CLIPRECTS);
+      OUT_BATCH(CMD);
+      OUT_BATCH(BR13 | ((uint16_t)dst_pitch));
+      OUT_BATCH((0 << 16) | dst_x);
+      OUT_BATCH((h << 16) | dst_x2);
+      OUT_RELOC(dst_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE,
+		dst_offset + dst_y * dst_pitch);
+      OUT_BATCH((0 << 16) | src_x);
+      OUT_BATCH(src_pitch);
+      OUT_RELOC(src_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
+		src_offset + src_y * src_pitch);
+      ADVANCE_BATCH();
+   }
+}
+
+
+/**
+ * Use blitting to clear the renderbuffers named by 'flags'.
+ * Note: we can't use the ctx->DrawBuffer->_ColorDrawBufferIndexes field
+ * since that might include software renderbuffers or renderbuffers
+ * which we're clearing with triangles.
+ * \param mask  bitmask of BUFFER_BIT_* values indicating buffers to clear
+ */
+void
+intelClearWithBlit(GLcontext *ctx, GLbitfield mask)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct gl_framebuffer *fb = ctx->DrawBuffer;
+   GLuint clear_depth;
+   GLbitfield skipBuffers = 0;
+   BATCH_LOCALS;
+
+   /*
+    * Compute values for clearing the buffers.
+    */
+   clear_depth = 0;
+   if (mask & BUFFER_BIT_DEPTH) {
+      clear_depth = (GLuint) (fb->_DepthMax * ctx->Depth.Clear);
+   }
+   if (mask & BUFFER_BIT_STENCIL) {
+      clear_depth |= (ctx->Stencil.Clear & 0xff) << 24;
+   }
+
+   /* If clearing both depth and stencil, skip BUFFER_BIT_STENCIL in
+    * the loop below.
+    */
+   if ((mask & BUFFER_BIT_DEPTH) && (mask & BUFFER_BIT_STENCIL)) {
+      skipBuffers = BUFFER_BIT_STENCIL;
+   }
+
+   /* XXX Move this flush/lock into the following conditional? */
+   intelFlush(&intel->ctx);
+   LOCK_HARDWARE(intel);
+
+   if (intel->numClipRects) {
+      GLint cx, cy, cw, ch;
+      drm_clip_rect_t clear;
+      int i;
+
+      /* Get clear bounds after locking */
+      cx = fb->_Xmin;
+      cy = fb->_Ymin;
+      cw = fb->_Xmax - cx;
+      ch = fb->_Ymax - cy;
+
+      if (fb->Name == 0) {
+         /* clearing a window */
+
+         /* flip top to bottom */
+         clear.x1 = cx + intel->drawX;
+         clear.y1 = intel->driDrawable->y + intel->driDrawable->h - cy - ch;
+         clear.x2 = clear.x1 + cw;
+         clear.y2 = clear.y1 + ch;
+      }
+      else {
+         /* clearing FBO */
+         assert(intel->numClipRects == 1);
+         assert(intel->pClipRects == &intel->fboRect);
+         clear.x1 = cx;
+         clear.y1 = cy;
+         clear.x2 = clear.x1 + cw;
+         clear.y2 = clear.y1 + ch;
+         /* no change to mask */
+      }
+
+      for (i = 0; i < intel->numClipRects; i++) {
+         const drm_clip_rect_t *box = &intel->pClipRects[i];
+         drm_clip_rect_t b;
+         GLuint buf;
+         GLuint clearMask = mask;      /* use copy, since we modify it below */
+         GLboolean all = (cw == fb->Width && ch == fb->Height);
+
+         if (!all) {
+            intel_intersect_cliprects(&b, &clear, box);
+         }
+         else {
+            b = *box;
+         }
+
+         if (b.x1 >= b.x2 || b.y1 >= b.y2)
+            continue;
+
+         if (0)
+            _mesa_printf("clear %d,%d..%d,%d, mask %x\n",
+                         b.x1, b.y1, b.x2, b.y2, mask);
+
+         /* Loop over all renderbuffers */
+         for (buf = 0; buf < BUFFER_COUNT && clearMask; buf++) {
+            const GLbitfield bufBit = 1 << buf;
+            if ((clearMask & bufBit) && !(bufBit & skipBuffers)) {
+               /* OK, clear this renderbuffer */
+               struct intel_region *irb_region =
+		  intel_get_rb_region(fb, buf);
+               dri_bo *write_buffer =
+                  intel_region_buffer(intel, irb_region,
+                                      all ? INTEL_WRITE_FULL :
+                                      INTEL_WRITE_PART);
+
+               GLuint clearVal;
+               GLint pitch, cpp;
+               GLuint BR13, CMD;
+
+               ASSERT(irb_region);
+
+               pitch = irb_region->pitch;
+               cpp = irb_region->cpp;
+
+               DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
+                   __FUNCTION__,
+                   irb_region->buffer, (pitch * cpp),
+                   irb_region->draw_offset,
+                   b.x1, b.y1, b.x2 - b.x1, b.y2 - b.y1);
+
+	       BR13 = 0xf0 << 16;
+	       CMD = XY_COLOR_BLT_CMD;
+
+               /* Setup the blit command */
+               if (cpp == 4) {
+                  BR13 |= (1 << 24) | (1 << 25);
+                  if (buf == BUFFER_DEPTH || buf == BUFFER_STENCIL) {
+                     if (clearMask & BUFFER_BIT_DEPTH)
+                        CMD |= XY_BLT_WRITE_RGB;
+                     if (clearMask & BUFFER_BIT_STENCIL)
+                        CMD |= XY_BLT_WRITE_ALPHA;
+                  }
+                  else {
+                     /* clearing RGBA */
+                     CMD |= XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
+                  }
+               }
+               else {
+                  ASSERT(cpp == 2 || cpp == 0);
+                  BR13 |= (1 << 24);
+               }
+
+#ifndef I915
+	       if (irb_region->tiled) {
+		  CMD |= XY_DST_TILED;
+		  pitch /= 4;
+	       }
+#endif
+	       BR13 |= (pitch * cpp);
+
+               if (buf == BUFFER_DEPTH || buf == BUFFER_STENCIL) {
+                  clearVal = clear_depth;
+               }
+               else {
+                  clearVal = (cpp == 4)
+                     ? intel->ClearColor8888 : intel->ClearColor565;
+               }
+               /*
+                  _mesa_debug(ctx, "hardware blit clear buf %d rb id %d\n",
+                  buf, irb->Base.Name);
+                */
+	       intel_wait_flips(intel);
+
+               assert(b.x1 < b.x2);
+               assert(b.y1 < b.y2);
+
+               BEGIN_BATCH(6, REFERENCES_CLIPRECTS);
+               OUT_BATCH(CMD);
+               OUT_BATCH(BR13);
+               OUT_BATCH((b.y1 << 16) | b.x1);
+               OUT_BATCH((b.y2 << 16) | b.x2);
+               OUT_RELOC(write_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE,
+                         irb_region->draw_offset);
+               OUT_BATCH(clearVal);
+               ADVANCE_BATCH();
+               clearMask &= ~bufBit;    /* turn off bit, for faster loop exit */
+            }
+         }
+      }
+      intel_batchbuffer_flush(intel->batch);
+   }
+
+   UNLOCK_HARDWARE(intel);
+}
+
+void
+intelEmitImmediateColorExpandBlit(struct intel_context *intel,
+				  GLuint cpp,
+				  GLubyte *src_bits, GLuint src_size,
+				  GLuint fg_color,
+				  GLshort dst_pitch,
+				  dri_bo *dst_buffer,
+				  GLuint dst_offset,
+				  GLboolean dst_tiled,
+				  GLshort x, GLshort y,
+				  GLshort w, GLshort h,
+				  GLenum logic_op)
+{
+   int dwords = ALIGN(src_size, 8) / 4;
+   uint32_t opcode, br13, blit_cmd;
+
+   assert( logic_op - GL_CLEAR >= 0 );
+   assert( logic_op - GL_CLEAR < 0x10 );
+
+   if (w < 0 || h < 0)
+      return;
+
+   dst_pitch *= cpp;
+
+   DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d, %d bytes %d dwords\n",
+       __FUNCTION__,
+       dst_buffer, dst_pitch, dst_offset, x, y, w, h, src_size, dwords);
+
+   intel_batchbuffer_require_space( intel->batch,
+				    (8 * 4) +
+				    (3 * 4) +
+				    dwords,
+				    NO_LOOP_CLIPRECTS );
+
+   opcode = XY_SETUP_BLT_CMD;
+   if (cpp == 4)
+      opcode |= XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
+#ifndef I915
+   if (dst_tiled) {
+      opcode |= XY_DST_TILED;
+      dst_pitch /= 4;
+   }
+#endif
+
+   br13 = dst_pitch | (translate_raster_op(logic_op) << 16) | (1 << 29);
+   if (cpp == 2)
+      br13 |= BR13_565;
+   else
+      br13 |= BR13_8888;
+
+   blit_cmd = XY_TEXT_IMMEDIATE_BLIT_CMD | XY_TEXT_BYTE_PACKED; /* packing? */
+   if (dst_tiled)
+      blit_cmd |= XY_DST_TILED;
+
+   BEGIN_BATCH(8 + 3, NO_LOOP_CLIPRECTS);
+   OUT_BATCH(opcode);
+   OUT_BATCH(br13);
+   OUT_BATCH((0 << 16) | 0); /* clip x1, y1 */
+   OUT_BATCH((100 << 16) | 100); /* clip x2, y2 */
+   OUT_RELOC(dst_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, dst_offset);
+   OUT_BATCH(0); /* bg */
+   OUT_BATCH(fg_color); /* fg */
+   OUT_BATCH(0); /* pattern base addr */
+
+   OUT_BATCH(blit_cmd | ((3 - 2) + dwords));
+   OUT_BATCH((y << 16) | x);
+   OUT_BATCH(((y + h) << 16) | (x + w));
+   ADVANCE_BATCH();
+
+   intel_batchbuffer_data( intel->batch,
+			   src_bits,
+			   dwords * 4,
+			   NO_LOOP_CLIPRECTS );
+}
diff --git a/i965/intel_blit.h b/shared/intel_blit.h
index e361545..fc0620c 100644
--- a/i965/intel_blit.h
+++ b/shared/intel_blit.h
@@ -30,37 +30,36 @@
 
 #include "intel_context.h"
 #include "intel_ioctl.h"
+#include "dri_bufmgr.h"
 
-struct buffer;
+extern void intelCopyBuffer(const __DRIdrawablePrivate * dpriv,
+                            const drm_clip_rect_t * rect);
 
-extern void intelCopyBuffer( const __DRIdrawablePrivate *dpriv,
-			     const drm_clip_rect_t *rect );
-extern void intelClearWithBlit(GLcontext *ctx, GLbitfield mask);
+extern void intelClearWithBlit(GLcontext * ctx, GLbitfield mask);
 
-extern void intelEmitCopyBlit( struct intel_context *intel,
-			       GLuint cpp,
-			       GLshort src_pitch,
-			       struct buffer *src_buffer,
-			       GLuint  src_offset,
-			       GLboolean src_tiled,
-			       GLshort dst_pitch,
-			       struct buffer *dst_buffer,
-			       GLuint  dst_offset,
-			       GLboolean dst_tiled,
-			       GLshort srcx, GLshort srcy,
-			       GLshort dstx, GLshort dsty,
-			       GLshort w, GLshort h,
-			       GLenum logic_op );
+extern void intelEmitCopyBlit(struct intel_context *intel,
+                              GLuint cpp,
+                              GLshort src_pitch,
+                              dri_bo *src_buffer,
+                              GLuint src_offset,
+			      GLboolean src_tiled,
+                              GLshort dst_pitch,
+                              dri_bo *dst_buffer,
+                              GLuint dst_offset,
+			      GLboolean dst_tiled,
+                              GLshort srcx, GLshort srcy,
+                              GLshort dstx, GLshort dsty,
+                              GLshort w, GLshort h,
+			      GLenum logicop );
 
-extern void intelEmitFillBlit( struct intel_context *intel,
-			       GLuint cpp,
-			       GLshort dst_pitch,
-			       struct buffer *dst_buffer,
-			       GLuint dst_offset,
-			       GLboolean dst_tiled,
-			       GLshort x, GLshort y, 
-			       GLshort w, GLshort h,
-			       GLuint color );
+extern void intelEmitFillBlit(struct intel_context *intel,
+                              GLuint cpp,
+                              GLshort dst_pitch,
+                              dri_bo *dst_buffer,
+                              GLuint dst_offset,
+			      GLboolean dst_tiled,
+                              GLshort x, GLshort y,
+                              GLshort w, GLshort h, GLuint color);
 
 void
 intelEmitImmediateColorExpandBlit(struct intel_context *intel,
@@ -68,11 +67,11 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel,
 				  GLubyte *src_bits, GLuint src_size,
 				  GLuint fg_color,
 				  GLshort dst_pitch,
-				  struct buffer *dst_buffer,
+				  dri_bo *dst_buffer,
 				  GLuint dst_offset,
 				  GLboolean dst_tiled,
-				  GLshort dst_x, GLshort dst_y, 
+				  GLshort x, GLshort y,
 				  GLshort w, GLshort h,
-				  GLenum logic_op );
+				  GLenum logic_op);
 
 #endif
diff --git a/shared/intel_buffer_objects.c b/shared/intel_buffer_objects.c
new file mode 100644
index 0000000..951b8cb
--- /dev/null
+++ b/shared/intel_buffer_objects.c
@@ -0,0 +1,285 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+
+#include "imports.h"
+#include "mtypes.h"
+#include "bufferobj.h"
+
+#include "intel_context.h"
+#include "intel_buffer_objects.h"
+#include "intel_regions.h"
+#include "dri_bufmgr.h"
+
+static GLboolean intel_bufferobj_unmap(GLcontext * ctx,
+				       GLenum target,
+				       struct gl_buffer_object *obj);
+
+/** Allocates a new dri_bo to store the data for the buffer object. */
+static void
+intel_bufferobj_alloc_buffer(struct intel_context *intel,
+			     struct intel_buffer_object *intel_obj)
+{
+   intel_obj->buffer = dri_bo_alloc(intel->bufmgr, "bufferobj",
+				    intel_obj->Base.Size, 64,
+				    DRM_BO_FLAG_MEM_LOCAL | DRM_BO_FLAG_CACHED | DRM_BO_FLAG_CACHED_MAPPED);
+}
+
+/**
+ * There is some duplication between mesa's bufferobjects and our
+ * bufmgr buffers.  Both have an integer handle and a hashtable to
+ * lookup an opaque structure.  It would be nice if the handles and
+ * internal structure where somehow shared.
+ */
+static struct gl_buffer_object *
+intel_bufferobj_alloc(GLcontext * ctx, GLuint name, GLenum target)
+{
+   struct intel_buffer_object *obj = CALLOC_STRUCT(intel_buffer_object);
+
+   _mesa_initialize_buffer_object(&obj->Base, name, target);
+
+   obj->buffer = NULL;
+
+   return &obj->Base;
+}
+
+/* Break the COW tie to the region.  The region gets to keep the data.
+ */
+void
+intel_bufferobj_release_region(struct intel_context *intel,
+                               struct intel_buffer_object *intel_obj)
+{
+   assert(intel_obj->region->buffer == intel_obj->buffer);
+   intel_obj->region->pbo = NULL;
+   intel_obj->region = NULL;
+
+   dri_bo_unreference(intel_obj->buffer);
+   intel_obj->buffer = NULL;
+}
+
+/* Break the COW tie to the region.  Both the pbo and the region end
+ * up with a copy of the data.
+ */
+void
+intel_bufferobj_cow(struct intel_context *intel,
+                    struct intel_buffer_object *intel_obj)
+{
+   assert(intel_obj->region);
+   intel_region_cow(intel, intel_obj->region);
+}
+
+
+/**
+ * Deallocate/free a vertex/pixel buffer object.
+ * Called via glDeleteBuffersARB().
+ */
+static void
+intel_bufferobj_free(GLcontext * ctx, struct gl_buffer_object *obj)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
+
+   assert(intel_obj);
+
+   /* Buffer objects are automatically unmapped when deleting according
+    * to the spec.
+    */
+   if (obj->Pointer)
+      intel_bufferobj_unmap(ctx, 0, obj);
+
+   if (intel_obj->region) {
+      intel_bufferobj_release_region(intel, intel_obj);
+   }
+   else if (intel_obj->buffer) {
+      dri_bo_unreference(intel_obj->buffer);
+   }
+
+   _mesa_free(intel_obj);
+}
+
+
+
+/**
+ * Allocate space for and store data in a buffer object.  Any data that was
+ * previously stored in the buffer object is lost.  If data is NULL,
+ * memory will be allocated, but no copy will occur.
+ * Called via glBufferDataARB().
+ */
+static void
+intel_bufferobj_data(GLcontext * ctx,
+                     GLenum target,
+                     GLsizeiptrARB size,
+                     const GLvoid * data,
+                     GLenum usage, struct gl_buffer_object *obj)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
+
+   intel_obj->Base.Size = size;
+   intel_obj->Base.Usage = usage;
+
+   /* Buffer objects are automatically unmapped when creating new data buffers
+    * according to the spec.
+    */
+   if (obj->Pointer)
+      intel_bufferobj_unmap(ctx, 0, obj);
+
+   if (intel_obj->region)
+      intel_bufferobj_release_region(intel, intel_obj);
+
+   if (intel_obj->buffer != NULL) {
+      dri_bo_unreference(intel_obj->buffer);
+      intel_obj->buffer = NULL;
+   }
+   if (size != 0) {
+      intel_bufferobj_alloc_buffer(intel, intel_obj);
+
+      if (data != NULL)
+	 dri_bo_subdata(intel_obj->buffer, 0, size, data);
+   }
+}
+
+
+/**
+ * Replace data in a subrange of buffer object.  If the data range
+ * specified by size + offset extends beyond the end of the buffer or
+ * if data is NULL, no copy is performed.
+ * Called via glBufferSubDataARB().
+ */
+static void
+intel_bufferobj_subdata(GLcontext * ctx,
+                        GLenum target,
+                        GLintptrARB offset,
+                        GLsizeiptrARB size,
+                        const GLvoid * data, struct gl_buffer_object *obj)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
+
+   assert(intel_obj);
+
+   if (intel_obj->region)
+      intel_bufferobj_cow(intel, intel_obj);
+
+   dri_bo_subdata(intel_obj->buffer, offset, size, data);
+}
+
+
+/**
+ * Called via glGetBufferSubDataARB().
+ */
+static void
+intel_bufferobj_get_subdata(GLcontext * ctx,
+                            GLenum target,
+                            GLintptrARB offset,
+                            GLsizeiptrARB size,
+                            GLvoid * data, struct gl_buffer_object *obj)
+{
+   struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
+
+   assert(intel_obj);
+   dri_bo_get_subdata(intel_obj->buffer, offset, size, data);
+}
+
+
+
+/**
+ * Called via glMapBufferARB().
+ */
+static void *
+intel_bufferobj_map(GLcontext * ctx,
+                    GLenum target,
+                    GLenum access, struct gl_buffer_object *obj)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
+
+   /* XXX: Translate access to flags arg below:
+    */
+   assert(intel_obj);
+
+   if (intel_obj->region)
+      intel_bufferobj_cow(intel, intel_obj);
+
+   if (intel_obj->buffer == NULL) {
+      obj->Pointer = NULL;
+      return NULL;
+   }
+
+   dri_bo_map(intel_obj->buffer, GL_TRUE);
+   obj->Pointer = intel_obj->buffer->virtual;
+   return obj->Pointer;
+}
+
+
+/**
+ * Called via glMapBufferARB().
+ */
+static GLboolean
+intel_bufferobj_unmap(GLcontext * ctx,
+                      GLenum target, struct gl_buffer_object *obj)
+{
+   struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
+
+   assert(intel_obj);
+   if (intel_obj->buffer != NULL) {
+      assert(obj->Pointer);
+      dri_bo_unmap(intel_obj->buffer);
+      obj->Pointer = NULL;
+   }
+   return GL_TRUE;
+}
+
+dri_bo *
+intel_bufferobj_buffer(struct intel_context *intel,
+                       struct intel_buffer_object *intel_obj, GLuint flag)
+{
+   if (intel_obj->region) {
+      if (flag == INTEL_WRITE_PART)
+         intel_bufferobj_cow(intel, intel_obj);
+      else if (flag == INTEL_WRITE_FULL) {
+         intel_bufferobj_release_region(intel, intel_obj);
+	 intel_bufferobj_alloc_buffer(intel, intel_obj);
+      }
+   }
+
+   return intel_obj->buffer;
+}
+
+void
+intel_bufferobj_init(struct intel_context *intel)
+{
+   GLcontext *ctx = &intel->ctx;
+
+   ctx->Driver.NewBufferObject = intel_bufferobj_alloc;
+   ctx->Driver.DeleteBuffer = intel_bufferobj_free;
+   ctx->Driver.BufferData = intel_bufferobj_data;
+   ctx->Driver.BufferSubData = intel_bufferobj_subdata;
+   ctx->Driver.GetBufferSubData = intel_bufferobj_get_subdata;
+   ctx->Driver.MapBuffer = intel_bufferobj_map;
+   ctx->Driver.UnmapBuffer = intel_bufferobj_unmap;
+}
diff --git a/i965/intel_buffer_objects.h b/shared/intel_buffer_objects.h
index 4b38803..7cecc32 100644
--- a/i965/intel_buffer_objects.h
+++ b/shared/intel_buffer_objects.h
@@ -1,6 +1,6 @@
- /**************************************************************************
+/**************************************************************************
  * 
- * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright 2005 Tungsten Graphics, Inc., Cedar Park, Texas.
  * All Rights Reserved.
  * 
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -31,25 +31,33 @@
 #include "mtypes.h"
 
 struct intel_context;
+struct intel_region;
 struct gl_buffer_object;
 
 
 /**
  * Intel vertex/pixel buffer object, derived from Mesa's gl_buffer_object.
  */
-struct intel_buffer_object {
+struct intel_buffer_object
+{
    struct gl_buffer_object Base;
-   struct buffer *buffer;   /* the low-level buffer manager's buffer handle */
+   dri_bo *buffer;     /* the low-level buffer manager's buffer handle */
+
+   struct intel_region *region; /* Is there a zero-copy texture
+                                   associated with this (pixel)
+                                   buffer object? */
 };
 
 
 /* Get the bm buffer associated with a GL bufferobject:
  */
-struct buffer *intel_bufferobj_buffer( const struct intel_buffer_object *obj );
+dri_bo *intel_bufferobj_buffer(struct intel_context *intel,
+			       struct intel_buffer_object
+			       *obj, GLuint flag);
 
 /* Hook the bufferobject implementation into mesa: 
  */
-void intel_bufferobj_init( struct intel_context *intel );
+void intel_bufferobj_init(struct intel_context *intel);
 
 
 
@@ -58,13 +66,21 @@ void intel_bufferobj_init( struct intel_context *intel );
  * the Name == 0 test is the only way to identify them and avoid
  * casting them erroneously to our structs.
  */
-static inline struct intel_buffer_object *
-intel_buffer_object( struct gl_buffer_object *obj )
+static INLINE struct intel_buffer_object *
+intel_buffer_object(struct gl_buffer_object *obj)
 {
    if (obj->Name)
-      return (struct intel_buffer_object *)obj;
+      return (struct intel_buffer_object *) obj;
    else
       return NULL;
 }
 
+/* Helpers for zerocopy image uploads.  See also intel_regions.h:
+ */
+void intel_bufferobj_cow(struct intel_context *intel,
+                         struct intel_buffer_object *intel_obj);
+void intel_bufferobj_release_region(struct intel_context *intel,
+                                    struct intel_buffer_object *intel_obj);
+
+
 #endif
diff --git a/shared/intel_buffers.c b/shared/intel_buffers.c
new file mode 100644
index 0000000..75542a9
--- /dev/null
+++ b/shared/intel_buffers.c
@@ -0,0 +1,1094 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "intel_screen.h"
+#include "intel_context.h"
+#include "intel_blit.h"
+#include "intel_buffers.h"
+#include "intel_chipset.h"
+#include "intel_depthstencil.h"
+#include "intel_fbo.h"
+#include "intel_regions.h"
+#include "intel_batchbuffer.h"
+#include "intel_reg.h"
+#include "context.h"
+#include "utils.h"
+#include "drirenderbuffer.h"
+#include "framebuffer.h"
+#include "swrast/swrast.h"
+#include "vblank.h"
+#include "i915_drm.h"
+
+/* This block can be removed when libdrm >= 2.3.1 is required */
+
+#ifndef DRM_IOCTL_I915_FLIP
+
+#define DRM_VBLANK_FLIP 0x8000000
+
+typedef struct drm_i915_flip {
+   int pipes;
+} drm_i915_flip_t;
+
+#undef DRM_IOCTL_I915_FLIP
+#define DRM_IOCTL_I915_FLIP DRM_IOW(DRM_COMMAND_BASE + DRM_I915_FLIP, \
+				    drm_i915_flip_t)
+
+#endif
+
+#define FILE_DEBUG_FLAG DEBUG_BLIT
+
+/**
+ * XXX move this into a new dri/common/cliprects.c file.
+ */
+GLboolean
+intel_intersect_cliprects(drm_clip_rect_t * dst,
+                          const drm_clip_rect_t * a,
+                          const drm_clip_rect_t * b)
+{
+   GLint bx = b->x1;
+   GLint by = b->y1;
+   GLint bw = b->x2 - bx;
+   GLint bh = b->y2 - by;
+
+   if (bx < a->x1)
+      bw -= a->x1 - bx, bx = a->x1;
+   if (by < a->y1)
+      bh -= a->y1 - by, by = a->y1;
+   if (bx + bw > a->x2)
+      bw = a->x2 - bx;
+   if (by + bh > a->y2)
+      bh = a->y2 - by;
+   if (bw <= 0)
+      return GL_FALSE;
+   if (bh <= 0)
+      return GL_FALSE;
+
+   dst->x1 = bx;
+   dst->y1 = by;
+   dst->x2 = bx + bw;
+   dst->y2 = by + bh;
+
+   return GL_TRUE;
+}
+
+/**
+ * Return pointer to current color drawing region, or NULL.
+ */
+struct intel_region *
+intel_drawbuf_region(struct intel_context *intel)
+{
+   struct intel_renderbuffer *irbColor =
+      intel_renderbuffer(intel->ctx.DrawBuffer->_ColorDrawBuffers[0]);
+   if (irbColor)
+      return irbColor->region;
+   else
+      return NULL;
+}
+
+/**
+ * Return pointer to current color reading region, or NULL.
+ */
+struct intel_region *
+intel_readbuf_region(struct intel_context *intel)
+{
+   struct intel_renderbuffer *irb
+      = intel_renderbuffer(intel->ctx.ReadBuffer->_ColorReadBuffer);
+   if (irb)
+      return irb->region;
+   else
+      return NULL;
+}
+
+
+
+/**
+ * Update the following fields for rendering to a user-created FBO:
+ *   intel->numClipRects
+ *   intel->pClipRects
+ *   intel->drawX
+ *   intel->drawY
+ */
+static void
+intelSetRenderbufferClipRects(struct intel_context *intel)
+{
+   assert(intel->ctx.DrawBuffer->Width > 0);
+   assert(intel->ctx.DrawBuffer->Height > 0);
+   intel->fboRect.x1 = 0;
+   intel->fboRect.y1 = 0;
+   intel->fboRect.x2 = intel->ctx.DrawBuffer->Width;
+   intel->fboRect.y2 = intel->ctx.DrawBuffer->Height;
+   intel->numClipRects = 1;
+   intel->pClipRects = &intel->fboRect;
+   intel->drawX = 0;
+   intel->drawY = 0;
+}
+
+
+/**
+ * As above, but for rendering to front buffer of a window.
+ * \sa intelSetRenderbufferClipRects
+ */
+static void
+intelSetFrontClipRects(struct intel_context *intel)
+{
+   __DRIdrawablePrivate *dPriv = intel->driDrawable;
+
+   if (!dPriv)
+      return;
+
+   intel->numClipRects = dPriv->numClipRects;
+   intel->pClipRects = dPriv->pClipRects;
+   intel->drawX = dPriv->x;
+   intel->drawY = dPriv->y;
+}
+
+
+/**
+ * As above, but for rendering to back buffer of a window.
+ */
+static void
+intelSetBackClipRects(struct intel_context *intel)
+{
+   __DRIdrawablePrivate *dPriv = intel->driDrawable;
+   struct intel_framebuffer *intel_fb;
+
+   if (!dPriv)
+      return;
+
+   intel_fb = dPriv->driverPrivate;
+
+   if (intel_fb->pf_active || dPriv->numBackClipRects == 0) {
+      /* use the front clip rects */
+      intel->numClipRects = dPriv->numClipRects;
+      intel->pClipRects = dPriv->pClipRects;
+      intel->drawX = dPriv->x;
+      intel->drawY = dPriv->y;
+   }
+   else {
+      /* use the back clip rects */
+      intel->numClipRects = dPriv->numBackClipRects;
+      intel->pClipRects = dPriv->pBackClipRects;
+      intel->drawX = dPriv->backX;
+      intel->drawY = dPriv->backY;
+   }
+}
+
+static void
+intelUpdatePageFlipping(struct intel_context *intel,
+			GLint areaA, GLint areaB)
+{
+   __DRIdrawablePrivate *dPriv = intel->driDrawable;
+   struct intel_framebuffer *intel_fb = dPriv->driverPrivate;
+   GLboolean pf_active;
+   GLint pf_planes;
+
+   /* Update page flipping info */
+   pf_planes = 0;
+
+   if (areaA > 0)
+      pf_planes |= 1;
+
+   if (areaB > 0)
+      pf_planes |= 2;
+
+   intel_fb->pf_current_page = (intel->sarea->pf_current_page >>
+				(intel_fb->pf_planes & 0x2)) & 0x3;
+
+   intel_fb->pf_num_pages = intel->intelScreen->third.handle ? 3 : 2;
+
+   pf_active = pf_planes && (pf_planes & intel->sarea->pf_active) == pf_planes;
+
+   if (INTEL_DEBUG & DEBUG_LOCK)
+      if (pf_active != intel_fb->pf_active)
+	 _mesa_printf("%s - Page flipping %sactive\n", __progname,
+		      pf_active ? "" : "in");
+
+   if (pf_active) {
+      /* Sync pages between planes if flipping on both at the same time */
+      if (pf_planes == 0x3 && pf_planes != intel_fb->pf_planes &&
+	  (intel->sarea->pf_current_page & 0x3) !=
+	  (((intel->sarea->pf_current_page) >> 2) & 0x3)) {
+	 drm_i915_flip_t flip;
+
+	 if (intel_fb->pf_current_page ==
+	     (intel->sarea->pf_current_page & 0x3)) {
+	    /* XXX: This is ugly, but emitting two flips 'in a row' can cause
+	     * lockups for unknown reasons.
+	     */
+	    intel->sarea->pf_current_page =
+	       intel->sarea->pf_current_page & 0x3;
+	    intel->sarea->pf_current_page |=
+	       ((intel_fb->pf_current_page + intel_fb->pf_num_pages - 1) %
+		intel_fb->pf_num_pages) << 2;
+
+	    flip.pipes = 0x2;
+	 } else {
+	    intel->sarea->pf_current_page =
+	       intel->sarea->pf_current_page & (0x3 << 2);
+	    intel->sarea->pf_current_page |=
+	       (intel_fb->pf_current_page + intel_fb->pf_num_pages - 1) %
+	       intel_fb->pf_num_pages;
+
+	    flip.pipes = 0x1;
+	 }
+
+	 drmCommandWrite(intel->driFd, DRM_I915_FLIP, &flip, sizeof(flip));
+      }
+
+      intel_fb->pf_planes = pf_planes;
+   }
+
+   intel_fb->pf_active = pf_active;
+   intel_flip_renderbuffers(intel_fb);
+   intel_draw_buffer(&intel->ctx, intel->ctx.DrawBuffer);
+}
+
+/**
+ * This will be called whenever the currently bound window is moved/resized.
+ * XXX: actually, it seems to NOT be called when the window is only moved (BP).
+ */
+void
+intelWindowMoved(struct intel_context *intel)
+{
+   GLcontext *ctx = &intel->ctx;
+   __DRIdrawablePrivate *dPriv = intel->driDrawable;
+   struct intel_framebuffer *intel_fb = dPriv->driverPrivate;
+
+   if (!intel->ctx.DrawBuffer) {
+      /* when would this happen? -BP */
+      intelSetFrontClipRects(intel);
+   }
+   else if (intel->ctx.DrawBuffer->Name != 0) {
+      /* drawing to user-created FBO - do nothing */
+      /* Cliprects would be set from intelDrawBuffer() */
+   }
+   else {
+      /* drawing to a window */
+      switch (intel_fb->Base._ColorDrawBufferIndexes[0]) {
+      case BUFFER_FRONT_LEFT:
+         intelSetFrontClipRects(intel);
+         break;
+      case BUFFER_BACK_LEFT:
+         intelSetBackClipRects(intel);
+         break;
+      default:
+         intelSetFrontClipRects(intel);
+      }
+	
+   }
+
+   if (!intel->intelScreen->driScrnPriv->dri2.enabled &&
+       intel->intelScreen->driScrnPriv->ddx_version.minor >= 7) {
+      volatile struct drm_i915_sarea *sarea = intel->sarea;
+      drm_clip_rect_t drw_rect = { .x1 = dPriv->x, .x2 = dPriv->x + dPriv->w,
+				   .y1 = dPriv->y, .y2 = dPriv->y + dPriv->h };
+      drm_clip_rect_t planeA_rect = { .x1 = sarea->planeA_x, .y1 = sarea->planeA_y,
+				     .x2 = sarea->planeA_x + sarea->planeA_w,
+				     .y2 = sarea->planeA_y + sarea->planeA_h };
+      drm_clip_rect_t planeB_rect = { .x1 = sarea->planeB_x, .y1 = sarea->planeB_y,
+				     .x2 = sarea->planeB_x + sarea->planeB_w,
+				     .y2 = sarea->planeB_y + sarea->planeB_h };
+      GLint areaA = driIntersectArea( drw_rect, planeA_rect );
+      GLint areaB = driIntersectArea( drw_rect, planeB_rect );
+      GLuint flags = dPriv->vblFlags;
+
+      intelUpdatePageFlipping(intel, areaA, areaB);
+
+      /* Update vblank info
+       */
+      if (areaB > areaA || (areaA == areaB && areaB > 0)) {
+	 flags = dPriv->vblFlags | VBLANK_FLAG_SECONDARY;
+      } else {
+	 flags = dPriv->vblFlags & ~VBLANK_FLAG_SECONDARY;
+      }
+
+      /* Check to see if we changed pipes */
+      if (flags != dPriv->vblFlags && dPriv->vblFlags &&
+	  !(dPriv->vblFlags & VBLANK_FLAG_NO_IRQ)) {
+	 int64_t count;
+	 drmVBlank vbl;
+	 int i;
+
+	 /*
+	  * Deal with page flipping
+	  */
+	 vbl.request.type = DRM_VBLANK_ABSOLUTE;
+
+	 if ( dPriv->vblFlags & VBLANK_FLAG_SECONDARY ) {
+	    vbl.request.type |= DRM_VBLANK_SECONDARY;
+	 }
+
+	 for (i = 0; i < intel_fb->pf_num_pages; i++) {
+	    if (!intel_fb->color_rb[i] ||
+		(intel_fb->vbl_waited - intel_fb->color_rb[i]->vbl_pending) <=
+		(1<<23))
+	       continue;
+
+	    vbl.request.sequence = intel_fb->color_rb[i]->vbl_pending;
+	    drmWaitVBlank(intel->driFd, &vbl);
+	 }
+
+	 /*
+	  * Update msc_base from old pipe
+	  */
+	 driDrawableGetMSC32(dPriv->driScreenPriv, dPriv, &count);
+	 dPriv->msc_base = count;
+	 /*
+	  * Then get new vblank_base and vblSeq values
+	  */
+	 dPriv->vblFlags = flags;
+	 driGetCurrentVBlank(dPriv);
+	 dPriv->vblank_base = dPriv->vblSeq;
+
+	 intel_fb->vbl_waited = dPriv->vblSeq;
+
+	 for (i = 0; i < intel_fb->pf_num_pages; i++) {
+	    if (intel_fb->color_rb[i])
+	       intel_fb->color_rb[i]->vbl_pending = intel_fb->vbl_waited;
+	 }
+      }
+   } else {
+      dPriv->vblFlags &= ~VBLANK_FLAG_SECONDARY;
+   }
+
+   /* Update Mesa's notion of window size */
+   driUpdateFramebufferSize(ctx, dPriv);
+   intel_fb->Base.Initialized = GL_TRUE; /* XXX remove someday */
+
+   /* Update hardware scissor */
+   if (ctx->Driver.Scissor != NULL) {
+      ctx->Driver.Scissor(ctx, ctx->Scissor.X, ctx->Scissor.Y,
+			  ctx->Scissor.Width, ctx->Scissor.Height);
+   }
+
+   /* Re-calculate viewport related state */
+   if (ctx->Driver.DepthRange != NULL)
+      ctx->Driver.DepthRange( ctx, ctx->Viewport.Near, ctx->Viewport.Far );
+}
+
+
+
+/* A true meta version of this would be very simple and additionally
+ * machine independent.  Maybe we'll get there one day.
+ */
+static void
+intelClearWithTris(struct intel_context *intel, GLbitfield mask)
+{
+   GLcontext *ctx = &intel->ctx;
+   struct gl_framebuffer *fb = ctx->DrawBuffer;
+   GLuint buf;
+
+   intel->vtbl.install_meta_state(intel);
+
+   /* Back and stencil cliprects are the same.  Try and do both
+    * buffers at once:
+    */
+   if (mask & (BUFFER_BIT_BACK_LEFT | BUFFER_BIT_STENCIL | BUFFER_BIT_DEPTH)) {
+      struct intel_region *backRegion =
+	 intel_get_rb_region(fb, BUFFER_BACK_LEFT);
+      struct intel_region *depthRegion =
+	 intel_get_rb_region(fb, BUFFER_DEPTH);
+
+      intel->vtbl.meta_draw_region(intel, backRegion, depthRegion);
+
+      if (mask & BUFFER_BIT_BACK_LEFT)
+	 intel->vtbl.meta_color_mask(intel, GL_TRUE);
+      else
+	 intel->vtbl.meta_color_mask(intel, GL_FALSE);
+
+      if (mask & BUFFER_BIT_STENCIL)
+	 intel->vtbl.meta_stencil_replace(intel,
+					  intel->ctx.Stencil.WriteMask[0],
+					  intel->ctx.Stencil.Clear);
+      else
+	 intel->vtbl.meta_no_stencil_write(intel);
+
+      if (mask & BUFFER_BIT_DEPTH)
+	 intel->vtbl.meta_depth_replace(intel);
+      else
+	 intel->vtbl.meta_no_depth_write(intel);
+
+      intel->vtbl.meta_draw_quad(intel,
+				 fb->_Xmin,
+				 fb->_Xmax,
+				 fb->_Ymin,
+				 fb->_Ymax,
+				 intel->ctx.Depth.Clear,
+				 intel->ClearColor8888,
+				 0, 0, 0, 0);   /* texcoords */
+
+      mask &= ~(BUFFER_BIT_BACK_LEFT | BUFFER_BIT_STENCIL | BUFFER_BIT_DEPTH);
+   }
+
+   /* clear the remaining (color) renderbuffers */
+   for (buf = 0; buf < BUFFER_COUNT && mask; buf++) {
+      const GLuint bufBit = 1 << buf;
+      if (mask & bufBit) {
+	 struct intel_renderbuffer *irbColor =
+	    intel_renderbuffer(fb->Attachment[buf].Renderbuffer);
+
+	 ASSERT(irbColor);
+
+	 intel->vtbl.meta_no_depth_write(intel);
+	 intel->vtbl.meta_no_stencil_write(intel);
+	 intel->vtbl.meta_color_mask(intel, GL_TRUE);
+	 intel->vtbl.meta_draw_region(intel, irbColor->region, NULL);
+
+	 intel->vtbl.meta_draw_quad(intel,
+				    fb->_Xmin,
+				    fb->_Xmax,
+				    fb->_Ymin,
+				    fb->_Ymax,
+				    0, intel->ClearColor8888,
+				    0, 0, 0, 0);   /* texcoords */
+
+	 mask &= ~bufBit;
+      }
+   }
+
+   intel->vtbl.leave_meta_state(intel);
+}
+
+static const char *buffer_names[] = {
+   [BUFFER_FRONT_LEFT] = "front",
+   [BUFFER_BACK_LEFT] = "back",
+   [BUFFER_FRONT_RIGHT] = "front right",
+   [BUFFER_BACK_RIGHT] = "back right",
+   [BUFFER_AUX0] = "aux0",
+   [BUFFER_AUX1] = "aux1",
+   [BUFFER_AUX2] = "aux2",
+   [BUFFER_AUX3] = "aux3",
+   [BUFFER_DEPTH] = "depth",
+   [BUFFER_STENCIL] = "stencil",
+   [BUFFER_ACCUM] = "accum",
+   [BUFFER_COLOR0] = "color0",
+   [BUFFER_COLOR1] = "color1",
+   [BUFFER_COLOR2] = "color2",
+   [BUFFER_COLOR3] = "color3",
+   [BUFFER_COLOR4] = "color4",
+   [BUFFER_COLOR5] = "color5",
+   [BUFFER_COLOR6] = "color6",
+   [BUFFER_COLOR7] = "color7",
+};
+
+/**
+ * Called by ctx->Driver.Clear.
+ */
+static void
+intelClear(GLcontext *ctx, GLbitfield mask)
+{
+   struct intel_context *intel = intel_context(ctx);
+   const GLuint colorMask = *((GLuint *) & ctx->Color.ColorMask);
+   GLbitfield tri_mask = 0;
+   GLbitfield blit_mask = 0;
+   GLbitfield swrast_mask = 0;
+   struct gl_framebuffer *fb = ctx->DrawBuffer;
+   GLuint i;
+
+   if (0)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   /* HW color buffers (front, back, aux, generic FBO, etc) */
+   if (colorMask == ~0) {
+      /* clear all R,G,B,A */
+      /* XXX FBO: need to check if colorbuffers are software RBOs! */
+      blit_mask |= (mask & BUFFER_BITS_COLOR);
+   }
+   else {
+      /* glColorMask in effect */
+      tri_mask |= (mask & BUFFER_BITS_COLOR);
+   }
+
+   /* HW stencil */
+   if (mask & BUFFER_BIT_STENCIL) {
+      const struct intel_region *stencilRegion
+         = intel_get_rb_region(fb, BUFFER_STENCIL);
+      if (stencilRegion) {
+         /* have hw stencil */
+         if (IS_965(intel->intelScreen->deviceID) ||
+	     (ctx->Stencil.WriteMask[0] & 0xff) != 0xff) {
+	    /* We have to use the 3D engine if we're clearing a partial mask
+	     * of the stencil buffer, or if we're on a 965 which has a tiled
+	     * depth/stencil buffer in a layout we can't blit to.
+	     */
+            tri_mask |= BUFFER_BIT_STENCIL;
+         }
+         else {
+            /* clearing all stencil bits, use blitting */
+            blit_mask |= BUFFER_BIT_STENCIL;
+         }
+      }
+   }
+
+   /* HW depth */
+   if (mask & BUFFER_BIT_DEPTH) {
+      /* clear depth with whatever method is used for stencil (see above) */
+      if (IS_965(intel->intelScreen->deviceID) ||
+	  tri_mask & BUFFER_BIT_STENCIL)
+         tri_mask |= BUFFER_BIT_DEPTH;
+      else
+         blit_mask |= BUFFER_BIT_DEPTH;
+   }
+
+   /* SW fallback clearing */
+   swrast_mask = mask & ~tri_mask & ~blit_mask;
+
+   for (i = 0; i < BUFFER_COUNT; i++) {
+      GLuint bufBit = 1 << i;
+      if ((blit_mask | tri_mask) & bufBit) {
+         if (!fb->Attachment[i].Renderbuffer->ClassID) {
+            blit_mask &= ~bufBit;
+            tri_mask &= ~bufBit;
+            swrast_mask |= bufBit;
+         }
+      }
+   }
+
+   if (blit_mask) {
+      if (INTEL_DEBUG & DEBUG_BLIT) {
+	 DBG("blit clear:");
+	 for (i = 0; i < BUFFER_COUNT; i++) {
+	    if (blit_mask & (1 << i))
+	       DBG(" %s", buffer_names[i]);
+	 }
+	 DBG("\n");
+      }
+      intelClearWithBlit(ctx, blit_mask);
+   }
+
+   if (tri_mask) {
+      if (INTEL_DEBUG & DEBUG_BLIT) {
+	 DBG("tri clear:");
+	 for (i = 0; i < BUFFER_COUNT; i++) {
+	    if (tri_mask & (1 << i))
+	       DBG(" %s", buffer_names[i]);
+	 }
+	 DBG("\n");
+      }
+      intelClearWithTris(intel, tri_mask);
+   }
+
+   if (swrast_mask) {
+      if (INTEL_DEBUG & DEBUG_BLIT) {
+	 DBG("swrast clear:");
+	 for (i = 0; i < BUFFER_COUNT; i++) {
+	    if (swrast_mask & (1 << i))
+	       DBG(" %s", buffer_names[i]);
+	 }
+	 DBG("\n");
+      }
+      _swrast_Clear(ctx, swrast_mask);
+   }
+}
+
+
+/* Emit wait for pending flips */
+void
+intel_wait_flips(struct intel_context *intel)
+{
+   struct intel_framebuffer *intel_fb =
+      (struct intel_framebuffer *) intel->ctx.DrawBuffer;
+   struct intel_renderbuffer *intel_rb =
+      intel_get_renderbuffer(&intel_fb->Base,
+			     intel_fb->Base._ColorDrawBufferIndexes[0] ==
+			     BUFFER_FRONT_LEFT ? BUFFER_FRONT_LEFT :
+			     BUFFER_BACK_LEFT);
+
+   if (intel_fb->Base.Name == 0 && intel_rb &&
+       intel_rb->pf_pending == intel_fb->pf_seq) {
+      GLint pf_planes = intel_fb->pf_planes;
+      BATCH_LOCALS;
+
+      /* Wait for pending flips to take effect */
+      BEGIN_BATCH(2, NO_LOOP_CLIPRECTS);
+      OUT_BATCH(pf_planes & 0x1 ? (MI_WAIT_FOR_EVENT | MI_WAIT_FOR_PLANE_A_FLIP)
+		: 0);
+      OUT_BATCH(pf_planes & 0x2 ? (MI_WAIT_FOR_EVENT | MI_WAIT_FOR_PLANE_B_FLIP)
+		: 0);
+      ADVANCE_BATCH();
+
+      intel_rb->pf_pending--;
+   }
+}
+
+
+/* Flip the front & back buffers
+ */
+static GLboolean
+intelPageFlip(const __DRIdrawablePrivate * dPriv)
+{
+   struct intel_context *intel;
+   int ret;
+   struct intel_framebuffer *intel_fb = dPriv->driverPrivate;
+
+   if (INTEL_DEBUG & DEBUG_IOCTL)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   assert(dPriv);
+   assert(dPriv->driContextPriv);
+   assert(dPriv->driContextPriv->driverPrivate);
+
+   intel = (struct intel_context *) dPriv->driContextPriv->driverPrivate;
+
+   if (intel->intelScreen->drmMinor < 9)
+      return GL_FALSE;
+
+   intelFlush(&intel->ctx);
+
+   ret = 0;
+
+   LOCK_HARDWARE(intel);
+
+   if (dPriv->numClipRects && intel_fb->pf_active) {
+      drm_i915_flip_t flip;
+
+      flip.pipes = intel_fb->pf_planes;
+
+      ret = drmCommandWrite(intel->driFd, DRM_I915_FLIP, &flip, sizeof(flip));
+   }
+
+   UNLOCK_HARDWARE(intel);
+
+   if (ret || !intel_fb->pf_active)
+      return GL_FALSE;
+
+   if (!dPriv->numClipRects) {
+      usleep(10000);	/* throttle invisible client 10ms */
+   }
+
+   intel_fb->pf_current_page = (intel->sarea->pf_current_page >>
+				(intel_fb->pf_planes & 0x2)) & 0x3;
+
+   if (dPriv->numClipRects != 0) {
+      intel_get_renderbuffer(&intel_fb->Base, BUFFER_FRONT_LEFT)->pf_pending =
+      intel_get_renderbuffer(&intel_fb->Base, BUFFER_BACK_LEFT)->pf_pending =
+	 ++intel_fb->pf_seq;
+   }
+
+   intel_flip_renderbuffers(intel_fb);
+   intel_draw_buffer(&intel->ctx, &intel_fb->Base);
+
+   return GL_TRUE;
+}
+
+static GLboolean
+intelScheduleSwap(__DRIdrawablePrivate * dPriv, GLboolean *missed_target)
+{
+   struct intel_framebuffer *intel_fb = dPriv->driverPrivate;
+   unsigned int interval;
+   struct intel_context *intel =
+      intelScreenContext(dPriv->driScreenPriv->private);
+   const intelScreenPrivate *intelScreen = intel->intelScreen;
+   unsigned int target;
+   drm_i915_vblank_swap_t swap;
+   GLboolean ret;
+
+   if (!dPriv->vblFlags ||
+       (dPriv->vblFlags & VBLANK_FLAG_NO_IRQ) ||
+       intelScreen->drmMinor < (intel_fb->pf_active ? 9 : 6))
+      return GL_FALSE;
+
+   interval = driGetVBlankInterval(dPriv);
+
+   swap.seqtype = DRM_VBLANK_ABSOLUTE;
+
+   if (dPriv->vblFlags & VBLANK_FLAG_SYNC) {
+      swap.seqtype |= DRM_VBLANK_NEXTONMISS;
+   } else if (interval == 0)
+      return GL_FALSE;
+
+   swap.drawable = dPriv->hHWDrawable;
+   target = swap.sequence = dPriv->vblSeq + interval;
+
+   if ( dPriv->vblFlags & VBLANK_FLAG_SECONDARY ) {
+      swap.seqtype |= DRM_VBLANK_SECONDARY;
+   }
+
+   LOCK_HARDWARE(intel);
+
+   intel_batchbuffer_flush(intel->batch);
+
+   if ( intel_fb->pf_active ) {
+      swap.seqtype |= DRM_VBLANK_FLIP;
+
+      intel_fb->pf_current_page = (((intel->sarea->pf_current_page >>
+				     (intel_fb->pf_planes & 0x2)) & 0x3) + 1) %
+				  intel_fb->pf_num_pages;
+   }
+
+   if (!drmCommandWriteRead(intel->driFd, DRM_I915_VBLANK_SWAP, &swap,
+			    sizeof(swap))) {
+      dPriv->vblSeq = swap.sequence;
+      swap.sequence -= target;
+      *missed_target = swap.sequence > 0 && swap.sequence <= (1 << 23);
+
+      intel_get_renderbuffer(&intel_fb->Base, BUFFER_BACK_LEFT)->vbl_pending =
+	 intel_get_renderbuffer(&intel_fb->Base,
+				BUFFER_FRONT_LEFT)->vbl_pending =
+	 dPriv->vblSeq;
+
+      if (swap.seqtype & DRM_VBLANK_FLIP) {
+	 intel_flip_renderbuffers(intel_fb);
+	 intel_draw_buffer(&intel->ctx, intel->ctx.DrawBuffer);
+      }
+
+      ret = GL_TRUE;
+   } else {
+      if (swap.seqtype & DRM_VBLANK_FLIP) {
+	 intel_fb->pf_current_page = ((intel->sarea->pf_current_page >>
+					(intel_fb->pf_planes & 0x2)) & 0x3) %
+				     intel_fb->pf_num_pages;
+      }
+
+      ret = GL_FALSE;
+   }
+
+   UNLOCK_HARDWARE(intel);
+
+   return ret;
+}
+  
+void
+intelSwapBuffers(__DRIdrawablePrivate * dPriv)
+{
+   __DRIscreenPrivate *psp = dPriv->driScreenPriv;
+
+   if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) {
+      GET_CURRENT_CONTEXT(ctx);
+      struct intel_context *intel;
+
+      if (ctx == NULL)
+	 return;
+
+      intel = intel_context(ctx);
+
+      if (ctx->Visual.doubleBufferMode) {
+	 GLboolean missed_target;
+	 struct intel_framebuffer *intel_fb = dPriv->driverPrivate;
+	 int64_t ust;
+         
+	 _mesa_notifySwapBuffers(ctx);  /* flush pending rendering comands */
+
+         if (!intelScheduleSwap(dPriv, &missed_target)) {
+	    driWaitForVBlank(dPriv, &missed_target);
+
+	    /*
+	     * Update each buffer's vbl_pending so we don't get too out of
+	     * sync
+	     */
+	    intel_get_renderbuffer(&intel_fb->Base,
+				   BUFFER_BACK_LEFT)->vbl_pending = 
+		    intel_get_renderbuffer(&intel_fb->Base,
+					   BUFFER_FRONT_LEFT)->vbl_pending =
+		    dPriv->vblSeq;
+	    if (!intelPageFlip(dPriv)) {
+	       intelCopyBuffer(dPriv, NULL);
+	    }
+	 }
+
+	 intel_fb->swap_count++;
+	 (*psp->systemTime->getUST) (&ust);
+	 if (missed_target) {
+	    intel_fb->swap_missed_count++;
+	    intel_fb->swap_missed_ust = ust - intel_fb->swap_ust;
+	 }
+
+	 intel_fb->swap_ust = ust;
+      }
+   }
+   else {
+      /* XXX this shouldn't be an error but we can't handle it for now */
+      fprintf(stderr, "%s: drawable has no context!\n", __FUNCTION__);
+   }
+}
+
+void
+intelCopySubBuffer(__DRIdrawablePrivate * dPriv, int x, int y, int w, int h)
+{
+   if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) {
+      struct intel_context *intel =
+         (struct intel_context *) dPriv->driContextPriv->driverPrivate;
+      GLcontext *ctx = &intel->ctx;
+
+      if (ctx->Visual.doubleBufferMode) {
+         drm_clip_rect_t rect;
+         rect.x1 = x + dPriv->x;
+         rect.y1 = (dPriv->h - y - h) + dPriv->y;
+         rect.x2 = rect.x1 + w;
+         rect.y2 = rect.y1 + h;
+         _mesa_notifySwapBuffers(ctx);  /* flush pending rendering comands */
+         intelCopyBuffer(dPriv, &rect);
+      }
+   }
+   else {
+      /* XXX this shouldn't be an error but we can't handle it for now */
+      fprintf(stderr, "%s: drawable has no context!\n", __FUNCTION__);
+   }
+}
+
+
+/**
+ * Update the hardware state for drawing into a window or framebuffer object.
+ *
+ * Called by glDrawBuffer, glBindFramebufferEXT, MakeCurrent, and other
+ * places within the driver.
+ *
+ * Basically, this needs to be called any time the current framebuffer
+ * changes, the renderbuffers change, or we need to draw into different
+ * color buffers.
+ */
+void
+intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_region *colorRegions[MAX_DRAW_BUFFERS], *depthRegion = NULL;
+   struct intel_renderbuffer *irbDepth = NULL, *irbStencil = NULL;
+   int front = 0;               /* drawing to front color buffer? */
+
+   if (!fb) {
+      /* this can happen during the initial context initialization */
+      return;
+   }
+
+   /* Do this here, note core Mesa, since this function is called from
+    * many places within the driver.
+    */
+   if (ctx->NewState & (_NEW_BUFFERS | _NEW_COLOR | _NEW_PIXEL)) {
+      /* this updates the DrawBuffer->_NumColorDrawBuffers fields, etc */
+      _mesa_update_framebuffer(ctx);
+      /* this updates the DrawBuffer's Width/Height if it's a FBO */
+      _mesa_update_draw_buffer_bounds(ctx);
+   }
+
+   if (fb->_Status != GL_FRAMEBUFFER_COMPLETE_EXT) {
+      /* this may occur when we're called by glBindFrameBuffer() during
+       * the process of someone setting up renderbuffers, etc.
+       */
+      /*_mesa_debug(ctx, "DrawBuffer: incomplete user FBO\n");*/
+      return;
+   }
+
+   if (fb->Name)
+      intel_validate_paired_depth_stencil(ctx, fb);
+
+   /* If the batch contents require looping over cliprects, flush them before
+    * we go changing which cliprects get referenced when that happens.
+    */
+   if (intel->batch->cliprect_mode == LOOP_CLIPRECTS)
+      intel_batchbuffer_flush(intel->batch);
+
+   /*
+    * How many color buffers are we drawing into?
+    */
+   if (fb->_NumColorDrawBuffers == 0) {
+      /* writing to 0  */
+      FALLBACK(intel, INTEL_FALLBACK_DRAW_BUFFER, GL_TRUE);
+      colorRegions[0] = NULL;
+
+      if (fb->Name != 0)
+	 intelSetRenderbufferClipRects(intel);
+   } else if (fb->_NumColorDrawBuffers > 1) {
+       int i;
+       struct intel_renderbuffer *irb;
+       FALLBACK(intel, INTEL_FALLBACK_DRAW_BUFFER, GL_FALSE);
+
+       if (fb->Name != 0)
+           intelSetRenderbufferClipRects(intel);
+       for (i = 0; i < fb->_NumColorDrawBuffers; i++) {
+           irb = intel_renderbuffer(fb->_ColorDrawBuffers[i]);
+           colorRegions[i] = (irb && irb->region) ? irb->region : NULL;
+       }
+   }
+   else {
+      /* draw to exactly one color buffer */
+      /*_mesa_debug(ctx, "Hardware rendering\n");*/
+      FALLBACK(intel, INTEL_FALLBACK_DRAW_BUFFER, GL_FALSE);
+      if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) {
+         front = 1;
+      }
+
+      /*
+       * Get the intel_renderbuffer for the colorbuffer we're drawing into.
+       * And set up cliprects.
+       */
+      if (fb->Name == 0) {
+	 /* drawing to window system buffer */
+	 if (front) {
+	    intelSetFrontClipRects(intel);
+	    colorRegions[0] = intel_get_rb_region(fb, BUFFER_FRONT_LEFT);
+	 }
+	 else {
+	    intelSetBackClipRects(intel);
+	    colorRegions[0]= intel_get_rb_region(fb, BUFFER_BACK_LEFT);
+	 }
+      }
+      else {
+	 /* drawing to user-created FBO */
+	 struct intel_renderbuffer *irb;
+	 intelSetRenderbufferClipRects(intel);
+	 irb = intel_renderbuffer(fb->_ColorDrawBuffers[0]);
+	 colorRegions[0] = (irb && irb->region) ? irb->region : NULL;
+      }
+   }
+
+   /* Update culling direction which changes depending on the
+    * orientation of the buffer:
+    */
+   if (ctx->Driver.FrontFace)
+      ctx->Driver.FrontFace(ctx, ctx->Polygon.FrontFace);
+   else
+      ctx->NewState |= _NEW_POLYGON;
+
+   if (!colorRegions[0]) {
+      FALLBACK(intel, INTEL_FALLBACK_DRAW_BUFFER, GL_TRUE);
+   }
+   else {
+      FALLBACK(intel, INTEL_FALLBACK_DRAW_BUFFER, GL_FALSE);
+   }
+
+   /***
+    *** Get depth buffer region and check if we need a software fallback.
+    *** Note that the depth buffer is usually a DEPTH_STENCIL buffer.
+    ***/
+   if (fb->_DepthBuffer && fb->_DepthBuffer->Wrapped) {
+      irbDepth = intel_renderbuffer(fb->_DepthBuffer->Wrapped);
+      if (irbDepth && irbDepth->region) {
+         FALLBACK(intel, INTEL_FALLBACK_DEPTH_BUFFER, GL_FALSE);
+         depthRegion = irbDepth->region;
+      }
+      else {
+         FALLBACK(intel, INTEL_FALLBACK_DEPTH_BUFFER, GL_TRUE);
+         depthRegion = NULL;
+      }
+   }
+   else {
+      /* not using depth buffer */
+      FALLBACK(intel, INTEL_FALLBACK_DEPTH_BUFFER, GL_FALSE);
+      depthRegion = NULL;
+   }
+
+   /***
+    *** Stencil buffer
+    *** This can only be hardware accelerated if we're using a
+    *** combined DEPTH_STENCIL buffer (for now anyway).
+    ***/
+   if (fb->_StencilBuffer && fb->_StencilBuffer->Wrapped) {
+      irbStencil = intel_renderbuffer(fb->_StencilBuffer->Wrapped);
+      if (irbStencil && irbStencil->region) {
+         ASSERT(irbStencil->Base._ActualFormat == GL_DEPTH24_STENCIL8_EXT);
+         FALLBACK(intel, INTEL_FALLBACK_STENCIL_BUFFER, GL_FALSE);
+         /* need to re-compute stencil hw state */
+	 if (ctx->Driver.Enable != NULL)
+	    ctx->Driver.Enable(ctx, GL_STENCIL_TEST, ctx->Stencil.Enabled);
+	 else
+	    ctx->NewState |= _NEW_STENCIL;
+         if (!depthRegion)
+            depthRegion = irbStencil->region;
+      }
+      else {
+         FALLBACK(intel, INTEL_FALLBACK_STENCIL_BUFFER, GL_TRUE);
+      }
+   }
+   else {
+      /* XXX FBO: instead of FALSE, pass ctx->Stencil.Enabled ??? */
+      FALLBACK(intel, INTEL_FALLBACK_STENCIL_BUFFER, GL_FALSE);
+      /* need to re-compute stencil hw state */
+      if (ctx->Driver.Enable != NULL)
+	 ctx->Driver.Enable(ctx, GL_STENCIL_TEST, ctx->Stencil.Enabled);
+      else
+	 ctx->NewState |= _NEW_STENCIL;
+   }
+
+   /*
+    * Update depth test state
+    */
+   if (ctx->Driver.Enable) {
+      if (ctx->Depth.Test && fb->Visual.depthBits > 0) {
+	 ctx->Driver.Enable(ctx, GL_DEPTH_TEST, GL_TRUE);
+      } else {
+	 ctx->Driver.Enable(ctx, GL_DEPTH_TEST, GL_FALSE);
+      }
+   } else {
+      ctx->NewState |= _NEW_DEPTH;
+   }
+
+   intel->vtbl.set_draw_region(intel, colorRegions, depthRegion, 
+	fb->_NumColorDrawBuffers);
+
+   /* update viewport since it depends on window size */
+   if (ctx->Driver.Viewport) {
+      ctx->Driver.Viewport(ctx, ctx->Viewport.X, ctx->Viewport.Y,
+			   ctx->Viewport.Width, ctx->Viewport.Height);
+   } else {
+      ctx->NewState |= _NEW_VIEWPORT;
+   }
+
+   /* Set state we know depends on drawable parameters:
+    */
+   if (ctx->Driver.Scissor)
+      ctx->Driver.Scissor(ctx, ctx->Scissor.X, ctx->Scissor.Y,
+			  ctx->Scissor.Width, ctx->Scissor.Height);
+   intel->NewGLState |= _NEW_SCISSOR;
+
+   if (ctx->Driver.DepthRange)
+      ctx->Driver.DepthRange(ctx,
+			     ctx->Viewport.Near,
+			     ctx->Viewport.Far);
+}
+
+
+static void
+intelDrawBuffer(GLcontext * ctx, GLenum mode)
+{
+   intel_draw_buffer(ctx, ctx->DrawBuffer);
+}
+
+
+static void
+intelReadBuffer(GLcontext * ctx, GLenum mode)
+{
+   if (ctx->ReadBuffer == ctx->DrawBuffer) {
+      /* This will update FBO completeness status.
+       * A framebuffer will be incomplete if the GL_READ_BUFFER setting
+       * refers to a missing renderbuffer.  Calling glReadBuffer can set
+       * that straight and can make the drawing buffer complete.
+       */
+      intel_draw_buffer(ctx, ctx->DrawBuffer);
+   }
+   /* Generally, functions which read pixels (glReadPixels, glCopyPixels, etc)
+    * reference ctx->ReadBuffer and do appropriate state checks.
+    */
+}
+
+
+void
+intelInitBufferFuncs(struct dd_function_table *functions)
+{
+   functions->Clear = intelClear;
+   functions->DrawBuffer = intelDrawBuffer;
+   functions->ReadBuffer = intelReadBuffer;
+}
diff --git a/i915/intel_span.h b/shared/intel_buffers.h
index 2d4f858..a669a85 100644
--- a/i915/intel_span.h
+++ b/shared/intel_buffers.h
@@ -1,6 +1,7 @@
+
 /**************************************************************************
  * 
- * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
  * All Rights Reserved.
  * 
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -25,17 +26,31 @@
  * 
  **************************************************************************/
 
-#ifndef _INTEL_SPAN_H
-#define _INTEL_SPAN_H
+#ifndef INTEL_BUFFERS_H
+#define INTEL_BUFFERS_H
+
+
+struct intel_context;
+struct intel_framebuffer;
+
+
+extern GLboolean
+intel_intersect_cliprects(drm_clip_rect_t * dest,
+                          const drm_clip_rect_t * a,
+                          const drm_clip_rect_t * b);
+
+extern struct intel_region *intel_readbuf_region(struct intel_context *intel);
+
+extern struct intel_region *intel_drawbuf_region(struct intel_context *intel);
+
+extern void intel_wait_flips(struct intel_context *intel);
 
-#include "drirenderbuffer.h"
+extern void intelSwapBuffers(__DRIdrawablePrivate * dPriv);
 
-extern void intelInitSpanFuncs( GLcontext *ctx );
+extern void intelWindowMoved(struct intel_context *intel);
 
-extern void intelSpanRenderFinish( GLcontext *ctx );
-extern void intelSpanRenderStart( GLcontext *ctx );
+extern void intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb);
 
-extern void
-intelSetSpanFunctions(driRenderbuffer *rb, const GLvisual *vis);
+extern void intelInitBufferFuncs(struct dd_function_table *functions);
 
-#endif
+#endif /* INTEL_BUFFERS_H */
diff --git a/shared/intel_bufmgr_ttm.c b/shared/intel_bufmgr_ttm.c
new file mode 100644
index 0000000..194814e
--- /dev/null
+++ b/shared/intel_bufmgr_ttm.c
@@ -0,0 +1,1122 @@
+/**************************************************************************
+ *
+ * Copyright � 2007 Red Hat Inc.
+ * Copyright � 2007 Intel Corporation
+ * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ *
+ **************************************************************************/
+/*
+ * Authors: Thomas Hellstr�m <thomas-at-tungstengraphics-dot-com>
+ *          Keith Whitwell <keithw-at-tungstengraphics-dot-com>
+ *	    Eric Anholt <eric@anholt.net>
+ *	    Dave Airlie <airlied@linux.ie>
+ */
+
+#include <xf86drm.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <assert.h>
+
+#include "errno.h"
+#include "mtypes.h"
+#include "dri_bufmgr.h"
+#include "string.h"
+#include "imports.h"
+
+#include "i915_drm.h"
+
+#include "intel_bufmgr_ttm.h"
+#ifdef TTM_API
+
+#define DBG(...) do {					\
+   if (bufmgr_ttm->bufmgr.debug)			\
+      fprintf(stderr, __VA_ARGS__);			\
+} while (0)
+
+/*
+ * These bits are always specified in each validation
+ * request. Other bits are not supported at this point
+ * as it would require a bit of investigation to figure
+ * out what mask value should be used.
+ */
+#define INTEL_BO_MASK  (DRM_BO_MASK_MEM | \
+			DRM_BO_FLAG_READ | \
+			DRM_BO_FLAG_WRITE | \
+			DRM_BO_FLAG_EXE)
+
+struct intel_validate_entry {
+    dri_bo *bo;
+    struct drm_i915_op_arg bo_arg;
+};
+
+struct dri_ttm_bo_bucket_entry {
+   drmBO drm_bo;
+   struct dri_ttm_bo_bucket_entry *next;
+};
+
+struct dri_ttm_bo_bucket {
+   struct dri_ttm_bo_bucket_entry *head;
+   struct dri_ttm_bo_bucket_entry **tail;
+   /**
+    * Limit on the number of entries in this bucket.
+    *
+    * 0 means that this caching at this bucket size is disabled.
+    * -1 means that there is no limit to caching at this size.
+    */
+   int max_entries;
+   int num_entries;
+};
+
+/* Arbitrarily chosen, 16 means that the maximum size we'll cache for reuse
+ * is 1 << 16 pages, or 256MB.
+ */
+#define INTEL_TTM_BO_BUCKETS	16
+typedef struct _dri_bufmgr_ttm {
+    dri_bufmgr bufmgr;
+
+    int fd;
+    unsigned int fence_type;
+    unsigned int fence_type_flush;
+
+    uint32_t max_relocs;
+
+    struct intel_validate_entry *validate_array;
+    int validate_array_size;
+    int validate_count;
+
+    /** Array of lists of cached drmBOs of power-of-two sizes */
+    struct dri_ttm_bo_bucket cache_bucket[INTEL_TTM_BO_BUCKETS];
+} dri_bufmgr_ttm;
+
+/**
+ * Private information associated with a relocation that isn't already stored
+ * in the relocation buffer to be passed to the kernel.
+ */
+struct dri_ttm_reloc {
+    dri_bo *target_buf;
+    uint64_t validate_flags;
+    /** Offset of target_buf after last execution of this relocation entry. */
+    unsigned int last_target_offset;
+};
+
+typedef struct _dri_bo_ttm {
+    dri_bo bo;
+
+    int refcount;
+    unsigned int map_count;
+    drmBO drm_bo;
+    const char *name;
+
+    uint64_t last_flags;
+
+    /**
+     * Index of the buffer within the validation list while preparing a
+     * batchbuffer execution.
+     */
+    int validate_index;
+
+    /** DRM buffer object containing relocation list */
+    uint32_t *reloc_buf_data;
+    struct dri_ttm_reloc *relocs;
+
+    /**
+     * Indicates that the buffer may be shared with other processes, so we
+     * can't hold maps beyond when the user does.
+     */
+    GLboolean shared;
+
+    GLboolean delayed_unmap;
+    /* Virtual address from the dri_bo_map whose unmap was delayed. */
+    void *saved_virtual;
+} dri_bo_ttm;
+
+typedef struct _dri_fence_ttm
+{
+    dri_fence fence;
+
+    int refcount;
+    const char *name;
+    drmFence drm_fence;
+} dri_fence_ttm;
+
+static int
+logbase2(int n)
+{
+   GLint i = 1;
+   GLint log2 = 0;
+
+   while (n > i) {
+      i *= 2;
+      log2++;
+   }
+
+   return log2;
+}
+
+static struct dri_ttm_bo_bucket *
+dri_ttm_bo_bucket_for_size(dri_bufmgr_ttm *bufmgr_ttm, unsigned long size)
+{
+    int i;
+
+    /* We only do buckets in power of two increments */
+    if ((size & (size - 1)) != 0)
+	return NULL;
+
+    /* We should only see sizes rounded to pages. */
+    assert((size % 4096) == 0);
+
+    /* We always allocate in units of pages */
+    i = ffs(size / 4096) - 1;
+    if (i >= INTEL_TTM_BO_BUCKETS)
+	return NULL;
+
+    return &bufmgr_ttm->cache_bucket[i];
+}
+
+
+static void dri_ttm_dump_validation_list(dri_bufmgr_ttm *bufmgr_ttm)
+{
+    int i, j;
+
+    for (i = 0; i < bufmgr_ttm->validate_count; i++) {
+	dri_bo *bo = bufmgr_ttm->validate_array[i].bo;
+	dri_bo_ttm *bo_ttm = (dri_bo_ttm *)bo;
+
+	if (bo_ttm->reloc_buf_data != NULL) {
+	    for (j = 0; j < (bo_ttm->reloc_buf_data[0] & 0xffff); j++) {
+		uint32_t *reloc_entry = bo_ttm->reloc_buf_data +
+		    I915_RELOC_HEADER +
+		    j * I915_RELOC0_STRIDE;
+		dri_bo *target_bo = bo_ttm->relocs[j].target_buf;
+		dri_bo_ttm *target_ttm = (dri_bo_ttm *)target_bo;
+
+		DBG("%2d: %s@0x%08x -> %s@0x%08lx + 0x%08x\n",
+		    i,
+		    bo_ttm->name, reloc_entry[0],
+		    target_ttm->name, target_bo->offset,
+		    reloc_entry[1]);
+	    }
+	} else {
+	    DBG("%2d: %s\n", i, bo_ttm->name);
+	}
+    }
+}
+
+/**
+ * Adds the given buffer to the list of buffers to be validated (moved into the
+ * appropriate memory type) with the next batch submission.
+ *
+ * If a buffer is validated multiple times in a batch submission, it ends up
+ * with the intersection of the memory type flags and the union of the
+ * access flags.
+ */
+static void
+intel_add_validate_buffer(dri_bo *buf,
+			  uint64_t flags)
+{
+    dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)buf->bufmgr;
+    dri_bo_ttm *ttm_buf = (dri_bo_ttm *)buf;
+
+    /* If we delayed doing an unmap to mitigate map/unmap syscall thrashing,
+     * do that now.
+     */
+    if (ttm_buf->delayed_unmap) {
+	drmBOUnmap(bufmgr_ttm->fd, &ttm_buf->drm_bo);
+	ttm_buf->delayed_unmap = GL_FALSE;
+    }
+
+    if (ttm_buf->validate_index == -1) {
+	struct intel_validate_entry *entry;
+	struct drm_i915_op_arg *arg;
+	struct drm_bo_op_req *req;
+	int index;
+
+	/* Extend the array of validation entries as necessary. */
+	if (bufmgr_ttm->validate_count == bufmgr_ttm->validate_array_size) {
+	    int i, new_size = bufmgr_ttm->validate_array_size * 2;
+
+	    if (new_size == 0)
+		new_size = 5;
+
+	    bufmgr_ttm->validate_array =
+	       realloc(bufmgr_ttm->validate_array,
+		       sizeof(struct intel_validate_entry) * new_size);
+	    bufmgr_ttm->validate_array_size = new_size;
+
+	    /* Update pointers for realloced mem. */
+	    for (i = 0; i < bufmgr_ttm->validate_count - 1; i++) {
+	       bufmgr_ttm->validate_array[i].bo_arg.next = (unsigned long)
+		  &bufmgr_ttm->validate_array[i + 1].bo_arg;
+	    }
+	}
+
+	/* Pick out the new array entry for ourselves */
+	index = bufmgr_ttm->validate_count;
+	ttm_buf->validate_index = index;
+	entry = &bufmgr_ttm->validate_array[index];
+	bufmgr_ttm->validate_count++;
+
+	/* Fill in array entry */
+	entry->bo = buf;
+	dri_bo_reference(buf);
+
+	/* Fill in kernel arg */
+	arg = &entry->bo_arg;
+	req = &arg->d.req;
+
+	memset(arg, 0, sizeof(*arg));
+	req->bo_req.handle = ttm_buf->drm_bo.handle;
+	req->op = drm_bo_validate;
+	req->bo_req.flags = flags;
+	req->bo_req.hint = 0;
+#ifdef DRM_BO_HINT_PRESUMED_OFFSET
+	/* PRESUMED_OFFSET indicates that all relocations pointing at this
+	 * buffer have the correct offset.  If any of our relocations don't,
+	 * this flag will be cleared off the buffer later in the relocation
+	 * processing.
+	 */
+	req->bo_req.hint |= DRM_BO_HINT_PRESUMED_OFFSET;
+	req->bo_req.presumed_offset = buf->offset;
+#endif
+	req->bo_req.mask = INTEL_BO_MASK;
+	req->bo_req.fence_class = 0; /* Backwards compat. */
+
+	if (ttm_buf->reloc_buf_data != NULL)
+ 	    arg->reloc_ptr = (unsigned long)(void *)ttm_buf->reloc_buf_data;
+	else
+	    arg->reloc_ptr = 0;
+
+	/* Hook up the linked list of args for the kernel */
+	arg->next = 0;
+	if (index != 0) {
+	    bufmgr_ttm->validate_array[index - 1].bo_arg.next =
+		(unsigned long)arg;
+	}
+    } else {
+	struct intel_validate_entry *entry =
+	    &bufmgr_ttm->validate_array[ttm_buf->validate_index];
+	struct drm_i915_op_arg *arg = &entry->bo_arg;
+	struct drm_bo_op_req *req = &arg->d.req;
+	uint64_t memFlags = req->bo_req.flags & flags & DRM_BO_MASK_MEM;
+	uint64_t modeFlags = (req->bo_req.flags | flags) & ~DRM_BO_MASK_MEM;
+
+	/* Buffer was already in the validate list.  Extend its flags as
+	 * necessary.
+	 */
+
+	if (memFlags == 0) {
+	    fprintf(stderr,
+		    "%s: No shared memory types between "
+		    "0x%16llx and 0x%16llx\n",
+		    __FUNCTION__, req->bo_req.flags, flags);
+	    abort();
+	}
+	if (flags & ~INTEL_BO_MASK) {
+	    fprintf(stderr,
+		    "%s: Flags bits 0x%16llx are not supposed to be used in a relocation\n",
+		    __FUNCTION__, flags & ~INTEL_BO_MASK);
+	    abort();
+	}
+	req->bo_req.flags = memFlags | modeFlags;
+    }
+}
+
+
+#define RELOC_BUF_SIZE(x) ((I915_RELOC_HEADER + x * I915_RELOC0_STRIDE) * \
+	sizeof(uint32_t))
+
+static int
+intel_setup_reloc_list(dri_bo *bo)
+{
+    dri_bo_ttm *bo_ttm = (dri_bo_ttm *)bo;
+    dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bo->bufmgr;
+
+    bo_ttm->relocs = calloc(bufmgr_ttm->max_relocs,
+			    sizeof(struct dri_ttm_reloc));
+    bo_ttm->reloc_buf_data = calloc(1, RELOC_BUF_SIZE(bufmgr_ttm->max_relocs));
+
+    /* Initialize the relocation list with the header:
+     * DWORD 0: relocation count
+     * DWORD 1: relocation type  
+     * DWORD 2+3: handle to next relocation list (currently none) 64-bits
+     */
+    bo_ttm->reloc_buf_data[0] = 0;
+    bo_ttm->reloc_buf_data[1] = I915_RELOC_TYPE_0;
+    bo_ttm->reloc_buf_data[2] = 0;
+    bo_ttm->reloc_buf_data[3] = 0;
+
+    return 0;
+}
+
+#if 0
+int
+driFenceSignaled(DriFenceObject * fence, unsigned type)
+{
+    int signaled;
+    int ret;
+
+    if (fence == NULL)
+	return GL_TRUE;
+
+    ret = drmFenceSignaled(bufmgr_ttm->fd, &fence->fence, type, &signaled);
+    BM_CKFATAL(ret);
+    return signaled;
+}
+#endif
+
+static dri_bo *
+dri_ttm_alloc(dri_bufmgr *bufmgr, const char *name,
+	      unsigned long size, unsigned int alignment,
+	      uint64_t location_mask)
+{
+    dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr;
+    dri_bo_ttm *ttm_buf;
+    unsigned int pageSize = getpagesize();
+    int ret;
+    uint64_t flags;
+    unsigned int hint;
+    unsigned long alloc_size;
+    struct dri_ttm_bo_bucket *bucket;
+    GLboolean alloc_from_cache = GL_FALSE;
+
+    ttm_buf = calloc(1, sizeof(*ttm_buf));
+    if (!ttm_buf)
+	return NULL;
+
+    /* The mask argument doesn't do anything for us that we want other than
+     * determine which pool (TTM or local) the buffer is allocated into, so
+     * just pass all of the allocation class flags.
+     */
+    flags = location_mask | DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE |
+	DRM_BO_FLAG_EXE;
+    /* No hints we want to use. */
+    hint = 0;
+
+    /* Round the allocated size up to a power of two number of pages. */
+    alloc_size = 1 << logbase2(size);
+    if (alloc_size < pageSize)
+	alloc_size = pageSize;
+    bucket = dri_ttm_bo_bucket_for_size(bufmgr_ttm, alloc_size);
+
+    /* If we don't have caching at this size, don't actually round the
+     * allocation up.
+     */
+    if (bucket == NULL || bucket->max_entries == 0)
+	alloc_size = size;
+
+    /* Get a buffer out of the cache if available */
+    if (bucket != NULL && bucket->num_entries > 0) {
+	struct dri_ttm_bo_bucket_entry *entry = bucket->head;
+	int busy;
+
+	/* Check if the buffer is still in flight.  If not, reuse it. */
+	ret = drmBOBusy(bufmgr_ttm->fd, &entry->drm_bo, &busy);
+	alloc_from_cache = (ret == 0 && busy == 0);
+
+	if (alloc_from_cache) {
+	    bucket->head = entry->next;
+	    if (entry->next == NULL)
+		bucket->tail = &bucket->head;
+	    bucket->num_entries--;
+
+	    ttm_buf->drm_bo = entry->drm_bo;
+	    free(entry);
+	}
+    }
+
+    if (!alloc_from_cache) {
+	ret = drmBOCreate(bufmgr_ttm->fd, alloc_size, alignment / pageSize,
+			  NULL, flags, hint, &ttm_buf->drm_bo);
+	if (ret != 0) {
+	    free(ttm_buf);
+	    return NULL;
+	}
+    }
+
+    ttm_buf->bo.size = size;
+    ttm_buf->bo.offset = ttm_buf->drm_bo.offset;
+    ttm_buf->bo.virtual = NULL;
+    ttm_buf->bo.bufmgr = bufmgr;
+    ttm_buf->name = name;
+    ttm_buf->refcount = 1;
+    ttm_buf->reloc_buf_data = NULL;
+    ttm_buf->relocs = NULL;
+    ttm_buf->last_flags = ttm_buf->drm_bo.flags;
+    ttm_buf->shared = GL_FALSE;
+    ttm_buf->delayed_unmap = GL_FALSE;
+    ttm_buf->validate_index = -1;
+
+    DBG("bo_create: %p (%s) %ldb\n", &ttm_buf->bo, ttm_buf->name, size);
+
+    return &ttm_buf->bo;
+}
+
+/* Our TTM backend doesn't allow creation of static buffers, as that requires
+ * privelege for the non-fake case, and the lock in the fake case where we were
+ * working around the X Server not creating buffers and passing handles to us.
+ */
+static dri_bo *
+dri_ttm_alloc_static(dri_bufmgr *bufmgr, const char *name,
+		     unsigned long offset, unsigned long size, void *virtual,
+		     uint64_t location_mask)
+{
+    return NULL;
+}
+
+/**
+ * Returns a dri_bo wrapping the given buffer object handle.
+ *
+ * This can be used when one application needs to pass a buffer object
+ * to another.
+ */
+dri_bo *
+intel_ttm_bo_create_from_handle(dri_bufmgr *bufmgr, const char *name,
+			      unsigned int handle)
+{
+    dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr;
+    dri_bo_ttm *ttm_buf;
+    int ret;
+
+    ttm_buf = calloc(1, sizeof(*ttm_buf));
+    if (!ttm_buf)
+	return NULL;
+
+    ret = drmBOReference(bufmgr_ttm->fd, handle, &ttm_buf->drm_bo);
+    if (ret != 0) {
+       fprintf(stderr, "Couldn't reference %s handle 0x%08x: %s\n",
+	       name, handle, strerror(-ret));
+	free(ttm_buf);
+	return NULL;
+    }
+    ttm_buf->bo.size = ttm_buf->drm_bo.size;
+    ttm_buf->bo.offset = ttm_buf->drm_bo.offset;
+    ttm_buf->bo.virtual = NULL;
+    ttm_buf->bo.bufmgr = bufmgr;
+    ttm_buf->name = name;
+    ttm_buf->refcount = 1;
+    ttm_buf->reloc_buf_data = NULL;
+    ttm_buf->relocs = NULL;
+    ttm_buf->last_flags = ttm_buf->drm_bo.flags;
+    ttm_buf->shared = GL_TRUE;
+    ttm_buf->delayed_unmap = GL_FALSE;
+    ttm_buf->validate_index = -1;
+
+    DBG("bo_create_from_handle: %p %08x (%s)\n",
+	&ttm_buf->bo, handle, ttm_buf->name);
+
+    return &ttm_buf->bo;
+}
+
+static void
+dri_ttm_bo_reference(dri_bo *buf)
+{
+    dri_bo_ttm *ttm_buf = (dri_bo_ttm *)buf;
+
+    ttm_buf->refcount++;
+}
+
+static void
+dri_ttm_bo_unreference(dri_bo *buf)
+{
+    dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)buf->bufmgr;
+    dri_bo_ttm *ttm_buf = (dri_bo_ttm *)buf;
+
+    if (!buf)
+	return;
+
+    if (--ttm_buf->refcount == 0) {
+	struct dri_ttm_bo_bucket *bucket;
+	int ret;
+
+	assert(ttm_buf->map_count == 0);
+
+	if (ttm_buf->reloc_buf_data) {
+	    int i;
+
+	    /* Unreference all the target buffers */
+	    for (i = 0; i < (ttm_buf->reloc_buf_data[0] & 0xffff); i++)
+		 dri_bo_unreference(ttm_buf->relocs[i].target_buf);
+	    free(ttm_buf->relocs);
+
+	    /* Free the kernel BO containing relocation entries */
+	    free(ttm_buf->reloc_buf_data);
+	    ttm_buf->reloc_buf_data = NULL;
+	}
+
+	if (ttm_buf->delayed_unmap) {
+	    int ret = drmBOUnmap(bufmgr_ttm->fd, &ttm_buf->drm_bo);
+
+	    if (ret != 0) {
+		fprintf(stderr, "%s:%d: Error unmapping buffer %s: %s.\n",
+			__FILE__, __LINE__, ttm_buf->name, strerror(-ret));
+	   }
+	}
+
+	bucket = dri_ttm_bo_bucket_for_size(bufmgr_ttm, ttm_buf->drm_bo.size);
+	/* Put the buffer into our internal cache for reuse if we can. */
+	if (!ttm_buf->shared &&
+	    bucket != NULL &&
+	    (bucket->max_entries == -1 ||
+	     (bucket->max_entries > 0 &&
+	      bucket->num_entries < bucket->max_entries)))
+	{
+	    struct dri_ttm_bo_bucket_entry *entry;
+
+	    entry = calloc(1, sizeof(*entry));
+	    entry->drm_bo = ttm_buf->drm_bo;
+
+	    entry->next = NULL;
+	    *bucket->tail = entry;
+	    bucket->tail = &entry->next;
+	    bucket->num_entries++;
+	} else {
+	    /* Decrement the kernel refcount for the buffer. */
+	    ret = drmBOUnreference(bufmgr_ttm->fd, &ttm_buf->drm_bo);
+	    if (ret != 0) {
+	       fprintf(stderr, "drmBOUnreference failed (%s): %s\n",
+		       ttm_buf->name, strerror(-ret));
+	    }
+	}
+
+	DBG("bo_unreference final: %p (%s)\n", &ttm_buf->bo, ttm_buf->name);
+
+	free(buf);
+	return;
+    }
+}
+
+static int
+dri_ttm_bo_map(dri_bo *buf, GLboolean write_enable)
+{
+    dri_bufmgr_ttm *bufmgr_ttm;
+    dri_bo_ttm *ttm_buf = (dri_bo_ttm *)buf;
+    uint64_t flags;
+    int ret;
+
+    bufmgr_ttm = (dri_bufmgr_ttm *)buf->bufmgr;
+
+    flags = DRM_BO_FLAG_READ;
+    if (write_enable)
+	flags |= DRM_BO_FLAG_WRITE;
+
+    /* Allow recursive mapping. Mesa may recursively map buffers with
+     * nested display loops.
+     */
+    if (ttm_buf->map_count++ != 0)
+	return 0;
+
+    assert(buf->virtual == NULL);
+
+    DBG("bo_map: %p (%s)\n", &ttm_buf->bo, ttm_buf->name);
+
+    /* XXX: What about if we're upgrading from READ to WRITE? */
+    if (ttm_buf->delayed_unmap) {
+	buf->virtual = ttm_buf->saved_virtual;
+	return 0;
+    }
+
+    ret = drmBOMap(bufmgr_ttm->fd, &ttm_buf->drm_bo, flags, 0, &buf->virtual);
+    if (ret != 0) {
+        fprintf(stderr, "%s:%d: Error mapping buffer %s: %s .\n",
+		__FILE__, __LINE__, ttm_buf->name, strerror(-ret));
+    }
+
+    return ret;
+}
+
+static int
+dri_ttm_bo_unmap(dri_bo *buf)
+{
+    dri_bufmgr_ttm *bufmgr_ttm;
+    dri_bo_ttm *ttm_buf = (dri_bo_ttm *)buf;
+    int ret;
+
+    if (buf == NULL)
+	return 0;
+
+    assert(ttm_buf->map_count != 0);
+    if (--ttm_buf->map_count != 0)
+	return 0;
+
+    bufmgr_ttm = (dri_bufmgr_ttm *)buf->bufmgr;
+
+    assert(buf->virtual != NULL);
+
+    DBG("bo_unmap: %p (%s)\n", &ttm_buf->bo, ttm_buf->name);
+
+    if (!ttm_buf->shared) {
+	ttm_buf->saved_virtual = buf->virtual;
+	ttm_buf->delayed_unmap = GL_TRUE;
+	buf->virtual = NULL;
+
+	return 0;
+    }
+
+    buf->virtual = NULL;
+
+    ret = drmBOUnmap(bufmgr_ttm->fd, &ttm_buf->drm_bo);
+    if (ret != 0) {
+        fprintf(stderr, "%s:%d: Error unmapping buffer %s: %s.\n",
+		__FILE__, __LINE__, ttm_buf->name, strerror(-ret));
+    }
+
+    return ret;
+}
+
+/**
+ * Returns a dri_bo wrapping the given buffer object handle.
+ *
+ * This can be used when one application needs to pass a buffer object
+ * to another.
+ */
+dri_fence *
+intel_ttm_fence_create_from_arg(dri_bufmgr *bufmgr, const char *name,
+				drm_fence_arg_t *arg)
+{
+    dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr;
+    dri_fence_ttm *ttm_fence;
+
+    ttm_fence = malloc(sizeof(*ttm_fence));
+    if (!ttm_fence)
+	return NULL;
+
+    ttm_fence->drm_fence.handle = arg->handle;
+    ttm_fence->drm_fence.fence_class = arg->fence_class;
+    ttm_fence->drm_fence.type = arg->type;
+    ttm_fence->drm_fence.flags = arg->flags;
+    ttm_fence->drm_fence.signaled = 0;
+    ttm_fence->drm_fence.sequence = arg->sequence;
+
+    ttm_fence->fence.bufmgr = bufmgr;
+    ttm_fence->name = name;
+    ttm_fence->refcount = 1;
+
+    DBG("fence_create_from_handle: %p (%s)\n",
+	&ttm_fence->fence, ttm_fence->name);
+
+    return &ttm_fence->fence;
+}
+
+
+static void
+dri_ttm_fence_reference(dri_fence *fence)
+{
+    dri_fence_ttm *fence_ttm = (dri_fence_ttm *)fence;
+    dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)fence->bufmgr;
+
+    ++fence_ttm->refcount;
+    DBG("fence_reference: %p (%s)\n", &fence_ttm->fence, fence_ttm->name);
+}
+
+static void
+dri_ttm_fence_unreference(dri_fence *fence)
+{
+    dri_fence_ttm *fence_ttm = (dri_fence_ttm *)fence;
+    dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)fence->bufmgr;
+
+    if (!fence)
+	return;
+
+    DBG("fence_unreference: %p (%s)\n", &fence_ttm->fence, fence_ttm->name);
+
+    if (--fence_ttm->refcount == 0) {
+	int ret;
+
+	ret = drmFenceUnreference(bufmgr_ttm->fd, &fence_ttm->drm_fence);
+	if (ret != 0) {
+	    fprintf(stderr, "drmFenceUnreference failed (%s): %s\n",
+		    fence_ttm->name, strerror(-ret));
+	}
+
+	free(fence);
+	return;
+    }
+}
+
+static void
+dri_ttm_fence_wait(dri_fence *fence)
+{
+    dri_fence_ttm *fence_ttm = (dri_fence_ttm *)fence;
+    dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)fence->bufmgr;
+    int ret;
+
+    ret = drmFenceWait(bufmgr_ttm->fd, DRM_FENCE_FLAG_WAIT_LAZY, &fence_ttm->drm_fence, 0);
+    if (ret != 0) {
+        fprintf(stderr, "%s:%d: Error waiting for fence %s: %s.\n",
+		__FILE__, __LINE__, fence_ttm->name, strerror(-ret));
+	abort();
+    }
+
+    DBG("fence_wait: %p (%s)\n", &fence_ttm->fence, fence_ttm->name);
+}
+
+static void
+dri_bufmgr_ttm_destroy(dri_bufmgr *bufmgr)
+{
+    dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr;
+    int i;
+
+    free(bufmgr_ttm->validate_array);
+
+    /* Free any cached buffer objects we were going to reuse */
+    for (i = 0; i < INTEL_TTM_BO_BUCKETS; i++) {
+	struct dri_ttm_bo_bucket *bucket = &bufmgr_ttm->cache_bucket[i];
+	struct dri_ttm_bo_bucket_entry *entry;
+
+	while ((entry = bucket->head) != NULL) {
+	    int ret;
+
+	    bucket->head = entry->next;
+	    if (entry->next == NULL)
+		bucket->tail = &bucket->head;
+	    bucket->num_entries--;
+
+	    /* Decrement the kernel refcount for the buffer. */
+	    ret = drmBOUnreference(bufmgr_ttm->fd, &entry->drm_bo);
+	    if (ret != 0) {
+	       fprintf(stderr, "drmBOUnreference failed: %s\n",
+		       strerror(-ret));
+	    }
+
+	    free(entry);
+	}
+    }
+
+    free(bufmgr);
+}
+
+/**
+ * Adds the target buffer to the validation list and adds the relocation
+ * to the reloc_buffer's relocation list.
+ *
+ * The relocation entry at the given offset must already contain the
+ * precomputed relocation value, because the kernel will optimize out
+ * the relocation entry write when the buffer hasn't moved from the
+ * last known offset in target_buf.
+ */
+static int
+dri_ttm_emit_reloc(dri_bo *reloc_buf, uint64_t flags, GLuint delta,
+		   GLuint offset, dri_bo *target_buf)
+{
+    dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)reloc_buf->bufmgr;
+    dri_bo_ttm *reloc_buf_ttm = (dri_bo_ttm *)reloc_buf;
+    dri_bo_ttm *target_buf_ttm = (dri_bo_ttm *)target_buf;
+    int num_relocs;
+    uint32_t *this_reloc;
+
+    /* Create a new relocation list if needed */
+    if (reloc_buf_ttm->reloc_buf_data == NULL)
+	intel_setup_reloc_list(reloc_buf);
+
+    num_relocs = reloc_buf_ttm->reloc_buf_data[0];
+
+    /* Check overflow */
+    assert(num_relocs < bufmgr_ttm->max_relocs);
+
+    this_reloc = reloc_buf_ttm->reloc_buf_data + I915_RELOC_HEADER +
+	num_relocs * I915_RELOC0_STRIDE;
+
+    this_reloc[0] = offset;
+    this_reloc[1] = delta;
+    this_reloc[2] = target_buf_ttm->drm_bo.handle; /* To be filled in at exec time */
+    this_reloc[3] = 0;
+
+    reloc_buf_ttm->relocs[num_relocs].validate_flags = flags;
+    reloc_buf_ttm->relocs[num_relocs].target_buf = target_buf;
+    dri_bo_reference(target_buf);
+
+    reloc_buf_ttm->reloc_buf_data[0]++; /* Increment relocation count */
+    /* Check wraparound */
+    assert(reloc_buf_ttm->reloc_buf_data[0] != 0);
+    return 0;
+}
+
+/**
+ * Walk the tree of relocations rooted at BO and accumulate the list of
+ * validations to be performed and update the relocation buffers with
+ * index values into the validation list.
+ */
+static void
+dri_ttm_bo_process_reloc(dri_bo *bo)
+{
+    dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bo->bufmgr;
+    dri_bo_ttm *bo_ttm = (dri_bo_ttm *)bo;
+    unsigned int nr_relocs;
+    int i;
+
+    if (bo_ttm->reloc_buf_data == NULL)
+	return;
+
+    nr_relocs = bo_ttm->reloc_buf_data[0] & 0xffff;
+
+    for (i = 0; i < nr_relocs; i++) {
+	struct dri_ttm_reloc *r = &bo_ttm->relocs[i];
+
+	/* Continue walking the tree depth-first. */
+	dri_ttm_bo_process_reloc(r->target_buf);
+
+	/* Add the target to the validate list */
+	intel_add_validate_buffer(r->target_buf, r->validate_flags);
+
+	/* Clear the PRESUMED_OFFSET flag from the validate list entry of the
+	 * target if this buffer has a stale relocated pointer at it.
+	 */
+	if (r->last_target_offset != r->target_buf->offset) {
+	   dri_bo_ttm *target_buf_ttm = (dri_bo_ttm *)r->target_buf;
+	   struct intel_validate_entry *entry =
+	      &bufmgr_ttm->validate_array[target_buf_ttm->validate_index];
+
+	   entry->bo_arg.d.req.bo_req.hint &= ~DRM_BO_HINT_PRESUMED_OFFSET;
+	}
+    }
+}
+
+static void *
+dri_ttm_process_reloc(dri_bo *batch_buf, GLuint *count)
+{
+    dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)batch_buf->bufmgr;
+
+    /* Update indices and set up the validate list. */
+    dri_ttm_bo_process_reloc(batch_buf);
+
+    /* Add the batch buffer to the validation list.  There are no relocations
+     * pointing to it.
+     */
+    intel_add_validate_buffer(batch_buf,
+			      DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_EXE);
+
+    *count = bufmgr_ttm->validate_count;
+    return &bufmgr_ttm->validate_array[0].bo_arg;
+}
+
+static const char *
+intel_get_flags_mem_type_string(uint64_t flags)
+{
+    switch (flags & DRM_BO_MASK_MEM) {
+    case DRM_BO_FLAG_MEM_LOCAL: return "local";
+    case DRM_BO_FLAG_MEM_TT: return "ttm";
+    case DRM_BO_FLAG_MEM_VRAM: return "vram";
+    case DRM_BO_FLAG_MEM_PRIV0: return "priv0";
+    case DRM_BO_FLAG_MEM_PRIV1: return "priv1";
+    case DRM_BO_FLAG_MEM_PRIV2: return "priv2";
+    case DRM_BO_FLAG_MEM_PRIV3: return "priv3";
+    case DRM_BO_FLAG_MEM_PRIV4: return "priv4";
+    default: return NULL;
+    }
+}
+
+static const char *
+intel_get_flags_caching_string(uint64_t flags)
+{
+    switch (flags & (DRM_BO_FLAG_CACHED | DRM_BO_FLAG_CACHED_MAPPED)) {
+    case 0: return "UU";
+    case DRM_BO_FLAG_CACHED: return "CU";
+    case DRM_BO_FLAG_CACHED_MAPPED: return "UC";
+    case DRM_BO_FLAG_CACHED | DRM_BO_FLAG_CACHED_MAPPED: return "CC";
+    default: return NULL;
+    }
+}
+
+static void
+intel_update_buffer_offsets (dri_bufmgr_ttm *bufmgr_ttm)
+{
+    int i;
+
+    for (i = 0; i < bufmgr_ttm->validate_count; i++) {
+	dri_bo *bo = bufmgr_ttm->validate_array[i].bo;
+	dri_bo_ttm *bo_ttm = (dri_bo_ttm *)bo;
+	struct drm_i915_op_arg *arg = &bufmgr_ttm->validate_array[i].bo_arg;
+	struct drm_bo_arg_rep *rep = &arg->d.rep;
+
+	/* Update the flags */
+	if (rep->bo_info.flags != bo_ttm->last_flags) {
+	    DBG("BO %s migrated: %s/%s -> %s/%s\n",
+		bo_ttm->name,
+		intel_get_flags_mem_type_string(bo_ttm->last_flags),
+		intel_get_flags_caching_string(bo_ttm->last_flags),
+		intel_get_flags_mem_type_string(rep->bo_info.flags),
+		intel_get_flags_caching_string(rep->bo_info.flags));
+
+	    bo_ttm->last_flags = rep->bo_info.flags;
+	}
+	/* Update the buffer offset */
+	if (rep->bo_info.offset != bo->offset) {
+	    DBG("BO %s migrated: 0x%08lx -> 0x%08lx\n",
+		bo_ttm->name, bo->offset, (unsigned long)rep->bo_info.offset);
+	    bo->offset = rep->bo_info.offset;
+	}
+    }
+}
+
+/**
+ * Update the last target offset field of relocation entries for PRESUMED_OFFSET
+ * computation.
+ */
+static void
+dri_ttm_bo_post_submit(dri_bo *bo)
+{
+    dri_bo_ttm *bo_ttm = (dri_bo_ttm *)bo;
+    unsigned int nr_relocs;
+    int i;
+
+    if (bo_ttm->reloc_buf_data == NULL)
+	return;
+
+    nr_relocs = bo_ttm->reloc_buf_data[0] & 0xffff;
+
+    for (i = 0; i < nr_relocs; i++) {
+	struct dri_ttm_reloc *r = &bo_ttm->relocs[i];
+
+	/* Continue walking the tree depth-first. */
+	dri_ttm_bo_post_submit(r->target_buf);
+
+	r->last_target_offset = r->target_buf->offset;
+    }
+}
+
+static void
+dri_ttm_post_submit(dri_bo *batch_buf, dri_fence **last_fence)
+{
+    dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)batch_buf->bufmgr;
+    int i;
+
+    intel_update_buffer_offsets (bufmgr_ttm);
+
+    dri_ttm_bo_post_submit(batch_buf);
+
+    if (bufmgr_ttm->bufmgr.debug)
+	dri_ttm_dump_validation_list(bufmgr_ttm);
+
+    for (i = 0; i < bufmgr_ttm->validate_count; i++) {
+	dri_bo *bo = bufmgr_ttm->validate_array[i].bo;
+	dri_bo_ttm *bo_ttm = (dri_bo_ttm *)bo;
+
+	/* Disconnect the buffer from the validate list */
+	bo_ttm->validate_index = -1;
+	dri_bo_unreference(bo);
+	bufmgr_ttm->validate_array[i].bo = NULL;
+    }
+    bufmgr_ttm->validate_count = 0;
+}
+
+/**
+ * Enables unlimited caching of buffer objects for reuse.
+ *
+ * This is potentially very memory expensive, as the cache at each bucket
+ * size is only bounded by how many buffers of that size we've managed to have
+ * in flight at once.
+ */
+void
+intel_ttm_enable_bo_reuse(dri_bufmgr *bufmgr)
+{
+    dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)bufmgr;
+    int i;
+
+    for (i = 0; i < INTEL_TTM_BO_BUCKETS; i++) {
+	bufmgr_ttm->cache_bucket[i].max_entries = -1;
+    }
+}
+
+/*
+ *
+ */
+static int
+dri_ttm_check_aperture_space(dri_bo *bo)
+{
+    return 0;
+}
+
+/**
+ * Initializes the TTM buffer manager, which uses the kernel to allocate, map,
+ * and manage map buffer objections.
+ *
+ * \param fd File descriptor of the opened DRM device.
+ * \param fence_type Driver-specific fence type used for fences with no flush.
+ * \param fence_type_flush Driver-specific fence type used for fences with a
+ *	  flush.
+ */
+dri_bufmgr *
+intel_bufmgr_ttm_init(int fd, unsigned int fence_type,
+		      unsigned int fence_type_flush, int batch_size)
+{
+    dri_bufmgr_ttm *bufmgr_ttm;
+    int i;
+
+    bufmgr_ttm = calloc(1, sizeof(*bufmgr_ttm));
+    bufmgr_ttm->fd = fd;
+    bufmgr_ttm->fence_type = fence_type;
+    bufmgr_ttm->fence_type_flush = fence_type_flush;
+
+    /* Let's go with one relocation per every 2 dwords (but round down a bit
+     * since a power of two will mean an extra page allocation for the reloc
+     * buffer).
+     *
+     * Every 4 was too few for the blender benchmark.
+     */
+    bufmgr_ttm->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2;
+
+    bufmgr_ttm->bufmgr.bo_alloc = dri_ttm_alloc;
+    bufmgr_ttm->bufmgr.bo_alloc_static = dri_ttm_alloc_static;
+    bufmgr_ttm->bufmgr.bo_reference = dri_ttm_bo_reference;
+    bufmgr_ttm->bufmgr.bo_unreference = dri_ttm_bo_unreference;
+    bufmgr_ttm->bufmgr.bo_map = dri_ttm_bo_map;
+    bufmgr_ttm->bufmgr.bo_unmap = dri_ttm_bo_unmap;
+    bufmgr_ttm->bufmgr.fence_reference = dri_ttm_fence_reference;
+    bufmgr_ttm->bufmgr.fence_unreference = dri_ttm_fence_unreference;
+    bufmgr_ttm->bufmgr.fence_wait = dri_ttm_fence_wait;
+    bufmgr_ttm->bufmgr.destroy = dri_bufmgr_ttm_destroy;
+    bufmgr_ttm->bufmgr.emit_reloc = dri_ttm_emit_reloc;
+    bufmgr_ttm->bufmgr.process_relocs = dri_ttm_process_reloc;
+    bufmgr_ttm->bufmgr.post_submit = dri_ttm_post_submit;
+    bufmgr_ttm->bufmgr.debug = GL_FALSE;
+    bufmgr_ttm->bufmgr.check_aperture_space = dri_ttm_check_aperture_space;
+    /* Initialize the linked lists for BO reuse cache. */
+    for (i = 0; i < INTEL_TTM_BO_BUCKETS; i++)
+	bufmgr_ttm->cache_bucket[i].tail = &bufmgr_ttm->cache_bucket[i].head;
+
+    return &bufmgr_ttm->bufmgr;
+}
+#else
+dri_bufmgr *
+intel_bufmgr_ttm_init(int fd, unsigned int fence_type,
+		      unsigned int fence_type_flush, int batch_size)
+{
+    return NULL;
+}
+
+dri_bo *
+intel_ttm_bo_create_from_handle(dri_bufmgr *bufmgr, const char *name,
+			      unsigned int handle)
+{
+    return NULL;
+}
+
+void
+intel_ttm_enable_bo_reuse(dri_bufmgr *bufmgr)
+{
+}
+#endif
diff --git a/shared/intel_bufmgr_ttm.h b/shared/intel_bufmgr_ttm.h
new file mode 100644
index 0000000..f5bd64c
--- /dev/null
+++ b/shared/intel_bufmgr_ttm.h
@@ -0,0 +1,28 @@
+
+#ifndef INTEL_BUFMGR_TTM_H
+#define INTEL_BUFMGR_TTM_H
+
+#include "dri_bufmgr.h"
+
+extern dri_bo *intel_ttm_bo_create_from_handle(dri_bufmgr *bufmgr, const char *name,
+					       unsigned int handle);
+
+#ifdef TTM_API
+dri_fence *intel_ttm_fence_create_from_arg(dri_bufmgr *bufmgr, const char *name,
+					   drm_fence_arg_t *arg);
+#endif
+
+
+dri_bufmgr *intel_bufmgr_ttm_init(int fd, unsigned int fence_type,
+				  unsigned int fence_type_flush, int batch_size);
+
+void
+intel_ttm_enable_bo_reuse(dri_bufmgr *bufmgr);
+
+#ifndef TTM_API
+#define DRM_I915_FENCE_CLASS_ACCEL 0
+#define DRM_I915_FENCE_TYPE_RW 2
+#define DRM_I915_FENCE_FLAG_FLUSHED 0x01000000
+#endif
+
+#endif
diff --git a/shared/intel_chipset.h b/shared/intel_chipset.h
new file mode 100644
index 0000000..15b9ef4
--- /dev/null
+++ b/shared/intel_chipset.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright © 2007 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#define PCI_CHIP_I810			0x7121
+#define PCI_CHIP_I810_DC100		0x7123
+#define PCI_CHIP_I810_E			0x7125
+#define PCI_CHIP_I815			0x1132
+
+#define PCI_CHIP_I830_M			0x3577
+#define PCI_CHIP_845_G			0x2562
+#define PCI_CHIP_I855_GM		0x3582
+#define PCI_CHIP_I865_G			0x2572
+
+#define PCI_CHIP_I915_G			0x2582
+#define PCI_CHIP_E7221_G		0x258A
+#define PCI_CHIP_I915_GM		0x2592
+#define PCI_CHIP_I945_G			0x2772
+#define PCI_CHIP_I945_GM		0x27A2
+#define PCI_CHIP_I945_GME		0x27AE
+
+#define PCI_CHIP_Q35_G			0x29B2
+#define PCI_CHIP_G33_G			0x29C2
+#define PCI_CHIP_Q33_G			0x29D2
+
+#define PCI_CHIP_I965_G			0x29A2
+#define PCI_CHIP_I965_Q			0x2992
+#define PCI_CHIP_I965_G_1		0x2982
+#define PCI_CHIP_I946_GZ		0x2972
+#define PCI_CHIP_I965_GM                0x2A02
+#define PCI_CHIP_I965_GME               0x2A12
+
+#define PCI_CHIP_GM45_GM                0x2A42
+
+#define PCI_CHIP_IGD_E_G                0x2E02
+#define PCI_CHIP_Q45_G                  0x2E12
+#define PCI_CHIP_G45_G                  0x2E22
+
+#define IS_MOBILE(devid)	(devid == PCI_CHIP_I855_GM || \
+				 devid == PCI_CHIP_I915_GM || \
+				 devid == PCI_CHIP_I945_GM || \
+				 devid == PCI_CHIP_I945_GME || \
+				 devid == PCI_CHIP_I965_GM || \
+				 devid == PCI_CHIP_I965_GME || \
+				 devid == PCI_CHIP_GM45_GM)
+
+#define IS_GM45_GM(devid)       (devid == PCI_CHIP_GM45_GM)
+#define IS_G4X(devid)           (devid == PCI_CHIP_IGD_E_G || \
+                                 devid == PCI_CHIP_Q45_G || \
+                                 devid == PCI_CHIP_G45_G)
+
+#define IS_915(devid)		(devid == PCI_CHIP_I915_G || \
+				 devid == PCI_CHIP_E7221_G || \
+				 devid == PCI_CHIP_I915_GM)
+
+#define IS_945(devid)		(devid == PCI_CHIP_I945_G || \
+				 devid == PCI_CHIP_I945_GM || \
+				 devid == PCI_CHIP_I945_GME || \
+				 devid == PCI_CHIP_G33_G || \
+				 devid == PCI_CHIP_Q33_G || \
+				 devid == PCI_CHIP_Q35_G)
+
+#define IS_965(devid)		(devid == PCI_CHIP_I965_G || \
+				 devid == PCI_CHIP_I965_Q || \
+				 devid == PCI_CHIP_I965_G_1 || \
+				 devid == PCI_CHIP_I965_GM || \
+				 devid == PCI_CHIP_I965_GME || \
+				 devid == PCI_CHIP_I946_GZ || \
+				 IS_GM45_GM(devid) || \
+				 IS_G4X(devid))
+
+#define IS_9XX(devid)		(IS_915(devid) || \
+				 IS_945(devid) || \
+				 IS_965(devid))
diff --git a/shared/intel_context.c b/shared/intel_context.c
new file mode 100644
index 0000000..5fa9d95
--- /dev/null
+++ b/shared/intel_context.c
@@ -0,0 +1,1022 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+
+#include "glheader.h"
+#include "context.h"
+#include "matrix.h"
+#include "simple_list.h"
+#include "extensions.h"
+#include "framebuffer.h"
+#include "imports.h"
+#include "points.h"
+
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "tnl/tnl.h"
+
+#include "tnl/t_pipeline.h"
+#include "tnl/t_vertex.h"
+
+#include "drivers/common/driverfuncs.h"
+
+#include "intel_screen.h"
+
+#include "i830_dri.h"
+
+#include "intel_chipset.h"
+#include "intel_buffers.h"
+#include "intel_tex.h"
+#include "intel_ioctl.h"
+#include "intel_batchbuffer.h"
+#include "intel_blit.h"
+#include "intel_pixel.h"
+#include "intel_regions.h"
+#include "intel_buffer_objects.h"
+#include "intel_fbo.h"
+#include "intel_decode.h"
+#include "intel_bufmgr_ttm.h"
+
+#include "drirenderbuffer.h"
+#include "vblank.h"
+#include "utils.h"
+#include "xmlpool.h"            /* for symbolic values of enum-type options */
+#ifndef INTEL_DEBUG
+int INTEL_DEBUG = (0);
+#endif
+
+#define need_GL_NV_point_sprite
+#define need_GL_ARB_multisample
+#define need_GL_ARB_point_parameters
+#define need_GL_ARB_texture_compression
+#define need_GL_ARB_vertex_buffer_object
+#define need_GL_ARB_vertex_program
+#define need_GL_ARB_window_pos
+#define need_GL_ARB_occlusion_query
+#define need_GL_EXT_blend_color
+#define need_GL_EXT_blend_equation_separate
+#define need_GL_EXT_blend_func_separate
+#define need_GL_EXT_blend_minmax
+#define need_GL_EXT_cull_vertex
+#define need_GL_EXT_fog_coord
+#define need_GL_EXT_framebuffer_object
+#define need_GL_EXT_multi_draw_arrays
+#define need_GL_EXT_secondary_color
+#define need_GL_NV_vertex_program
+#define need_GL_ATI_separate_stencil
+#define need_GL_EXT_point_parameters
+#define need_GL_VERSION_2_0
+#define need_GL_VERSION_2_1
+#define need_GL_ARB_shader_objects
+#define need_GL_ARB_vertex_shader
+
+#include "extension_helper.h"
+
+#define DRIVER_DATE                     "20061102"
+
+static const GLubyte *
+intelGetString(GLcontext * ctx, GLenum name)
+{
+   const char *chipset;
+   static char buffer[128];
+
+   switch (name) {
+   case GL_VENDOR:
+      return (GLubyte *) "Tungsten Graphics, Inc";
+      break;
+
+   case GL_RENDERER:
+      switch (intel_context(ctx)->intelScreen->deviceID) {
+      case PCI_CHIP_845_G:
+         chipset = "Intel(R) 845G";
+         break;
+      case PCI_CHIP_I830_M:
+         chipset = "Intel(R) 830M";
+         break;
+      case PCI_CHIP_I855_GM:
+         chipset = "Intel(R) 852GM/855GM";
+         break;
+      case PCI_CHIP_I865_G:
+         chipset = "Intel(R) 865G";
+         break;
+      case PCI_CHIP_I915_G:
+         chipset = "Intel(R) 915G";
+         break;
+      case PCI_CHIP_E7221_G:
+	 chipset = "Intel (R) E7221G (i915)";
+	 break;
+      case PCI_CHIP_I915_GM:
+         chipset = "Intel(R) 915GM";
+         break;
+      case PCI_CHIP_I945_G:
+         chipset = "Intel(R) 945G";
+         break;
+      case PCI_CHIP_I945_GM:
+         chipset = "Intel(R) 945GM";
+         break;
+      case PCI_CHIP_I945_GME:
+         chipset = "Intel(R) 945GME";
+         break;
+      case PCI_CHIP_G33_G:
+	 chipset = "Intel(R) G33";
+	 break;
+      case PCI_CHIP_Q35_G:
+	 chipset = "Intel(R) Q35";
+	 break;
+      case PCI_CHIP_Q33_G:
+	 chipset = "Intel(R) Q33";
+	 break;
+      case PCI_CHIP_I965_Q:
+	 chipset = "Intel(R) 965Q";
+	 break;
+      case PCI_CHIP_I965_G:
+      case PCI_CHIP_I965_G_1:
+	 chipset = "Intel(R) 965G";
+	 break;
+      case PCI_CHIP_I946_GZ:
+	 chipset = "Intel(R) 946GZ";
+	 break;
+      case PCI_CHIP_I965_GM:
+	 chipset = "Intel(R) 965GM";
+	 break;
+      case PCI_CHIP_I965_GME:
+	 chipset = "Intel(R) 965GME/GLE";
+	 break;
+      case PCI_CHIP_GM45_GM:
+	 chipset = "Mobile Intel® GM45 Express Chipset";
+	 break; 
+      case PCI_CHIP_IGD_E_G:
+	 chipset = "Intel(R) Integrated Graphics Device";
+	 break;
+      case PCI_CHIP_G45_G:
+         chipset = "Intel(R) G45/G43";
+         break;
+      case PCI_CHIP_Q45_G:
+         chipset = "Intel(R) Q45/Q43";
+         break;
+      default:
+         chipset = "Unknown Intel Chipset";
+         break;
+      }
+
+      (void) driGetRendererString(buffer, chipset, DRIVER_DATE, 0);
+      return (GLubyte *) buffer;
+
+   default:
+      return NULL;
+   }
+}
+
+/**
+ * Extension strings exported by the intel driver.
+ *
+ * Extensions supported by all chips supported by i830_dri, i915_dri, or
+ * i965_dri.
+ */
+static const struct dri_extension card_extensions[] = {
+   {"GL_ARB_multisample", GL_ARB_multisample_functions},
+   {"GL_ARB_multitexture", NULL},
+   {"GL_ARB_point_parameters", GL_ARB_point_parameters_functions},
+   {"GL_NV_point_sprite", GL_NV_point_sprite_functions},
+   {"GL_ARB_texture_border_clamp", NULL},
+   {"GL_ARB_texture_compression", GL_ARB_texture_compression_functions},
+   {"GL_ARB_texture_cube_map", NULL},
+   {"GL_ARB_texture_env_add", NULL},
+   {"GL_ARB_texture_env_combine", NULL},
+   {"GL_ARB_texture_env_crossbar", NULL},
+   {"GL_ARB_texture_env_dot3", NULL},
+   {"GL_ARB_texture_mirrored_repeat", NULL},
+   {"GL_ARB_texture_non_power_of_two",   NULL },
+   {"GL_ARB_texture_rectangle", NULL},
+   {"GL_NV_texture_rectangle", NULL},
+   {"GL_EXT_texture_rectangle", NULL},
+   {"GL_ARB_point_parameters", NULL}, 
+   {"GL_ARB_vertex_buffer_object", GL_ARB_vertex_buffer_object_functions},
+   {"GL_ARB_vertex_program", GL_ARB_vertex_program_functions},
+   {"GL_ARB_window_pos", GL_ARB_window_pos_functions},
+   {"GL_EXT_blend_color", GL_EXT_blend_color_functions},
+   {"GL_EXT_blend_equation_separate",
+    GL_EXT_blend_equation_separate_functions},
+   {"GL_EXT_blend_func_separate", GL_EXT_blend_func_separate_functions},
+   {"GL_EXT_blend_minmax", GL_EXT_blend_minmax_functions},
+   {"GL_EXT_blend_logic_op", NULL},
+   {"GL_EXT_blend_subtract", NULL},
+   {"GL_EXT_cull_vertex", GL_EXT_cull_vertex_functions},
+   {"GL_EXT_fog_coord", GL_EXT_fog_coord_functions},
+   {"GL_EXT_multi_draw_arrays", GL_EXT_multi_draw_arrays_functions},
+   {"GL_ATI_separate_stencil", GL_ATI_separate_stencil_functions},
+#if 1                           /* XXX FBO temporary? */
+   {"GL_EXT_packed_depth_stencil", NULL},
+#endif
+   {"GL_EXT_secondary_color", GL_EXT_secondary_color_functions},
+   {"GL_EXT_stencil_wrap", NULL},
+   {"GL_EXT_texture_edge_clamp", NULL},
+   {"GL_EXT_texture_env_combine", NULL},
+   {"GL_EXT_texture_env_dot3", NULL},
+   {"GL_EXT_texture_filter_anisotropic", NULL},
+   {"GL_EXT_texture_lod_bias", NULL},
+   {"GL_3DFX_texture_compression_FXT1", NULL},
+   {"GL_APPLE_client_storage", NULL},
+   {"GL_MESA_pack_invert", NULL},
+   {"GL_MESA_ycbcr_texture", NULL},
+   {"GL_NV_blend_square", NULL},
+   {"GL_NV_vertex_program", GL_NV_vertex_program_functions},
+   {"GL_NV_vertex_program1_1", NULL},
+   { "GL_SGIS_generate_mipmap", NULL },
+   {NULL, NULL}
+};
+
+static const struct dri_extension brw_extensions[] = {
+   { "GL_ARB_shading_language_100",       GL_VERSION_2_0_functions},
+   { "GL_ARB_shading_language_120",       GL_VERSION_2_1_functions},
+   { "GL_ARB_shader_objects",             GL_ARB_shader_objects_functions},
+   { "GL_ARB_vertex_shader",              GL_ARB_vertex_shader_functions},
+   { "GL_ARB_point_sprite", 		  NULL},
+   { "GL_ARB_fragment_shader",            NULL },
+   { "GL_ARB_draw_buffers",               NULL },
+   { "GL_ARB_depth_texture",              NULL },
+   { "GL_ARB_fragment_program",           NULL },
+   { "GL_ARB_shadow",                     NULL },
+   { "GL_EXT_shadow_funcs",               NULL },
+   { "GL_ARB_fragment_program_shadow",    NULL },
+   /* ARB extn won't work if not enabled */
+   { "GL_SGIX_depth_texture",             NULL },
+   { "GL_EXT_texture_sRGB",		  NULL},
+   { NULL,                                NULL }
+};
+
+static const struct dri_extension arb_oc_extensions[] = {
+   {"GL_ARB_occlusion_query",            GL_ARB_occlusion_query_functions},
+   {NULL, NULL}
+};
+
+static const struct dri_extension ttm_extensions[] = {
+   {"GL_EXT_framebuffer_object", GL_EXT_framebuffer_object_functions},
+   {"GL_ARB_pixel_buffer_object", NULL},
+   {NULL, NULL}
+};
+
+/**
+ * Initializes potential list of extensions if ctx == NULL, or actually enables
+ * extensions for a context.
+ */
+void intelInitExtensions(GLcontext *ctx, GLboolean enable_imaging)
+{
+   struct intel_context *intel = ctx?intel_context(ctx):NULL;
+
+   /* Disable imaging extension until convolution is working in teximage paths.
+    */
+   enable_imaging = GL_FALSE;
+
+   driInitExtensions(ctx, card_extensions, enable_imaging);
+
+   if (intel == NULL || intel->ttm)
+      driInitExtensions(ctx, ttm_extensions, GL_FALSE);
+
+   if (intel == NULL || 
+       (IS_965(intel->intelScreen->deviceID) && 
+	intel->intelScreen->drmMinor >= 8))
+      driInitExtensions(ctx, arb_oc_extensions, GL_FALSE);
+
+   if (intel == NULL || IS_965(intel->intelScreen->deviceID))
+      driInitExtensions(ctx, brw_extensions, GL_FALSE);
+}
+
+static const struct dri_debug_control debug_control[] = {
+   { "tex",   DEBUG_TEXTURE},
+   { "state", DEBUG_STATE},
+   { "ioctl", DEBUG_IOCTL},
+   { "blit",  DEBUG_BLIT},
+   { "mip",   DEBUG_MIPTREE},
+   { "fall",  DEBUG_FALLBACKS},
+   { "verb",  DEBUG_VERBOSE},
+   { "bat",   DEBUG_BATCH},
+   { "pix",   DEBUG_PIXEL},
+   { "buf",   DEBUG_BUFMGR},
+   { "reg",   DEBUG_REGION},
+   { "fbo",   DEBUG_FBO},
+   { "lock",  DEBUG_LOCK},
+   { "sync",  DEBUG_SYNC},
+   { "prim",  DEBUG_PRIMS },
+   { "vert",  DEBUG_VERTS },
+   { "dri",   DEBUG_DRI },
+   { "dma",   DEBUG_DMA },
+   { "san",   DEBUG_SANITY },
+   { "sleep", DEBUG_SLEEP },
+   { "stats", DEBUG_STATS },
+   { "tile",  DEBUG_TILE },
+   { "sing",  DEBUG_SINGLE_THREAD },
+   { "thre",  DEBUG_SINGLE_THREAD },
+   { "wm",    DEBUG_WM },
+   { "urb",   DEBUG_URB },
+   { "vs",    DEBUG_VS },
+   { NULL,    0 }
+};
+
+
+static void
+intelInvalidateState(GLcontext * ctx, GLuint new_state)
+{
+    struct intel_context *intel = intel_context(ctx);
+
+   _swrast_InvalidateState(ctx, new_state);
+   _swsetup_InvalidateState(ctx, new_state);
+   _vbo_InvalidateState(ctx, new_state);
+   _tnl_InvalidateState(ctx, new_state);
+   _tnl_invalidate_vertex_state(ctx, new_state);
+
+   intel->NewGLState |= new_state;
+
+   if (intel->vtbl.invalidate_state)
+      intel->vtbl.invalidate_state( intel, new_state );
+}
+
+
+void
+intelFlush(GLcontext * ctx)
+{
+   struct intel_context *intel = intel_context(ctx);
+
+   if (intel->Fallback)
+      _swrast_flush(ctx);
+
+   if (!IS_965(intel->intelScreen->deviceID))
+      INTEL_FIREVERTICES(intel);
+
+   if (intel->batch->map != intel->batch->ptr)
+      intel_batchbuffer_flush(intel->batch);
+
+   /* XXX: Need to do an MI_FLUSH here.
+    */
+}
+
+void
+intelFinish(GLcontext * ctx)
+{
+   struct intel_context *intel = intel_context(ctx);
+   intelFlush(ctx);
+   if (intel->batch->last_fence) {
+      dri_fence_wait(intel->batch->last_fence);
+      dri_fence_unreference(intel->batch->last_fence);
+      intel->batch->last_fence = NULL;
+   }
+}
+
+static void
+intelBeginQuery(GLcontext *ctx, GLenum target, struct gl_query_object *q)
+{
+	struct intel_context *intel = intel_context( ctx );
+	struct drm_i915_mmio io = {
+		.read_write = I915_MMIO_READ,
+		.reg = MMIO_REGS_PS_DEPTH_COUNT,
+		.data = &q->Result 
+	};
+	intel->stats_wm++;
+	intelFinish(&intel->ctx);
+	drmCommandWrite(intel->driFd, DRM_I915_MMIO, &io, sizeof(io));
+}
+
+static void
+intelEndQuery(GLcontext *ctx, GLenum target, struct gl_query_object *q)
+{
+	struct intel_context *intel = intel_context( ctx );
+	GLuint64EXT tmp;	
+	struct drm_i915_mmio io = {
+		.read_write = I915_MMIO_READ,
+		.reg = MMIO_REGS_PS_DEPTH_COUNT,
+		.data = &tmp
+	};
+	intelFinish(&intel->ctx);
+	drmCommandWrite(intel->driFd, DRM_I915_MMIO, &io, sizeof(io));
+	q->Result = tmp - q->Result;
+	q->Ready = GL_TRUE;
+	intel->stats_wm--;
+}
+
+/** Driver-specific fence emit implementation for the fake memory manager. */
+static unsigned int
+intel_fence_emit(void *private)
+{
+   struct intel_context *intel = (struct intel_context *)private;
+   unsigned int fence;
+
+   /* XXX: Need to emit a flush, if we haven't already (at least with the
+    * current batchbuffer implementation, we have).
+    */
+
+   fence = intelEmitIrqLocked(intel);
+
+   return fence;
+}
+
+/** Driver-specific fence wait implementation for the fake memory manager. */
+static int
+intel_fence_wait(void *private, unsigned int cookie)
+{
+   struct intel_context *intel = (struct intel_context *)private;
+
+   intelWaitIrq(intel, cookie);
+
+   return 0;
+}
+
+static GLboolean
+intel_init_bufmgr(struct intel_context *intel)
+{
+   intelScreenPrivate *intelScreen = intel->intelScreen;
+   GLboolean ttm_disable = getenv("INTEL_NO_TTM") != NULL;
+   GLboolean ttm_supported;
+
+   /* If we've got a new enough DDX that's initializing TTM and giving us
+    * object handles for the shared buffers, use that.
+    */
+   intel->ttm = GL_FALSE;
+   if (intel->intelScreen->driScrnPriv->dri2.enabled)
+       ttm_supported = GL_TRUE;
+   else if (intel->intelScreen->driScrnPriv->ddx_version.minor >= 9 &&
+	    intel->intelScreen->drmMinor >= 11 &&
+	    intel->intelScreen->front.bo_handle != -1)
+       ttm_supported = GL_TRUE;
+   else
+       ttm_supported = GL_FALSE;
+
+   if (!ttm_disable && ttm_supported) {
+      int bo_reuse_mode;
+      intel->bufmgr = intel_bufmgr_ttm_init(intel->driFd,
+					    DRM_FENCE_TYPE_EXE,
+					    DRM_FENCE_TYPE_EXE |
+					    DRM_I915_FENCE_TYPE_RW,
+					    BATCH_SZ);
+      if (intel->bufmgr != NULL)
+	 intel->ttm = GL_TRUE;
+
+      bo_reuse_mode = driQueryOptioni(&intel->optionCache, "bo_reuse");
+      switch (bo_reuse_mode) {
+      case DRI_CONF_BO_REUSE_DISABLED:
+	 break;
+      case DRI_CONF_BO_REUSE_ALL:
+	 intel_ttm_enable_bo_reuse(intel->bufmgr);
+	 break;
+      }
+   }
+   /* Otherwise, use the classic buffer manager. */
+   if (intel->bufmgr == NULL) {
+      if (ttm_disable) {
+	 fprintf(stderr, "TTM buffer manager disabled.  Using classic.\n");
+      } else {
+	 fprintf(stderr, "Failed to initialize TTM buffer manager.  "
+		 "Falling back to classic.\n");
+      }
+
+      if (intelScreen->tex.size == 0) {
+	 fprintf(stderr, "[%s:%u] Error initializing buffer manager.\n",
+		 __func__, __LINE__);
+	 return GL_FALSE;
+      }
+
+      intel->bufmgr = dri_bufmgr_fake_init(intelScreen->tex.offset,
+					   intelScreen->tex.map,
+					   intelScreen->tex.size,
+					   intel_fence_emit,
+					   intel_fence_wait,
+					   intel);
+   }
+
+   return GL_TRUE;
+}
+
+void
+intelInitDriverFunctions(struct dd_function_table *functions)
+{
+   _mesa_init_driver_functions(functions);
+
+   functions->Flush = intelFlush;
+   functions->Finish = intelFinish;
+   functions->GetString = intelGetString;
+   functions->UpdateState = intelInvalidateState;
+
+   functions->CopyColorTable = _swrast_CopyColorTable;
+   functions->CopyColorSubTable = _swrast_CopyColorSubTable;
+   functions->CopyConvolutionFilter1D = _swrast_CopyConvolutionFilter1D;
+   functions->CopyConvolutionFilter2D = _swrast_CopyConvolutionFilter2D;
+
+   functions->BeginQuery = intelBeginQuery;
+   functions->EndQuery = intelEndQuery;
+
+   intelInitTextureFuncs(functions);
+   intelInitStateFuncs(functions);
+   intelInitBufferFuncs(functions);
+   intelInitPixelFuncs(functions);
+}
+
+
+GLboolean
+intelInitContext(struct intel_context *intel,
+                 const __GLcontextModes * mesaVis,
+                 __DRIcontextPrivate * driContextPriv,
+                 void *sharedContextPrivate,
+                 struct dd_function_table *functions)
+{
+   GLcontext *ctx = &intel->ctx;
+   GLcontext *shareCtx = (GLcontext *) sharedContextPrivate;
+   __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv;
+   intelScreenPrivate *intelScreen = (intelScreenPrivate *) sPriv->private;
+   volatile struct drm_i915_sarea *saPriv = (struct drm_i915_sarea *)
+      (((GLubyte *) sPriv->pSAREA) + intelScreen->sarea_priv_offset);
+   int fthrottle_mode;
+
+   if (!_mesa_initialize_context(&intel->ctx, mesaVis, shareCtx,
+                                 functions, (void *) intel)) {
+      _mesa_printf("%s: failed to init mesa context\n", __FUNCTION__);
+      return GL_FALSE;
+   }
+
+   driContextPriv->driverPrivate = intel;
+   intel->intelScreen = intelScreen;
+   intel->driScreen = sPriv;
+   intel->sarea = saPriv;
+
+   /* Dri stuff */
+   intel->hHWContext = driContextPriv->hHWContext;
+   intel->driFd = sPriv->fd;
+   intel->driHwLock = sPriv->lock;
+
+   intel->width = intelScreen->width;
+   intel->height = intelScreen->height;
+
+   driParseConfigFiles(&intel->optionCache, &intelScreen->optionCache,
+                       intel->driScreen->myNum,
+		       IS_965(intelScreen->deviceID) ? "i965" : "i915");
+   if (intelScreen->deviceID == PCI_CHIP_I865_G)
+      intel->maxBatchSize = 4096;
+   else
+      intel->maxBatchSize = BATCH_SZ;
+
+   if (!intel_init_bufmgr(intel))
+      return GL_FALSE;
+
+   ctx->Const.MaxTextureMaxAnisotropy = 2.0;
+
+   /* This doesn't yet catch all non-conformant rendering, but it's a
+    * start.
+    */
+   if (getenv("INTEL_STRICT_CONFORMANCE")) {
+      intel->strict_conformance = 1;
+   }
+
+   if (intel->strict_conformance) {
+      ctx->Const.MinLineWidth = 1.0;
+      ctx->Const.MinLineWidthAA = 1.0;
+      ctx->Const.MaxLineWidth = 1.0;
+      ctx->Const.MaxLineWidthAA = 1.0;
+      ctx->Const.LineWidthGranularity = 1.0;
+   }
+   else {
+      ctx->Const.MinLineWidth = 1.0;
+      ctx->Const.MinLineWidthAA = 1.0;
+      ctx->Const.MaxLineWidth = 5.0;
+      ctx->Const.MaxLineWidthAA = 5.0;
+      ctx->Const.LineWidthGranularity = 0.5;
+   }
+
+   ctx->Const.MinPointSize = 1.0;
+   ctx->Const.MinPointSizeAA = 1.0;
+   ctx->Const.MaxPointSize = 255.0;
+   ctx->Const.MaxPointSizeAA = 3.0;
+   ctx->Const.PointSizeGranularity = 1.0;
+
+   /* reinitialize the context point state.
+    * It depend on constants in __GLcontextRec::Const
+    */
+   _mesa_init_point(ctx);
+
+   ctx->Const.MaxColorAttachments = 4;  /* XXX FBO: review this */
+
+   /* Initialize the software rasterizer and helper modules. */
+   _swrast_CreateContext(ctx);
+   _vbo_CreateContext(ctx);
+   _tnl_CreateContext(ctx);
+   _swsetup_CreateContext(ctx);
+ 
+   /* Configure swrast to match hardware characteristics: */
+   _swrast_allow_pixel_fog(ctx, GL_FALSE);
+   _swrast_allow_vertex_fog(ctx, GL_TRUE);
+
+   intel->hw_stencil = mesaVis->stencilBits && mesaVis->depthBits == 24;
+   intel->hw_stipple = 1;
+
+   /* XXX FBO: this doesn't seem to be used anywhere */
+   switch (mesaVis->depthBits) {
+   case 0:                     /* what to do in this case? */
+   case 16:
+      intel->polygon_offset_scale = 1.0;
+      break;
+   case 24:
+      intel->polygon_offset_scale = 2.0;     /* req'd to pass glean */
+      break;
+   default:
+      assert(0);
+      break;
+   }
+
+   if (IS_965(intelScreen->deviceID))
+      intel->polygon_offset_scale /= 0xffff;
+
+   intel->RenderIndex = ~0;
+
+   fthrottle_mode = driQueryOptioni(&intel->optionCache, "fthrottle_mode");
+   intel->irqsEmitted = 0;
+
+   intel->do_irqs = (intel->intelScreen->irq_active &&
+                     fthrottle_mode == DRI_CONF_FTHROTTLE_IRQS);
+
+   intel->do_usleeps = (fthrottle_mode == DRI_CONF_FTHROTTLE_USLEEPS);
+
+   _math_matrix_ctr(&intel->ViewportMatrix);
+
+   if (IS_965(intelScreen->deviceID) && !intel->intelScreen->irq_active) {
+      _mesa_printf("IRQs not active.  Exiting\n");
+      exit(1);
+   }
+
+   intelInitExtensions(ctx, GL_FALSE);
+
+   INTEL_DEBUG = driParseDebugString(getenv("INTEL_DEBUG"), debug_control);
+   if (INTEL_DEBUG & DEBUG_BUFMGR)
+      dri_bufmgr_set_debug(intel->bufmgr, GL_TRUE);
+
+   if (!sPriv->dri2.enabled)
+      intel_recreate_static_regions(intel);
+
+   intel->batch = intel_batchbuffer_alloc(intel);
+   intel->last_swap_fence = NULL;
+   intel->first_swap_fence = NULL;
+
+   intel_bufferobj_init(intel);
+   intel_fbo_init(intel);
+
+   if (intel->ctx.Mesa_DXTn) {
+      _mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc");
+      _mesa_enable_extension(ctx, "GL_S3_s3tc");
+   }
+   else if (driQueryOptionb(&intel->optionCache, "force_s3tc_enable")) {
+      _mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc");
+   }
+
+   intel->prim.primitive = ~0;
+
+   /* Force all software fallbacks */
+   if (driQueryOptionb(&intel->optionCache, "no_rast")) {
+      fprintf(stderr, "disabling 3D rasterization\n");
+      FALLBACK(intel, INTEL_FALLBACK_USER, 1);
+      intel->no_rast = 1;
+   }
+
+   /* Disable all hardware rendering (skip emitting batches and fences/waits
+    * to the kernel)
+    */
+   intel->no_hw = getenv("INTEL_NO_HW") != NULL;
+
+   return GL_TRUE;
+}
+
+void
+intelDestroyContext(__DRIcontextPrivate * driContextPriv)
+{
+   struct intel_context *intel =
+      (struct intel_context *) driContextPriv->driverPrivate;
+
+   assert(intel);               /* should never be null */
+   if (intel) {
+      GLboolean release_texture_heaps;
+
+      INTEL_FIREVERTICES(intel);
+
+      intel->vtbl.destroy(intel);
+
+      release_texture_heaps = (intel->ctx.Shared->RefCount == 1);
+      _swsetup_DestroyContext(&intel->ctx);
+      _tnl_DestroyContext(&intel->ctx);
+      _vbo_DestroyContext(&intel->ctx);
+
+      _swrast_DestroyContext(&intel->ctx);
+      intel->Fallback = 0;      /* don't call _swrast_Flush later */
+
+      intel_batchbuffer_free(intel->batch);
+
+      if (intel->last_swap_fence) {
+	 dri_fence_wait(intel->last_swap_fence);
+	 dri_fence_unreference(intel->last_swap_fence);
+	 intel->last_swap_fence = NULL;
+      }
+      if (intel->first_swap_fence) {
+	 dri_fence_wait(intel->first_swap_fence);
+	 dri_fence_unreference(intel->first_swap_fence);
+	 intel->first_swap_fence = NULL;
+      }
+
+      if (release_texture_heaps) {
+         /* This share group is about to go away, free our private
+          * texture object data.
+          */
+         if (INTEL_DEBUG & DEBUG_TEXTURE)
+            fprintf(stderr, "do something to free texture heaps\n");
+      }
+
+      /* free the Mesa context */
+      _mesa_free_context_data(&intel->ctx);
+
+      dri_bufmgr_destroy(intel->bufmgr);
+   }
+}
+
+GLboolean
+intelUnbindContext(__DRIcontextPrivate * driContextPriv)
+{
+   return GL_TRUE;
+}
+
+GLboolean
+intelMakeCurrent(__DRIcontextPrivate * driContextPriv,
+                 __DRIdrawablePrivate * driDrawPriv,
+                 __DRIdrawablePrivate * driReadPriv)
+{
+   __DRIscreenPrivate *psp = driDrawPriv->driScreenPriv;
+
+   if (driContextPriv) {
+      struct intel_context *intel =
+         (struct intel_context *) driContextPriv->driverPrivate;
+      struct intel_framebuffer *intel_fb =
+	 (struct intel_framebuffer *) driDrawPriv->driverPrivate;
+      GLframebuffer *readFb = (GLframebuffer *) driReadPriv->driverPrivate;
+
+
+      /* XXX FBO temporary fix-ups! */
+      /* if the renderbuffers don't have regions, init them from the context */
+      if (!driContextPriv->driScreenPriv->dri2.enabled) {
+         struct intel_renderbuffer *irbDepth
+            = intel_get_renderbuffer(&intel_fb->Base, BUFFER_DEPTH);
+         struct intel_renderbuffer *irbStencil
+            = intel_get_renderbuffer(&intel_fb->Base, BUFFER_STENCIL);
+
+         if (intel_fb->color_rb[0]) {
+	    intel_renderbuffer_set_region(intel_fb->color_rb[0],
+					  intel->front_region);
+         }
+         if (intel_fb->color_rb[1]) {
+	    intel_renderbuffer_set_region(intel_fb->color_rb[1],
+					  intel->back_region);
+         }
+#if 0
+         if (intel_fb->color_rb[2]) {
+	    intel_renderbuffer_set_region(intel_fb->color_rb[2],
+					  intel->third_region);
+         }
+#endif
+         if (irbDepth) {
+	    intel_renderbuffer_set_region(irbDepth, intel->depth_region);
+         }
+         if (irbStencil) {
+	    intel_renderbuffer_set_region(irbStencil, intel->depth_region);
+         }
+      }
+
+      /* set GLframebuffer size to match window, if needed */
+      driUpdateFramebufferSize(&intel->ctx, driDrawPriv);
+
+      if (driReadPriv != driDrawPriv) {
+	 driUpdateFramebufferSize(&intel->ctx, driReadPriv);
+      }
+
+      _mesa_make_current(&intel->ctx, &intel_fb->Base, readFb);
+
+      /* The drawbuffer won't always be updated by _mesa_make_current: 
+       */
+      if (intel->ctx.DrawBuffer == &intel_fb->Base) {
+
+	 if (intel->driReadDrawable != driReadPriv)
+	    intel->driReadDrawable = driReadPriv;
+
+	 if (intel->driDrawable != driDrawPriv) {
+	    if (driDrawPriv->swap_interval == (unsigned)-1) {
+	       int i;
+
+	       driDrawPriv->vblFlags = (intel->intelScreen->irq_active != 0)
+		  ? driGetDefaultVBlankFlags(&intel->optionCache)
+		 : VBLANK_FLAG_NO_IRQ;
+
+	       (*psp->systemTime->getUST) (&intel_fb->swap_ust);
+	       driDrawableInitVBlank(driDrawPriv);
+	       intel_fb->vbl_waited = driDrawPriv->vblSeq;
+
+	       for (i = 0; i < (intel->intelScreen->third.handle ? 3 : 2); i++) {
+		  if (intel_fb->color_rb[i])
+		     intel_fb->color_rb[i]->vbl_pending = driDrawPriv->vblSeq;
+	       }
+	    }
+	    intel->driDrawable = driDrawPriv;
+	    intelWindowMoved(intel);
+	 }
+
+	 intel_draw_buffer(&intel->ctx, &intel_fb->Base);
+      }
+   }
+   else {
+      _mesa_make_current(NULL, NULL, NULL);
+   }
+
+   return GL_TRUE;
+}
+
+static void
+intelContendedLock(struct intel_context *intel, GLuint flags)
+{
+   __DRIdrawablePrivate *dPriv = intel->driDrawable;
+   __DRIscreenPrivate *sPriv = intel->driScreen;
+   volatile struct drm_i915_sarea *sarea = intel->sarea;
+   int me = intel->hHWContext;
+
+   drmGetLock(intel->driFd, intel->hHWContext, flags);
+   intel->locked = 1;
+
+   if (INTEL_DEBUG & DEBUG_LOCK)
+      _mesa_printf("%s - got contended lock\n", __progname);
+
+   /* If the window moved, may need to set a new cliprect now.
+    *
+    * NOTE: This releases and regains the hw lock, so all state
+    * checking must be done *after* this call:
+    */
+   if (dPriv)
+       DRI_VALIDATE_DRAWABLE_INFO(sPriv, dPriv);
+
+   if (sarea && sarea->ctxOwner != me) {
+      if (INTEL_DEBUG & DEBUG_BUFMGR) {
+	 fprintf(stderr, "Lost Context: sarea->ctxOwner %x me %x\n",
+		 sarea->ctxOwner, me);
+      }
+      sarea->ctxOwner = me;
+   }
+
+   /* If the last consumer of the texture memory wasn't us, notify the fake
+    * bufmgr and record the new owner.  We should have the memory shared
+    * between contexts of a single fake bufmgr, but this will at least make
+    * things correct for now.
+    */
+   if (!intel->ttm && sarea->texAge != intel->hHWContext) {
+      sarea->texAge = intel->hHWContext;
+      dri_bufmgr_fake_contended_lock_take(intel->bufmgr);
+      if (INTEL_DEBUG & DEBUG_BATCH)
+	 intel_decode_context_reset();
+      if (INTEL_DEBUG & DEBUG_BUFMGR)
+	 fprintf(stderr, "Lost Textures: sarea->texAge %x hw context %x\n",
+		 sarea->ctxOwner, intel->hHWContext);
+   }
+
+   if (sarea->width != intel->width || sarea->height != intel->height) {
+       int numClipRects = intel->numClipRects;
+
+       /*
+	* FIXME: Really only need to do this when drawing to a
+	* common back- or front buffer.
+	*/
+
+       /*
+	* This will essentially drop the outstanding batchbuffer on
+	* the floor.
+	*/
+       intel->numClipRects = 0;
+
+       if (intel->Fallback)
+	   _swrast_flush(&intel->ctx);
+
+       if (!IS_965(intel->intelScreen->deviceID))
+	   INTEL_FIREVERTICES(intel);
+
+       if (intel->batch->map != intel->batch->ptr)
+	   intel_batchbuffer_flush(intel->batch);
+
+       intel->numClipRects = numClipRects;
+
+       /* force window update */
+       intel->lastStamp = 0;
+
+       intel->width = sarea->width;
+       intel->height = sarea->height;
+   }
+
+   /* Drawable changed?
+    */
+   if (dPriv && intel->lastStamp != dPriv->lastStamp) {
+       intelWindowMoved(intel);
+       intel->lastStamp = dPriv->lastStamp;
+   }
+}
+
+
+_glthread_DECLARE_STATIC_MUTEX(lockMutex);
+
+/* Lock the hardware and validate our state.  
+ */
+void LOCK_HARDWARE( struct intel_context *intel )
+{
+    __DRIdrawable *dPriv = intel->driDrawable;
+    __DRIscreen *sPriv = intel->driScreen;
+    char __ret = 0;
+    struct intel_framebuffer *intel_fb = NULL;
+    struct intel_renderbuffer *intel_rb = NULL;
+
+    _glthread_LOCK_MUTEX(lockMutex);
+    assert(!intel->locked);
+    intel->locked = 1;
+
+    if (intel->driDrawable) {
+       intel_fb = intel->driDrawable->driverPrivate;
+
+       if (intel_fb)
+	  intel_rb =
+	     intel_get_renderbuffer(&intel_fb->Base,
+				    intel_fb->Base._ColorDrawBufferIndexes[0]);
+    }
+
+    if (intel_rb && dPriv->vblFlags &&
+	!(dPriv->vblFlags & VBLANK_FLAG_NO_IRQ) &&
+	(intel_fb->vbl_waited - intel_rb->vbl_pending) > (1<<23)) {
+	drmVBlank vbl;
+
+	vbl.request.type = DRM_VBLANK_ABSOLUTE;
+
+	if ( dPriv->vblFlags & VBLANK_FLAG_SECONDARY ) {
+	    vbl.request.type |= DRM_VBLANK_SECONDARY;
+	}
+
+	vbl.request.sequence = intel_rb->vbl_pending;
+	drmWaitVBlank(intel->driFd, &vbl);
+	intel_fb->vbl_waited = vbl.reply.sequence;
+    }
+
+    DRM_CAS(intel->driHwLock, intel->hHWContext,
+        (DRM_LOCK_HELD|intel->hHWContext), __ret);
+
+    if (sPriv->dri2.enabled) {
+	if (__ret)
+	    drmGetLock(intel->driFd, intel->hHWContext, 0);
+	if (__driParseEvents(dPriv->driContextPriv, dPriv)) {
+	    intelWindowMoved(intel);
+	    intel_draw_buffer(&intel->ctx, intel->ctx.DrawBuffer);
+	}
+    } else if (__ret) {
+        intelContendedLock( intel, 0 );
+    }
+
+
+    if (INTEL_DEBUG & DEBUG_LOCK)
+      _mesa_printf("%s - locked\n", __progname);
+}
+
+
+/* Unlock the hardware using the global current context 
+ */
+void UNLOCK_HARDWARE( struct intel_context *intel )
+{
+   intel->vtbl.note_unlock( intel );
+   intel->locked = 0;
+
+   DRM_UNLOCK(intel->driFd, intel->driHwLock, intel->hHWContext);
+
+   _glthread_UNLOCK_MUTEX(lockMutex);
+
+   if (INTEL_DEBUG & DEBUG_LOCK)
+      _mesa_printf("%s - unlocked\n", __progname);
+
+   /**
+    * Nothing should be left in batch outside of LOCK/UNLOCK which references
+    * cliprects.
+    */
+   assert(intel->batch->cliprect_mode != REFERENCES_CLIPRECTS);
+}
+
diff --git a/shared/intel_context.h b/shared/intel_context.h
new file mode 100644
index 0000000..df79ab8
--- /dev/null
+++ b/shared/intel_context.h
@@ -0,0 +1,502 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef INTELCONTEXT_INC
+#define INTELCONTEXT_INC
+
+
+
+#include "mtypes.h"
+#include "drm.h"
+#include "mm.h"
+#include "texmem.h"
+#include "dri_bufmgr.h"
+
+#include "intel_screen.h"
+#include "intel_tex_obj.h"
+#include "i915_drm.h"
+#include "tnl/t_vertex.h"
+
+#define TAG(x) intel##x
+#include "tnl_dd/t_dd_vertex.h"
+#undef TAG
+
+#define DV_PF_555  (1<<8)
+#define DV_PF_565  (2<<8)
+#define DV_PF_8888 (3<<8)
+
+struct intel_region;
+struct intel_context;
+
+typedef void (*intel_tri_func) (struct intel_context *, intelVertex *,
+                                intelVertex *, intelVertex *);
+typedef void (*intel_line_func) (struct intel_context *, intelVertex *,
+                                 intelVertex *);
+typedef void (*intel_point_func) (struct intel_context *, intelVertex *);
+
+#define INTEL_FALLBACK_DRAW_BUFFER	 0x1
+#define INTEL_FALLBACK_READ_BUFFER	 0x2
+#define INTEL_FALLBACK_DEPTH_BUFFER      0x4
+#define INTEL_FALLBACK_STENCIL_BUFFER    0x8
+#define INTEL_FALLBACK_USER		 0x10
+#define INTEL_FALLBACK_RENDERMODE	 0x20
+#define INTEL_FALLBACK_TEXTURE   	 0x40
+
+extern void intelFallback(struct intel_context *intel, GLuint bit,
+                          GLboolean mode);
+#define FALLBACK( intel, bit, mode ) intelFallback( intel, bit, mode )
+
+
+#define INTEL_WRITE_PART  0x1
+#define INTEL_WRITE_FULL  0x2
+#define INTEL_READ        0x4
+
+#define INTEL_MAX_FIXUP 64
+
+struct intel_context
+{
+   GLcontext ctx;               /* the parent class */
+
+   struct
+   {
+      void (*destroy) (struct intel_context * intel);
+      void (*emit_state) (struct intel_context * intel);
+      void (*new_batch) (struct intel_context * intel);
+      void (*emit_invarient_state) (struct intel_context * intel);
+      void (*note_fence) (struct intel_context *intel, GLuint fence);
+      void (*note_unlock) (struct intel_context *intel);
+      void (*update_texture_state) (struct intel_context * intel);
+
+      void (*render_start) (struct intel_context * intel);
+      void (*render_prevalidate) (struct intel_context * intel);
+      void (*set_draw_region) (struct intel_context * intel,
+                               struct intel_region * draw_regions[],
+                               struct intel_region * depth_region,
+			       GLuint num_regions);
+
+      GLuint (*flush_cmd) (void);
+      void (*emit_flush) (struct intel_context *intel, GLuint unused);
+
+      void (*reduced_primitive_state) (struct intel_context * intel,
+                                       GLenum rprim);
+
+      GLboolean (*check_vertex_size) (struct intel_context * intel,
+				      GLuint expected);
+      void (*invalidate_state) (struct intel_context *intel,
+				GLuint new_state);
+
+
+      /* Metaops: 
+       */
+      void (*install_meta_state) (struct intel_context * intel);
+      void (*leave_meta_state) (struct intel_context * intel);
+
+      void (*meta_draw_region) (struct intel_context * intel,
+                                struct intel_region * draw_region,
+                                struct intel_region * depth_region);
+
+      void (*meta_draw_quad)(struct intel_context *intel,
+			     GLfloat x0, GLfloat x1,
+			     GLfloat y0, GLfloat y1,
+			     GLfloat z,
+			     GLuint color, /* ARGB32 */
+			     GLfloat s0, GLfloat s1,
+			     GLfloat t0, GLfloat t1);
+
+      void (*meta_color_mask) (struct intel_context * intel, GLboolean);
+
+      void (*meta_stencil_replace) (struct intel_context * intel,
+                                    GLuint mask, GLuint clear);
+
+      void (*meta_depth_replace) (struct intel_context * intel);
+
+      void (*meta_texture_blend_replace) (struct intel_context * intel);
+
+      void (*meta_no_stencil_write) (struct intel_context * intel);
+      void (*meta_no_depth_write) (struct intel_context * intel);
+      void (*meta_no_texture) (struct intel_context * intel);
+
+      void (*meta_import_pixel_state) (struct intel_context * intel);
+      void (*meta_frame_buffer_texture) (struct intel_context *intel,
+					 GLint xoff, GLint yoff);
+
+      GLboolean(*meta_tex_rect_source) (struct intel_context * intel,
+					dri_bo * buffer,
+					GLuint offset,
+					GLuint pitch,
+					GLuint height,
+					GLenum format, GLenum type);
+
+      void (*assert_not_dirty) (struct intel_context *intel);
+
+      void (*debug_batch)(struct intel_context *intel);
+   } vtbl;
+
+   GLint refcount;
+   GLuint Fallback;
+   GLuint NewGLState;
+
+   dri_bufmgr *bufmgr;
+   unsigned int maxBatchSize;
+
+   struct intel_region *front_region;
+   struct intel_region *back_region;
+   struct intel_region *third_region;
+   struct intel_region *depth_region;
+
+   /**
+    * This value indicates that the kernel memory manager is being used
+    * instead of the fake client-side memory manager.
+    */
+   GLboolean ttm;
+
+   dri_fence *last_swap_fence;
+   dri_fence *first_swap_fence;
+
+   struct intel_batchbuffer *batch;
+   GLboolean no_batch_wrap;
+   unsigned batch_id;
+
+   struct
+   {
+      GLuint id;
+      GLuint primitive;
+      GLubyte *start_ptr;
+      void (*flush) (struct intel_context *);
+   } prim;
+
+   GLuint stats_wm;
+   GLboolean locked;
+   char *prevLockFile;
+   int prevLockLine;
+
+   GLubyte clear_chan[4];
+   GLuint ClearColor565;
+   GLuint ClearColor8888;
+
+   /* Offsets of fields within the current vertex:
+    */
+   GLuint coloroffset;
+   GLuint specoffset;
+   GLuint wpos_offset;
+   GLuint wpos_size;
+
+   struct tnl_attr_map vertex_attrs[VERT_ATTRIB_MAX];
+   GLuint vertex_attr_count;
+
+   GLfloat polygon_offset_scale;        /* dependent on depth_scale, bpp */
+
+   GLboolean hw_stencil;
+   GLboolean hw_stipple;
+   GLboolean depth_buffer_is_float;
+   GLboolean no_rast;
+   GLboolean strict_conformance;
+
+   /* State for intelvb.c and inteltris.c.
+    */
+   GLuint RenderIndex;
+   GLmatrix ViewportMatrix;
+   GLenum render_primitive;
+   GLenum reduced_primitive;
+   GLuint vertex_size;
+   GLubyte *verts;              /* points to tnl->clipspace.vertex_buf */
+   struct intel_region *draw_region;
+
+   /* Fallback rasterization functions 
+    */
+   intel_point_func draw_point;
+   intel_line_func draw_line;
+   intel_tri_func draw_tri;
+
+   /* These refer to the current drawing buffer:
+    */
+   int drawX, drawY;            /**< origin of drawing area within region */
+   GLuint numClipRects;         /**< cliprects for drawing */
+   drm_clip_rect_t *pClipRects;
+   struct gl_texture_object *frame_buffer_texobj;
+   drm_clip_rect_t fboRect;     /**< cliprect for FBO rendering */
+
+   int perf_boxes;
+
+   GLuint do_usleeps;
+   int do_irqs;
+   GLuint irqsEmitted;
+
+   GLboolean scissor;
+   drm_clip_rect_t draw_rect;
+   drm_clip_rect_t scissor_rect;
+
+   drm_context_t hHWContext;
+   drmLock *driHwLock;
+   int driFd;
+
+   __DRIdrawablePrivate *driDrawable;
+   __DRIdrawablePrivate *driReadDrawable;
+   __DRIscreenPrivate *driScreen;
+   intelScreenPrivate *intelScreen;
+   volatile struct drm_i915_sarea *sarea;
+
+   GLuint lastStamp;
+
+   GLboolean no_hw;
+
+   /**
+    * Configuration cache
+    */
+   driOptionCache optionCache;
+
+   /* Last seen width/height of the screen */
+   int width;
+   int height;
+
+   int64_t swap_ust;
+   int64_t swap_missed_ust;
+
+   GLuint swap_count;
+   GLuint swap_missed_count;
+};
+
+/* These are functions now:
+ */
+void LOCK_HARDWARE( struct intel_context *intel );
+void UNLOCK_HARDWARE( struct intel_context *intel );
+
+extern char *__progname;
+
+
+#define SUBPIXEL_X 0.125
+#define SUBPIXEL_Y 0.125
+
+#define ALIGN(value, alignment)  ((value + alignment - 1) & ~(alignment - 1))
+
+#define INTEL_FIREVERTICES(intel)		\
+do {						\
+   if ((intel)->prim.flush)			\
+      (intel)->prim.flush(intel);		\
+} while (0)
+
+/* ================================================================
+ * Color packing:
+ */
+
+#define INTEL_PACKCOLOR4444(r,g,b,a) \
+  ((((a) & 0xf0) << 8) | (((r) & 0xf0) << 4) | ((g) & 0xf0) | ((b) >> 4))
+
+#define INTEL_PACKCOLOR1555(r,g,b,a) \
+  ((((r) & 0xf8) << 7) | (((g) & 0xf8) << 2) | (((b) & 0xf8) >> 3) | \
+    ((a) ? 0x8000 : 0))
+
+#define INTEL_PACKCOLOR565(r,g,b) \
+  ((((r) & 0xf8) << 8) | (((g) & 0xfc) << 3) | (((b) & 0xf8) >> 3))
+
+#define INTEL_PACKCOLOR8888(r,g,b,a) \
+  ((a<<24) | (r<<16) | (g<<8) | b)
+
+#define INTEL_PACKCOLOR(format, r,  g,  b, a)		\
+(format == DV_PF_555 ? INTEL_PACKCOLOR1555(r,g,b,a) :	\
+ (format == DV_PF_565 ? INTEL_PACKCOLOR565(r,g,b) :	\
+  (format == DV_PF_8888 ? INTEL_PACKCOLOR8888(r,g,b,a) :	\
+   0)))
+
+/* ================================================================
+ * From linux kernel i386 header files, copes with odd sizes better
+ * than COPY_DWORDS would:
+ * XXX Put this in src/mesa/main/imports.h ???
+ */
+#if defined(i386) || defined(__i386__)
+static INLINE void * __memcpy(void * to, const void * from, size_t n)
+{
+   int d0, d1, d2;
+   __asm__ __volatile__(
+      "rep ; movsl\n\t"
+      "testb $2,%b4\n\t"
+      "je 1f\n\t"
+      "movsw\n"
+      "1:\ttestb $1,%b4\n\t"
+      "je 2f\n\t"
+      "movsb\n"
+      "2:"
+      : "=&c" (d0), "=&D" (d1), "=&S" (d2)
+      :"0" (n/4), "q" (n),"1" ((long) to),"2" ((long) from)
+      : "memory");
+   return (to);
+}
+#else
+#define __memcpy(a,b,c) memcpy(a,b,c)
+#endif
+
+
+/* ================================================================
+ * Debugging:
+ */
+extern int INTEL_DEBUG;
+
+#define DEBUG_TEXTURE	0x1
+#define DEBUG_STATE	0x2
+#define DEBUG_IOCTL	0x4
+#define DEBUG_BLIT	0x8
+#define DEBUG_MIPTREE   0x10
+#define DEBUG_FALLBACKS	0x20
+#define DEBUG_VERBOSE	0x40
+#define DEBUG_BATCH     0x80
+#define DEBUG_PIXEL     0x100
+#define DEBUG_BUFMGR    0x200
+#define DEBUG_REGION    0x400
+#define DEBUG_FBO       0x800
+#define DEBUG_LOCK      0x1000
+#define DEBUG_SYNC	0x2000
+#define DEBUG_PRIMS	0x4000
+#define DEBUG_VERTS	0x8000
+#define DEBUG_DRI       0x10000
+#define DEBUG_DMA       0x20000
+#define DEBUG_SANITY    0x40000
+#define DEBUG_SLEEP     0x80000
+#define DEBUG_STATS     0x100000
+#define DEBUG_TILE      0x200000
+#define DEBUG_SINGLE_THREAD   0x400000
+#define DEBUG_WM        0x800000
+#define DEBUG_URB       0x1000000
+#define DEBUG_VS        0x2000000
+
+#define DBG(...) do {						\
+	if (INTEL_DEBUG & FILE_DEBUG_FLAG)			\
+		_mesa_printf(__VA_ARGS__);			\
+} while(0)
+
+#define PCI_CHIP_845_G			0x2562
+#define PCI_CHIP_I830_M			0x3577
+#define PCI_CHIP_I855_GM		0x3582
+#define PCI_CHIP_I865_G			0x2572
+#define PCI_CHIP_I915_G			0x2582
+#define PCI_CHIP_I915_GM		0x2592
+#define PCI_CHIP_I945_G			0x2772
+#define PCI_CHIP_I945_GM		0x27A2
+#define PCI_CHIP_I945_GME		0x27AE
+#define PCI_CHIP_G33_G			0x29C2
+#define PCI_CHIP_Q35_G			0x29B2
+#define PCI_CHIP_Q33_G			0x29D2
+
+
+/* ================================================================
+ * intel_context.c:
+ */
+
+extern GLboolean intelInitContext(struct intel_context *intel,
+                                  const __GLcontextModes * mesaVis,
+                                  __DRIcontextPrivate * driContextPriv,
+                                  void *sharedContextPrivate,
+                                  struct dd_function_table *functions);
+
+extern void intelGetLock(struct intel_context *intel, GLuint flags);
+
+extern void intelFinish(GLcontext * ctx);
+extern void intelFlush(GLcontext * ctx);
+
+extern void intelInitDriverFunctions(struct dd_function_table *functions);
+extern void intelInitExtensions(GLcontext *ctx, GLboolean enable_imaging);
+
+
+/* ================================================================
+ * intel_state.c:
+ */
+extern void intelInitStateFuncs(struct dd_function_table *functions);
+
+#define COMPAREFUNC_ALWAYS		0
+#define COMPAREFUNC_NEVER		0x1
+#define COMPAREFUNC_LESS		0x2
+#define COMPAREFUNC_EQUAL		0x3
+#define COMPAREFUNC_LEQUAL		0x4
+#define COMPAREFUNC_GREATER		0x5
+#define COMPAREFUNC_NOTEQUAL		0x6
+#define COMPAREFUNC_GEQUAL		0x7
+
+#define STENCILOP_KEEP			0
+#define STENCILOP_ZERO			0x1
+#define STENCILOP_REPLACE		0x2
+#define STENCILOP_INCRSAT		0x3
+#define STENCILOP_DECRSAT		0x4
+#define STENCILOP_INCR			0x5
+#define STENCILOP_DECR			0x6
+#define STENCILOP_INVERT		0x7
+
+#define LOGICOP_CLEAR			0
+#define LOGICOP_NOR			0x1
+#define LOGICOP_AND_INV 		0x2
+#define LOGICOP_COPY_INV		0x3
+#define LOGICOP_AND_RVRSE		0x4
+#define LOGICOP_INV			0x5
+#define LOGICOP_XOR			0x6
+#define LOGICOP_NAND			0x7
+#define LOGICOP_AND			0x8
+#define LOGICOP_EQUIV			0x9
+#define LOGICOP_NOOP			0xa
+#define LOGICOP_OR_INV			0xb
+#define LOGICOP_COPY			0xc
+#define LOGICOP_OR_RVRSE		0xd
+#define LOGICOP_OR			0xe
+#define LOGICOP_SET			0xf
+
+#define BLENDFACT_ZERO			0x01
+#define BLENDFACT_ONE			0x02
+#define BLENDFACT_SRC_COLR		0x03
+#define BLENDFACT_INV_SRC_COLR 		0x04
+#define BLENDFACT_SRC_ALPHA		0x05
+#define BLENDFACT_INV_SRC_ALPHA 	0x06
+#define BLENDFACT_DST_ALPHA		0x07
+#define BLENDFACT_INV_DST_ALPHA 	0x08
+#define BLENDFACT_DST_COLR		0x09
+#define BLENDFACT_INV_DST_COLR		0x0a
+#define BLENDFACT_SRC_ALPHA_SATURATE	0x0b
+#define BLENDFACT_CONST_COLOR		0x0c
+#define BLENDFACT_INV_CONST_COLOR	0x0d
+#define BLENDFACT_CONST_ALPHA		0x0e
+#define BLENDFACT_INV_CONST_ALPHA	0x0f
+#define BLENDFACT_MASK          	0x0f
+
+enum {
+   DRI_CONF_BO_REUSE_DISABLED,
+   DRI_CONF_BO_REUSE_ALL
+};
+
+extern int intel_translate_shadow_compare_func(GLenum func);
+extern int intel_translate_compare_func(GLenum func);
+extern int intel_translate_stencil_op(GLenum op);
+extern int intel_translate_blend_factor(GLenum factor);
+extern int intel_translate_logic_op(GLenum opcode);
+
+
+/*======================================================================
+ * Inline conversion functions.  
+ * These are better-typed than the macros used previously:
+ */
+static INLINE struct intel_context *
+intel_context(GLcontext * ctx)
+{
+   return (struct intel_context *) ctx;
+}
+
+#endif
diff --git a/shared/intel_decode.c b/shared/intel_decode.c
new file mode 100644
index 0000000..a124063
--- /dev/null
+++ b/shared/intel_decode.c
@@ -0,0 +1,1047 @@
+/* -*- c-basic-offset: 4 -*- */
+/*
+ * Copyright © 2007 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+/** @file intel_decode.c
+ * This file contains code to print out batchbuffer contents in a
+ * human-readable format.
+ *
+ * The current version only supports i915 packets, and only pretty-prints a
+ * subset of them.  The intention is for it to make just a best attempt to
+ * decode, but never crash in the process.
+ */
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <inttypes.h>
+
+#include "intel_decode.h"
+#include "intel_chipset.h"
+
+#define BUFFER_FAIL(_count, _len, _name) do {			\
+    fprintf(out, "Buffer size too small in %s (%d < %d)\n",	\
+	    (_name), (_count), (_len));				\
+    (*failures)++;						\
+    return count;						\
+} while (0)
+
+static FILE *out;
+static uint32_t saved_s2 = 0, saved_s4 = 0;
+static char saved_s2_set = 0, saved_s4_set = 0;
+
+static float
+int_as_float(uint32_t intval)
+{
+    union intfloat {
+	uint32_t i;
+	float f;
+    } uval;
+
+    uval.i = intval;
+    return uval.f;
+}
+
+static void
+instr_out(uint32_t *data, uint32_t hw_offset, unsigned int index,
+	  char *fmt, ...)
+{
+    va_list va;
+
+    fprintf(out, "0x%08x: 0x%08x:%s ", hw_offset + index * 4, data[index],
+	    index == 0 ? "" : "  ");
+    va_start(va, fmt);
+    vfprintf(out, fmt, va);
+    va_end(va);
+}
+
+
+static int
+decode_mi(uint32_t *data, int count, uint32_t hw_offset, int *failures)
+{
+    unsigned int opcode;
+
+    struct {
+	uint32_t opcode;
+	int min_len;
+	int max_len;
+	char *name;
+    } opcodes_mi[] = {
+	{ 0x08, 1, 1, "MI_ARB_ON_OFF" },
+	{ 0x0a, 1, 1, "MI_BATCH_BUFFER_END" },
+	{ 0x31, 2, 2, "MI_BATCH_BUFFER_START" },
+	{ 0x14, 3, 3, "MI_DISPLAY_BUFFER_INFO" },
+	{ 0x04, 1, 1, "MI_FLUSH" },
+	{ 0x22, 3, 3, "MI_LOAD_REGISTER_IMM" },
+	{ 0x13, 2, 2, "MI_LOAD_SCAN_LINES_EXCL" },
+	{ 0x12, 2, 2, "MI_LOAD_SCAN_LINES_INCL" },
+	{ 0x00, 1, 1, "MI_NOOP" },
+	{ 0x11, 2, 2, "MI_OVERLAY_FLIP" },
+	{ 0x07, 1, 1, "MI_REPORT_HEAD" },
+	{ 0x18, 2, 2, "MI_SET_CONTEXT" },
+	{ 0x20, 3, 4, "MI_STORE_DATA_IMM" },
+	{ 0x21, 3, 4, "MI_STORE_DATA_INDEX" },
+	{ 0x24, 3, 3, "MI_STORE_REGISTER_MEM" },
+	{ 0x02, 1, 1, "MI_USER_INTERRUPT" },
+	{ 0x03, 1, 1, "MI_WAIT_FOR_EVENT" },
+    };
+
+
+    for (opcode = 0; opcode < sizeof(opcodes_mi) / sizeof(opcodes_mi[0]);
+	 opcode++) {
+	if ((data[0] & 0x1f800000) >> 23 == opcodes_mi[opcode].opcode) {
+	    unsigned int len = 1, i;
+
+	    instr_out(data, hw_offset, 0, "%s\n", opcodes_mi[opcode].name);
+	    if (opcodes_mi[opcode].max_len > 1) {
+		len = (data[0] & 0x000000ff) + 2;
+		if (len < opcodes_mi[opcode].min_len ||
+		    len > opcodes_mi[opcode].max_len)
+		{
+		    fprintf(out, "Bad length in %s\n",
+			    opcodes_mi[opcode].name);
+		}
+	    }
+
+	    for (i = 1; i < len; i++) {
+		if (i >= count)
+		    BUFFER_FAIL(count, len, opcodes_mi[opcode].name);
+		instr_out(data, hw_offset, i, "dword %d\n", i);
+	    }
+
+	    return len;
+	}
+    }
+
+    instr_out(data, hw_offset, 0, "MI UNKNOWN\n");
+    (*failures)++;
+    return 1;
+}
+
+static int
+decode_2d(uint32_t *data, int count, uint32_t hw_offset, int *failures)
+{
+    unsigned int opcode, len;
+    char *format = NULL;
+
+    struct {
+	uint32_t opcode;
+	int min_len;
+	int max_len;
+	char *name;
+    } opcodes_2d[] = {
+	{ 0x40, 5, 5, "COLOR_BLT" },
+	{ 0x43, 6, 6, "SRC_COPY_BLT" },
+	{ 0x01, 8, 8, "XY_SETUP_BLT" },
+	{ 0x11, 9, 9, "XY_SETUP_MONO_PATTERN_SL_BLT" },
+	{ 0x03, 3, 3, "XY_SETUP_CLIP_BLT" },
+	{ 0x24, 2, 2, "XY_PIXEL_BLT" },
+	{ 0x25, 3, 3, "XY_SCANLINES_BLT" },
+	{ 0x26, 4, 4, "Y_TEXT_BLT" },
+	{ 0x31, 5, 134, "XY_TEXT_IMMEDIATE_BLT" },
+	{ 0x50, 6, 6, "XY_COLOR_BLT" },
+	{ 0x51, 6, 6, "XY_PAT_BLT" },
+	{ 0x76, 8, 8, "XY_PAT_CHROMA_BLT" },
+	{ 0x72, 7, 135, "XY_PAT_BLT_IMMEDIATE" },
+	{ 0x77, 9, 137, "XY_PAT_CHROMA_BLT_IMMEDIATE" },
+	{ 0x52, 9, 9, "XY_MONO_PAT_BLT" },
+	{ 0x59, 7, 7, "XY_MONO_PAT_FIXED_BLT" },
+	{ 0x53, 8, 8, "XY_SRC_COPY_BLT" },
+	{ 0x54, 8, 8, "XY_MONO_SRC_COPY_BLT" },
+	{ 0x71, 9, 137, "XY_MONO_SRC_COPY_IMMEDIATE_BLT" },
+	{ 0x55, 9, 9, "XY_FULL_BLT" },
+	{ 0x55, 9, 137, "XY_FULL_IMMEDIATE_PATTERN_BLT" },
+	{ 0x56, 9, 9, "XY_FULL_MONO_SRC_BLT" },
+	{ 0x75, 10, 138, "XY_FULL_MONO_SRC_IMMEDIATE_PATTERN_BLT" },
+	{ 0x57, 12, 12, "XY_FULL_MONO_PATTERN_BLT" },
+	{ 0x58, 12, 12, "XY_FULL_MONO_PATTERN_MONO_SRC_BLT" },
+    };
+
+    switch ((data[0] & 0x1fc00000) >> 22) {
+    case 0x50:
+	instr_out(data, hw_offset, 0,
+		  "XY_COLOR_BLT (rgb %sabled, alpha %sabled)\n",
+		  (data[0] & (1 << 20)) ? "en" : "dis",
+		  (data[0] & (1 << 21)) ? "en" : "dis");
+
+	len = (data[0] & 0x000000ff) + 2;
+	if (len != 6)
+	    fprintf(out, "Bad count in XY_COLOR_BLT\n");
+	if (count < 6)
+	    BUFFER_FAIL(count, len, "XY_COLOR_BLT");
+
+	switch ((data[1] >> 24) & 0x3) {
+	case 0:
+	    format="8";
+	    break;
+	case 1:
+	    format="565";
+	    break;
+	case 2:
+	    format="1555";
+	    break;
+	case 3:
+	    format="8888";
+	    break;
+	}
+
+	instr_out(data, hw_offset, 1, "format %s, pitch %d, "
+		  "clipping %sabled\n", format,
+		  data[1] & 0xffff, data[1] & (1 << 30) ? "en" : "dis");
+	instr_out(data, hw_offset, 2, "(%d,%d)\n",
+		  data[2] & 0xffff, data[2] >> 16);
+	instr_out(data, hw_offset, 3, "(%d,%d)\n",
+		  data[3] & 0xffff, data[3] >> 16);
+	instr_out(data, hw_offset, 4, "offset 0x%08x\n", data[4]);
+	instr_out(data, hw_offset, 5, "color\n");
+	return len;
+    case 0x53:
+	instr_out(data, hw_offset, 0,
+		  "XY_SRC_COPY_BLT (rgb %sabled, alpha %sabled)\n",
+		  (data[0] & (1 << 20)) ? "en" : "dis",
+		  (data[0] & (1 << 21)) ? "en" : "dis");
+
+	len = (data[0] & 0x000000ff) + 2;
+	if (len != 8)
+	    fprintf(out, "Bad count in XY_SRC_COPY_BLT\n");
+	if (count < 8)
+	    BUFFER_FAIL(count, len, "XY_SRC_COPY_BLT");
+
+	switch ((data[1] >> 24) & 0x3) {
+	case 0:
+	    format="8";
+	    break;
+	case 1:
+	    format="565";
+	    break;
+	case 2:
+	    format="1555";
+	    break;
+	case 3:
+	    format="8888";
+	    break;
+	}
+
+	instr_out(data, hw_offset, 1, "format %s, dst pitch %d, "
+		  "clipping %sabled\n", format,
+		  data[1] & 0xffff, data[1] & (1 << 30) ? "en" : "dis");
+	instr_out(data, hw_offset, 2, "dst (%d,%d)\n",
+		  data[2] & 0xffff, data[2] >> 16);
+	instr_out(data, hw_offset, 3, "dst (%d,%d)\n",
+		  data[2] & 0xffff, data[2] >> 16);
+	instr_out(data, hw_offset, 4, "dst offset 0x%08x\n", data[4]);
+	instr_out(data, hw_offset, 5, "src (%d,%d)\n",
+		  data[5] & 0xffff, data[5] >> 16);
+	instr_out(data, hw_offset, 6, "src pitch %d\n",
+		  data[6] & 0xffff);
+	instr_out(data, hw_offset, 7, "src offset 0x%08x\n", data[7]);
+	return len;
+    }
+
+    for (opcode = 0; opcode < sizeof(opcodes_2d) / sizeof(opcodes_2d[0]);
+	 opcode++) {
+	if ((data[0] & 0x1fc00000) >> 22 == opcodes_2d[opcode].opcode) {
+	    unsigned int i;
+
+	    len = 1;
+	    instr_out(data, hw_offset, 0, "%s\n", opcodes_2d[opcode].name);
+	    if (opcodes_2d[opcode].max_len > 1) {
+		len = (data[0] & 0x000000ff) + 2;
+		if (len < opcodes_2d[opcode].min_len ||
+		    len > opcodes_2d[opcode].max_len)
+		{
+		    fprintf(out, "Bad count in %s\n", opcodes_2d[opcode].name);
+		}
+	    }
+
+	    for (i = 1; i < len; i++) {
+		if (i >= count)
+		    BUFFER_FAIL(count, len, opcodes_2d[opcode].name);
+		instr_out(data, hw_offset, i, "dword %d\n", i);
+	    }
+
+	    return len;
+	}
+    }
+
+    instr_out(data, hw_offset, 0, "2D UNKNOWN\n");
+    (*failures)++;
+    return 1;
+}
+
+static int
+decode_3d_1c(uint32_t *data, int count, uint32_t hw_offset, int *failures)
+{
+    switch ((data[0] & 0x00f80000) >> 19) {
+    case 0x11:
+	instr_out(data, hw_offset, 0, "3DSTATE_DEPTH_SUBRECTANGLE_DISALBE\n");
+	return 1;
+    case 0x10:
+	instr_out(data, hw_offset, 0, "3DSTATE_SCISSOR_ENABLE\n");
+	return 1;
+    }
+
+    instr_out(data, hw_offset, 0, "3D UNKNOWN\n");
+    (*failures)++;
+    return 1;
+}
+
+static int
+decode_3d_1d(uint32_t *data, int count, uint32_t hw_offset, int *failures)
+{
+    unsigned int len, i, c, opcode, word, map, sampler, instr;
+
+    struct {
+	uint32_t opcode;
+	int min_len;
+	int max_len;
+	char *name;
+    } opcodes_3d_1d[] = {
+	{ 0x8e, 3, 3, "3DSTATE_BUFFER_INFO" },
+	{ 0x86, 4, 4, "3DSTATE_CHROMA_KEY" },
+	{ 0x9c, 1, 1, "3DSTATE_CLEAR_PARAMETERS" },
+	{ 0x88, 2, 2, "3DSTATE_CONSTANT_BLEND_COLOR" },
+	{ 0x99, 2, 2, "3DSTATE_DEFAULT_DIFFUSE" },
+	{ 0x9a, 2, 2, "3DSTATE_DEFAULT_SPECULAR" },
+	{ 0x98, 2, 2, "3DSTATE_DEFAULT_Z" },
+	{ 0x97, 2, 2, "3DSTATE_DEPTH_OFFSET_SCALE" },
+	{ 0x85, 2, 2, "3DSTATE_DEST_BUFFER_VARIABLES" },
+	{ 0x80, 5, 5, "3DSTATE_DRAWING_RECTANGLE" },
+	{ 0x8e, 3, 3, "3DSTATE_BUFFER_INFO" },
+	{ 0x9d, 65, 65, "3DSTATE_FILTER_COEFFICIENTS_4X4" },
+	{ 0x9e, 4, 4, "3DSTATE_MONO_FILTER" },
+	{ 0x89, 4, 4, "3DSTATE_FOG_MODE" },
+	{ 0x8f, 2, 16, "3DSTATE_MAP_PALLETE_LOAD_32" },
+	{ 0x81, 3, 3, "3DSTATE_SCISSOR_RECTANGLE" },
+	{ 0x83, 2, 2, "3DSTATE_SPAN_STIPPLE" },
+    };
+
+    switch ((data[0] & 0x00ff0000) >> 16) {
+    case 0x07:
+	/* This instruction is unusual.  A 0 length means just 1 DWORD instead of
+	 * 2.  The 0 length is specified in one place to be unsupported, but
+	 * stated to be required in another, and 0 length LOAD_INDIRECTs appear
+	 * to cause no harm at least.
+	 */
+	instr_out(data, hw_offset, 0, "3DSTATE_LOAD_INDIRECT\n");
+	len = (data[0] & 0x000000ff) + 1;
+	i = 1;
+	if (data[0] & (0x01 << 8)) {
+	    if (i + 2 >= count)
+		BUFFER_FAIL(count, len, "3DSTATE_LOAD_INDIRECT");
+	    instr_out(data, hw_offset, i++, "SIS.0\n");
+	    instr_out(data, hw_offset, i++, "SIS.1\n");
+	}
+	if (data[0] & (0x02 << 8)) {
+	    if (i + 1 >= count)
+		BUFFER_FAIL(count, len, "3DSTATE_LOAD_INDIRECT");
+	    instr_out(data, hw_offset, i++, "DIS.0\n");
+	}
+	if (data[0] & (0x04 << 8)) {
+	    if (i + 2 >= count)
+		BUFFER_FAIL(count, len, "3DSTATE_LOAD_INDIRECT");
+	    instr_out(data, hw_offset, i++, "SSB.0\n");
+	    instr_out(data, hw_offset, i++, "SSB.1\n");
+	}
+	if (data[0] & (0x08 << 8)) {
+	    if (i + 2 >= count)
+		BUFFER_FAIL(count, len, "3DSTATE_LOAD_INDIRECT");
+	    instr_out(data, hw_offset, i++, "MSB.0\n");
+	    instr_out(data, hw_offset, i++, "MSB.1\n");
+	}
+	if (data[0] & (0x10 << 8)) {
+	    if (i + 2 >= count)
+		BUFFER_FAIL(count, len, "3DSTATE_LOAD_INDIRECT");
+	    instr_out(data, hw_offset, i++, "PSP.0\n");
+	    instr_out(data, hw_offset, i++, "PSP.1\n");
+	}
+	if (data[0] & (0x20 << 8)) {
+	    if (i + 2 >= count)
+		BUFFER_FAIL(count, len, "3DSTATE_LOAD_INDIRECT");
+	    instr_out(data, hw_offset, i++, "PSC.0\n");
+	    instr_out(data, hw_offset, i++, "PSC.1\n");
+	}
+	if (len != i) {
+	    fprintf(out, "Bad count in 3DSTATE_LOAD_INDIRECT\n");
+	    (*failures)++;
+	    return len;
+	}
+	return len;
+    case 0x04:
+	instr_out(data, hw_offset, 0, "3DSTATE_LOAD_STATE_IMMEDIATE_1\n");
+	len = (data[0] & 0x0000000f) + 2;
+	i = 1;
+	for (word = 0; word <= 7; word++) {
+	    if (data[0] & (1 << (4 + word))) {
+		if (i >= count)
+		    BUFFER_FAIL(count, len, "3DSTATE_LOAD_STATE_IMMEDIATE_1");
+
+		/* save vertex state for decode */
+		if (word == 2) {
+		    saved_s2_set = 1;
+		    saved_s2 = data[i];
+		}
+		if (word == 4) {
+		    saved_s4_set = 1;
+		    saved_s4 = data[i];
+		}
+
+		instr_out(data, hw_offset, i++, "S%d\n", word);
+	    }
+	}
+	if (len != i) {
+	    fprintf(out, "Bad count in 3DSTATE_LOAD_INDIRECT\n");
+	    (*failures)++;
+	}
+	return len;
+    case 0x00:
+	instr_out(data, hw_offset, 0, "3DSTATE_MAP_STATE\n");
+	len = (data[0] & 0x0000003f) + 2;
+
+	i = 1;
+	for (map = 0; map <= 15; map++) {
+	    if (data[1] & (1 << map)) {
+		if (i + 3 >= count)
+		    BUFFER_FAIL(count, len, "3DSTATE_MAP_STATE");
+		instr_out(data, hw_offset, i++, "map %d MS2\n", map);
+		instr_out(data, hw_offset, i++, "map %d MS3\n", map);
+		instr_out(data, hw_offset, i++, "map %d MS4\n", map);
+	    }
+	}
+	if (len != i) {
+	    fprintf(out, "Bad count in 3DSTATE_MAP_STATE\n");
+	    (*failures)++;
+	    return len;
+	}
+	return len;
+    case 0x06:
+	instr_out(data, hw_offset, 0, "3DSTATE_PIXEL_SHADER_CONSTANTS\n");
+	len = (data[0] & 0x000000ff) + 2;
+
+	i = 1;
+	for (c = 0; c <= 31; c++) {
+	    if (data[1] & (1 << c)) {
+		if (i + 4 >= count)
+		    BUFFER_FAIL(count, len, "3DSTATE_PIXEL_SHADER_CONSTANTS");
+		instr_out(data, hw_offset, i, "C%d.X = %f\n",
+			  c, int_as_float(data[i]));
+		i++;
+		instr_out(data, hw_offset, i, "C%d.Y = %f\n",
+			  c, int_as_float(data[i]));
+		i++;
+		instr_out(data, hw_offset, i, "C%d.Z = %f\n",
+			  c, int_as_float(data[i]));
+		i++;
+		instr_out(data, hw_offset, i, "C%d.W = %f\n",
+			  c, int_as_float(data[i]));
+		i++;
+	    }
+	}
+	if (len != i) {
+	    fprintf(out, "Bad count in 3DSTATE_MAP_STATE\n");
+	    (*failures)++;
+	}
+	return len;
+    case 0x05:
+	instr_out(data, hw_offset, 0, "3DSTATE_PIXEL_SHADER_PROGRAM\n");
+	len = (data[0] & 0x000000ff) + 2;
+	if ((len - 1) % 3 != 0 || len > 370) {
+	    fprintf(out, "Bad count in 3DSTATE_PIXEL_SHADER_PROGRAM\n");
+	    (*failures)++;
+	}
+	i = 1;
+	for (instr = 0; instr < (len - 1) / 3; instr++) {
+	    if (i + 3 >= count)
+		BUFFER_FAIL(count, len, "3DSTATE_MAP_STATE");
+	    instr_out(data, hw_offset, i++, "PS%03x\n", instr);
+	    instr_out(data, hw_offset, i++, "PS%03x\n", instr);
+	    instr_out(data, hw_offset, i++, "PS%03x\n", instr);
+	}
+	return len;
+    case 0x01:
+	instr_out(data, hw_offset, 0, "3DSTATE_SAMPLER_STATE\n");
+	len = (data[0] & 0x0000003f) + 2;
+	i = 1;
+	for (sampler = 0; sampler <= 15; sampler++) {
+	    if (data[1] & (1 << sampler)) {
+		if (i + 3 >= count)
+		    BUFFER_FAIL(count, len, "3DSTATE_SAMPLER_STATE");
+		instr_out(data, hw_offset, i++, "sampler %d SS2\n",
+			  sampler);
+		instr_out(data, hw_offset, i++, "sampler %d SS3\n",
+			  sampler);
+		instr_out(data, hw_offset, i++, "sampler %d SS4\n",
+			  sampler);
+	    }
+	}
+	if (len != i) {
+	    fprintf(out, "Bad count in 3DSTATE_SAMPLER_STATE\n");
+	    (*failures)++;
+	}
+	return len;
+    }
+
+    for (opcode = 0; opcode < sizeof(opcodes_3d_1d) / sizeof(opcodes_3d_1d[0]);
+	 opcode++)
+    {
+	if (((data[0] & 0x00ff0000) >> 16) == opcodes_3d_1d[opcode].opcode) {
+	    len = 1;
+
+	    instr_out(data, hw_offset, 0, "%s\n", opcodes_3d_1d[opcode].name);
+	    if (opcodes_3d_1d[opcode].max_len > 1) {
+		len = (data[0] & 0x0000ffff) + 2;
+		if (len < opcodes_3d_1d[opcode].min_len ||
+		    len > opcodes_3d_1d[opcode].max_len)
+		{
+		    fprintf(out, "Bad count in %s\n",
+			    opcodes_3d_1d[opcode].name);
+		    (*failures)++;
+		}
+	    }
+
+	    for (i = 1; i < len; i++) {
+		if (i >= count)
+		    BUFFER_FAIL(count, len,  opcodes_3d_1d[opcode].name);
+		instr_out(data, hw_offset, i, "dword %d\n", i);
+	    }
+
+	    return len;
+	}
+    }
+
+    instr_out(data, hw_offset, 0, "3D UNKNOWN\n");
+    (*failures)++;
+    return 1;
+}
+
+static int
+decode_3d_primitive(uint32_t *data, int count, uint32_t hw_offset,
+		    int *failures)
+{
+    char immediate = (data[0] & (1 << 23)) == 0;
+    unsigned int len, i;
+    char *primtype;
+
+    switch ((data[0] >> 18) & 0xf) {
+    case 0x0: primtype = "TRILIST"; break;
+    case 0x1: primtype = "TRISTRIP"; break;
+    case 0x2: primtype = "TRISTRIP_REVERSE"; break;
+    case 0x3: primtype = "TRIFAN"; break;
+    case 0x4: primtype = "POLYGON"; break;
+    case 0x5: primtype = "LINELIST"; break;
+    case 0x6: primtype = "LINESTRIP"; break;
+    case 0x7: primtype = "RECTLIST"; break;
+    case 0x8: primtype = "POINTLIST"; break;
+    case 0x9: primtype = "DIB"; break;
+    case 0xa: primtype = "CLEAR_RECT"; break;
+    default: primtype = "unknown"; break;
+    }
+
+    /* XXX: 3DPRIM_DIB not supported */
+    if (immediate) {
+	len = (data[0] & 0x0003ffff) + 2;
+	instr_out(data, hw_offset, 0, "3DPRIMITIVE inline %s\n", primtype);
+	if (count < len)
+	    BUFFER_FAIL(count, len,  "3DPRIMITIVE inline");
+	if (!saved_s2_set || !saved_s4_set) {
+	    fprintf(out, "unknown vertex format\n");
+	    for (i = 1; i < len; i++) {
+		instr_out(data, hw_offset, i,
+			  "           vertex data (%f float)\n",
+			  int_as_float(data[i]));
+	    }
+	} else {
+	    unsigned int vertex = 0;
+	    for (i = 1; i < len;) {
+		unsigned int tc;
+
+#define VERTEX_OUT(fmt, ...) do {					\
+    if (i < len)							\
+	instr_out(data, hw_offset, i, " V%d."fmt"\n", vertex, __VA_ARGS__); \
+    else								\
+	fprintf(out, " missing data in V%d\n", vertex);			\
+    i++;								\
+} while (0)
+
+		VERTEX_OUT("X = %f", int_as_float(data[i]));
+		VERTEX_OUT("Y = %f", int_as_float(data[i]));
+	        switch (saved_s4 >> 6 & 0x7) {
+		case 0x1:
+		    VERTEX_OUT("Z = %f", int_as_float(data[i]));
+		    break;
+		case 0x2:
+		    VERTEX_OUT("Z = %f", int_as_float(data[i]));
+		    VERTEX_OUT("W = %f", int_as_float(data[i]));
+		    break;
+		case 0x3:
+		    break;
+		case 0x4:
+		    VERTEX_OUT("W = %f", int_as_float(data[i]));
+		    break;
+		default:
+		    fprintf(out, "bad S4 position mask\n");
+		}
+
+		if (saved_s4 & (1 << 10)) {
+		    VERTEX_OUT("color = (A=0x%02x, R=0x%02x, G=0x%02x, "
+			       "B=0x%02x)",
+			       data[i] >> 24,
+			       (data[i] >> 16) & 0xff,
+			       (data[i] >> 8) & 0xff,
+			       data[i] & 0xff);
+		}
+		if (saved_s4 & (1 << 11)) {
+		    VERTEX_OUT("spec = (A=0x%02x, R=0x%02x, G=0x%02x, "
+			       "B=0x%02x)",
+			       data[i] >> 24,
+			       (data[i] >> 16) & 0xff,
+			       (data[i] >> 8) & 0xff,
+			       data[i] & 0xff);
+		}
+		if (saved_s4 & (1 << 12))
+		    VERTEX_OUT("width = 0x%08x)", data[i]);
+
+		for (tc = 0; tc <= 7; tc++) {
+		    switch ((saved_s2 >> (tc * 4)) & 0xf) {
+		    case 0x0:
+			VERTEX_OUT("T%d.X = %f", tc, int_as_float(data[i]));
+			VERTEX_OUT("T%d.Y = %f", tc, int_as_float(data[i]));
+			break;
+		    case 0x1:
+			VERTEX_OUT("T%d.X = %f", tc, int_as_float(data[i]));
+			VERTEX_OUT("T%d.Y = %f", tc, int_as_float(data[i]));
+			VERTEX_OUT("T%d.Z = %f", tc, int_as_float(data[i]));
+			break;
+		    case 0x2:
+			VERTEX_OUT("T%d.X = %f", tc, int_as_float(data[i]));
+			VERTEX_OUT("T%d.Y = %f", tc, int_as_float(data[i]));
+			VERTEX_OUT("T%d.Z = %f", tc, int_as_float(data[i]));
+			VERTEX_OUT("T%d.W = %f", tc, int_as_float(data[i]));
+			break;
+		    case 0x3:
+			VERTEX_OUT("T%d.X = %f", tc, int_as_float(data[i]));
+			break;
+		    case 0x4:
+			VERTEX_OUT("T%d.XY = 0x%08x half-float", tc, data[i]);
+			break;
+		    case 0x5:
+			VERTEX_OUT("T%d.XY = 0x%08x half-float", tc, data[i]);
+			VERTEX_OUT("T%d.ZW = 0x%08x half-float", tc, data[i]);
+			break;
+		    case 0xf:
+			break;
+		    default:
+			fprintf(out, "bad S2.T%d format\n", tc);
+		    }
+		}
+		vertex++;
+	    }
+	}
+    } else {
+	/* indirect vertices */
+	len = data[0] & 0x0000ffff; /* index count */
+	if (data[0] & (1 << 17)) {
+	    /* random vertex access */
+	    if (count < (len + 1) / 2 + 1) {
+		BUFFER_FAIL(count, (len + 1) / 2 + 1,
+			    "3DPRIMITIVE random indirect");
+	    }
+	    instr_out(data, hw_offset, 0,
+		      "3DPRIMITIVE random indirect %s (%d)\n", primtype, len);
+	    if (len == 0) {
+		/* vertex indices continue until 0xffff is found */
+		for (i = 1; i < count; i++) {
+		    if ((data[i] & 0xffff) == 0xffff) {
+			instr_out(data, hw_offset, i,
+				  "            indices: (terminator)\n");
+			return i;
+		    } else if ((data[i] >> 16) == 0xffff) {
+			instr_out(data, hw_offset, i,
+				  "            indices: 0x%04x, "
+				  "(terminator)\n",
+				  data[i] & 0xffff);
+			return i;
+		    } else {
+			instr_out(data, hw_offset, i,
+				  "            indices: 0x%04x, 0x%04x\n",
+				  data[i] & 0xffff, data[i] >> 16);
+		    }
+		}
+		fprintf(out,
+			"3DPRIMITIVE: no terminator found in index buffer\n");
+		(*failures)++;
+		return count;
+	    } else {
+		/* fixed size vertex index buffer */
+		for (i = 0; i < len; i += 2) {
+		    if (i * 2 == len - 1) {
+			instr_out(data, hw_offset, i,
+				  "            indices: 0x%04x\n",
+				  data[i] & 0xffff);
+		    } else {
+			instr_out(data, hw_offset, i,
+				  "            indices: 0x%04x, 0x%04x\n",
+				  data[i] & 0xffff, data[i] >> 16);
+		    }
+		}
+	    }
+	    return (len + 1) / 2 + 1;
+	} else {
+	    /* sequential vertex access */
+	    if (count < 2)
+		BUFFER_FAIL(count, 2, "3DPRIMITIVE seq indirect");
+	    instr_out(data, hw_offset, 0,
+		      "3DPRIMITIVE sequential indirect %s, %d starting from "
+		      "%d\n", primtype, len, data[1] & 0xffff);
+	    instr_out(data, hw_offset, 1, "           start\n");
+	    return 2;
+	}
+    }
+
+    return len;
+}
+
+static int
+decode_3d(uint32_t *data, int count, uint32_t hw_offset, int *failures)
+{
+    unsigned int opcode;
+
+    struct {
+	uint32_t opcode;
+	int min_len;
+	int max_len;
+	char *name;
+    } opcodes_3d[] = {
+	{ 0x06, 1, 1, "3DSTATE_ANTI_ALIASING" },
+	{ 0x08, 1, 1, "3DSTATE_BACKFACE_STENCIL_OPS" },
+	{ 0x09, 1, 1, "3DSTATE_BACKFACE_STENCIL_MASKS" },
+	{ 0x16, 1, 1, "3DSTATE_COORD_SET_BINDINGS" },
+	{ 0x15, 1, 1, "3DSTATE_FOG_COLOR" },
+	{ 0x0b, 1, 1, "3DSTATE_INDEPENDENT_ALPHA_BLEND" },
+	{ 0x0d, 1, 1, "3DSTATE_MODES_4" },
+	{ 0x0c, 1, 1, "3DSTATE_MODES_5" },
+	{ 0x07, 1, 1, "3DSTATE_RASTERIZATION_RULES" },
+    };
+
+    switch ((data[0] & 0x1f000000) >> 24) {
+    case 0x1f:
+	return decode_3d_primitive(data, count, hw_offset, failures);
+    case 0x1d:
+	return decode_3d_1d(data, count, hw_offset, failures);
+    case 0x1c:
+	return decode_3d_1c(data, count, hw_offset, failures);
+    }
+
+    for (opcode = 0; opcode < sizeof(opcodes_3d) / sizeof(opcodes_3d[0]);
+	 opcode++) {
+	if ((data[0] & 0x1f000000) >> 24 == opcodes_3d[opcode].opcode) {
+	    unsigned int len = 1, i;
+
+	    instr_out(data, hw_offset, 0, "%s\n", opcodes_3d[opcode].name);
+	    if (opcodes_3d[opcode].max_len > 1) {
+		len = (data[0] & 0xff) + 2;
+		if (len < opcodes_3d[opcode].min_len ||
+		    len > opcodes_3d[opcode].max_len)
+		{
+		    fprintf(out, "Bad count in %s\n", opcodes_3d[opcode].name);
+		}
+	    }
+
+	    for (i = 1; i < len; i++) {
+		if (i >= count)
+		    BUFFER_FAIL(count, len, opcodes_3d[opcode].name);
+		instr_out(data, hw_offset, i, "dword %d\n", i);
+	    }
+	    return len;
+	}
+    }
+
+    instr_out(data, hw_offset, 0, "3D UNKNOWN\n");
+    (*failures)++;
+    return 1;
+}
+
+static const char *
+get_965_surfacetype(unsigned int surfacetype)
+{
+    switch (surfacetype) {
+    case 0: return "1D";
+    case 1: return "2D";
+    case 2: return "3D";
+    case 3: return "CUBE";
+    case 4: return "BUFFER";
+    case 7: return "NULL";
+    default: return "unknown";
+    }
+}
+
+static const char *
+get_965_depthformat(unsigned int depthformat)
+{
+    switch (depthformat) {
+    case 0: return "s8_z24float";
+    case 1: return "z32float";
+    case 2: return "z24s8";
+    case 5: return "z16";
+    default: return "unknown";
+    }
+}
+
+static int
+decode_3d_965(uint32_t *data, int count, uint32_t hw_offset, int *failures)
+{
+    unsigned int opcode, len;
+
+    struct {
+	uint32_t opcode;
+	int min_len;
+	int max_len;
+	char *name;
+    } opcodes_3d[] = {
+	{ 0x6000, 3, 3, "URB_FENCE" },
+	{ 0x6001, 2, 2, "CS_URB_STATE" },
+	{ 0x6002, 2, 2, "CONSTANT_BUFFER" },
+	{ 0x6101, 6, 6, "STATE_BASE_ADDRESS" },
+	{ 0x6102, 2, 2 , "STATE_SIP" },
+	{ 0x6104, 1, 1, "3DSTATE_PIPELINE_SELECT" },
+	{ 0x680b, 1, 1, "3DSTATE_VF_STATISTICS" },
+	{ 0x6904, 1, 1, "3DSTATE_PIPELINE_SELECT" },
+	{ 0x7800, 7, 7, "3DSTATE_PIPELINED_POINTERS" },
+	{ 0x7801, 6, 6, "3DSTATE_BINDING_TABLE_POINTERS" },
+	{ 0x780b, 1, 1, "3DSTATE_VF_STATISTICS" },
+	{ 0x7808, 5, 257, "3DSTATE_VERTEX_BUFFERS" },
+	{ 0x7809, 3, 256, "3DSTATE_VERTEX_ELEMENTS" },
+	/* 0x7808: 3DSTATE_VERTEX_BUFFERS */
+	/* 0x7809: 3DSTATE_VERTEX_ELEMENTS */
+	{ 0x7900, 4, 4, "3DSTATE_DRAWING_RECTANGLE" },
+	{ 0x7901, 5, 5, "3DSTATE_CONSTANT_COLOR" },
+	{ 0x7905, 5, 7, "3DSTATE_DEPTH_BUFFER" },
+	{ 0x7906, 2, 2, "3DSTATE_POLY_STIPPLE_OFFSET" },
+	{ 0x7907, 33, 33, "3DSTATE_POLY_STIPPLE_PATTERN" },
+	{ 0x7908, 3, 3, "3DSTATE_LINE_STIPPLE" },
+	{ 0x7909, 2, 2, "3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP" },
+	{ 0x790a, 3, 3, "3DSTATE_AA_LINE_PARAMETERS" },
+	{ 0x7b00, 6, 6, "3DPRIMITIVE" },
+    };
+
+    len = (data[0] & 0x0000ffff) + 2;
+
+    switch ((data[0] & 0xffff0000) >> 16) {
+    case 0x6101:
+	if (len != 6)
+	    fprintf(out, "Bad count in STATE_BASE_ADDRESS\n");
+	if (count < 6)
+	    BUFFER_FAIL(count, len, "STATE_BASE_ADDRESS");
+
+	instr_out(data, hw_offset, 0,
+		  "STATE_BASE_ADDRESS\n");
+
+	if (data[1] & 1) {
+	    instr_out(data, hw_offset, 1, "General state at 0x%08x\n",
+		      data[1] & ~1);
+	} else
+	    instr_out(data, hw_offset, 1, "General state not updated\n");
+
+	if (data[2] & 1) {
+	    instr_out(data, hw_offset, 2, "Surface state at 0x%08x\n",
+		      data[2] & ~1);
+	} else
+	    instr_out(data, hw_offset, 2, "Surface state not updated\n");
+
+	if (data[3] & 1) {
+	    instr_out(data, hw_offset, 3, "Indirect state at 0x%08x\n",
+		      data[3] & ~1);
+	} else
+	    instr_out(data, hw_offset, 3, "Indirect state not updated\n");
+
+	if (data[4] & 1) {
+	    instr_out(data, hw_offset, 4, "General state upper bound 0x%08x\n",
+		      data[4] & ~1);
+	} else
+	    instr_out(data, hw_offset, 4, "General state not updated\n");
+
+	if (data[5] & 1) {
+	    instr_out(data, hw_offset, 5, "Indirect state upper bound 0x%08x\n",
+		      data[5] & ~1);
+	} else
+	    instr_out(data, hw_offset, 5, "Indirect state not updated\n");
+
+	return len;
+    case 0x7800:
+	if (len != 7)
+	    fprintf(out, "Bad count in 3DSTATE_PIPELINED_POINTERS\n");
+	if (count < 7)
+	    BUFFER_FAIL(count, len, "3DSTATE_PIPELINED_POINTERS");
+
+	instr_out(data, hw_offset, 0,
+		  "3DSTATE_PIPELINED_POINTERS\n");
+	instr_out(data, hw_offset, 1, "VS state\n");
+	instr_out(data, hw_offset, 2, "GS state\n");
+	instr_out(data, hw_offset, 3, "Clip state\n");
+	instr_out(data, hw_offset, 4, "SF state\n");
+	instr_out(data, hw_offset, 5, "WM state\n");
+	instr_out(data, hw_offset, 6, "CC state\n");
+	return len;
+    case 0x7801:
+	if (len != 6)
+	    fprintf(out, "Bad count in 3DSTATE_BINDING_TABLE_POINTERS\n");
+	if (count < 6)
+	    BUFFER_FAIL(count, len, "3DSTATE_BINDING_TABLE_POINTERS");
+
+	instr_out(data, hw_offset, 0,
+		  "3DSTATE_BINDING_TABLE_POINTERS\n");
+	instr_out(data, hw_offset, 1, "VS binding table\n");
+	instr_out(data, hw_offset, 2, "GS binding table\n");
+	instr_out(data, hw_offset, 3, "Clip binding table\n");
+	instr_out(data, hw_offset, 4, "SF binding table\n");
+	instr_out(data, hw_offset, 5, "WM binding table\n");
+
+	return len;
+
+    case 0x7900:
+	if (len != 4)
+	    fprintf(out, "Bad count in 3DSTATE_DRAWING_RECTANGLE\n");
+	if (count < 4)
+	    BUFFER_FAIL(count, len, "3DSTATE_DRAWING_RECTANGLE");
+
+	instr_out(data, hw_offset, 0,
+		  "3DSTATE_DRAWING_RECTANGLE\n");
+	instr_out(data, hw_offset, 1, "top left: %d,%d\n",
+		  data[1] & 0xffff,
+		  (data[1] >> 16) & 0xffff);
+	instr_out(data, hw_offset, 2, "bottom right: %d,%d\n",
+		  data[2] & 0xffff,
+		  (data[2] >> 16) & 0xffff);
+	instr_out(data, hw_offset, 3, "origin: %d,%d\n",
+		  (int)data[3] & 0xffff,
+		  ((int)data[3] >> 16) & 0xffff);
+
+	return len;
+
+    case 0x7905:
+	if (len != 5)
+	    fprintf(out, "Bad count in 3DSTATE_DEPTH_BUFFER\n");
+	if (count < 5)
+	    BUFFER_FAIL(count, len, "3DSTATE_DEPTH_BUFFER");
+
+	instr_out(data, hw_offset, 0,
+		  "3DSTATE_DEPTH_BUFFER\n");
+	instr_out(data, hw_offset, 1, "%s, %s, pitch = %d bytes, %stiled\n",
+		  get_965_surfacetype(data[1] >> 29),
+		  get_965_depthformat((data[1] >> 18) & 0x7),
+		  (data[1] & 0x0001ffff) + 1,
+		  data[1] & (1 << 27) ? "" : "not ");
+	instr_out(data, hw_offset, 2, "depth offset\n");
+	instr_out(data, hw_offset, 3, "%dx%d\n",
+		  ((data[3] & 0x0007ffc0) >> 6) + 1,
+		  ((data[3] & 0xfff80000) >> 19) + 1);
+	instr_out(data, hw_offset, 4, "volume depth\n");
+
+	return len;
+    }
+
+    for (opcode = 0; opcode < sizeof(opcodes_3d) / sizeof(opcodes_3d[0]);
+	 opcode++) {
+	if ((data[0] & 0xffff0000) >> 16 == opcodes_3d[opcode].opcode) {
+	    unsigned int i;
+	    len = 1;
+
+	    instr_out(data, hw_offset, 0, "%s\n", opcodes_3d[opcode].name);
+	    if (opcodes_3d[opcode].max_len > 1) {
+		len = (data[0] & 0xff) + 2;
+		if (len < opcodes_3d[opcode].min_len ||
+		    len > opcodes_3d[opcode].max_len)
+		{
+		    fprintf(out, "Bad count in %s\n", opcodes_3d[opcode].name);
+		}
+	    }
+
+	    for (i = 1; i < len; i++) {
+		if (i >= count)
+		    BUFFER_FAIL(count, len, opcodes_3d[opcode].name);
+		instr_out(data, hw_offset, i, "dword %d\n", i);
+	    }
+	    return len;
+	}
+    }
+
+    instr_out(data, hw_offset, 0, "3D UNKNOWN\n");
+    (*failures)++;
+    return 1;
+}
+
+/**
+ * Decodes an i830-i915 batch buffer, writing the output to stdout.
+ *
+ * \param data batch buffer contents
+ * \param count number of DWORDs to decode in the batch buffer
+ * \param hw_offset hardware address for the buffer
+ */
+int
+intel_decode(uint32_t *data, int count, uint32_t hw_offset, uint32_t devid)
+{
+    int index = 0;
+    int failures = 0;
+
+    out = stderr;
+
+    while (index < count) {
+	switch ((data[index] & 0xe0000000) >> 29) {
+	case 0x0:
+	    index += decode_mi(data + index, count - index,
+			       hw_offset + index * 4, &failures);
+	    break;
+	case 0x2:
+	    index += decode_2d(data + index, count - index,
+			       hw_offset + index * 4, &failures);
+	    break;
+	case 0x3:
+	    if (IS_965(devid)) {
+		index += decode_3d_965(data + index, count - index,
+				       hw_offset + index * 4, &failures);
+	    } else {
+		index += decode_3d(data + index, count - index,
+				   hw_offset + index * 4, &failures);
+	    }
+	    break;
+	default:
+	    instr_out(data, hw_offset, index, "UNKNOWN\n");
+	    failures++;
+	    index++;
+	    break;
+	}
+	fflush(out);
+    }
+
+    return failures;
+}
+
+void intel_decode_context_reset(void)
+{
+    saved_s2_set = 0;
+    saved_s4_set = 1;
+}
+
diff --git a/shared/intel_decode.h b/shared/intel_decode.h
new file mode 100644
index 0000000..c50644a
--- /dev/null
+++ b/shared/intel_decode.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright © 2007 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+int intel_decode(uint32_t *data, int count, uint32_t hw_offset, uint32_t devid);
+void intel_decode_context_reset(void);
diff --git a/shared/intel_depthstencil.c b/shared/intel_depthstencil.c
new file mode 100644
index 0000000..90baecd
--- /dev/null
+++ b/shared/intel_depthstencil.c
@@ -0,0 +1,282 @@
+/**************************************************************************
+ * 
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "glheader.h"
+#include "imports.h"
+#include "context.h"
+#include "depthstencil.h"
+#include "fbobject.h"
+#include "framebuffer.h"
+#include "hash.h"
+#include "mtypes.h"
+#include "renderbuffer.h"
+
+#include "intel_context.h"
+#include "intel_fbo.h"
+#include "intel_depthstencil.h"
+#include "intel_regions.h"
+
+
+/**
+ * The GL_EXT_framebuffer_object allows the user to create their own
+ * framebuffer objects consisting of color renderbuffers (0 or more),
+ * depth renderbuffers (0 or 1) and stencil renderbuffers (0 or 1).
+ *
+ * The spec considers depth and stencil renderbuffers to be totally independent
+ * buffers.  In reality, most graphics hardware today uses a combined
+ * depth+stencil buffer (one 32-bit pixel = 24 bits of Z + 8 bits of stencil).
+ *
+ * This causes difficulty because the user may create some number of depth
+ * renderbuffers and some number of stencil renderbuffers and bind them
+ * together in framebuffers in any combination.
+ *
+ * This code manages all that.
+ *
+ * 1. Depth renderbuffers are always allocated in hardware as 32bpp
+ *    GL_DEPTH24_STENCIL8 buffers.
+ *
+ * 2. Stencil renderbuffers are initially allocated in software as 8bpp
+ *    GL_STENCIL_INDEX8 buffers.
+ *
+ * 3. Depth and Stencil renderbuffers use the PairedStencil and PairedDepth
+ *    fields (respectively) to indicate if the buffer's currently paired
+ *    with another stencil or depth buffer (respectively).
+ *
+ * 4. When a depth and stencil buffer are initially both attached to the
+ *    current framebuffer, we merge the stencil buffer values into the
+ *    depth buffer (really a depth+stencil buffer).  The then hardware uses
+ *    the combined buffer.
+ *
+ * 5. Whenever a depth or stencil buffer is reallocated (with
+ *    glRenderbufferStorage) we undo the pairing and copy the stencil values
+ *    from the combined depth/stencil buffer back to the stencil-only buffer.
+ *
+ * 6. We also undo the pairing when we find a change in buffer bindings.
+ *
+ * 7. If a framebuffer is only using a depth renderbuffer (no stencil), we
+ *    just use the combined depth/stencil buffer and ignore the stencil values.
+ *
+ * 8. If a framebuffer is only using a stencil renderbuffer (no depth) we have
+ *    to promote the 8bpp software stencil buffer to a 32bpp hardware
+ *    depth+stencil buffer.
+ *
+ */
+
+
+
+static void
+map_regions(GLcontext * ctx,
+            struct intel_renderbuffer *depthRb,
+            struct intel_renderbuffer *stencilRb)
+{
+   struct intel_context *intel = intel_context(ctx);
+   if (depthRb && depthRb->region) {
+      intel_region_map(intel, depthRb->region);
+      depthRb->pfMap = depthRb->region->map;
+      depthRb->pfPitch = depthRb->region->pitch;
+   }
+   if (stencilRb && stencilRb->region) {
+      intel_region_map(intel, stencilRb->region);
+      stencilRb->pfMap = stencilRb->region->map;
+      stencilRb->pfPitch = stencilRb->region->pitch;
+   }
+}
+
+static void
+unmap_regions(GLcontext * ctx,
+              struct intel_renderbuffer *depthRb,
+              struct intel_renderbuffer *stencilRb)
+{
+   struct intel_context *intel = intel_context(ctx);
+   if (depthRb && depthRb->region) {
+      intel_region_unmap(intel, depthRb->region);
+      depthRb->pfMap = NULL;
+      depthRb->pfPitch = 0;
+   }
+   if (stencilRb && stencilRb->region) {
+      intel_region_unmap(intel, stencilRb->region);
+      stencilRb->pfMap = NULL;
+      stencilRb->pfPitch = 0;
+   }
+}
+
+
+
+/**
+ * Undo the pairing/interleaving between depth and stencil buffers.
+ * irb should be a depth/stencil or stencil renderbuffer.
+ */
+void
+intel_unpair_depth_stencil(GLcontext * ctx, struct intel_renderbuffer *irb)
+{
+   if (irb->PairedStencil) {
+      /* irb is a depth/stencil buffer */
+      struct gl_renderbuffer *stencilRb;
+      struct intel_renderbuffer *stencilIrb;
+
+      ASSERT(irb->Base._ActualFormat == GL_DEPTH24_STENCIL8_EXT);
+
+      stencilRb = _mesa_lookup_renderbuffer(ctx, irb->PairedStencil);
+      stencilIrb = intel_renderbuffer(stencilRb);
+      if (stencilIrb) {
+         /* need to extract stencil values from the depth buffer */
+         ASSERT(stencilIrb->PairedDepth == irb->Base.Name);
+         map_regions(ctx, irb, stencilIrb);
+         _mesa_extract_stencil(ctx, &irb->Base, &stencilIrb->Base);
+         unmap_regions(ctx, irb, stencilIrb);
+         stencilIrb->PairedDepth = 0;
+      }
+      irb->PairedStencil = 0;
+   }
+   else if (irb->PairedDepth) {
+      /* irb is a stencil buffer */
+      struct gl_renderbuffer *depthRb;
+      struct intel_renderbuffer *depthIrb;
+
+      ASSERT(irb->Base._ActualFormat == GL_STENCIL_INDEX8_EXT ||
+             irb->Base._ActualFormat == GL_DEPTH24_STENCIL8_EXT);
+
+      depthRb = _mesa_lookup_renderbuffer(ctx, irb->PairedDepth);
+      depthIrb = intel_renderbuffer(depthRb);
+      if (depthIrb) {
+         /* need to extract stencil values from the depth buffer */
+         ASSERT(depthIrb->PairedStencil == irb->Base.Name);
+         map_regions(ctx, depthIrb, irb);
+         _mesa_extract_stencil(ctx, &depthIrb->Base, &irb->Base);
+         unmap_regions(ctx, depthIrb, irb);
+         depthIrb->PairedStencil = 0;
+      }
+      irb->PairedDepth = 0;
+   }
+   else {
+      _mesa_problem(ctx, "Problem in undo_depth_stencil_pairing");
+   }
+
+   ASSERT(irb->PairedStencil == 0);
+   ASSERT(irb->PairedDepth == 0);
+}
+
+
+/**
+ * Examine the depth and stencil renderbuffers which are attached to the
+ * framebuffer.  If both depth and stencil are attached, make sure that the
+ * renderbuffers are 'paired' (combined).  If only depth or only stencil is
+ * attached, undo any previous pairing.
+ *
+ * Must be called if NewState & _NEW_BUFFER (when renderbuffer attachments
+ * change, for example).
+ */
+void
+intel_validate_paired_depth_stencil(GLcontext * ctx,
+                                    struct gl_framebuffer *fb)
+{
+   struct intel_renderbuffer *depthRb, *stencilRb;
+
+   depthRb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
+   stencilRb = intel_get_renderbuffer(fb, BUFFER_STENCIL);
+
+   if (depthRb && stencilRb) {
+      if (depthRb == stencilRb) {
+         /* Using a user-created combined depth/stencil buffer.
+          * Nothing to do.
+          */
+         ASSERT(depthRb->Base._BaseFormat == GL_DEPTH_STENCIL_EXT);
+         ASSERT(depthRb->Base._ActualFormat == GL_DEPTH24_STENCIL8_EXT);
+      }
+      else {
+         /* Separate depth/stencil buffers, need to interleave now */
+         ASSERT(depthRb->Base._BaseFormat == GL_DEPTH_COMPONENT);
+         ASSERT(stencilRb->Base._BaseFormat == GL_STENCIL_INDEX);
+         /* may need to interleave depth/stencil now */
+         if (depthRb->PairedStencil == stencilRb->Base.Name) {
+            /* OK, the depth and stencil buffers are already interleaved */
+            ASSERT(stencilRb->PairedDepth == depthRb->Base.Name);
+         }
+         else {
+            /* need to setup new pairing/interleaving */
+            if (depthRb->PairedStencil) {
+               intel_unpair_depth_stencil(ctx, depthRb);
+            }
+            if (stencilRb->PairedDepth) {
+               intel_unpair_depth_stencil(ctx, stencilRb);
+            }
+
+            ASSERT(depthRb->Base._ActualFormat == GL_DEPTH24_STENCIL8_EXT);
+            ASSERT(stencilRb->Base._ActualFormat == GL_STENCIL_INDEX8_EXT ||
+                   stencilRb->Base._ActualFormat == GL_DEPTH24_STENCIL8_EXT);
+
+            /* establish new pairing: interleave stencil into depth buffer */
+            map_regions(ctx, depthRb, stencilRb);
+            _mesa_insert_stencil(ctx, &depthRb->Base, &stencilRb->Base);
+            unmap_regions(ctx, depthRb, stencilRb);
+            depthRb->PairedStencil = stencilRb->Base.Name;
+            stencilRb->PairedDepth = depthRb->Base.Name;
+         }
+
+      }
+   }
+   else if (depthRb) {
+      /* Depth buffer but no stencil buffer.
+       * We'll use a GL_DEPTH24_STENCIL8 buffer and ignore the stencil bits.
+       */
+      /* can't assert this until storage is allocated:
+         ASSERT(depthRb->Base._ActualFormat == GL_DEPTH24_STENCIL8_EXT);
+       */
+      /* intel_undo any previous pairing */
+      if (depthRb->PairedStencil) {
+         intel_unpair_depth_stencil(ctx, depthRb);
+      }
+   }
+   else if (stencilRb) {
+      /* Stencil buffer but no depth buffer.
+       * Since h/w doesn't typically support just 8bpp stencil w/out Z,
+       * we'll use a GL_DEPTH24_STENCIL8 buffer and ignore the depth bits.
+       */
+      /* undo any previous pairing */
+      if (stencilRb->PairedDepth) {
+         intel_unpair_depth_stencil(ctx, stencilRb);
+      }
+      if (stencilRb->Base._ActualFormat == GL_STENCIL_INDEX8_EXT) {
+         /* promote buffer to GL_DEPTH24_STENCIL8 for hw rendering */
+         _mesa_promote_stencil(ctx, &stencilRb->Base);
+         ASSERT(stencilRb->Base._ActualFormat == GL_DEPTH24_STENCIL8_EXT);
+      }
+   }
+
+   /* Finally, update the fb->_DepthBuffer and fb->_StencilBuffer fields */
+   _mesa_update_depth_buffer(ctx, fb, BUFFER_DEPTH);
+   if (depthRb && depthRb->PairedStencil)
+      _mesa_update_stencil_buffer(ctx, fb, BUFFER_DEPTH);
+   else
+      _mesa_update_stencil_buffer(ctx, fb, BUFFER_STENCIL);
+
+
+   /* The hardware should use fb->Attachment[BUFFER_DEPTH].Renderbuffer
+    * first, if present, then fb->Attachment[BUFFER_STENCIL].Renderbuffer
+    * if present.
+    */
+}
diff --git a/shared/intel_depthstencil.h b/shared/intel_depthstencil.h
new file mode 100644
index 0000000..740eb0d
--- /dev/null
+++ b/shared/intel_depthstencil.h
@@ -0,0 +1,15 @@
+
+#ifndef INTEL_DEPTH_STENCIL_H
+#define INTEL_DEPTH_STENCIL_H
+
+#include "intel_fbo.h"
+
+extern void
+intel_unpair_depth_stencil(GLcontext * ctx, struct intel_renderbuffer *irb);
+
+extern void
+intel_validate_paired_depth_stencil(GLcontext * ctx,
+                                    struct gl_framebuffer *fb);
+
+
+#endif /* INTEL_DEPTH_STENCIL_H */
diff --git a/shared/intel_fbo.c b/shared/intel_fbo.c
new file mode 100644
index 0000000..b3f6610
--- /dev/null
+++ b/shared/intel_fbo.c
@@ -0,0 +1,705 @@
+/**************************************************************************
+ * 
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+
+#include "imports.h"
+#include "mtypes.h"
+#include "fbobject.h"
+#include "framebuffer.h"
+#include "renderbuffer.h"
+#include "context.h"
+#include "texformat.h"
+#include "texrender.h"
+
+#include "intel_context.h"
+#include "intel_buffers.h"
+#include "intel_depthstencil.h"
+#include "intel_fbo.h"
+#include "intel_mipmap_tree.h"
+#include "intel_regions.h"
+#include "intel_span.h"
+
+
+#define FILE_DEBUG_FLAG DEBUG_FBO
+
+#define INTEL_RB_CLASS 0x12345678
+
+
+/* XXX FBO: move this to intel_context.h (inlined) */
+/**
+ * Return a gl_renderbuffer ptr casted to intel_renderbuffer.
+ * NULL will be returned if the rb isn't really an intel_renderbuffer.
+ * This is determiend by checking the ClassID.
+ */
+struct intel_renderbuffer *
+intel_renderbuffer(struct gl_renderbuffer *rb)
+{
+   struct intel_renderbuffer *irb = (struct intel_renderbuffer *) rb;
+   if (irb && irb->Base.ClassID == INTEL_RB_CLASS) {
+      /*_mesa_warning(NULL, "Returning non-intel Rb\n");*/
+      return irb;
+   }
+   else
+      return NULL;
+}
+
+
+struct intel_renderbuffer *
+intel_get_renderbuffer(struct gl_framebuffer *fb, int attIndex)
+{
+   if (attIndex >= 0)
+      return intel_renderbuffer(fb->Attachment[attIndex].Renderbuffer);
+   else
+      return NULL;
+}
+
+
+void
+intel_flip_renderbuffers(struct intel_framebuffer *intel_fb)
+{
+   int current_page = intel_fb->pf_current_page;
+   int next_page = (current_page + 1) % intel_fb->pf_num_pages;
+   struct gl_renderbuffer *tmp_rb;
+
+   /* Exchange renderbuffers if necessary but make sure their reference counts
+    * are preserved.
+    */
+   if (intel_fb->color_rb[current_page] &&
+       intel_fb->Base.Attachment[BUFFER_FRONT_LEFT].Renderbuffer !=
+       &intel_fb->color_rb[current_page]->Base) {
+      tmp_rb = NULL;
+      _mesa_reference_renderbuffer(&tmp_rb,
+	 intel_fb->Base.Attachment[BUFFER_FRONT_LEFT].Renderbuffer);
+      tmp_rb = &intel_fb->color_rb[current_page]->Base;
+      _mesa_reference_renderbuffer(
+	 &intel_fb->Base.Attachment[BUFFER_FRONT_LEFT].Renderbuffer, tmp_rb);
+      _mesa_reference_renderbuffer(&tmp_rb, NULL);
+   }
+
+   if (intel_fb->color_rb[next_page] &&
+       intel_fb->Base.Attachment[BUFFER_BACK_LEFT].Renderbuffer !=
+       &intel_fb->color_rb[next_page]->Base) {
+      tmp_rb = NULL;
+      _mesa_reference_renderbuffer(&tmp_rb,
+	 intel_fb->Base.Attachment[BUFFER_BACK_LEFT].Renderbuffer);
+      tmp_rb = &intel_fb->color_rb[next_page]->Base;
+      _mesa_reference_renderbuffer(
+	 &intel_fb->Base.Attachment[BUFFER_BACK_LEFT].Renderbuffer, tmp_rb);
+      _mesa_reference_renderbuffer(&tmp_rb, NULL);
+   }
+}
+
+
+struct intel_region *
+intel_get_rb_region(struct gl_framebuffer *fb, GLuint attIndex)
+{
+   struct intel_renderbuffer *irb = intel_get_renderbuffer(fb, attIndex);
+
+   if (irb)
+      return irb->region;
+   else
+      return NULL;
+}
+
+
+
+/**
+ * Create a new framebuffer object.
+ */
+static struct gl_framebuffer *
+intel_new_framebuffer(GLcontext * ctx, GLuint name)
+{
+   /* Only drawable state in intel_framebuffer at this time, just use Mesa's
+    * class
+    */
+   return _mesa_new_framebuffer(ctx, name);
+}
+
+
+static void
+intel_delete_renderbuffer(struct gl_renderbuffer *rb)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+
+   ASSERT(irb);
+
+   if (irb->PairedStencil || irb->PairedDepth) {
+      intel_unpair_depth_stencil(ctx, irb);
+   }
+
+   if (intel && irb->region) {
+      intel_region_release(&irb->region);
+   }
+
+   _mesa_free(irb);
+}
+
+
+
+/**
+ * Return a pointer to a specific pixel in a renderbuffer.
+ */
+static void *
+intel_get_pointer(GLcontext * ctx, struct gl_renderbuffer *rb,
+                  GLint x, GLint y)
+{
+   /* By returning NULL we force all software rendering to go through
+    * the span routines.
+    */
+   return NULL;
+}
+
+
+
+/**
+ * Called via glRenderbufferStorageEXT() to set the format and allocate
+ * storage for a user-created renderbuffer.
+ */
+static GLboolean
+intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb,
+                                 GLenum internalFormat,
+                                 GLuint width, GLuint height)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+   GLboolean softwareBuffer = GL_FALSE;
+   int cpp;
+
+   ASSERT(rb->Name != 0);
+
+   switch (internalFormat) {
+   case GL_R3_G3_B2:
+   case GL_RGB4:
+   case GL_RGB5:
+      rb->_ActualFormat = GL_RGB5;
+      rb->DataType = GL_UNSIGNED_BYTE;
+      rb->RedBits = 5;
+      rb->GreenBits = 6;
+      rb->BlueBits = 5;
+      cpp = 2;
+      break;
+   case GL_RGB:
+   case GL_RGB8:
+   case GL_RGB10:
+   case GL_RGB12:
+   case GL_RGB16:
+   case GL_RGBA:
+   case GL_RGBA2:
+   case GL_RGBA4:
+   case GL_RGB5_A1:
+   case GL_RGBA8:
+   case GL_RGB10_A2:
+   case GL_RGBA12:
+   case GL_RGBA16:
+      rb->_ActualFormat = GL_RGBA8;
+      rb->DataType = GL_UNSIGNED_BYTE;
+      rb->RedBits = 8;
+      rb->GreenBits = 8;
+      rb->BlueBits = 8;
+      rb->AlphaBits = 8;
+      cpp = 4;
+      break;
+   case GL_STENCIL_INDEX:
+   case GL_STENCIL_INDEX1_EXT:
+   case GL_STENCIL_INDEX4_EXT:
+   case GL_STENCIL_INDEX8_EXT:
+   case GL_STENCIL_INDEX16_EXT:
+      /* alloc a depth+stencil buffer */
+      rb->_ActualFormat = GL_DEPTH24_STENCIL8_EXT;
+      rb->DataType = GL_UNSIGNED_INT_24_8_EXT;
+      rb->StencilBits = 8;
+      cpp = 4;
+      break;
+   case GL_DEPTH_COMPONENT16:
+      rb->_ActualFormat = GL_DEPTH_COMPONENT16;
+      rb->DataType = GL_UNSIGNED_SHORT;
+      rb->DepthBits = 16;
+      cpp = 2;
+      break;
+   case GL_DEPTH_COMPONENT:
+   case GL_DEPTH_COMPONENT24:
+   case GL_DEPTH_COMPONENT32:
+      rb->_ActualFormat = GL_DEPTH24_STENCIL8_EXT;
+      rb->DataType = GL_UNSIGNED_INT_24_8_EXT;
+      rb->DepthBits = 24;
+      cpp = 4;
+      break;
+   case GL_DEPTH_STENCIL_EXT:
+   case GL_DEPTH24_STENCIL8_EXT:
+      rb->_ActualFormat = GL_DEPTH24_STENCIL8_EXT;
+      rb->DataType = GL_UNSIGNED_INT_24_8_EXT;
+      rb->DepthBits = 24;
+      rb->StencilBits = 8;
+      cpp = 4;
+      break;
+   default:
+      _mesa_problem(ctx,
+                    "Unexpected format in intel_alloc_renderbuffer_storage");
+      return GL_FALSE;
+   }
+
+   intelFlush(ctx);
+
+   /* free old region */
+   if (irb->region) {
+      intel_region_release(&irb->region);
+   }
+
+   /* allocate new memory region/renderbuffer */
+   if (softwareBuffer) {
+      return _mesa_soft_renderbuffer_storage(ctx, rb, internalFormat,
+                                             width, height);
+   }
+   else {
+      /* Choose a pitch to match hardware requirements:
+       */
+      GLuint pitch = ((cpp * width + 63) & ~63) / cpp;
+
+      /* alloc hardware renderbuffer */
+      DBG("Allocating %d x %d Intel RBO (pitch %d)\n", width,
+	  height, pitch);
+
+      irb->region = intel_region_alloc(intel, cpp, pitch, height);
+      if (!irb->region)
+         return GL_FALSE;       /* out of memory? */
+
+      ASSERT(irb->region->buffer);
+
+      rb->Width = width;
+      rb->Height = height;
+
+      /* This sets the Get/PutRow/Value functions */
+      intel_set_span_functions(&irb->Base);
+
+      return GL_TRUE;
+   }
+}
+
+
+
+/**
+ * Called for each hardware renderbuffer when a _window_ is resized.
+ * Just update fields.
+ * Not used for user-created renderbuffers!
+ */
+static GLboolean
+intel_alloc_window_storage(GLcontext * ctx, struct gl_renderbuffer *rb,
+                           GLenum internalFormat, GLuint width, GLuint height)
+{
+   ASSERT(rb->Name == 0);
+   rb->Width = width;
+   rb->Height = height;
+   rb->_ActualFormat = internalFormat;
+
+   return GL_TRUE;
+}
+
+static void
+intel_resize_buffers(GLcontext *ctx, struct gl_framebuffer *fb,
+		     GLuint width, GLuint height)
+{
+   struct intel_framebuffer *intel_fb = (struct intel_framebuffer*)fb;
+   int i;
+
+   _mesa_resize_framebuffer(ctx, fb, width, height);
+
+   fb->Initialized = GL_TRUE; /* XXX remove someday */
+
+   if (fb->Name != 0) {
+      return;
+   }
+
+   /* Make sure all window system renderbuffers are up to date */
+   for (i = 0; i < 3; i++) {
+      struct gl_renderbuffer *rb = &intel_fb->color_rb[i]->Base;
+
+      /* only resize if size is changing */
+      if (rb && (rb->Width != width || rb->Height != height)) {
+	 rb->AllocStorage(ctx, rb, rb->InternalFormat, width, height);
+      }
+   }
+}
+
+static GLboolean
+intel_nop_alloc_storage(GLcontext * ctx, struct gl_renderbuffer *rb,
+                        GLenum internalFormat, GLuint width, GLuint height)
+{
+   _mesa_problem(ctx, "intel_op_alloc_storage should never be called.");
+   return GL_FALSE;
+}
+
+
+void
+intel_renderbuffer_set_region(struct intel_renderbuffer *rb,
+			      struct intel_region *region)
+{
+   struct intel_region *old;
+
+   old = rb->region;
+   rb->region = NULL;
+   intel_region_reference(&rb->region, region);
+   intel_region_release(&old);
+
+   rb->pfMap = region->map;
+   rb->pfPitch = region->pitch;
+}
+
+/**
+ * Create a new intel_renderbuffer which corresponds to an on-screen window,
+ * not a user-created renderbuffer.
+ */
+struct intel_renderbuffer *
+intel_create_renderbuffer(GLenum intFormat)
+{
+   GET_CURRENT_CONTEXT(ctx);
+
+   struct intel_renderbuffer *irb;
+   const GLuint name = 0;
+
+   irb = CALLOC_STRUCT(intel_renderbuffer);
+   if (!irb) {
+      _mesa_error(ctx, GL_OUT_OF_MEMORY, "creating renderbuffer");
+      return NULL;
+   }
+
+   _mesa_init_renderbuffer(&irb->Base, name);
+   irb->Base.ClassID = INTEL_RB_CLASS;
+
+   switch (intFormat) {
+   case GL_RGB5:
+      irb->Base._ActualFormat = GL_RGB5;
+      irb->Base._BaseFormat = GL_RGBA;
+      irb->Base.RedBits = 5;
+      irb->Base.GreenBits = 6;
+      irb->Base.BlueBits = 5;
+      irb->Base.DataType = GL_UNSIGNED_BYTE;
+      break;
+   case GL_RGBA8:
+      irb->Base._ActualFormat = GL_RGBA8;
+      irb->Base._BaseFormat = GL_RGBA;
+      irb->Base.RedBits = 8;
+      irb->Base.GreenBits = 8;
+      irb->Base.BlueBits = 8;
+      irb->Base.AlphaBits = 8;
+      irb->Base.DataType = GL_UNSIGNED_BYTE;
+      break;
+   case GL_STENCIL_INDEX8_EXT:
+      irb->Base._ActualFormat = GL_STENCIL_INDEX8_EXT;
+      irb->Base._BaseFormat = GL_STENCIL_INDEX;
+      irb->Base.StencilBits = 8;
+      irb->Base.DataType = GL_UNSIGNED_BYTE;
+      break;
+   case GL_DEPTH_COMPONENT16:
+      irb->Base._ActualFormat = GL_DEPTH_COMPONENT16;
+      irb->Base._BaseFormat = GL_DEPTH_COMPONENT;
+      irb->Base.DepthBits = 16;
+      irb->Base.DataType = GL_UNSIGNED_SHORT;
+      break;
+   case GL_DEPTH_COMPONENT24:
+      irb->Base._ActualFormat = GL_DEPTH24_STENCIL8_EXT;
+      irb->Base._BaseFormat = GL_DEPTH_COMPONENT;
+      irb->Base.DepthBits = 24;
+      irb->Base.DataType = GL_UNSIGNED_INT;
+      break;
+   case GL_DEPTH24_STENCIL8_EXT:
+      irb->Base._ActualFormat = GL_DEPTH24_STENCIL8_EXT;
+      irb->Base._BaseFormat = GL_DEPTH_STENCIL_EXT;
+      irb->Base.DepthBits = 24;
+      irb->Base.StencilBits = 8;
+      irb->Base.DataType = GL_UNSIGNED_INT_24_8_EXT;
+      break;
+   default:
+      _mesa_problem(NULL,
+                    "Unexpected intFormat in intel_create_renderbuffer");
+      return NULL;
+   }
+
+   irb->Base.InternalFormat = intFormat;
+
+   /* intel-specific methods */
+   irb->Base.Delete = intel_delete_renderbuffer;
+   irb->Base.AllocStorage = intel_alloc_window_storage;
+   irb->Base.GetPointer = intel_get_pointer;
+   /* This sets the Get/PutRow/Value functions */
+   intel_set_span_functions(&irb->Base);
+
+   return irb;
+}
+
+
+/**
+ * Create a new renderbuffer object.
+ * Typically called via glBindRenderbufferEXT().
+ */
+static struct gl_renderbuffer *
+intel_new_renderbuffer(GLcontext * ctx, GLuint name)
+{
+   /*struct intel_context *intel = intel_context(ctx); */
+   struct intel_renderbuffer *irb;
+
+   irb = CALLOC_STRUCT(intel_renderbuffer);
+   if (!irb) {
+      _mesa_error(ctx, GL_OUT_OF_MEMORY, "creating renderbuffer");
+      return NULL;
+   }
+
+   _mesa_init_renderbuffer(&irb->Base, name);
+   irb->Base.ClassID = INTEL_RB_CLASS;
+
+   /* intel-specific methods */
+   irb->Base.Delete = intel_delete_renderbuffer;
+   irb->Base.AllocStorage = intel_alloc_renderbuffer_storage;
+   irb->Base.GetPointer = intel_get_pointer;
+   /* span routines set in alloc_storage function */
+
+   return &irb->Base;
+}
+
+
+/**
+ * Called via glBindFramebufferEXT().
+ */
+static void
+intel_bind_framebuffer(GLcontext * ctx, GLenum target,
+                       struct gl_framebuffer *fb, struct gl_framebuffer *fbread)
+{
+   if (target == GL_FRAMEBUFFER_EXT || target == GL_DRAW_FRAMEBUFFER_EXT) {
+      intel_draw_buffer(ctx, fb);
+      /* Integer depth range depends on depth buffer bits */
+      if (ctx->Driver.DepthRange != NULL)
+	 ctx->Driver.DepthRange(ctx, ctx->Viewport.Near, ctx->Viewport.Far);
+   }
+   else {
+      /* don't need to do anything if target == GL_READ_FRAMEBUFFER_EXT */
+   }
+}
+
+
+/**
+ * Called via glFramebufferRenderbufferEXT().
+ */
+static void
+intel_framebuffer_renderbuffer(GLcontext * ctx,
+                               struct gl_framebuffer *fb,
+                               GLenum attachment, struct gl_renderbuffer *rb)
+{
+   DBG("Intel FramebufferRenderbuffer %u %u\n", fb->Name, rb ? rb->Name : 0);
+
+   intelFlush(ctx);
+
+   _mesa_framebuffer_renderbuffer(ctx, fb, attachment, rb);
+   intel_draw_buffer(ctx, fb);
+}
+
+static GLboolean
+intel_update_wrapper(GLcontext *ctx, struct intel_renderbuffer *irb, 
+                          struct gl_texture_image *texImage)
+{
+   if (texImage->TexFormat == &_mesa_texformat_argb8888) {
+      irb->Base._ActualFormat = GL_RGBA8;
+      irb->Base._BaseFormat = GL_RGBA;
+      DBG("Render to RGBA8 texture OK\n");
+   }
+   else if (texImage->TexFormat == &_mesa_texformat_rgb565) {
+      irb->Base._ActualFormat = GL_RGB5;
+      irb->Base._BaseFormat = GL_RGB;
+      DBG("Render to RGB5 texture OK\n");
+   }
+   else if (texImage->TexFormat == &_mesa_texformat_z16) {
+      irb->Base._ActualFormat = GL_DEPTH_COMPONENT16;
+      irb->Base._BaseFormat = GL_DEPTH_COMPONENT;
+      DBG("Render to DEPTH16 texture OK\n");
+   } else if (texImage->TexFormat == &_mesa_texformat_z24_s8) {
+      irb->Base._ActualFormat = GL_DEPTH24_STENCIL8_EXT;
+      irb->Base._BaseFormat = GL_DEPTH_STENCIL_EXT;
+      DBG("Render to DEPTH_STENCIL texture OK\n");
+   }
+   else {
+      DBG("Render to texture BAD FORMAT %d\n",
+	  texImage->TexFormat->MesaFormat);
+      return GL_FALSE;
+   }
+
+   irb->Base.InternalFormat = irb->Base._ActualFormat;
+   irb->Base.Width = texImage->Width;
+   irb->Base.Height = texImage->Height;
+   irb->Base.DataType = GL_UNSIGNED_BYTE;       /* FBO XXX fix */
+   irb->Base.RedBits = texImage->TexFormat->RedBits;
+   irb->Base.GreenBits = texImage->TexFormat->GreenBits;
+   irb->Base.BlueBits = texImage->TexFormat->BlueBits;
+   irb->Base.AlphaBits = texImage->TexFormat->AlphaBits;
+   irb->Base.DepthBits = texImage->TexFormat->DepthBits;
+
+   irb->Base.Delete = intel_delete_renderbuffer;
+   irb->Base.AllocStorage = intel_nop_alloc_storage;
+   intel_set_span_functions(&irb->Base);
+
+   irb->RenderToTexture = GL_TRUE;
+
+   return GL_TRUE;
+}
+
+/**
+ * When glFramebufferTexture[123]D is called this function sets up the
+ * gl_renderbuffer wrapper around the texture image.
+ * This will have the region info needed for hardware rendering.
+ */
+static struct intel_renderbuffer *
+intel_wrap_texture(GLcontext * ctx, struct gl_texture_image *texImage)
+{
+   const GLuint name = ~0;      /* not significant, but distinct for debugging */
+   struct intel_renderbuffer *irb;
+
+   /* make an intel_renderbuffer to wrap the texture image */
+   irb = CALLOC_STRUCT(intel_renderbuffer);
+   if (!irb) {
+      _mesa_error(ctx, GL_OUT_OF_MEMORY, "glFramebufferTexture");
+      return NULL;
+   }
+
+   _mesa_init_renderbuffer(&irb->Base, name);
+   irb->Base.ClassID = INTEL_RB_CLASS;
+
+   if (!intel_update_wrapper(ctx, irb, texImage)) {
+      _mesa_free(irb);
+      return NULL;
+   }
+
+   return irb;
+}
+
+
+/**
+ * Called by glFramebufferTexture[123]DEXT() (and other places) to
+ * prepare for rendering into texture memory.  This might be called
+ * many times to choose different texture levels, cube faces, etc
+ * before intel_finish_render_texture() is ever called.
+ */
+static void
+intel_render_texture(GLcontext * ctx,
+                     struct gl_framebuffer *fb,
+                     struct gl_renderbuffer_attachment *att)
+{
+   struct gl_texture_image *newImage
+      = att->Texture->Image[att->CubeMapFace][att->TextureLevel];
+   struct intel_renderbuffer *irb = intel_renderbuffer(att->Renderbuffer);
+   struct intel_texture_image *intel_image;
+   GLuint imageOffset;
+
+   (void) fb;
+
+   ASSERT(newImage);
+
+   if (!irb) {
+      irb = intel_wrap_texture(ctx, newImage);
+      if (irb) {
+         /* bind the wrapper to the attachment point */
+         _mesa_reference_renderbuffer(&att->Renderbuffer, &irb->Base);
+      }
+      else {
+         /* fallback to software rendering */
+         _mesa_render_texture(ctx, fb, att);
+         return;
+      }
+   } if (!intel_update_wrapper(ctx, irb, newImage)) {
+       _mesa_reference_renderbuffer(&att->Renderbuffer, NULL);
+       _mesa_render_texture(ctx, fb, att);
+       return;
+   }
+
+   DBG("Begin render texture tid %x tex=%u w=%d h=%d refcount=%d\n",
+       _glthread_GetID(),
+       att->Texture->Name, newImage->Width, newImage->Height,
+       irb->Base.RefCount);
+
+   /* point the renderbufer's region to the texture image region */
+   intel_image = intel_texture_image(newImage);
+   if (irb->region != intel_image->mt->region) {
+      if (irb->region)
+	 intel_region_release(&irb->region);
+      intel_region_reference(&irb->region, intel_image->mt->region);
+   }
+
+   /* compute offset of the particular 2D image within the texture region */
+   imageOffset = intel_miptree_image_offset(intel_image->mt,
+                                            att->CubeMapFace,
+                                            att->TextureLevel);
+
+   if (att->Texture->Target == GL_TEXTURE_3D) {
+      const GLuint *offsets = intel_miptree_depth_offsets(intel_image->mt,
+                                                          att->TextureLevel);
+      imageOffset += offsets[att->Zoffset];
+   }
+
+   /* store that offset in the region */
+   intel_image->mt->region->draw_offset = imageOffset;
+
+   /* update drawing region, etc */
+   intel_draw_buffer(ctx, fb);
+}
+
+
+/**
+ * Called by Mesa when rendering to a texture is done.
+ */
+static void
+intel_finish_render_texture(GLcontext * ctx,
+                            struct gl_renderbuffer_attachment *att)
+{
+   struct intel_renderbuffer *irb = intel_renderbuffer(att->Renderbuffer);
+
+   DBG("End render texture (tid %x) tex %u\n", _glthread_GetID(), att->Texture->Name);
+
+   if (irb) {
+      /* just release the region */
+      intel_region_release(&irb->region);
+   }
+   else if (att->Renderbuffer) {
+      /* software fallback */
+      _mesa_finish_render_texture(ctx, att);
+      /* XXX FBO: Need to unmap the buffer (or in intelSpanRenderStart???) */
+   }
+}
+
+
+/**
+ * Do one-time context initializations related to GL_EXT_framebuffer_object.
+ * Hook in device driver functions.
+ */
+void
+intel_fbo_init(struct intel_context *intel)
+{
+   intel->ctx.Driver.NewFramebuffer = intel_new_framebuffer;
+   intel->ctx.Driver.NewRenderbuffer = intel_new_renderbuffer;
+   intel->ctx.Driver.BindFramebuffer = intel_bind_framebuffer;
+   intel->ctx.Driver.FramebufferRenderbuffer = intel_framebuffer_renderbuffer;
+   intel->ctx.Driver.RenderTexture = intel_render_texture;
+   intel->ctx.Driver.FinishRenderTexture = intel_finish_render_texture;
+   intel->ctx.Driver.ResizeBuffers = intel_resize_buffers;
+}
diff --git a/shared/intel_fbo.h b/shared/intel_fbo.h
new file mode 100644
index 0000000..c90c84b
--- /dev/null
+++ b/shared/intel_fbo.h
@@ -0,0 +1,113 @@
+/**************************************************************************
+ * 
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef INTEL_FBO_H
+#define INTEL_FBO_H
+
+
+struct intel_context;
+struct intel_region;
+
+/**
+ * Intel framebuffer, derived from gl_framebuffer.
+ */
+struct intel_framebuffer
+{
+   struct gl_framebuffer Base;
+
+   struct intel_renderbuffer *color_rb[3];
+
+   /* Drawable page flipping state */
+   GLboolean pf_active;
+   GLuint pf_seq;
+   GLint pf_planes;
+   GLint pf_current_page;
+   GLint pf_num_pages;
+
+   /* VBI
+    */
+   GLuint vbl_waited;
+
+   int64_t swap_ust;
+   int64_t swap_missed_ust;
+
+   GLuint swap_count;
+   GLuint swap_missed_count;
+};
+
+
+/**
+ * Intel renderbuffer, derived from gl_renderbuffer.
+ * Note: The PairedDepth and PairedStencil fields use renderbuffer IDs,
+ * not pointers because in some circumstances a deleted renderbuffer could
+ * result in a dangling pointer here.
+ */
+struct intel_renderbuffer
+{
+   struct gl_renderbuffer Base;
+   struct intel_region *region;
+   void *pfMap;                 /* possibly paged flipped map pointer */
+   GLuint pfPitch;              /* possibly paged flipped pitch */
+   GLboolean RenderToTexture;   /* RTT? */
+
+   GLuint PairedDepth;   /**< only used if this is a depth renderbuffer */
+   GLuint PairedStencil; /**< only used if this is a stencil renderbuffer */
+
+   GLuint pf_pending;  /**< sequence number of pending flip */
+
+   GLuint vbl_pending;   /**< vblank sequence number of pending flip */
+};
+
+extern struct intel_renderbuffer *intel_renderbuffer(struct gl_renderbuffer
+                                                     *rb);
+
+extern void
+intel_renderbuffer_set_region(struct intel_renderbuffer *irb,
+			      struct intel_region *region);
+
+extern struct intel_renderbuffer *
+intel_create_renderbuffer(GLenum intFormat);
+
+extern void intel_fbo_init(struct intel_context *intel);
+
+
+/* XXX make inline or macro */
+extern struct intel_renderbuffer *intel_get_renderbuffer(struct gl_framebuffer
+                                                         *fb,
+                                                         int attIndex);
+
+extern void intel_flip_renderbuffers(struct intel_framebuffer *intel_fb);
+
+
+/* XXX make inline or macro */
+extern struct intel_region *intel_get_rb_region(struct gl_framebuffer *fb,
+                                                GLuint attIndex);
+
+
+
+
+#endif /* INTEL_FBO_H */
diff --git a/shared/intel_ioctl.c b/shared/intel_ioctl.c
new file mode 100644
index 0000000..f4566ba
--- /dev/null
+++ b/shared/intel_ioctl.c
@@ -0,0 +1,223 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+
+#include <stdio.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sched.h>
+
+#include "mtypes.h"
+#include "context.h"
+#include "swrast/swrast.h"
+
+#include "intel_context.h"
+#include "intel_ioctl.h"
+#include "intel_batchbuffer.h"
+#include "intel_blit.h"
+#include "intel_regions.h"
+#include "drm.h"
+#include "i915_drm.h"
+
+#include "intel_bufmgr_ttm.h"
+
+#define FILE_DEBUG_FLAG DEBUG_IOCTL
+
+int
+intelEmitIrqLocked(struct intel_context *intel)
+{
+   struct drm_i915_irq_emit ie;
+   int ret, seq = 1;
+
+   if (intel->no_hw)
+      return 1;
+
+   /*
+     assert(((*(int *)intel->driHwLock) & ~DRM_LOCK_CONT) ==
+     (DRM_LOCK_HELD|intel->hHWContext));
+   */
+
+   ie.irq_seq = &seq;
+
+   ret = drmCommandWriteRead(intel->driFd, DRM_I915_IRQ_EMIT, &ie, sizeof(ie));
+   if (ret) {
+      fprintf(stderr, "%s: drm_i915_irq_emit: %d\n", __FUNCTION__, ret);
+      exit(1);
+   }
+
+   DBG("%s -->  %d\n", __FUNCTION__, seq);
+
+   return seq;
+}
+
+void
+intelWaitIrq(struct intel_context *intel, int seq)
+{
+   struct drm_i915_irq_wait iw;
+   int ret, lastdispatch;
+   volatile struct drm_i915_sarea *sarea = intel->sarea;
+
+   if (intel->no_hw)
+      return;
+
+   DBG("%s %d\n", __FUNCTION__, seq);
+
+   iw.irq_seq = seq;
+
+   do {
+      lastdispatch = sarea->last_dispatch;
+      ret = drmCommandWrite(intel->driFd, DRM_I915_IRQ_WAIT, &iw, sizeof(iw));
+   } while (ret == -EAGAIN ||
+	    ret == -EINTR ||
+	    (ret == -EBUSY && lastdispatch != sarea->last_dispatch) ||
+	    (ret == 0 && seq > sarea->last_dispatch) ||
+	    (ret == 0 && sarea->last_dispatch - seq >= (1 << 24)));
+
+   if (ret) {
+      fprintf(stderr, "%s: drm_i915_irq_wait: %d\n", __FUNCTION__, ret);
+      exit(1);
+   }
+}
+
+
+void
+intel_batch_ioctl(struct intel_context *intel,
+                  GLuint start_offset,
+                  GLuint used,
+                  GLboolean ignore_cliprects, GLboolean allow_unlock)
+{
+   struct drm_i915_batchbuffer batch;
+
+   if (intel->no_hw)
+      return;
+
+   assert(intel->locked);
+   assert(used);
+
+   DBG("%s used %d offset %x..%x ignore_cliprects %d\n",
+       __FUNCTION__,
+       used, start_offset, start_offset + used, ignore_cliprects);
+
+   /* Throw away non-effective packets.  Won't work once we have
+    * hardware contexts which would preserve statechanges beyond a
+    * single buffer.
+    */
+   batch.start = start_offset;
+   batch.used = used;
+   batch.cliprects = intel->pClipRects;
+   batch.num_cliprects = ignore_cliprects ? 0 : intel->numClipRects;
+   batch.DR1 = 0;
+   batch.DR4 = ((((GLuint) intel->drawX) & 0xffff) |
+                (((GLuint) intel->drawY) << 16));
+
+   DBG("%s: 0x%x..0x%x DR4: %x cliprects: %d\n",
+       __FUNCTION__,
+       batch.start,
+       batch.start + batch.used * 4, batch.DR4, batch.num_cliprects);
+
+   if (drmCommandWrite(intel->driFd, DRM_I915_BATCHBUFFER, &batch,
+                       sizeof(batch))) {
+      fprintf(stderr, "DRM_I915_BATCHBUFFER: %d\n", -errno);
+      UNLOCK_HARDWARE(intel);
+      exit(1);
+   }
+}
+
+#ifdef TTM_API
+void
+intel_exec_ioctl(struct intel_context *intel,
+		 GLuint used,
+		 GLboolean ignore_cliprects, GLboolean allow_unlock,
+		 void *start, GLuint count, dri_fence **fence)
+{
+   struct drm_i915_execbuffer execbuf;
+   dri_fence *fo;
+   int ret;
+
+   assert(intel->locked);
+   assert(used);
+
+   if (intel->no_hw)
+      return;
+
+   if (*fence) {
+     dri_fence_unreference(*fence);
+   }
+
+   memset(&execbuf, 0, sizeof(execbuf));
+
+   execbuf.num_buffers = count;
+   execbuf.batch.used = used;
+   execbuf.batch.cliprects = intel->pClipRects;
+   execbuf.batch.num_cliprects = ignore_cliprects ? 0 : intel->numClipRects;
+   execbuf.batch.DR1 = 0;
+   execbuf.batch.DR4 = ((((GLuint) intel->drawX) & 0xffff) |
+			(((GLuint) intel->drawY) << 16));
+
+   execbuf.ops_list = (unsigned long)start; // TODO
+   execbuf.fence_arg.flags = DRM_FENCE_FLAG_SHAREABLE | DRM_I915_FENCE_FLAG_FLUSHED;
+
+   do {
+      ret = drmCommandWriteRead(intel->driFd, DRM_I915_EXECBUFFER, &execbuf,
+				sizeof(execbuf));
+   } while (ret == -EAGAIN);
+
+   if (ret != 0) {
+      fprintf(stderr, "DRM_I915_EXECBUFFER: %d\n", -errno);
+      UNLOCK_HARDWARE(intel);
+      exit(1);
+   }
+
+   if (execbuf.fence_arg.error != 0) {
+
+      /*
+       * Fence creation has failed, but the GPU has been
+       * idled by the kernel. Safe to continue.
+       */ 
+
+      *fence = NULL;
+      return;
+   }
+
+   fo = intel_ttm_fence_create_from_arg(intel->bufmgr, "fence buffers",
+					&execbuf.fence_arg);
+   if (!fo) {
+      fprintf(stderr, "failed to fence handle: %08x\n", execbuf.fence_arg.handle);
+      UNLOCK_HARDWARE(intel);
+      exit(1);
+   }
+   *fence = fo;
+}
+#else
+void
+intel_exec_ioctl(struct intel_context *intel,
+		 GLuint used,
+		 GLboolean ignore_cliprects, GLboolean allow_unlock,
+		 void *start, GLuint count, dri_fence **fence)
+{
+}
+#endif
diff --git a/i965/intel_ioctl.h b/shared/intel_ioctl.h
index df27659..8674aef 100644
--- a/i965/intel_ioctl.h
+++ b/shared/intel_ioctl.h
@@ -35,10 +35,12 @@ int intelEmitIrqLocked( struct intel_context *intel );
 
 void intel_batch_ioctl( struct intel_context *intel, 
 			GLuint start_offset,
-			GLuint used);
-
-void intel_cmd_ioctl( struct intel_context *intel, 
-		      char *buf,
-		      GLuint used);
+			GLuint used,
+			GLboolean ignore_cliprects,
+			GLboolean allow_unlock );
+void intel_exec_ioctl(struct intel_context *intel,
+		      GLuint used,
+		      GLboolean ignore_cliprects, GLboolean allow_unlock,
+		      void *start, GLuint count, dri_fence **fence);
 
 #endif
diff --git a/shared/intel_mipmap_tree.c b/shared/intel_mipmap_tree.c
new file mode 100644
index 0000000..9be7e02
--- /dev/null
+++ b/shared/intel_mipmap_tree.c
@@ -0,0 +1,489 @@
+/**************************************************************************
+ * 
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "intel_context.h"
+#include "intel_mipmap_tree.h"
+#include "intel_regions.h"
+#include "intel_chipset.h"
+#include "enums.h"
+
+#define FILE_DEBUG_FLAG DEBUG_MIPTREE
+
+static GLenum
+target_to_target(GLenum target)
+{
+   switch (target) {
+   case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB:
+   case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB:
+   case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB:
+   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB:
+   case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB:
+   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB:
+      return GL_TEXTURE_CUBE_MAP_ARB;
+   default:
+      return target;
+   }
+}
+
+static struct intel_mipmap_tree *
+intel_miptree_create_internal(struct intel_context *intel,
+			      GLenum target,
+			      GLenum internal_format,
+			      GLuint first_level,
+			      GLuint last_level,
+			      GLuint width0,
+			      GLuint height0,
+			      GLuint depth0, GLuint cpp, GLuint compress_byte)
+{
+   GLboolean ok;
+   struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1);
+
+   DBG("%s target %s format %s level %d..%d\n", __FUNCTION__,
+       _mesa_lookup_enum_by_nr(target),
+       _mesa_lookup_enum_by_nr(internal_format), first_level, last_level);
+
+   mt->target = target_to_target(target);
+   mt->internal_format = internal_format;
+   mt->first_level = first_level;
+   mt->last_level = last_level;
+   mt->width0 = width0;
+   mt->height0 = height0;
+   mt->depth0 = depth0;
+   mt->cpp = compress_byte ? compress_byte : cpp;
+   mt->compressed = compress_byte ? 1 : 0;
+   mt->refcount = 1; 
+   mt->pitch = 0;
+
+#ifdef I915
+   if (IS_945(intel->intelScreen->deviceID))
+      ok = i945_miptree_layout(intel, mt);
+   else
+      ok = i915_miptree_layout(intel, mt);
+#else
+   ok = brw_miptree_layout(intel, mt);
+#endif
+
+   if (!ok) {
+      free(mt);
+      return NULL;
+   }
+
+   return mt;
+}
+
+struct intel_mipmap_tree *
+intel_miptree_create(struct intel_context *intel,
+		     GLenum target,
+		     GLenum internal_format,
+		     GLuint first_level,
+		     GLuint last_level,
+		     GLuint width0,
+		     GLuint height0,
+		     GLuint depth0, GLuint cpp, GLuint compress_byte)
+{
+   struct intel_mipmap_tree *mt;
+
+   mt = intel_miptree_create_internal(intel, target, internal_format,
+				      first_level, last_level, width0,
+				      height0, depth0, cpp, compress_byte);
+   /*
+    * pitch == 0 indicates the null texture
+    */
+   if (!mt || !mt->pitch)
+      return NULL;
+
+   mt->region = intel_region_alloc(intel,
+				   mt->cpp, mt->pitch, mt->total_height);
+
+   if (!mt->region) {
+       free(mt);
+       return NULL;
+   }
+
+   return mt;
+}
+
+struct intel_mipmap_tree *
+intel_miptree_create_for_region(struct intel_context *intel,
+				GLenum target,
+				GLenum internal_format,
+				GLuint first_level,
+				GLuint last_level,
+				struct intel_region *region,
+				GLuint depth0,
+				GLuint compress_byte)
+{
+   struct intel_mipmap_tree *mt;
+
+   mt = intel_miptree_create_internal(intel, target, internal_format,
+				      first_level, last_level,
+				      region->pitch, region->height, depth0,
+				      region->cpp, compress_byte);
+   if (!mt)
+      return mt;
+#if 0
+   if (mt->pitch != region->pitch) {
+      fprintf(stderr,
+	      "region pitch (%d) doesn't match mipmap tree pitch (%d)\n",
+	      region->pitch, mt->pitch);
+      free(mt);
+      return NULL;
+   }
+#else
+   /* The mipmap tree pitch is aligned to 64 bytes to make sure render
+    * to texture works, but we don't need that for texturing from a
+    * pixmap.  Just override it here. */
+   mt->pitch = region->pitch;
+#endif
+
+   mt->region = region;
+
+   return mt;
+ }
+
+/**
+ * intel_miptree_pitch_align:
+ *
+ * @intel: intel context pointer
+ *
+ * @mt: the miptree to compute pitch alignment for
+ *
+ * @pitch: the natural pitch value
+ *
+ * Given @pitch, compute a larger value which accounts for
+ * any necessary alignment required by the device
+ */
+
+int intel_miptree_pitch_align (struct intel_context *intel,
+			       struct intel_mipmap_tree *mt,
+			       int pitch)
+{
+#ifdef I915
+   GLcontext *ctx = &intel->ctx;
+#endif
+
+   if (!mt->compressed) {
+      int pitch_align;
+
+      if (intel->ttm) {
+	 /* XXX: Align pitch to multiple of 64 bytes for now to allow
+	  * render-to-texture to work in all cases. This should probably be
+	  * replaced at some point by some scheme to only do this when really
+	  * necessary.
+	  */
+	 pitch_align = 64;
+      } else {
+	 pitch_align = 4;
+      }
+
+      pitch = ALIGN(pitch * mt->cpp, pitch_align);
+
+#ifdef I915
+      /* XXX: At least the i915 seems very upset when the pitch is a multiple
+       * of 1024 and sometimes 512 bytes - performance can drop by several
+       * times. Go to the next multiple of the required alignment for now.
+       */
+      if (!(pitch & 511) && 
+	 (pitch + pitch_align) < (1 << ctx->Const.MaxTextureLevels))
+	 pitch += pitch_align;
+#endif
+
+      pitch /= mt->cpp;
+   }
+   return pitch;
+}
+
+void
+intel_miptree_reference(struct intel_mipmap_tree **dst,
+                        struct intel_mipmap_tree *src)
+{
+   src->refcount++;
+   *dst = src;
+   DBG("%s %p refcount now %d\n", __FUNCTION__, src, src->refcount);
+}
+
+void
+intel_miptree_release(struct intel_context *intel,
+                      struct intel_mipmap_tree **mt)
+{
+   if (!*mt)
+      return;
+
+   DBG("%s %p refcount will be %d\n", __FUNCTION__, *mt, (*mt)->refcount - 1);
+   if (--(*mt)->refcount <= 0) {
+      GLuint i;
+
+      DBG("%s deleting %p\n", __FUNCTION__, *mt);
+
+      intel_region_release(&((*mt)->region));
+
+      for (i = 0; i < MAX_TEXTURE_LEVELS; i++)
+         if ((*mt)->level[i].image_offset)
+            free((*mt)->level[i].image_offset);
+
+      free(*mt);
+   }
+   *mt = NULL;
+}
+
+
+
+
+/* Can the image be pulled into a unified mipmap tree.  This mirrors
+ * the completeness test in a lot of ways.
+ *
+ * Not sure whether I want to pass gl_texture_image here.
+ */
+GLboolean
+intel_miptree_match_image(struct intel_mipmap_tree *mt,
+                          struct gl_texture_image *image,
+                          GLuint face, GLuint level)
+{
+   /* Images with borders are never pulled into mipmap trees. 
+    */
+   if (image->Border ||
+       ((image->_BaseFormat == GL_DEPTH_COMPONENT) &&
+        ((image->TexObject->WrapS == GL_CLAMP_TO_BORDER) ||
+         (image->TexObject->WrapT == GL_CLAMP_TO_BORDER)))) 
+      return GL_FALSE;
+
+   if (image->InternalFormat != mt->internal_format ||
+       image->IsCompressed != mt->compressed)
+      return GL_FALSE;
+
+   if (!image->IsCompressed &&
+       !mt->compressed &&
+       image->TexFormat->TexelBytes != mt->cpp)
+      return GL_FALSE;
+
+   /* Test image dimensions against the base level image adjusted for
+    * minification.  This will also catch images not present in the
+    * tree, changed targets, etc.
+    */
+   if (image->Width != mt->level[level].width ||
+       image->Height != mt->level[level].height ||
+       image->Depth != mt->level[level].depth)
+      return GL_FALSE;
+
+   return GL_TRUE;
+}
+
+
+void
+intel_miptree_set_level_info(struct intel_mipmap_tree *mt,
+			     GLuint level,
+			     GLuint nr_images,
+			     GLuint x, GLuint y,
+			     GLuint w, GLuint h, GLuint d)
+{
+   mt->level[level].width = w;
+   mt->level[level].height = h;
+   mt->level[level].depth = d;
+   mt->level[level].level_offset = (x + y * mt->pitch) * mt->cpp;
+   mt->level[level].nr_images = nr_images;
+
+   DBG("%s level %d size: %d,%d,%d offset %d,%d (0x%x)\n", __FUNCTION__,
+       level, w, h, d, x, y, mt->level[level].level_offset);
+
+   /* Not sure when this would happen, but anyway: 
+    */
+   if (mt->level[level].image_offset) {
+      free(mt->level[level].image_offset);
+      mt->level[level].image_offset = NULL;
+   }
+
+   assert(nr_images);
+
+   mt->level[level].image_offset = malloc(nr_images * sizeof(GLuint));
+   mt->level[level].image_offset[0] = 0;
+}
+
+
+
+void
+intel_miptree_set_image_offset(struct intel_mipmap_tree *mt,
+			       GLuint level, GLuint img,
+			       GLuint x, GLuint y)
+{
+   if (img == 0 && level == 0)
+      assert(x == 0 && y == 0);
+
+   assert(img < mt->level[level].nr_images);
+
+   mt->level[level].image_offset[img] = (x + y * mt->pitch) * mt->cpp;
+
+   DBG("%s level %d img %d pos %d,%d image_offset %x\n",
+       __FUNCTION__, level, img, x, y, mt->level[level].image_offset[img]);
+}
+
+
+/* Although we use the image_offset[] array to store relative offsets
+ * to cube faces, Mesa doesn't know anything about this and expects
+ * each cube face to be treated as a separate image.
+ *
+ * These functions present that view to mesa:
+ */
+const GLuint *
+intel_miptree_depth_offsets(struct intel_mipmap_tree *mt, GLuint level)
+{
+   static const GLuint zero = 0;
+
+   if (mt->target != GL_TEXTURE_3D || mt->level[level].nr_images == 1)
+      return &zero;
+   else
+      return mt->level[level].image_offset;
+}
+
+
+GLuint
+intel_miptree_image_offset(struct intel_mipmap_tree *mt,
+			   GLuint face, GLuint level)
+{
+   if (mt->target == GL_TEXTURE_CUBE_MAP_ARB)
+      return (mt->level[level].level_offset +
+	      mt->level[level].image_offset[face]);
+   else
+      return mt->level[level].level_offset;
+}
+
+
+
+/**
+ * Map a teximage in a mipmap tree.
+ * \param row_stride  returns row stride in bytes
+ * \param image_stride  returns image stride in bytes (for 3D textures).
+ * \param image_offsets pointer to array of pixel offsets from the returned
+ *	  pointer to each depth image
+ * \return address of mapping
+ */
+GLubyte *
+intel_miptree_image_map(struct intel_context * intel,
+                        struct intel_mipmap_tree * mt,
+                        GLuint face,
+                        GLuint level,
+                        GLuint * row_stride, GLuint * image_offsets)
+{
+   DBG("%s \n", __FUNCTION__);
+
+   if (row_stride)
+      *row_stride = mt->pitch * mt->cpp;
+
+   if (mt->target == GL_TEXTURE_3D) {
+      int i;
+
+      for (i = 0; i < mt->level[level].depth; i++)
+	 image_offsets[i] = mt->level[level].image_offset[i] / mt->cpp;
+   } else {
+      assert(mt->level[level].depth == 1);
+      assert(mt->target == GL_TEXTURE_CUBE_MAP ||
+	     mt->level[level].image_offset[0] == 0);
+      image_offsets[0] = 0;
+   }
+
+   return (intel_region_map(intel, mt->region) +
+           intel_miptree_image_offset(mt, face, level));
+}
+
+void
+intel_miptree_image_unmap(struct intel_context *intel,
+                          struct intel_mipmap_tree *mt)
+{
+   DBG("%s\n", __FUNCTION__);
+   intel_region_unmap(intel, mt->region);
+}
+
+
+
+/* Upload data for a particular image.
+ */
+void
+intel_miptree_image_data(struct intel_context *intel,
+			 struct intel_mipmap_tree *dst,
+			 GLuint face,
+			 GLuint level,
+			 void *src,
+			 GLuint src_row_pitch,
+			 GLuint src_image_pitch)
+{
+   GLuint depth = dst->level[level].depth;
+   GLuint dst_offset = intel_miptree_image_offset(dst, face, level);
+   const GLuint *dst_depth_offset = intel_miptree_depth_offsets(dst, level);
+   GLuint i;
+   GLuint height = 0;
+
+   DBG("%s: %d/%d\n", __FUNCTION__, face, level);
+   for (i = 0; i < depth; i++) {
+      height = dst->level[level].height;
+      if(dst->compressed)
+	 height = (height + 3) / 4;
+      intel_region_data(intel,
+			dst->region,
+			dst_offset + dst_depth_offset[i], /* dst_offset */
+			0, 0,                             /* dstx, dsty */
+			src,
+			src_row_pitch,
+			0, 0,                             /* source x, y */
+			dst->level[level].width, height); /* width, height */
+
+      src += src_image_pitch * dst->cpp;
+   }
+}
+
+extern GLuint intel_compressed_alignment(GLenum);
+/* Copy mipmap image between trees
+ */
+void
+intel_miptree_image_copy(struct intel_context *intel,
+                         struct intel_mipmap_tree *dst,
+                         GLuint face, GLuint level,
+                         struct intel_mipmap_tree *src)
+{
+   GLuint width = src->level[level].width;
+   GLuint height = src->level[level].height;
+   GLuint depth = src->level[level].depth;
+   GLuint dst_offset = intel_miptree_image_offset(dst, face, level);
+   GLuint src_offset = intel_miptree_image_offset(src, face, level);
+   const GLuint *dst_depth_offset = intel_miptree_depth_offsets(dst, level);
+   const GLuint *src_depth_offset = intel_miptree_depth_offsets(src, level);
+   GLuint i;
+
+   if (dst->compressed) {
+       GLuint alignment = intel_compressed_alignment(dst->internal_format);
+       height = (height + 3) / 4;
+       width = ((width + alignment - 1) & ~(alignment - 1));
+   }
+
+   for (i = 0; i < depth; i++) {
+      intel_region_copy(intel,
+                        dst->region, dst_offset + dst_depth_offset[i],
+                        0,
+                        0,
+                        src->region, src_offset + src_depth_offset[i],
+                        0, 0, width, height);
+   }
+
+}
diff --git a/shared/intel_mipmap_tree.h b/shared/intel_mipmap_tree.h
new file mode 100644
index 0000000..c9537db
--- /dev/null
+++ b/shared/intel_mipmap_tree.h
@@ -0,0 +1,226 @@
+/**************************************************************************
+ * 
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef INTEL_MIPMAP_TREE_H
+#define INTEL_MIPMAP_TREE_H
+
+#include "intel_regions.h"
+
+/* A layer on top of the intel_regions code which adds:
+ *
+ * - Code to size and layout a region to hold a set of mipmaps.
+ * - Query to determine if a new image fits in an existing tree.
+ * - More refcounting 
+ *     - maybe able to remove refcounting from intel_region?
+ * - ?
+ *
+ * The fixed mipmap layout of intel hardware where one offset
+ * specifies the position of all images in a mipmap hierachy
+ * complicates the implementation of GL texture image commands,
+ * compared to hardware where each image is specified with an
+ * independent offset.
+ *
+ * In an ideal world, each texture object would be associated with a
+ * single bufmgr buffer or 2d intel_region, and all the images within
+ * the texture object would slot into the tree as they arrive.  The
+ * reality can be a little messier, as images can arrive from the user
+ * with sizes that don't fit in the existing tree, or in an order
+ * where the tree layout cannot be guessed immediately.  
+ * 
+ * This structure encodes an idealized mipmap tree.  The GL image
+ * commands build these where possible, otherwise store the images in
+ * temporary system buffers.
+ */
+
+
+/**
+ * Describes the location of each texture image within a texture region.
+ */
+struct intel_mipmap_level
+{
+   /**
+    * Byte offset to the base of this level.
+    *
+    * This is used for mipmap levels of 1D/2D/3D textures.  However, CUBE
+    * layouts spread images around the whole tree, so the level offset is
+    * always zero in that case.
+    */
+   GLuint level_offset;
+   GLuint width;
+   GLuint height;
+   /** Depth of the mipmap at this level: 1 for 1D/2D/CUBE, n for 3D. */
+   GLuint depth;
+   /** Number of images at this level: 1 for 1D/2D, 6 for CUBE, depth for 3D */
+   GLuint nr_images;
+
+   /**
+    * Byte offset from level_offset to the image for each cube face or depth
+    * level.
+    *
+    * Pretty much have to accept that hardware formats
+    * are going to be so diverse that there is no unified way to
+    * compute the offsets of depth/cube images within a mipmap level,
+    * so have to store them as a lookup table.
+    */
+   GLuint *image_offset;
+};
+
+struct intel_mipmap_tree
+{
+   /* Effectively the key:
+    */
+   GLenum target;
+   GLenum internal_format;
+
+   GLuint first_level;
+   GLuint last_level;
+
+   GLuint width0, height0, depth0; /**< Level zero image dimensions */
+   GLuint cpp;
+   GLboolean compressed;
+
+   /* Derived from the above:
+    */
+   GLuint pitch;
+   GLuint depth_pitch;          /* per-image on i945? */
+   GLuint total_height;
+
+   /* Includes image offset tables:
+    */
+   struct intel_mipmap_level level[MAX_TEXTURE_LEVELS];
+
+   /* The data is held here:
+    */
+   struct intel_region *region;
+
+   /* These are also refcounted:
+    */
+   GLuint refcount;
+};
+
+
+
+struct intel_mipmap_tree *intel_miptree_create(struct intel_context *intel,
+                                               GLenum target,
+                                               GLenum internal_format,
+                                               GLuint first_level,
+                                               GLuint last_level,
+                                               GLuint width0,
+                                               GLuint height0,
+                                               GLuint depth0,
+                                               GLuint cpp,
+                                               GLuint compress_byte);
+
+struct intel_mipmap_tree *
+intel_miptree_create_for_region(struct intel_context *intel,
+				GLenum target,
+				GLenum internal_format,
+				GLuint first_level,
+				GLuint last_level,
+				struct intel_region *region,
+				GLuint depth0,
+				GLuint compress_byte);
+
+int intel_miptree_pitch_align (struct intel_context *intel,
+			       struct intel_mipmap_tree *mt,
+			       int pitch);
+
+void intel_miptree_reference(struct intel_mipmap_tree **dst,
+                             struct intel_mipmap_tree *src);
+
+void intel_miptree_release(struct intel_context *intel,
+                           struct intel_mipmap_tree **mt);
+
+/* Check if an image fits an existing mipmap tree layout
+ */
+GLboolean intel_miptree_match_image(struct intel_mipmap_tree *mt,
+                                    struct gl_texture_image *image,
+                                    GLuint face, GLuint level);
+
+/* Return a pointer to an image within a tree.  Return image stride as
+ * well.
+ */
+GLubyte *intel_miptree_image_map(struct intel_context *intel,
+                                 struct intel_mipmap_tree *mt,
+                                 GLuint face,
+                                 GLuint level,
+                                 GLuint * row_stride, GLuint * image_stride);
+
+void intel_miptree_image_unmap(struct intel_context *intel,
+                               struct intel_mipmap_tree *mt);
+
+
+/* Return the linear offset of an image relative to the start of the
+ * tree:
+ */
+GLuint intel_miptree_image_offset(struct intel_mipmap_tree *mt,
+                                  GLuint face, GLuint level);
+
+/* Return pointers to each 2d slice within an image.  Indexed by depth
+ * value.
+ */
+const GLuint *intel_miptree_depth_offsets(struct intel_mipmap_tree *mt,
+                                          GLuint level);
+
+
+void intel_miptree_set_level_info(struct intel_mipmap_tree *mt,
+                                  GLuint level,
+                                  GLuint nr_images,
+                                  GLuint x, GLuint y,
+                                  GLuint w, GLuint h, GLuint d);
+
+void intel_miptree_set_image_offset(struct intel_mipmap_tree *mt,
+                                    GLuint level,
+                                    GLuint img, GLuint x, GLuint y);
+
+
+/* Upload an image into a tree
+ */
+void intel_miptree_image_data(struct intel_context *intel,
+                              struct intel_mipmap_tree *dst,
+                              GLuint face,
+                              GLuint level,
+                              void *src,
+                              GLuint src_row_pitch, GLuint src_image_pitch);
+
+/* Copy an image between two trees
+ */
+void intel_miptree_image_copy(struct intel_context *intel,
+                              struct intel_mipmap_tree *dst,
+                              GLuint face, GLuint level,
+                              struct intel_mipmap_tree *src);
+
+/* i915_mipmap_tree.c:
+ */
+GLboolean i915_miptree_layout(struct intel_context *intel,
+			      struct intel_mipmap_tree *mt);
+GLboolean i945_miptree_layout(struct intel_context *intel,
+			      struct intel_mipmap_tree *mt);
+GLboolean brw_miptree_layout(struct intel_context *intel,
+			     struct intel_mipmap_tree *mt);
+
+#endif
diff --git a/shared/intel_pixel.c b/shared/intel_pixel.c
new file mode 100644
index 0000000..6417866
--- /dev/null
+++ b/shared/intel_pixel.c
@@ -0,0 +1,189 @@
+/**************************************************************************
+ * 
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portionsalloc
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "enums.h"
+#include "state.h"
+#include "swrast/swrast.h"
+
+#include "intel_context.h"
+#include "intel_pixel.h"
+#include "intel_regions.h"
+
+#define FILE_DEBUG_FLAG DEBUG_PIXEL
+
+static GLenum
+effective_func(GLenum func, GLboolean src_alpha_is_one)
+{
+   if (src_alpha_is_one) {
+      if (func == GL_SRC_ALPHA)
+	 return GL_ONE;
+      if (func == GL_ONE_MINUS_SRC_ALPHA)
+	 return GL_ZERO;
+   }
+
+   return func;
+}
+
+/**
+ * Check if any fragment operations are in effect which might effect
+ * glDraw/CopyPixels.
+ */
+GLboolean
+intel_check_blit_fragment_ops(GLcontext * ctx, GLboolean src_alpha_is_one)
+{
+   if (ctx->NewState)
+      _mesa_update_state(ctx);
+
+   if (ctx->FragmentProgram._Enabled) {
+      DBG("fallback due to fragment program\n");
+      return GL_FALSE;
+   }
+
+   if (ctx->Color.BlendEnabled &&
+       (effective_func(ctx->Color.BlendSrcRGB, src_alpha_is_one) != GL_ONE ||
+	effective_func(ctx->Color.BlendDstRGB, src_alpha_is_one) != GL_ZERO ||
+	ctx->Color.BlendEquationRGB != GL_FUNC_ADD ||
+	effective_func(ctx->Color.BlendSrcA, src_alpha_is_one) != GL_ONE ||
+	effective_func(ctx->Color.BlendDstA, src_alpha_is_one) != GL_ZERO ||
+	ctx->Color.BlendEquationA != GL_FUNC_ADD)) {
+      DBG("fallback due to blend\n");
+      return GL_FALSE;
+   }
+
+   if (ctx->Texture._EnabledUnits) {
+      DBG("fallback due to texturing\n");
+      return GL_FALSE;
+   }
+
+   if (!(ctx->Color.ColorMask[0] &&
+	 ctx->Color.ColorMask[1] &&
+	 ctx->Color.ColorMask[2] &&
+	 ctx->Color.ColorMask[3])) {
+      DBG("fallback due to color masking\n");
+      return GL_FALSE;
+   }
+
+   if (ctx->Color.AlphaEnabled) {
+      DBG("fallback due to alpha\n");
+      return GL_FALSE;
+   }
+
+   if (ctx->Depth.Test) {
+      DBG("fallback due to depth test\n");
+      return GL_FALSE;
+   }
+
+   if (ctx->Fog.Enabled) {
+      DBG("fallback due to fog\n");
+      return GL_FALSE;
+   }
+
+   if (ctx->_ImageTransferState) {
+      DBG("fallback due to image transfer\n");
+      return GL_FALSE;
+   }
+
+   if (ctx->Stencil.Enabled) {
+      DBG("fallback due to image stencil\n");
+      return GL_FALSE;
+   }
+
+   if (ctx->Scissor.Enabled) {
+      /* XXX Note: Scissor could be done with the blitter */
+      DBG("fallback due to image scissor\n");
+      return GL_FALSE;
+   }
+
+   if (ctx->RenderMode != GL_RENDER) {
+      DBG("fallback due to render mode\n");
+      return GL_FALSE;
+   }
+
+   return GL_TRUE;
+}
+
+
+GLboolean
+intel_check_meta_tex_fragment_ops(GLcontext * ctx)
+{
+   if (ctx->NewState)
+      _mesa_update_state(ctx);
+
+   /* Some of _ImageTransferState (scale, bias) could be done with
+    * fragment programs on i915.
+    */
+   return !(ctx->_ImageTransferState || ctx->Fog.Enabled ||     /* not done yet */
+            ctx->Texture._EnabledUnits || ctx->FragmentProgram._Enabled);
+}
+
+/* The intel_region struct doesn't really do enough to capture the
+ * format of the pixels in the region.  For now this code assumes that
+ * the region is a display surface and hence is either ARGB8888 or
+ * RGB565.
+ * XXX FBO: If we'd pass in the intel_renderbuffer instead of region, we'd
+ * know the buffer's pixel format.
+ *
+ * \param format  as given to glDraw/ReadPixels
+ * \param type  as given to glDraw/ReadPixels
+ */
+GLboolean
+intel_check_blit_format(struct intel_region * region,
+                        GLenum format, GLenum type)
+{
+   if (region->cpp == 4 &&
+       (type == GL_UNSIGNED_INT_8_8_8_8_REV ||
+        type == GL_UNSIGNED_BYTE) && format == GL_BGRA) {
+      return GL_TRUE;
+   }
+
+   if (region->cpp == 2 &&
+       type == GL_UNSIGNED_SHORT_5_6_5_REV && format == GL_BGR) {
+      return GL_TRUE;
+   }
+
+   if (INTEL_DEBUG & DEBUG_PIXEL)
+      fprintf(stderr, "%s: bad format for blit (cpp %d, type %s format %s)\n",
+              __FUNCTION__, region->cpp,
+              _mesa_lookup_enum_by_nr(type), _mesa_lookup_enum_by_nr(format));
+
+   return GL_FALSE;
+}
+
+
+void
+intelInitPixelFuncs(struct dd_function_table *functions)
+{
+   functions->Accum = _swrast_Accum;
+   if (!getenv("INTEL_NO_BLIT")) {
+      functions->Bitmap = intelBitmap;
+      functions->CopyPixels = intelCopyPixels;
+#ifdef I915
+      functions->ReadPixels = intelReadPixels;
+      functions->DrawPixels = intelDrawPixels;
+#endif
+   }
+}
diff --git a/shared/intel_pixel.h b/shared/intel_pixel.h
new file mode 100644
index 0000000..9c899b9
--- /dev/null
+++ b/shared/intel_pixel.h
@@ -0,0 +1,70 @@
+/**************************************************************************
+ * 
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef INTEL_PIXEL_H
+#define INTEL_PIXEL_H
+
+#include "mtypes.h"
+
+void intelInitPixelFuncs(struct dd_function_table *functions);
+
+GLboolean intel_check_blit_fragment_ops(GLcontext * ctx,
+					GLboolean src_alpha_is_one);
+
+GLboolean intel_check_meta_tex_fragment_ops(GLcontext * ctx);
+
+GLboolean intel_check_blit_format(struct intel_region *region,
+                                  GLenum format, GLenum type);
+
+
+void intelReadPixels(GLcontext * ctx,
+                     GLint x, GLint y,
+                     GLsizei width, GLsizei height,
+                     GLenum format, GLenum type,
+                     const struct gl_pixelstore_attrib *pack,
+                     GLvoid * pixels);
+
+void intelDrawPixels(GLcontext * ctx,
+                     GLint x, GLint y,
+                     GLsizei width, GLsizei height,
+                     GLenum format,
+                     GLenum type,
+                     const struct gl_pixelstore_attrib *unpack,
+                     const GLvoid * pixels);
+
+void intelCopyPixels(GLcontext * ctx,
+                     GLint srcx, GLint srcy,
+                     GLsizei width, GLsizei height,
+                     GLint destx, GLint desty, GLenum type);
+
+void intelBitmap(GLcontext * ctx,
+		 GLint x, GLint y,
+		 GLsizei width, GLsizei height,
+		 const struct gl_pixelstore_attrib *unpack,
+		 const GLubyte * pixels);
+
+#endif
diff --git a/i965/intel_pixel_bitmap.c b/shared/intel_pixel_bitmap.c
index df9d688..81238ac 100644
--- a/i965/intel_pixel_bitmap.c
+++ b/shared/intel_pixel_bitmap.c
@@ -41,6 +41,8 @@
 #include "intel_blit.h"
 #include "intel_regions.h"
 #include "intel_buffer_objects.h"
+#include "intel_buffers.h"
+#include "intel_pixel.h"
 
 
 
@@ -91,11 +93,6 @@ static void set_bit( GLubyte *dest,
    dest[bit/8] |= 1 << (bit % 8);
 }
 
-static int align(int x, int align)
-{
-   return (x + align - 1) & ~(align - 1);
-}
-
 /* Extract a rectangle's worth of data from the bitmap.  Called
  * per-cliprect.
  */
@@ -147,7 +144,7 @@ static GLuint get_bitmap_rect(GLsizei width, GLsizei height,
       }
 
       if (row_align)
-	 bit = (bit + row_align - 1) & ~(row_align - 1);
+	 bit = ALIGN(bit, row_align);
    }
 
    return count;
@@ -169,11 +166,8 @@ do_blit_bitmap( GLcontext *ctx,
    struct intel_context *intel = intel_context(ctx);
    struct intel_region *dst = intel_drawbuf_region(intel);
    GLfloat tmpColor[4];
-
-   union {
-      GLuint ui;
-      GLubyte ub[4];
-   } color;
+   GLubyte ubcolor[4];
+   GLuint color8888, color565;
 
    if (!dst)
        return GL_FALSE;
@@ -190,14 +184,17 @@ do_blit_bitmap( GLcontext *ctx,
        ADD_3V(tmpColor, tmpColor, ctx->Current.RasterSecondaryColor);
    }
 
-   UNCLAMPED_FLOAT_TO_CHAN(color.ub[0], tmpColor[2]);
-   UNCLAMPED_FLOAT_TO_CHAN(color.ub[1], tmpColor[1]);
-   UNCLAMPED_FLOAT_TO_CHAN(color.ub[2], tmpColor[0]);
-   UNCLAMPED_FLOAT_TO_CHAN(color.ub[3], tmpColor[3]);
+   UNCLAMPED_FLOAT_TO_UBYTE(ubcolor[0], tmpColor[0]);
+   UNCLAMPED_FLOAT_TO_UBYTE(ubcolor[1], tmpColor[1]);
+   UNCLAMPED_FLOAT_TO_UBYTE(ubcolor[2], tmpColor[2]);
+   UNCLAMPED_FLOAT_TO_UBYTE(ubcolor[3], tmpColor[3]);
+
+   color8888 = INTEL_PACKCOLOR8888(ubcolor[0], ubcolor[1], ubcolor[2], ubcolor[3]);
+   color565 = INTEL_PACKCOLOR565(ubcolor[0], ubcolor[1], ubcolor[2]);
 
    /* Does zoom apply to bitmaps?
     */
-   if (!intel_check_blit_fragment_ops(ctx) ||
+   if (!intel_check_blit_fragment_ops(ctx, tmpColor[3] == 1.0F) ||
        ctx->Pixel.ZoomX != 1.0F || 
        ctx->Pixel.ZoomY != 1.0F)
       return GL_FALSE;
@@ -235,10 +232,10 @@ do_blit_bitmap( GLcontext *ctx,
       dsty = dPriv->y + (dPriv->h - dsty - height);  
       dstx = dPriv->x + dstx;
 
-      dest_rect.x1 = dstx;
-      dest_rect.y1 = dsty;
-      dest_rect.x2 = dstx + width;
-      dest_rect.y2 = dsty + height;
+      dest_rect.x1 = dstx < 0 ? 0 : dstx;
+      dest_rect.y1 = dsty < 0 ? 0 : dsty;
+      dest_rect.x2 = dstx + width < 0 ? 0 : dstx + width;
+      dest_rect.y2 = dsty + height < 0 ? 0 : dsty + height;
 
       for (i = 0; i < nbox; i++) {
          drm_clip_rect_t rect;
@@ -268,7 +265,7 @@ do_blit_bitmap( GLcontext *ctx,
 	    for (px = 0; px < box_w; px += DX) { 
 	       int h = MIN2(DY, box_h - py);
 	       int w = MIN2(DX, box_w - px); 
-	       GLuint sz = align(align(w,8) * h, 64)/8;
+	       GLuint sz = ALIGN(ALIGN(w,8) * h, 64)/8;
 	       GLenum logic_op = ctx->Color.ColorLogicOpEnabled ?
 		  ctx->Color.LogicOp : GL_COPY;
 
@@ -292,7 +289,7 @@ do_blit_bitmap( GLcontext *ctx,
 						  dst->cpp,
 						  (GLubyte *)stipple, 
 						  sz,
-						  color.ui,
+						  (dst->cpp == 2) ? color565 : color8888,
 						  dst->pitch,
 						  dst->buffer,
 						  0,
@@ -304,7 +301,6 @@ do_blit_bitmap( GLcontext *ctx,
 	    } 
 	 } 
       }
-      intel->need_flush = GL_TRUE;
    out:
       intel_batchbuffer_flush(intel->batch);
    }
diff --git a/i965/intel_pixel_copy.c b/shared/intel_pixel_copy.c
index 3bdf2fb..45f72ba 100644
--- a/i965/intel_pixel_copy.c
+++ b/shared/intel_pixel_copy.c
@@ -28,18 +28,21 @@
 #include "glheader.h"
 #include "enums.h"
 #include "image.h"
+#include "state.h"
 #include "mtypes.h"
 #include "macros.h"
-#include "state.h"
 #include "swrast/swrast.h"
 
 #include "intel_screen.h"
 #include "intel_context.h"
 #include "intel_ioctl.h"
 #include "intel_batchbuffer.h"
+#include "intel_buffers.h"
 #include "intel_blit.h"
 #include "intel_regions.h"
+#include "intel_pixel.h"
 
+#define FILE_DEBUG_FLAG DEBUG_PIXEL
 
 static struct intel_region *
 copypix_src_region(struct intel_context *intel, GLenum type)
@@ -68,20 +71,20 @@ copypix_src_region(struct intel_context *intel, GLenum type)
 }
 
 
-
-
 /**
  * Check if any fragment operations are in effect which might effect
- * glDraw/CopyPixels.
+ * glCopyPixels.  Differs from intel_check_blit_fragment_ops in that
+ * we allow Scissor.
  */
-GLboolean
-intel_check_blit_fragment_ops(GLcontext * ctx)
+static GLboolean
+intel_check_copypixel_blit_fragment_ops(GLcontext * ctx)
 {
    if (ctx->NewState)
       _mesa_update_state(ctx);
 
+   /* Could do logicop with the blitter: 
+    */
    return !(ctx->_ImageTransferState ||
-	    ctx->RenderMode != GL_RENDER ||
             ctx->Color.AlphaEnabled ||
             ctx->Depth.Test ||
             ctx->Fog.Enabled ||
@@ -89,12 +92,13 @@ intel_check_blit_fragment_ops(GLcontext * ctx)
             !ctx->Color.ColorMask[0] ||
             !ctx->Color.ColorMask[1] ||
             !ctx->Color.ColorMask[2] ||
-            !ctx->Color.ColorMask[3] ||	/* can do this! */
+            !ctx->Color.ColorMask[3] ||
             ctx->Texture._EnabledUnits ||
 	    ctx->FragmentProgram._Enabled ||
 	    ctx->Color.BlendEnabled);
 }
 
+#ifdef I915
 /* Doesn't work for overlapping regions.  Could do a double copy or
  * just fallback.
  */
@@ -113,15 +117,9 @@ do_texture_copypixels(GLcontext * ctx,
    DBG("%s %d,%d %dx%d --> %d,%d\n", __FUNCTION__, 
        srcx, srcy, width, height, dstx, dsty);
 
-   if (!src || !dst || type != GL_COLOR ||
-       ctx->_ImageTransferState ||
-       ctx->Pixel.ZoomX != 1.0F || ctx->Pixel.ZoomY != 1.0F ||
-       ctx->RenderMode != GL_RENDER ||
-       ctx->Texture._EnabledUnits ||
-       ctx->FragmentProgram._Enabled ||
-       src != dst )
-       return GL_FALSE;
-   
+   if (!src || !dst || type != GL_COLOR)
+      return GL_FALSE;
+
    /* Can't handle overlapping regions.  Don't have sufficient control
     * over rasterization to pull it off in-place.  Punt on these for
     * now.
@@ -135,13 +133,13 @@ do_texture_copypixels(GLcontext * ctx,
 
       srcbox.x1 = srcx;
       srcbox.y1 = srcy;
-      srcbox.x2 = srcx + width - 1;
-      srcbox.y2 = srcy + height - 1;
+      srcbox.x2 = srcx + width;
+      srcbox.y2 = srcy + height;
 
       dstbox.x1 = dstx;
       dstbox.y1 = dsty;
-      dstbox.x2 = dstx + width - 1;
-      dstbox.y2 = dsty + height - 1;
+      dstbox.x2 = dstx + width * ctx->Pixel.ZoomX;
+      dstbox.y2 = dsty + height * ctx->Pixel.ZoomY;
 
       DBG("src %d,%d %d,%d\n", srcbox.x1, srcbox.y1, srcbox.x2, srcbox.y2);
       DBG("dst %d,%d %d,%d (%dx%d) (%f,%f)\n", dstbox.x1, dstbox.y1, dstbox.x2, dstbox.y2,
@@ -180,22 +178,66 @@ do_texture_copypixels(GLcontext * ctx,
 
    /* Set the frontbuffer up as a large rectangular texture.
     */
-   intel->vtbl.meta_frame_buffer_texture( intel, srcx - dstx, srcy - dsty );
+   if (!intel->vtbl.meta_tex_rect_source(intel, src->buffer, 0,
+                                         src->pitch,
+                                         src->height, src_format, src_type)) {
+      intel->vtbl.leave_meta_state(intel);
+      return GL_FALSE;
+   }
+
 
    intel->vtbl.meta_texture_blend_replace(intel);
-   
-   if (intel->driDrawable->numClipRects)
-      intel->vtbl.meta_draw_quad( intel,
-				  dstx, dstx + width,
-				  dsty, dsty + height,
-				  ctx->Current.RasterPos[ 2 ],
-				  0, 0, 0, 0, 0.0, 0.0, 0.0, 0.0 );
-   
-   intel->vtbl.leave_meta_state( intel );
-   
+
+   LOCK_HARDWARE(intel);
+
+   if (intel->driDrawable->numClipRects) {
+      __DRIdrawablePrivate *dPriv = intel->driDrawable;
+
+
+      srcy = dPriv->h - srcy - height;  /* convert from gl to hardware coords */
+
+      srcx += dPriv->x;
+      srcy += dPriv->y;
+
+      /* Clip against the source region.  This is the only source
+       * clipping we do.  XXX: Just set the texcord wrap mode to clamp
+       * or similar.
+       *
+       */
+      if (0) {
+         GLint orig_x = srcx;
+         GLint orig_y = srcy;
+
+         if (!_mesa_clip_to_region(0, 0, src->pitch, src->height,
+                                   &srcx, &srcy, &width, &height))
+            goto out;
+
+         dstx += srcx - orig_x;
+         dsty += (srcy - orig_y) * ctx->Pixel.ZoomY;
+      }
+
+      /* Just use the regular cliprect mechanism...  Does this need to
+       * even hold the lock???
+       */
+      intel->vtbl.meta_draw_quad(intel,
+				 dstx,
+				 dstx + width * ctx->Pixel.ZoomX,
+				 dPriv->h - (dsty + height * ctx->Pixel.ZoomY),
+				 dPriv->h - (dsty), 0, /* XXX: what z value? */
+				 0x00ff00ff,
+				 srcx, srcx + width, srcy, srcy + height);
+
+    out:
+      intel->vtbl.leave_meta_state(intel);
+      intel_batchbuffer_flush(intel->batch);
+   }
+   UNLOCK_HARDWARE(intel);
+
    DBG("%s: success\n", __FUNCTION__);
    return GL_TRUE;
 }
+#endif /* I915 */
+
 
 /**
  * CopyPixels with the blitter.  Don't support zooming, pixel transfer, etc.
@@ -213,7 +255,7 @@ do_blit_copypixels(GLcontext * ctx,
    /* Copypixels can be more than a straight copy.  Ensure all the
     * extra operations are disabled:
     */
-   if (!intel_check_blit_fragment_ops(ctx) ||
+   if (!intel_check_copypixel_blit_fragment_ops(ctx) ||
        ctx->Pixel.ZoomX != 1.0F || ctx->Pixel.ZoomY != 1.0F)
       return GL_FALSE;
 
@@ -224,16 +266,12 @@ do_blit_copypixels(GLcontext * ctx,
 
    intelFlush(&intel->ctx);
 
-/*    intel->vtbl.render_start(intel); */
-/*    intel->vtbl.emit_state(intel); */
-
    LOCK_HARDWARE(intel);
 
    if (intel->driDrawable->numClipRects) {
       __DRIdrawablePrivate *dPriv = intel->driDrawable;
       __DRIdrawablePrivate *dReadPriv = intel->driReadDrawable;
       drm_clip_rect_t *box = dPriv->pClipRects;
-      drm_clip_rect_t dest_rect;
       GLint nbox = dPriv->numClipRects;
       GLint delta_x = 0;
       GLint delta_y = 0;
@@ -281,13 +319,6 @@ do_blit_copypixels(GLcontext * ctx,
          dsty = srcy - delta_y;
       }
 
-      dest_rect.x1 = dstx;
-      dest_rect.y1 = dsty;
-      dest_rect.x2 = dstx + width;
-      dest_rect.y2 = dsty + height;
-
-/*       intel->vtbl.emit_flush(intel, 0); */
-
       /* Could do slightly more clipping: Eg, take the intersection of
        * the existing set of cliprects and those cliprects translated
        * by delta_x, delta_y:
@@ -296,32 +327,35 @@ do_blit_copypixels(GLcontext * ctx,
        * introduce garbage when copying from obscured window regions.
        */
       for (i = 0; i < nbox; i++) {
-         drm_clip_rect_t rect;
+	 GLint clip_x = dstx;
+	 GLint clip_y = dsty;
+	 GLint clip_w = width;
+	 GLint clip_h = height;
 
-         if (!intel_intersect_cliprects(&rect, &dest_rect, &box[i]))
+         if (!_mesa_clip_to_region(box[i].x1, box[i].y1, box[i].x2, box[i].y2,
+				   &clip_x, &clip_y, &clip_w, &clip_h))
             continue;
 
-
-         intelEmitCopyBlit(intel, 
-			   dst->cpp, 
+         intelEmitCopyBlit(intel, dst->cpp,
 			   src->pitch, src->buffer, 0, src->tiled,
 			   dst->pitch, dst->buffer, 0, dst->tiled,
-			   rect.x1 + delta_x, 
-			   rect.y1 + delta_y,       /* srcx, srcy */
-                           rect.x1, rect.y1,    /* dstx, dsty */
-                           rect.x2 - rect.x1, rect.y2 - rect.y1,
+			   clip_x + delta_x, clip_y + delta_y, /* srcx, srcy */
+			   clip_x, clip_y, /* dstx, dsty */
+			   clip_w, clip_h,
 			   ctx->Color.ColorLogicOpEnabled ?
 			   ctx->Color.LogicOp : GL_COPY);
       }
 
-      intel->need_flush = GL_TRUE;
-   out:
+    out:
       intel_batchbuffer_flush(intel->batch);
    }
    UNLOCK_HARDWARE(intel);
+
+   DBG("%s: success\n", __FUNCTION__);
    return GL_TRUE;
 }
 
+
 void
 intelCopyPixels(GLcontext * ctx,
                 GLint srcx, GLint srcy,
@@ -334,11 +368,12 @@ intelCopyPixels(GLcontext * ctx,
    if (do_blit_copypixels(ctx, srcx, srcy, width, height, destx, desty, type))
       return;
 
+#ifdef I915
    if (do_texture_copypixels(ctx, srcx, srcy, width, height, destx, desty, type))
       return;
-   
-   if (INTEL_DEBUG & DEBUG_PIXEL)
-      _mesa_printf("fallback to _swrast_CopyPixels\n");
+#endif
+
+   DBG("fallback to _swrast_CopyPixels\n");
 
    _swrast_CopyPixels(ctx, srcx, srcy, width, height, destx, desty, type);
 }
diff --git a/shared/intel_pixel_draw.c b/shared/intel_pixel_draw.c
new file mode 100644
index 0000000..34813d2
--- /dev/null
+++ b/shared/intel_pixel_draw.c
@@ -0,0 +1,388 @@
+/**************************************************************************
+ * 
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portionsalloc
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "glheader.h"
+#include "enums.h"
+#include "image.h"
+#include "mtypes.h"
+#include "macros.h"
+#include "bufferobj.h"
+#include "swrast/swrast.h"
+
+#include "intel_screen.h"
+#include "intel_context.h"
+#include "intel_ioctl.h"
+#include "intel_batchbuffer.h"
+#include "intel_blit.h"
+#include "intel_buffers.h"
+#include "intel_regions.h"
+#include "intel_pixel.h"
+#include "intel_buffer_objects.h"
+#include "intel_tris.h"
+
+
+
+static GLboolean
+do_texture_drawpixels(GLcontext * ctx,
+                      GLint x, GLint y,
+                      GLsizei width, GLsizei height,
+                      GLenum format, GLenum type,
+                      const struct gl_pixelstore_attrib *unpack,
+                      const GLvoid * pixels)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_region *dst = intel_drawbuf_region(intel);
+   struct intel_buffer_object *src = intel_buffer_object(unpack->BufferObj);
+   GLuint rowLength = unpack->RowLength ? unpack->RowLength : width;
+   GLuint src_offset;
+
+   if (INTEL_DEBUG & DEBUG_PIXEL)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   intelFlush(&intel->ctx);
+
+   if (!dst)
+      return GL_FALSE;
+
+   intel->vtbl.render_start(intel);
+   intel->vtbl.emit_state(intel);
+
+   if (src) {
+      if (!_mesa_validate_pbo_access(2, unpack, width, height, 1,
+                                     format, type, pixels)) {
+         _mesa_error(ctx, GL_INVALID_OPERATION, "glDrawPixels");
+         return GL_TRUE;
+      }
+   }
+   else {
+      /* PBO only for now:
+       */
+/*       _mesa_printf("%s - not PBO\n", __FUNCTION__); */
+      return GL_FALSE;
+   }
+
+   /* There are a couple of things we can't do yet, one of which is
+    * set the correct state for pixel operations when GL texturing is
+    * enabled.  That's a pretty rare state and probably not worth the
+    * effort.  A completely device-independent version of this may do
+    * more.
+    *
+    * Similarly, we make no attempt to merge metaops processing with
+    * an enabled fragment program, though it would certainly be
+    * possible.
+    */
+   if (!intel_check_meta_tex_fragment_ops(ctx)) {
+      if (INTEL_DEBUG & DEBUG_PIXEL)
+         _mesa_printf("%s - bad GL fragment state for metaops texture\n",
+                      __FUNCTION__);
+      return GL_FALSE;
+   }
+
+   intel->vtbl.install_meta_state(intel);
+
+
+   /* Is this true?  Also will need to turn depth testing on according
+    * to state:
+    */
+   intel->vtbl.meta_no_stencil_write(intel);
+   intel->vtbl.meta_no_depth_write(intel);
+
+   /* Set the 3d engine to draw into the destination region:
+    */
+   intel->vtbl.meta_draw_region(intel, dst, intel->depth_region);
+
+   intel->vtbl.meta_import_pixel_state(intel);
+
+   src_offset = (GLuint) _mesa_image_address(2, unpack, pixels, width, height,
+                                             format, type, 0, 0, 0);
+
+
+   /* Setup the pbo up as a rectangular texture, if possible.
+    *
+    * TODO: This is almost always possible if the i915 fragment
+    * program is adjusted to correctly swizzle the sampled colors.
+    * The major exception is any 24bit texture, like RGB888, for which
+    * there is no hardware support.  
+    */
+   if (!intel->vtbl.meta_tex_rect_source(intel, src->buffer, src_offset,
+                                         rowLength, height, format, type)) {
+      intel->vtbl.leave_meta_state(intel);
+      return GL_FALSE;
+   }
+
+   intel->vtbl.meta_texture_blend_replace(intel);
+
+
+   LOCK_HARDWARE(intel);
+
+   if (intel->driDrawable->numClipRects) {
+      __DRIdrawablePrivate *dPriv = intel->driDrawable;
+      GLint srcx, srcy;
+      GLint dstx, dsty;
+
+      dstx = x;
+      dsty = dPriv->h - (y + height);
+
+      srcx = 0;                 /* skiprows/pixels already done */
+      srcy = 0;
+
+      if (0) {
+         const GLint orig_x = dstx;
+         const GLint orig_y = dsty;
+
+         if (!_mesa_clip_to_region(0, 0, dst->pitch, dst->height,
+                                   &dstx, &dsty, &width, &height))
+            goto out;
+
+         srcx += dstx - orig_x;
+         srcy += dsty - orig_y;
+      }
+
+
+      if (INTEL_DEBUG & DEBUG_PIXEL)
+         _mesa_printf("draw %d,%d %dx%d\n", dstx, dsty, width, height);
+
+      /* Must use the regular cliprect mechanism in order to get the
+       * drawing origin set correctly.  Otherwise scissor state is in
+       * incorrect coordinate space.  Does this even need to hold the
+       * lock???
+       */
+      intel->vtbl.meta_draw_quad(intel,
+				 dstx, dstx + width * ctx->Pixel.ZoomX,
+				 dPriv->h - (y + height * ctx->Pixel.ZoomY),
+				 dPriv->h - (y),
+				 -ctx->Current.RasterPos[2] * .5,
+				 0x00ff00ff,
+				 srcx, srcx + width, srcy + height, srcy);
+    out:
+      intel->vtbl.leave_meta_state(intel);
+      intel_batchbuffer_flush(intel->batch);
+   }
+   UNLOCK_HARDWARE(intel);
+   return GL_TRUE;
+}
+
+
+
+
+
+/* Pros:  
+ *   - no waiting for idle before updating framebuffer.
+ *   
+ * Cons:
+ *   - if upload is by memcpy, this may actually be slower than fallback path.
+ *   - uploads the whole image even if destination is clipped
+ *   
+ * Need to benchmark.
+ *
+ * Given the questions about performance, implement for pbo's only.
+ * This path is definitely a win if the pbo is already in agp.  If it
+ * turns out otherwise, we can add the code necessary to upload client
+ * data to agp space before performing the blit.  (Though it may turn
+ * out to be better/simpler just to use the texture engine).
+ */
+static GLboolean
+do_blit_drawpixels(GLcontext * ctx,
+                   GLint x, GLint y,
+                   GLsizei width, GLsizei height,
+                   GLenum format, GLenum type,
+                   const struct gl_pixelstore_attrib *unpack,
+                   const GLvoid * pixels)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_region *dest = intel_drawbuf_region(intel);
+   struct intel_buffer_object *src = intel_buffer_object(unpack->BufferObj);
+   GLuint src_offset;
+   GLuint rowLength;
+   dri_fence *fence = NULL;
+
+   if (INTEL_DEBUG & DEBUG_PIXEL)
+      _mesa_printf("%s\n", __FUNCTION__);
+
+
+   if (!dest) {
+      if (INTEL_DEBUG & DEBUG_PIXEL)
+         _mesa_printf("%s - no dest\n", __FUNCTION__);
+      return GL_FALSE;
+   }
+
+   if (src) {
+      /* This validation should be done by core mesa:
+       */
+      if (!_mesa_validate_pbo_access(2, unpack, width, height, 1,
+                                     format, type, pixels)) {
+         _mesa_error(ctx, GL_INVALID_OPERATION, "glDrawPixels");
+         return GL_TRUE;
+      }
+   }
+   else {
+      /* PBO only for now:
+       */
+      if (INTEL_DEBUG & DEBUG_PIXEL)
+         _mesa_printf("%s - not PBO\n", __FUNCTION__);
+      return GL_FALSE;
+   }
+
+   if (!intel_check_blit_format(dest, format, type)) {
+      if (INTEL_DEBUG & DEBUG_PIXEL)
+         _mesa_printf("%s - bad format for blit\n", __FUNCTION__);
+      return GL_FALSE;
+   }
+
+   if (!intel_check_blit_fragment_ops(ctx, GL_FALSE)) {
+      if (INTEL_DEBUG & DEBUG_PIXEL)
+         _mesa_printf("%s - bad GL fragment state for blitter\n",
+                      __FUNCTION__);
+      return GL_FALSE;
+   }
+
+   if (ctx->Pixel.ZoomX != 1.0F) {
+      if (INTEL_DEBUG & DEBUG_PIXEL)
+         _mesa_printf("%s - bad PixelZoomX for blit\n", __FUNCTION__);
+      return GL_FALSE;
+   }
+
+
+   if (unpack->RowLength > 0)
+      rowLength = unpack->RowLength;
+   else
+      rowLength = width;
+
+   if (ctx->Pixel.ZoomY == -1.0F) {
+      if (INTEL_DEBUG & DEBUG_PIXEL)
+         _mesa_printf("%s - bad PixelZoomY for blit\n", __FUNCTION__);
+      return GL_FALSE;          /* later */
+      y -= height;
+   }
+   else if (ctx->Pixel.ZoomY == 1.0F) {
+      rowLength = -rowLength;
+   }
+   else {
+      if (INTEL_DEBUG & DEBUG_PIXEL)
+         _mesa_printf("%s - bad PixelZoomY for blit\n", __FUNCTION__);
+      return GL_FALSE;
+   }
+
+   src_offset = (GLuint) _mesa_image_address(2, unpack, pixels, width, height,
+                                             format, type, 0, 0, 0);
+
+   intelFlush(&intel->ctx);
+   LOCK_HARDWARE(intel);
+
+   if (intel->driDrawable->numClipRects) {
+      __DRIdrawablePrivate *dPriv = intel->driDrawable;
+      int nbox = dPriv->numClipRects;
+      drm_clip_rect_t *box = dPriv->pClipRects;
+      drm_clip_rect_t rect;
+      drm_clip_rect_t dest_rect;
+      dri_bo *src_buffer = intel_bufferobj_buffer(intel, src, INTEL_READ);
+      int i;
+
+      dest_rect.x1 = dPriv->x + x;
+      dest_rect.y1 = dPriv->y + dPriv->h - (y + height);
+      dest_rect.x2 = dest_rect.x1 + width;
+      dest_rect.y2 = dest_rect.y1 + height;
+
+      for (i = 0; i < nbox; i++) {
+         if (!intel_intersect_cliprects(&rect, &dest_rect, &box[i]))
+            continue;
+
+         intelEmitCopyBlit(intel,
+                           dest->cpp,
+                           rowLength, src_buffer, src_offset, GL_FALSE,
+                           dest->pitch, dest->buffer, 0, dest->tiled,
+                           rect.x1 - dest_rect.x1,
+                           rect.y2 - dest_rect.y2,
+                           rect.x1,
+                           rect.y1, rect.x2 - rect.x1, rect.y2 - rect.y1,
+			   ctx->Color.ColorLogicOpEnabled ?
+			   ctx->Color.LogicOp : GL_COPY);
+      }
+      intel_batchbuffer_flush(intel->batch);
+      fence = intel->batch->last_fence;
+      dri_fence_reference(fence);
+   }
+   UNLOCK_HARDWARE(intel);
+
+   if (fence) {
+      dri_fence_wait(fence);
+      dri_fence_unreference(fence);
+   }
+
+   if (INTEL_DEBUG & DEBUG_PIXEL)
+      _mesa_printf("%s - DONE\n", __FUNCTION__);
+
+   return GL_TRUE;
+}
+
+
+
+void
+intelDrawPixels(GLcontext * ctx,
+                GLint x, GLint y,
+                GLsizei width, GLsizei height,
+                GLenum format,
+                GLenum type,
+                const struct gl_pixelstore_attrib *unpack,
+                const GLvoid * pixels)
+{
+   if (do_blit_drawpixels(ctx, x, y, width, height, format, type,
+                          unpack, pixels))
+      return;
+
+   if (do_texture_drawpixels(ctx, x, y, width, height, format, type,
+                             unpack, pixels))
+      return;
+
+
+   if (INTEL_DEBUG & DEBUG_PIXEL)
+      _mesa_printf("%s: fallback to swrast\n", __FUNCTION__);
+
+   if (ctx->FragmentProgram._Current == ctx->FragmentProgram._TexEnvProgram) {
+      /*
+       * We don't want the i915 texenv program to be applied to DrawPixels.
+       * This is really just a performance optimization (mesa will other-
+       * wise happily run the fragment program on each pixel in the image).
+       */
+      struct gl_fragment_program *fpSave = ctx->FragmentProgram._Current;
+   /* can't just set current frag prog to 0 here as on buffer resize
+      we'll get new state checks which will segfault. Remains a hack. */
+      ctx->FragmentProgram._Current = NULL;
+      ctx->FragmentProgram._UseTexEnvProgram = GL_FALSE;
+      ctx->FragmentProgram._Active = GL_FALSE;
+      _swrast_DrawPixels( ctx, x, y, width, height, format, type,
+                          unpack, pixels );
+      ctx->FragmentProgram._Current = fpSave;
+      ctx->FragmentProgram._UseTexEnvProgram = GL_TRUE;
+      ctx->FragmentProgram._Active = GL_TRUE;
+      _swrast_InvalidateState(ctx, _NEW_PROGRAM);
+   }
+   else {
+      _swrast_DrawPixels( ctx, x, y, width, height, format, type,
+                          unpack, pixels );
+   }
+}
diff --git a/i915/intel_reg.h b/shared/intel_reg.h
index 1ec1532..37629c0 100644
--- a/i915/intel_reg.h
+++ b/shared/intel_reg.h
@@ -25,16 +25,19 @@
  * 
  **************************************************************************/
 
+#define CMD_MI				(0x0 << 29)
+#define CMD_2D				(0x2 << 29)
+#define CMD_3D				(0x3 << 29)
 
-#ifndef _INTEL_REG_H_
-#define _INTEL_REG_H_
+#define MI_BATCH_BUFFER_END		(CMD_MI | 0xA << 23)
 
+/* Stalls command execution waiting for the given events to have occurred. */
+#define MI_WAIT_FOR_EVENT               (CMD_MI | (0x3 << 23))
+#define MI_WAIT_FOR_PLANE_B_FLIP        (1<<6)
+#define MI_WAIT_FOR_PLANE_A_FLIP        (1<<2)
 
-
-#define CMD_3D (0x3<<29)
-
-
-#define _3DPRIMITIVE         ((0x3<<29)|(0x1f<<24))
+/* Primitive dispatch on 830-945 */
+#define _3DPRIMITIVE			(CMD_3D | (0x1f << 24))
 #define PRIM_INDIRECT            (1<<23)
 #define PRIM_INLINE              (0<<23)
 #define PRIM_INDIRECT_SEQUENTIAL (0<<17)
@@ -52,33 +55,25 @@
 #define PRIM3D_DIB		(0x9<<18)
 #define PRIM3D_MASK		(0x1f<<18)
 
-#define I915PACKCOLOR4444(r,g,b,a) \
-  ((((a) & 0xf0) << 8) | (((r) & 0xf0) << 4) | ((g) & 0xf0) | ((b) >> 4))
-
-#define I915PACKCOLOR1555(r,g,b,a) \
-  ((((r) & 0xf8) << 7) | (((g) & 0xf8) << 2) | (((b) & 0xf8) >> 3) | \
-    ((a) ? 0x8000 : 0))
-
-#define I915PACKCOLOR565(r,g,b) \
-  ((((r) & 0xf8) << 8) | (((g) & 0xfc) << 3) | (((b) & 0xf8) >> 3))
-
-#define I915PACKCOLOR8888(r,g,b,a) \
-  ((a<<24) | (r<<16) | (g<<8) | b)
-
+#define XY_SETUP_BLT_CMD		(CMD_2D | (0x01 << 22) | 6)
 
+#define XY_COLOR_BLT_CMD		(CMD_2D | (0x50 << 22) | 4)
 
+#define XY_SRC_COPY_BLT_CMD             (CMD_2D | (0x53 << 22) | 6)
 
-#define BR00_BITBLT_CLIENT   0x40000000
-#define BR00_OP_COLOR_BLT    0x10000000
-#define BR00_OP_SRC_COPY_BLT 0x10C00000
-#define BR13_SOLID_PATTERN   0x80000000
+#define XY_TEXT_IMMEDIATE_BLIT_CMD	(CMD_2D | (0x31 << 22))
+# define XY_TEXT_BYTE_PACKED		(1 << 16)
 
-#define XY_COLOR_BLT_CMD		((2<<29)|(0x50<<22)|0x4)
-#define XY_COLOR_BLT_WRITE_ALPHA	(1<<21)
-#define XY_COLOR_BLT_WRITE_RGB		(1<<20)
+/* BR00 */
+#define XY_BLT_WRITE_ALPHA	(1 << 21)
+#define XY_BLT_WRITE_RGB	(1 << 20)
+#define XY_SRC_TILED		(1 << 15)
+#define XY_DST_TILED		(1 << 11)
 
-#define XY_SRC_COPY_BLT_CMD             ((2<<29)|(0x53<<22)|6)
-#define XY_SRC_COPY_BLT_WRITE_ALPHA     (1<<21)
-#define XY_SRC_COPY_BLT_WRITE_RGB       (1<<20)
+/* BR13 */
+#define BR13_565		(0x1 << 24)
+#define BR13_8888		(0x3 << 24)
 
-#endif
+#define FENCE_LINEAR 0
+#define FENCE_XMAJOR 1
+#define FENCE_YMAJOR 2
diff --git a/shared/intel_regions.c b/shared/intel_regions.c
new file mode 100644
index 0000000..35ab46a
--- /dev/null
+++ b/shared/intel_regions.c
@@ -0,0 +1,502 @@
+/**************************************************************************
+ * 
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+/* Provide additional functionality on top of bufmgr buffers:
+ *   - 2d semantics and blit operations
+ *   - refcounting of buffers for multiple images in a buffer.
+ *   - refcounting of buffer mappings.
+ *   - some logic for moving the buffers to the best memory pools for
+ *     given operations.
+ *
+ * Most of this is to make it easier to implement the fixed-layout
+ * mipmap tree required by intel hardware in the face of GL's
+ * programming interface where each image can be specifed in random
+ * order and it isn't clear what layout the tree should have until the
+ * last moment.
+ */
+
+#include "intel_context.h"
+#include "intel_regions.h"
+#include "intel_blit.h"
+#include "intel_buffer_objects.h"
+#include "dri_bufmgr.h"
+#include "intel_bufmgr_ttm.h"
+#include "intel_batchbuffer.h"
+
+#define FILE_DEBUG_FLAG DEBUG_REGION
+
+/* XXX: Thread safety?
+ */
+GLubyte *
+intel_region_map(struct intel_context *intel, struct intel_region *region)
+{
+   DBG("%s\n", __FUNCTION__);
+   if (!region->map_refcount++) {
+      if (region->pbo)
+         intel_region_cow(intel, region);
+
+      dri_bo_map(region->buffer, GL_TRUE);
+      region->map = region->buffer->virtual;
+   }
+
+   return region->map;
+}
+
+void
+intel_region_unmap(struct intel_context *intel, struct intel_region *region)
+{
+   DBG("%s\n", __FUNCTION__);
+   if (!--region->map_refcount) {
+      dri_bo_unmap(region->buffer);
+      region->map = NULL;
+   }
+}
+
+static struct intel_region *
+intel_region_alloc_internal(struct intel_context *intel,
+			    GLuint cpp, GLuint pitch, GLuint height,
+			    GLuint tiled, dri_bo *buffer)
+{
+   struct intel_region *region;
+
+   DBG("%s\n", __FUNCTION__);
+
+   if (buffer == NULL)
+      return NULL;
+
+   region = calloc(sizeof(*region), 1);
+   region->cpp = cpp;
+   region->pitch = pitch;
+   region->height = height;     /* needed? */
+   region->refcount = 1;
+   region->tiled = tiled;
+   region->buffer = buffer;
+
+   return region;
+}
+
+struct intel_region *
+intel_region_alloc(struct intel_context *intel,
+                   GLuint cpp, GLuint pitch, GLuint height)
+{
+   dri_bo *buffer;
+
+   buffer = dri_bo_alloc(intel->bufmgr, "region",
+			 pitch * cpp * height, 64,
+			 DRM_BO_FLAG_MEM_LOCAL |
+			 DRM_BO_FLAG_CACHED |
+			 DRM_BO_FLAG_CACHED_MAPPED);
+
+   return intel_region_alloc_internal(intel, cpp, pitch, height, 0, buffer);
+}
+
+struct intel_region *
+intel_region_alloc_for_handle(struct intel_context *intel,
+			      GLuint cpp, GLuint pitch, GLuint height,
+			      GLuint tiled, GLuint handle)
+{
+   dri_bo *buffer;
+
+   buffer = intel_ttm_bo_create_from_handle(intel->bufmgr, "region", handle);
+
+   return intel_region_alloc_internal(intel,
+				      cpp, pitch, height, tiled, buffer);
+}
+
+void
+intel_region_reference(struct intel_region **dst, struct intel_region *src)
+{
+   assert(*dst == NULL);
+   if (src) {
+      src->refcount++;
+      *dst = src;
+   }
+}
+
+void
+intel_region_release(struct intel_region **region)
+{
+   if (!*region)
+      return;
+
+   DBG("%s %d\n", __FUNCTION__, (*region)->refcount - 1);
+
+   ASSERT((*region)->refcount > 0);
+   (*region)->refcount--;
+
+   if ((*region)->refcount == 0) {
+      assert((*region)->map_refcount == 0);
+
+      if ((*region)->pbo)
+	 (*region)->pbo->region = NULL;
+      (*region)->pbo = NULL;
+      dri_bo_unreference((*region)->buffer);
+      free(*region);
+   }
+   *region = NULL;
+}
+
+/*
+ * XXX Move this into core Mesa?
+ */
+void
+_mesa_copy_rect(GLubyte * dst,
+                GLuint cpp,
+                GLuint dst_pitch,
+                GLuint dst_x,
+                GLuint dst_y,
+                GLuint width,
+                GLuint height,
+                const GLubyte * src,
+                GLuint src_pitch, GLuint src_x, GLuint src_y)
+{
+   GLuint i;
+
+   dst_pitch *= cpp;
+   src_pitch *= cpp;
+   dst += dst_x * cpp;
+   src += src_x * cpp;
+   dst += dst_y * dst_pitch;
+   src += src_y * dst_pitch;
+   width *= cpp;
+
+   if (width == dst_pitch && width == src_pitch)
+      memcpy(dst, src, height * width);
+   else {
+      for (i = 0; i < height; i++) {
+         memcpy(dst, src, width);
+         dst += dst_pitch;
+         src += src_pitch;
+      }
+   }
+}
+
+
+/* Upload data to a rectangular sub-region.  Lots of choices how to do this:
+ *
+ * - memcpy by span to current destination
+ * - upload data as new buffer and blit
+ *
+ * Currently always memcpy.
+ */
+void
+intel_region_data(struct intel_context *intel,
+                  struct intel_region *dst,
+                  GLuint dst_offset,
+                  GLuint dstx, GLuint dsty,
+                  const void *src, GLuint src_pitch,
+                  GLuint srcx, GLuint srcy, GLuint width, GLuint height)
+{
+   GLboolean locked = GL_FALSE;
+
+   DBG("%s\n", __FUNCTION__);
+
+   if (intel == NULL)
+      return;
+
+   if (dst->pbo) {
+      if (dstx == 0 &&
+          dsty == 0 && width == dst->pitch && height == dst->height)
+         intel_region_release_pbo(intel, dst);
+      else
+         intel_region_cow(intel, dst);
+   }
+
+   if (!intel->locked) {
+      LOCK_HARDWARE(intel);
+      locked = GL_TRUE;
+   }
+
+   _mesa_copy_rect(intel_region_map(intel, dst) + dst_offset,
+                   dst->cpp,
+                   dst->pitch,
+                   dstx, dsty, width, height, src, src_pitch, srcx, srcy);
+
+   intel_region_unmap(intel, dst);
+
+   if (locked)
+      UNLOCK_HARDWARE(intel);
+
+}
+
+/* Copy rectangular sub-regions. Need better logic about when to
+ * push buffers into AGP - will currently do so whenever possible.
+ */
+void
+intel_region_copy(struct intel_context *intel,
+                  struct intel_region *dst,
+                  GLuint dst_offset,
+                  GLuint dstx, GLuint dsty,
+                  struct intel_region *src,
+                  GLuint src_offset,
+                  GLuint srcx, GLuint srcy, GLuint width, GLuint height)
+{
+   DBG("%s\n", __FUNCTION__);
+
+   if (intel == NULL)
+      return;
+
+   if (dst->pbo) {
+      if (dstx == 0 &&
+          dsty == 0 && width == dst->pitch && height == dst->height)
+         intel_region_release_pbo(intel, dst);
+      else
+         intel_region_cow(intel, dst);
+   }
+
+   assert(src->cpp == dst->cpp);
+
+   intelEmitCopyBlit(intel,
+                     dst->cpp,
+                     src->pitch, src->buffer, src_offset, src->tiled,
+                     dst->pitch, dst->buffer, dst_offset, dst->tiled,
+                     srcx, srcy, dstx, dsty, width, height,
+		     GL_COPY);
+}
+
+/* Fill a rectangular sub-region.  Need better logic about when to
+ * push buffers into AGP - will currently do so whenever possible.
+ */
+void
+intel_region_fill(struct intel_context *intel,
+                  struct intel_region *dst,
+                  GLuint dst_offset,
+                  GLuint dstx, GLuint dsty,
+                  GLuint width, GLuint height, GLuint color)
+{
+   DBG("%s\n", __FUNCTION__);
+
+   if (intel == NULL)
+      return;   
+
+   if (dst->pbo) {
+      if (dstx == 0 &&
+          dsty == 0 && width == dst->pitch && height == dst->height)
+         intel_region_release_pbo(intel, dst);
+      else
+         intel_region_cow(intel, dst);
+   }
+
+   intelEmitFillBlit(intel,
+                     dst->cpp,
+                     dst->pitch, dst->buffer, dst_offset, dst->tiled,
+                     dstx, dsty, width, height, color);
+}
+
+/* Attach to a pbo, discarding our data.  Effectively zero-copy upload
+ * the pbo's data.
+ */
+void
+intel_region_attach_pbo(struct intel_context *intel,
+                        struct intel_region *region,
+                        struct intel_buffer_object *pbo)
+{
+   if (region->pbo == pbo)
+      return;
+
+   /* If there is already a pbo attached, break the cow tie now.
+    * Don't call intel_region_release_pbo() as that would
+    * unnecessarily allocate a new buffer we would have to immediately
+    * discard.
+    */
+   if (region->pbo) {
+      region->pbo->region = NULL;
+      region->pbo = NULL;
+   }
+
+   if (region->buffer) {
+      dri_bo_unreference(region->buffer);
+      region->buffer = NULL;
+   }
+
+   region->pbo = pbo;
+   region->pbo->region = region;
+   dri_bo_reference(pbo->buffer);
+   region->buffer = pbo->buffer;
+}
+
+
+/* Break the COW tie to the pbo and allocate a new buffer.
+ * The pbo gets to keep the data.
+ */
+void
+intel_region_release_pbo(struct intel_context *intel,
+                         struct intel_region *region)
+{
+   assert(region->buffer == region->pbo->buffer);
+   region->pbo->region = NULL;
+   region->pbo = NULL;
+   dri_bo_unreference(region->buffer);
+   region->buffer = NULL;
+
+   region->buffer = dri_bo_alloc(intel->bufmgr, "region",
+				 region->pitch * region->cpp * region->height,
+				 64,
+				 DRM_BO_FLAG_MEM_LOCAL |
+				 DRM_BO_FLAG_CACHED |
+				 DRM_BO_FLAG_CACHED_MAPPED);
+}
+
+/* Break the COW tie to the pbo.  Both the pbo and the region end up
+ * with a copy of the data.
+ */
+void
+intel_region_cow(struct intel_context *intel, struct intel_region *region)
+{
+   struct intel_buffer_object *pbo = region->pbo;
+   GLboolean was_locked = intel->locked;
+
+   if (intel == NULL)
+      return;
+
+   intel_region_release_pbo(intel, region);
+
+   assert(region->cpp * region->pitch * region->height == pbo->Base.Size);
+
+   DBG("%s (%d bytes)\n", __FUNCTION__, pbo->Base.Size);
+
+   /* Now blit from the texture buffer to the new buffer: 
+    */
+
+   intel_batchbuffer_flush(intel->batch);
+
+   was_locked = intel->locked;
+   if (intel->locked)
+      LOCK_HARDWARE(intel);
+
+   intelEmitCopyBlit(intel,
+		     region->cpp,
+		     region->pitch, region->buffer, 0, region->tiled,
+		     region->pitch, pbo->buffer, 0, region->tiled,
+		     0, 0, 0, 0,
+		     region->pitch, region->height,
+		     GL_COPY);
+
+   intel_batchbuffer_flush(intel->batch);
+
+   if (was_locked)
+      UNLOCK_HARDWARE(intel);
+}
+
+dri_bo *
+intel_region_buffer(struct intel_context *intel,
+                    struct intel_region *region, GLuint flag)
+{
+   if (region->pbo) {
+      if (flag == INTEL_WRITE_PART)
+         intel_region_cow(intel, region);
+      else if (flag == INTEL_WRITE_FULL)
+         intel_region_release_pbo(intel, region);
+   }
+
+   return region->buffer;
+}
+
+static struct intel_region *
+intel_recreate_static(struct intel_context *intel,
+		      const char *name,
+		      struct intel_region *region,
+		      intelRegion *region_desc)
+{
+   intelScreenPrivate *intelScreen = intel->intelScreen;
+
+   if (region == NULL) {
+      region = calloc(sizeof(*region), 1);
+      region->refcount = 1;
+   }
+
+   if (intel->ctx.Visual.rgbBits == 24)
+      region->cpp = 4;
+   else
+      region->cpp = intel->ctx.Visual.rgbBits / 8;
+   region->pitch = intelScreen->pitch;
+   region->height = intelScreen->height;     /* needed? */
+   region->tiled = region_desc->tiled;
+
+   if (intel->ttm) {
+      assert(region_desc->bo_handle != -1);
+      region->buffer = intel_ttm_bo_create_from_handle(intel->bufmgr,
+						       name,
+						       region_desc->bo_handle);
+   } else {
+      region->buffer = dri_bo_alloc_static(intel->bufmgr,
+					   name,
+					   region_desc->offset,
+					   intelScreen->pitch *
+					   intelScreen->height,
+					   region_desc->map,
+					   DRM_BO_FLAG_MEM_TT);
+   }
+
+   assert(region->buffer != NULL);
+
+   return region;
+}
+
+/**
+ * Create intel_region structs to describe the static front, back, and depth
+ * buffers created by the xserver.
+ *
+ * Although FBO's mean we now no longer use these as render targets in
+ * all circumstances, they won't go away until the back and depth
+ * buffers become private, and the front buffer will remain even then.
+ *
+ * Note that these don't allocate video memory, just describe
+ * allocations alread made by the X server.
+ */
+void
+intel_recreate_static_regions(struct intel_context *intel)
+{
+   intelScreenPrivate *intelScreen = intel->intelScreen;
+
+   intel->front_region =
+      intel_recreate_static(intel, "front",
+			    intel->front_region,
+			    &intelScreen->front);
+
+   intel->back_region =
+      intel_recreate_static(intel, "back",
+			    intel->back_region,
+			    &intelScreen->back);
+
+#ifdef I915
+   if (intelScreen->third.handle) {
+      intel->third_region =
+	 intel_recreate_static(intel, "third",
+			       intel->third_region,
+			       &intelScreen->third);
+   }
+#endif /* I915 */
+
+   /* Still assumes front.cpp == depth.cpp.  We can kill this when we move to
+    * private buffers.
+    */
+   intel->depth_region =
+      intel_recreate_static(intel, "depth",
+			    intel->depth_region,
+			    &intelScreen->depth);
+}
diff --git a/shared/intel_regions.h b/shared/intel_regions.h
new file mode 100644
index 0000000..229f79a
--- /dev/null
+++ b/shared/intel_regions.h
@@ -0,0 +1,140 @@
+/**************************************************************************
+ * 
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef INTEL_REGIONS_H
+#define INTEL_REGIONS_H
+
+#include "mtypes.h"
+#include "dri_bufmgr.h"
+
+struct intel_context;
+struct intel_buffer_object;
+
+/**
+ * A layer on top of the bufmgr buffers that adds a few useful things:
+ *
+ * - Refcounting for local buffer references.
+ * - Refcounting for buffer maps
+ * - Buffer dimensions - pitch and height.
+ * - Blitter commands for copying 2D regions between buffers. (really???)
+ */
+struct intel_region
+{
+   dri_bo *buffer;  /**< buffer manager's buffer */
+   GLuint refcount; /**< Reference count for region */
+   GLuint cpp;      /**< bytes per pixel */
+   GLuint pitch;    /**< in pixels */
+   GLuint height;   /**< in pixels */
+   GLubyte *map;    /**< only non-NULL when region is actually mapped */
+   GLuint map_refcount;  /**< Reference count for mapping */
+
+   GLuint draw_offset; /**< Offset of drawing address within the region */
+   GLboolean tiled; /**< True if the region is X or Y-tiled.  Used on 965. */
+
+   struct intel_buffer_object *pbo;     /* zero-copy uploads */
+};
+
+
+/* Allocate a refcounted region.  Pointers to regions should only be
+ * copied by calling intel_reference_region().
+ */
+struct intel_region *intel_region_alloc(struct intel_context *intel,
+                                        GLuint cpp,
+                                        GLuint pitch, GLuint height);
+
+struct intel_region *
+intel_region_alloc_for_handle(struct intel_context *intel,
+			      GLuint cpp, GLuint pitch, GLuint height,
+			      GLuint tiled, unsigned int handle);
+
+void intel_region_reference(struct intel_region **dst,
+                            struct intel_region *src);
+
+void intel_region_release(struct intel_region **ib);
+
+void intel_recreate_static_regions(struct intel_context *intel);
+
+/* Map/unmap regions.  This is refcounted also: 
+ */
+GLubyte *intel_region_map(struct intel_context *intel,
+                          struct intel_region *ib);
+
+void intel_region_unmap(struct intel_context *intel, struct intel_region *ib);
+
+
+/* Upload data to a rectangular sub-region
+ */
+void intel_region_data(struct intel_context *intel,
+                       struct intel_region *dest,
+                       GLuint dest_offset,
+                       GLuint destx, GLuint desty,
+                       const void *src, GLuint src_stride,
+                       GLuint srcx, GLuint srcy, GLuint width, GLuint height);
+
+/* Copy rectangular sub-regions
+ */
+void intel_region_copy(struct intel_context *intel,
+                       struct intel_region *dest,
+                       GLuint dest_offset,
+                       GLuint destx, GLuint desty,
+                       struct intel_region *src,
+                       GLuint src_offset,
+                       GLuint srcx, GLuint srcy, GLuint width, GLuint height);
+
+/* Fill a rectangular sub-region
+ */
+void intel_region_fill(struct intel_context *intel,
+                       struct intel_region *dest,
+                       GLuint dest_offset,
+                       GLuint destx, GLuint desty,
+                       GLuint width, GLuint height, GLuint color);
+
+/* Helpers for zerocopy uploads, particularly texture image uploads:
+ */
+void intel_region_attach_pbo(struct intel_context *intel,
+                             struct intel_region *region,
+                             struct intel_buffer_object *pbo);
+void intel_region_release_pbo(struct intel_context *intel,
+                              struct intel_region *region);
+void intel_region_cow(struct intel_context *intel,
+                      struct intel_region *region);
+
+dri_bo *intel_region_buffer(struct intel_context *intel,
+			    struct intel_region *region,
+			    GLuint flag);
+
+void _mesa_copy_rect(GLubyte * dst,
+                GLuint cpp,
+                GLuint dst_pitch,
+                GLuint dst_x,
+                GLuint dst_y,
+                GLuint width,
+                GLuint height,
+                const GLubyte * src,
+                GLuint src_pitch, GLuint src_x, GLuint src_y);
+
+#endif
diff --git a/shared/intel_screen.c b/shared/intel_screen.c
new file mode 100644
index 0000000..5dded4b
--- /dev/null
+++ b/shared/intel_screen.c
@@ -0,0 +1,882 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "glheader.h"
+#include "context.h"
+#include "framebuffer.h"
+#include "matrix.h"
+#include "renderbuffer.h"
+#include "simple_list.h"
+#include "utils.h"
+#include "vblank.h"
+#include "xmlpool.h"
+
+
+#include "intel_screen.h"
+
+#include "intel_buffers.h"
+#include "intel_tex.h"
+#include "intel_span.h"
+#include "intel_ioctl.h"
+#include "intel_fbo.h"
+#include "intel_chipset.h"
+
+#include "i915_drm.h"
+#include "i830_dri.h"
+#include "intel_regions.h"
+#include "intel_batchbuffer.h"
+#include "intel_bufmgr_ttm.h"
+
+PUBLIC const char __driConfigOptions[] =
+   DRI_CONF_BEGIN
+   DRI_CONF_SECTION_PERFORMANCE
+      DRI_CONF_FTHROTTLE_MODE(DRI_CONF_FTHROTTLE_IRQS)
+      DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_ALWAYS_SYNC)
+      /* Options correspond to DRI_CONF_BO_REUSE_DISABLED,
+       * DRI_CONF_BO_REUSE_ALL
+       */
+      DRI_CONF_OPT_BEGIN_V(bo_reuse, enum, 0, "0:1")
+	 DRI_CONF_DESC_BEGIN(en, "Buffer object reuse")
+	    DRI_CONF_ENUM(0, "Disable buffer object reuse")
+	    DRI_CONF_ENUM(1, "Enable reuse of all sizes of buffer objects")
+	 DRI_CONF_DESC_END
+      DRI_CONF_OPT_END
+   DRI_CONF_SECTION_END
+   DRI_CONF_SECTION_QUALITY
+      DRI_CONF_FORCE_S3TC_ENABLE(false)
+      DRI_CONF_ALLOW_LARGE_TEXTURES(2)
+   DRI_CONF_SECTION_END
+   DRI_CONF_SECTION_DEBUG
+     DRI_CONF_NO_RAST(false)
+   DRI_CONF_SECTION_END
+DRI_CONF_END;
+
+const GLuint __driNConfigOptions = 6;
+
+#ifdef USE_NEW_INTERFACE
+static PFNGLXCREATECONTEXTMODES create_context_modes = NULL;
+#endif /*USE_NEW_INTERFACE */
+
+/**
+ * Map all the memory regions described by the screen.
+ * \return GL_TRUE if success, GL_FALSE if error.
+ */
+GLboolean
+intelMapScreenRegions(__DRIscreenPrivate * sPriv)
+{
+   intelScreenPrivate *intelScreen = (intelScreenPrivate *) sPriv->private;
+
+   if (intelScreen->front.handle) {
+      if (drmMap(sPriv->fd,
+                 intelScreen->front.handle,
+                 intelScreen->front.size,
+                 (drmAddress *) & intelScreen->front.map) != 0) {
+         _mesa_problem(NULL, "drmMap(frontbuffer) failed!");
+         return GL_FALSE;
+      }
+   }
+   else {
+      _mesa_warning(NULL, "no front buffer handle in intelMapScreenRegions!");
+   }
+
+   if (0)
+      _mesa_printf("Back 0x%08x ", intelScreen->back.handle);
+   if (drmMap(sPriv->fd,
+              intelScreen->back.handle,
+              intelScreen->back.size,
+              (drmAddress *) & intelScreen->back.map) != 0) {
+      intelUnmapScreenRegions(intelScreen);
+      return GL_FALSE;
+   }
+
+   if (intelScreen->third.handle) {
+      if (0)
+	 _mesa_printf("Third 0x%08x ", intelScreen->third.handle);
+      if (drmMap(sPriv->fd,
+		 intelScreen->third.handle,
+		 intelScreen->third.size,
+		 (drmAddress *) & intelScreen->third.map) != 0) {
+	 intelUnmapScreenRegions(intelScreen);
+	 return GL_FALSE;
+      }
+   }
+
+   if (0)
+      _mesa_printf("Depth 0x%08x ", intelScreen->depth.handle);
+   if (drmMap(sPriv->fd,
+              intelScreen->depth.handle,
+              intelScreen->depth.size,
+              (drmAddress *) & intelScreen->depth.map) != 0) {
+      intelUnmapScreenRegions(intelScreen);
+      return GL_FALSE;
+   }
+
+   if (0)
+      _mesa_printf("TEX 0x%08x ", intelScreen->tex.handle);
+   if (intelScreen->tex.size != 0) {
+      if (drmMap(sPriv->fd,
+		 intelScreen->tex.handle,
+		 intelScreen->tex.size,
+		 (drmAddress *) & intelScreen->tex.map) != 0) {
+	 intelUnmapScreenRegions(intelScreen);
+	 return GL_FALSE;
+      }
+   }
+
+   if (0)
+      printf("Mappings:  front: %p  back: %p  third: %p  depth: %p  tex: %p\n",
+             intelScreen->front.map,
+             intelScreen->back.map, intelScreen->third.map,
+             intelScreen->depth.map, intelScreen->tex.map);
+   return GL_TRUE;
+}
+
+void
+intelUnmapScreenRegions(intelScreenPrivate * intelScreen)
+{
+#define REALLY_UNMAP 1
+   if (intelScreen->front.map) {
+#if REALLY_UNMAP
+      if (drmUnmap(intelScreen->front.map, intelScreen->front.size) != 0)
+         printf("drmUnmap front failed!\n");
+#endif
+      intelScreen->front.map = NULL;
+   }
+   if (intelScreen->back.map) {
+#if REALLY_UNMAP
+      if (drmUnmap(intelScreen->back.map, intelScreen->back.size) != 0)
+         printf("drmUnmap back failed!\n");
+#endif
+      intelScreen->back.map = NULL;
+   }
+   if (intelScreen->third.map) {
+#if REALLY_UNMAP
+      if (drmUnmap(intelScreen->third.map, intelScreen->third.size) != 0)
+         printf("drmUnmap third failed!\n");
+#endif
+      intelScreen->third.map = NULL;
+   }
+   if (intelScreen->depth.map) {
+#if REALLY_UNMAP
+      drmUnmap(intelScreen->depth.map, intelScreen->depth.size);
+      intelScreen->depth.map = NULL;
+#endif
+   }
+   if (intelScreen->tex.map) {
+#if REALLY_UNMAP
+      drmUnmap(intelScreen->tex.map, intelScreen->tex.size);
+      intelScreen->tex.map = NULL;
+#endif
+   }
+}
+
+
+static void
+intelPrintDRIInfo(intelScreenPrivate * intelScreen,
+                  __DRIscreenPrivate * sPriv, I830DRIPtr gDRIPriv)
+{
+   fprintf(stderr, "*** Front size:   0x%x  offset: 0x%x  pitch: %d\n",
+           intelScreen->front.size, intelScreen->front.offset,
+           intelScreen->pitch);
+   fprintf(stderr, "*** Back size:    0x%x  offset: 0x%x  pitch: %d\n",
+           intelScreen->back.size, intelScreen->back.offset,
+           intelScreen->pitch);
+   fprintf(stderr, "*** Depth size:   0x%x  offset: 0x%x  pitch: %d\n",
+           intelScreen->depth.size, intelScreen->depth.offset,
+           intelScreen->pitch);
+   fprintf(stderr, "*** Texture size: 0x%x  offset: 0x%x\n",
+           intelScreen->tex.size, intelScreen->tex.offset);
+   fprintf(stderr, "*** Memory : 0x%x\n", gDRIPriv->mem);
+}
+
+
+static void
+intelPrintSAREA(const struct drm_i915_sarea * sarea)
+{
+   fprintf(stderr, "SAREA: sarea width %d  height %d\n", sarea->width,
+           sarea->height);
+   fprintf(stderr, "SAREA: pitch: %d\n", sarea->pitch);
+   fprintf(stderr,
+           "SAREA: front offset: 0x%08x  size: 0x%x  handle: 0x%x\n",
+           sarea->front_offset, sarea->front_size,
+           (unsigned) sarea->front_handle);
+   fprintf(stderr,
+           "SAREA: back  offset: 0x%08x  size: 0x%x  handle: 0x%x\n",
+           sarea->back_offset, sarea->back_size,
+           (unsigned) sarea->back_handle);
+   fprintf(stderr, "SAREA: depth offset: 0x%08x  size: 0x%x  handle: 0x%x\n",
+           sarea->depth_offset, sarea->depth_size,
+           (unsigned) sarea->depth_handle);
+   fprintf(stderr, "SAREA: tex   offset: 0x%08x  size: 0x%x  handle: 0x%x\n",
+           sarea->tex_offset, sarea->tex_size, (unsigned) sarea->tex_handle);
+}
+
+
+/**
+ * A number of the screen parameters are obtained/computed from
+ * information in the SAREA.  This function updates those parameters.
+ */
+void
+intelUpdateScreenFromSAREA(intelScreenPrivate * intelScreen,
+                           struct drm_i915_sarea * sarea)
+{
+   intelScreen->width = sarea->width;
+   intelScreen->height = sarea->height;
+   intelScreen->pitch = sarea->pitch;
+
+   intelScreen->front.offset = sarea->front_offset;
+   intelScreen->front.handle = sarea->front_handle;
+   intelScreen->front.size = sarea->front_size;
+   intelScreen->front.tiled = sarea->front_tiled;
+
+   intelScreen->back.offset = sarea->back_offset;
+   intelScreen->back.handle = sarea->back_handle;
+   intelScreen->back.size = sarea->back_size;
+   intelScreen->back.tiled = sarea->back_tiled;
+
+   if (intelScreen->driScrnPriv->ddx_version.minor >= 8) {
+      intelScreen->third.offset = sarea->third_offset;
+      intelScreen->third.handle = sarea->third_handle;
+      intelScreen->third.size = sarea->third_size;
+      intelScreen->third.tiled = sarea->third_tiled;
+   }
+
+   intelScreen->depth.offset = sarea->depth_offset;
+   intelScreen->depth.handle = sarea->depth_handle;
+   intelScreen->depth.size = sarea->depth_size;
+   intelScreen->depth.tiled = sarea->depth_tiled;
+
+   if (intelScreen->driScrnPriv->ddx_version.minor >= 9) {
+      intelScreen->front.bo_handle = sarea->front_bo_handle;
+      intelScreen->back.bo_handle = sarea->back_bo_handle;
+      intelScreen->third.bo_handle = sarea->third_bo_handle;
+      intelScreen->depth.bo_handle = sarea->depth_bo_handle;
+   } else {
+      intelScreen->front.bo_handle = -1;
+      intelScreen->back.bo_handle = -1;
+      intelScreen->third.bo_handle = -1;
+      intelScreen->depth.bo_handle = -1;
+   }
+
+   intelScreen->tex.offset = sarea->tex_offset;
+   intelScreen->logTextureGranularity = sarea->log_tex_granularity;
+   intelScreen->tex.handle = sarea->tex_handle;
+   intelScreen->tex.size = sarea->tex_size;
+
+   if (0)
+      intelPrintSAREA(sarea);
+}
+
+
+/**
+ * DRI2 entrypoint
+ */
+static void
+intelHandleDrawableConfig(__DRIdrawablePrivate *dPriv,
+			  __DRIcontextPrivate *pcp,
+			  __DRIDrawableConfigEvent *event)
+{
+   struct intel_framebuffer *intel_fb = dPriv->driverPrivate;
+   struct intel_region *region = NULL;
+   struct intel_renderbuffer *rb, *depth_rb, *stencil_rb;
+   struct intel_context *intel = pcp->driverPrivate;
+   int cpp, pitch;
+
+   cpp = intel->ctx.Visual.rgbBits / 8;
+   pitch = ((cpp * dPriv->w + 63) & ~63) / cpp;
+
+   rb = intel_fb->color_rb[1];
+   if (rb) {
+      region = intel_region_alloc(intel, cpp, pitch, dPriv->h);
+      intel_renderbuffer_set_region(rb, region);
+   }
+
+   rb = intel_fb->color_rb[2];
+   if (rb) {
+      region = intel_region_alloc(intel, cpp, pitch, dPriv->h);
+      intel_renderbuffer_set_region(rb, region);
+   }
+
+   depth_rb = intel_get_renderbuffer(&intel_fb->Base, BUFFER_DEPTH);
+   stencil_rb = intel_get_renderbuffer(&intel_fb->Base, BUFFER_STENCIL);
+   if (depth_rb || stencil_rb)
+      region = intel_region_alloc(intel, cpp, pitch, dPriv->h);
+   if (depth_rb)
+      intel_renderbuffer_set_region(depth_rb, region);
+   if (stencil_rb)
+      intel_renderbuffer_set_region(stencil_rb, region);
+
+   /* FIXME: Tell the X server about the regions we just allocated and
+    * attached. */
+}
+
+#define BUFFER_FLAG_TILED 0x0100
+
+/**
+ * DRI2 entrypoint
+ */
+static void
+intelHandleBufferAttach(__DRIdrawablePrivate *dPriv,
+			__DRIcontextPrivate *pcp,
+			__DRIBufferAttachEvent *ba)
+{
+   struct intel_framebuffer *intel_fb = dPriv->driverPrivate;
+   struct intel_renderbuffer *rb;
+   struct intel_region *region;
+   struct intel_context *intel = pcp->driverPrivate;
+   GLuint tiled;
+
+   switch (ba->buffer.attachment) {
+   case DRI_DRAWABLE_BUFFER_FRONT_LEFT:
+      rb = intel_fb->color_rb[0];
+      break;
+
+   case DRI_DRAWABLE_BUFFER_BACK_LEFT:
+      rb = intel_fb->color_rb[0];
+      break;
+
+   case DRI_DRAWABLE_BUFFER_DEPTH:
+     rb = intel_get_renderbuffer(&intel_fb->Base, BUFFER_DEPTH);
+     break;
+
+   case DRI_DRAWABLE_BUFFER_STENCIL:
+     rb = intel_get_renderbuffer(&intel_fb->Base, BUFFER_STENCIL);
+     break;
+
+   case DRI_DRAWABLE_BUFFER_ACCUM:
+   default:
+      fprintf(stderr, "unhandled buffer attach event, attacment type %d\n",
+	      ba->buffer.attachment);
+      return;
+   }
+
+#if 0
+   /* FIXME: Add this so we can filter out when the X server sends us
+    * attachment events for the buffers we just allocated.  Need to
+    * get the BO handle for a render buffer. */
+   if (intel_renderbuffer_get_region_handle(rb) == ba->buffer.handle)
+      return;
+#endif
+
+   tiled = (ba->buffer.flags & BUFFER_FLAG_TILED) > 0;
+   region = intel_region_alloc_for_handle(intel, ba->buffer.cpp,
+					  ba->buffer.pitch / ba->buffer.cpp,
+					  dPriv->h, tiled,
+					  ba->buffer.handle);
+
+   intel_renderbuffer_set_region(rb, region);
+}
+
+static const __DRItexOffsetExtension intelTexOffsetExtension = {
+   { __DRI_TEX_OFFSET },
+   intelSetTexOffset,
+};
+
+static const __DRItexBufferExtension intelTexBufferExtension = {
+    { __DRI_TEX_BUFFER, __DRI_TEX_BUFFER_VERSION },
+   intelSetTexBuffer,
+};
+
+static const __DRIextension *intelScreenExtensions[] = {
+    &driReadDrawableExtension,
+    &driCopySubBufferExtension.base,
+    &driSwapControlExtension.base,
+    &driFrameTrackingExtension.base,
+    &driMediaStreamCounterExtension.base,
+    &intelTexOffsetExtension.base,
+    &intelTexBufferExtension.base,
+    NULL
+};
+
+static GLboolean
+intel_get_param(__DRIscreenPrivate *psp, int param, int *value)
+{
+   int ret;
+   struct drm_i915_getparam gp;
+
+   gp.param = param;
+   gp.value = value;
+
+   ret = drmCommandWriteRead(psp->fd, DRM_I915_GETPARAM, &gp, sizeof(gp));
+   if (ret) {
+      fprintf(stderr, "drm_i915_getparam: %d\n", ret);
+      return GL_FALSE;
+   }
+
+   return GL_TRUE;
+}
+
+static GLboolean intelInitDriver(__DRIscreenPrivate *sPriv)
+{
+   intelScreenPrivate *intelScreen;
+   I830DRIPtr gDRIPriv = (I830DRIPtr) sPriv->pDevPriv;
+   struct drm_i915_sarea *sarea;
+
+   if (sPriv->devPrivSize != sizeof(I830DRIRec)) {
+      fprintf(stderr,
+              "\nERROR!  sizeof(I830DRIRec) does not match passed size from device driver\n");
+      return GL_FALSE;
+   }
+
+   /* Allocate the private area */
+   intelScreen = (intelScreenPrivate *) CALLOC(sizeof(intelScreenPrivate));
+   if (!intelScreen) {
+      fprintf(stderr, "\nERROR!  Allocating private area failed\n");
+      return GL_FALSE;
+   }
+   /* parse information in __driConfigOptions */
+   driParseOptionInfo(&intelScreen->optionCache,
+                      __driConfigOptions, __driNConfigOptions);
+
+   intelScreen->driScrnPriv = sPriv;
+   sPriv->private = (void *) intelScreen;
+   intelScreen->sarea_priv_offset = gDRIPriv->sarea_priv_offset;
+   sarea = (struct drm_i915_sarea *)
+      (((GLubyte *) sPriv->pSAREA) + intelScreen->sarea_priv_offset);
+
+   intelScreen->deviceID = gDRIPriv->deviceID;
+
+   intelUpdateScreenFromSAREA(intelScreen, sarea);
+
+   if (!intelMapScreenRegions(sPriv)) {
+      fprintf(stderr, "\nERROR!  mapping regions\n");
+      _mesa_free(intelScreen);
+      sPriv->private = NULL;
+      return GL_FALSE;
+   }
+
+   intelScreen->sarea_priv_offset = gDRIPriv->sarea_priv_offset;
+
+   if (0)
+      intelPrintDRIInfo(intelScreen, sPriv, gDRIPriv);
+
+   intelScreen->drmMinor = sPriv->drm_version.minor;
+
+   /* Determine if IRQs are active? */
+   if (!intel_get_param(sPriv, I915_PARAM_IRQ_ACTIVE,
+			&intelScreen->irq_active))
+      return GL_FALSE;
+
+   /* Determine if batchbuffers are allowed */
+   if (!intel_get_param(sPriv, I915_PARAM_ALLOW_BATCHBUFFER,
+			&intelScreen->allow_batchbuffer))
+      return GL_FALSE;
+
+   sPriv->extensions = intelScreenExtensions;
+
+   return GL_TRUE;
+}
+
+
+static void
+intelDestroyScreen(__DRIscreenPrivate * sPriv)
+{
+   intelScreenPrivate *intelScreen = (intelScreenPrivate *) sPriv->private;
+
+   intelUnmapScreenRegions(intelScreen);
+
+   FREE(intelScreen);
+   sPriv->private = NULL;
+}
+
+
+/**
+ * This is called when we need to set up GL rendering to a new X window.
+ */
+static GLboolean
+intelCreateBuffer(__DRIscreenPrivate * driScrnPriv,
+                  __DRIdrawablePrivate * driDrawPriv,
+                  const __GLcontextModes * mesaVis, GLboolean isPixmap)
+{
+   intelScreenPrivate *screen = (intelScreenPrivate *) driScrnPriv->private;
+
+   if (isPixmap) {
+      return GL_FALSE;          /* not implemented */
+   }
+   else {
+      GLboolean swStencil = (mesaVis->stencilBits > 0 &&
+                             mesaVis->depthBits != 24);
+      GLenum rgbFormat = (mesaVis->redBits == 5 ? GL_RGB5 : GL_RGBA8);
+
+      struct intel_framebuffer *intel_fb = CALLOC_STRUCT(intel_framebuffer);
+
+      if (!intel_fb)
+	 return GL_FALSE;
+
+      _mesa_initialize_framebuffer(&intel_fb->Base, mesaVis);
+
+      /* setup the hardware-based renderbuffers */
+      {
+         intel_fb->color_rb[0] = intel_create_renderbuffer(rgbFormat);
+         _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_FRONT_LEFT,
+				&intel_fb->color_rb[0]->Base);
+      }
+
+      if (mesaVis->doubleBufferMode) {
+         intel_fb->color_rb[1] = intel_create_renderbuffer(rgbFormat);
+         _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_BACK_LEFT,
+				&intel_fb->color_rb[1]->Base);
+
+	 if (screen->third.handle) {
+	    struct gl_renderbuffer *tmp_rb = NULL;
+
+	    intel_fb->color_rb[2] = intel_create_renderbuffer(rgbFormat);
+	    _mesa_reference_renderbuffer(&tmp_rb, &intel_fb->color_rb[2]->Base);
+	 }
+      }
+
+      if (mesaVis->depthBits == 24) {
+	 if (mesaVis->stencilBits == 8) {
+	    /* combined depth/stencil buffer */
+	    struct intel_renderbuffer *depthStencilRb
+	       = intel_create_renderbuffer(GL_DEPTH24_STENCIL8_EXT);
+	    /* note: bind RB to two attachment points */
+	    _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_DEPTH,
+				   &depthStencilRb->Base);
+	    _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_STENCIL,
+				   &depthStencilRb->Base);
+	 } else {
+	    struct intel_renderbuffer *depthRb
+	       = intel_create_renderbuffer(GL_DEPTH_COMPONENT24);
+	    _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_DEPTH,
+				   &depthRb->Base);
+	 }
+      }
+      else if (mesaVis->depthBits == 16) {
+         /* just 16-bit depth buffer, no hw stencil */
+         struct intel_renderbuffer *depthRb
+            = intel_create_renderbuffer(GL_DEPTH_COMPONENT16);
+         _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_DEPTH, &depthRb->Base);
+      }
+
+      /* now add any/all software-based renderbuffers we may need */
+      _mesa_add_soft_renderbuffers(&intel_fb->Base,
+                                   GL_FALSE, /* never sw color */
+                                   GL_FALSE, /* never sw depth */
+                                   swStencil, mesaVis->accumRedBits > 0,
+                                   GL_FALSE, /* never sw alpha */
+                                   GL_FALSE  /* never sw aux */ );
+      driDrawPriv->driverPrivate = (void *) intel_fb;
+
+      return GL_TRUE;
+   }
+}
+
+static void
+intelDestroyBuffer(__DRIdrawablePrivate * driDrawPriv)
+{
+   _mesa_unreference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate)));
+}
+
+
+/**
+ * Get information about previous buffer swaps.
+ */
+static int
+intelGetSwapInfo(__DRIdrawablePrivate * dPriv, __DRIswapInfo * sInfo)
+{
+   struct intel_framebuffer *intel_fb;
+
+   if ((dPriv == NULL) || (dPriv->driverPrivate == NULL)
+       || (sInfo == NULL)) {
+      return -1;
+   }
+
+   intel_fb = dPriv->driverPrivate;
+   sInfo->swap_count = intel_fb->swap_count;
+   sInfo->swap_ust = intel_fb->swap_ust;
+   sInfo->swap_missed_count = intel_fb->swap_missed_count;
+
+   sInfo->swap_missed_usage = (sInfo->swap_missed_count != 0)
+      ? driCalculateSwapUsage(dPriv, 0, intel_fb->swap_missed_ust)
+      : 0.0;
+
+   return 0;
+}
+
+
+/* There are probably better ways to do this, such as an
+ * init-designated function to register chipids and createcontext
+ * functions.
+ */
+extern GLboolean i830CreateContext(const __GLcontextModes * mesaVis,
+                                   __DRIcontextPrivate * driContextPriv,
+                                   void *sharedContextPrivate);
+
+extern GLboolean i915CreateContext(const __GLcontextModes * mesaVis,
+                                   __DRIcontextPrivate * driContextPriv,
+                                   void *sharedContextPrivate);
+extern GLboolean brwCreateContext(const __GLcontextModes * mesaVis,
+				  __DRIcontextPrivate * driContextPriv,
+				  void *sharedContextPrivate);
+
+static GLboolean
+intelCreateContext(const __GLcontextModes * mesaVis,
+                   __DRIcontextPrivate * driContextPriv,
+                   void *sharedContextPrivate)
+{
+   __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv;
+   intelScreenPrivate *intelScreen = (intelScreenPrivate *) sPriv->private;
+
+#ifdef I915
+   if (IS_9XX(intelScreen->deviceID)) {
+      if (!IS_965(intelScreen->deviceID)) {
+	 return i915CreateContext(mesaVis, driContextPriv,
+				  sharedContextPrivate);
+      }
+   } else {
+      return i830CreateContext(mesaVis, driContextPriv, sharedContextPrivate);
+   }
+#else
+   if (IS_965(intelScreen->deviceID))
+      return brwCreateContext(mesaVis, driContextPriv, sharedContextPrivate);
+#endif
+   fprintf(stderr, "Unrecognized deviceID %x\n", intelScreen->deviceID);
+   return GL_FALSE;
+}
+
+
+static __DRIconfig **
+intelFillInModes(__DRIscreenPrivate *psp,
+		 unsigned pixel_bits, unsigned depth_bits,
+                 unsigned stencil_bits, GLboolean have_back_buffer)
+{
+   __DRIconfig **configs;
+   __GLcontextModes *m;
+   unsigned depth_buffer_factor;
+   unsigned back_buffer_factor;
+   GLenum fb_format;
+   GLenum fb_type;
+   int i;
+
+   /* GLX_SWAP_COPY_OML is only supported because the Intel driver doesn't
+    * support pageflipping at all.
+    */
+   static const GLenum back_buffer_modes[] = {
+      GLX_NONE, GLX_SWAP_UNDEFINED_OML, GLX_SWAP_COPY_OML
+   };
+
+   u_int8_t depth_bits_array[3];
+   u_int8_t stencil_bits_array[3];
+
+   depth_bits_array[0] = 0;
+   depth_bits_array[1] = depth_bits;
+   depth_bits_array[2] = depth_bits;
+
+   /* Just like with the accumulation buffer, always provide some modes
+    * with a stencil buffer.  It will be a sw fallback, but some apps won't
+    * care about that.
+    */
+   stencil_bits_array[0] = 0;
+   stencil_bits_array[1] = 0;
+   if (depth_bits == 24)
+      stencil_bits_array[1] = (stencil_bits == 0) ? 8 : stencil_bits;
+
+   stencil_bits_array[2] = (stencil_bits == 0) ? 8 : stencil_bits;
+
+   depth_buffer_factor = ((depth_bits != 0) || (stencil_bits != 0)) ? 3 : 1;
+   back_buffer_factor = (have_back_buffer) ? 3 : 1;
+
+   if (pixel_bits == 16) {
+      fb_format = GL_RGB;
+      fb_type = GL_UNSIGNED_SHORT_5_6_5;
+   }
+   else {
+      fb_format = GL_BGRA;
+      fb_type = GL_UNSIGNED_INT_8_8_8_8_REV;
+   }
+
+   configs = driCreateConfigs(fb_format, fb_type,
+			      depth_bits_array, stencil_bits_array,
+			      depth_buffer_factor, back_buffer_modes,
+			      back_buffer_factor);
+   if (configs == NULL) {
+    fprintf(stderr, "[%s:%u] Error creating FBConfig!\n", __func__,
+              __LINE__);
+      return NULL;
+   }
+
+   /* Mark the visual as slow if there are "fake" stencil bits.
+    */
+   for (i = 0; configs[i]; i++) {
+      m = &configs[i]->modes;
+      if ((m->stencilBits != 0) && (m->stencilBits != stencil_bits)) {
+         m->visualRating = GLX_SLOW_CONFIG;
+      }
+   }
+
+   return configs;
+}
+
+
+/**
+ * This is the driver specific part of the createNewScreen entry point.
+ * Called when using legacy DRI.
+ * 
+ * \todo maybe fold this into intelInitDriver
+ *
+ * \return the __GLcontextModes supported by this driver
+ */
+static const __DRIconfig **intelInitScreen(__DRIscreenPrivate *psp)
+{
+#ifdef I915
+   static const __DRIversion ddx_expected = { 1, 5, 0 };
+#else
+   static const __DRIversion ddx_expected = { 1, 6, 0 };
+#endif
+   static const __DRIversion dri_expected = { 4, 0, 0 };
+   static const __DRIversion drm_expected = { 1, 5, 0 };
+   I830DRIPtr dri_priv = (I830DRIPtr) psp->pDevPriv;
+
+   if (!driCheckDriDdxDrmVersions2("i915",
+                                   &psp->dri_version, &dri_expected,
+                                   &psp->ddx_version, &ddx_expected,
+                                   &psp->drm_version, &drm_expected)) {
+      return NULL;
+   }
+
+   /* Calling driInitExtensions here, with a NULL context pointer,
+    * does not actually enable the extensions.  It just makes sure
+    * that all the dispatch offsets for all the extensions that
+    * *might* be enables are known.  This is needed because the
+    * dispatch offsets need to be known when _mesa_context_create is
+    * called, but we can't enable the extensions until we have a
+    * context pointer.
+    *
+    * Hello chicken.  Hello egg.  How are you two today?
+    */
+   intelInitExtensions(NULL, GL_TRUE);
+	   
+   if (!intelInitDriver(psp))
+       return NULL;
+
+   psp->extensions = intelScreenExtensions;
+
+   return (const __DRIconfig **)
+       intelFillInModes(psp, dri_priv->cpp * 8,
+			(dri_priv->cpp == 2) ? 16 : 24,
+			(dri_priv->cpp == 2) ? 0  : 8, 1);
+}
+
+struct intel_context *intelScreenContext(intelScreenPrivate *intelScreen)
+{
+  /*
+   * This should probably change to have the screen allocate a dummy
+   * context at screen creation. For now just use the current context.
+   */
+
+  GET_CURRENT_CONTEXT(ctx);
+  if (ctx == NULL) {
+     _mesa_problem(NULL, "No current context in intelScreenContext\n");
+     return NULL;
+  }
+  return intel_context(ctx);
+}
+
+/**
+ * This is the driver specific part of the createNewScreen entry point.
+ * Called when using DRI2.
+ *
+ * \return the __GLcontextModes supported by this driver
+ */
+static const
+__DRIconfig **intelInitScreen2(__DRIscreenPrivate *psp)
+{
+   intelScreenPrivate *intelScreen;
+
+   /* Calling driInitExtensions here, with a NULL context pointer,
+    * does not actually enable the extensions.  It just makes sure
+    * that all the dispatch offsets for all the extensions that
+    * *might* be enables are known.  This is needed because the
+    * dispatch offsets need to be known when _mesa_context_create is
+    * called, but we can't enable the extensions until we have a
+    * context pointer.
+    *
+    * Hello chicken.  Hello egg.  How are you two today?
+    */
+   intelInitExtensions(NULL, GL_TRUE);
+
+   /* Allocate the private area */
+   intelScreen = (intelScreenPrivate *) CALLOC(sizeof(intelScreenPrivate));
+   if (!intelScreen) {
+      fprintf(stderr, "\nERROR!  Allocating private area failed\n");
+      return GL_FALSE;
+   }
+   /* parse information in __driConfigOptions */
+   driParseOptionInfo(&intelScreen->optionCache,
+                      __driConfigOptions, __driNConfigOptions);
+
+   intelScreen->driScrnPriv = psp;
+   psp->private = (void *) intelScreen;
+
+   intelScreen->drmMinor = psp->drm_version.minor;
+
+   /* Determine chipset ID? */
+   if (!intel_get_param(psp, I915_PARAM_CHIPSET_ID,
+			&intelScreen->deviceID))
+      return GL_FALSE;
+
+   /* Determine if IRQs are active? */
+   if (!intel_get_param(psp, I915_PARAM_IRQ_ACTIVE,
+			&intelScreen->irq_active))
+      return GL_FALSE;
+
+   /* Determine if batchbuffers are allowed */
+   if (!intel_get_param(psp, I915_PARAM_ALLOW_BATCHBUFFER,
+			&intelScreen->allow_batchbuffer))
+      return GL_FALSE;
+
+   if (!intelScreen->allow_batchbuffer) {
+      fprintf(stderr, "batch buffer not allowed\n");
+      return GL_FALSE;
+   }
+
+   psp->extensions = intelScreenExtensions;
+
+   return driConcatConfigs(intelFillInModes(psp, 16, 16, 0, 1),
+			   intelFillInModes(psp, 32, 24, 8, 1));
+}
+
+const struct __DriverAPIRec driDriverAPI = {
+   .InitScreen		 = intelInitScreen,
+   .DestroyScreen	 = intelDestroyScreen,
+   .CreateContext	 = intelCreateContext,
+   .DestroyContext	 = intelDestroyContext,
+   .CreateBuffer	 = intelCreateBuffer,
+   .DestroyBuffer	 = intelDestroyBuffer,
+   .SwapBuffers		 = intelSwapBuffers,
+   .MakeCurrent		 = intelMakeCurrent,
+   .UnbindContext	 = intelUnbindContext,
+   .GetSwapInfo		 = intelGetSwapInfo,
+   .GetDrawableMSC	 = driDrawableGetMSC32,
+   .WaitForMSC		 = driWaitForMSC32,
+   .CopySubBuffer	 = intelCopySubBuffer,
+
+   .InitScreen2		 = intelInitScreen2,
+   .HandleDrawableConfig = intelHandleDrawableConfig,
+   .HandleBufferAttach	 = intelHandleBufferAttach,
+};
diff --git a/i965/intel_screen.h b/shared/intel_screen.h
index bf9a716..e62b2d7 100644
--- a/i965/intel_screen.h
+++ b/shared/intel_screen.h
@@ -30,39 +30,42 @@
 
 #include <sys/time.h>
 #include "dri_util.h"
+#include "i915_drm.h"
 #include "xmlconfig.h"
-#include "i830_common.h"
 
 /* XXX: change name or eliminate to avoid conflict with "struct
  * intel_region"!!!
  */
-typedef struct {
+typedef struct
+{
    drm_handle_t handle;
-   drmSize size;        /* region size in bytes */
-   char *map;           /* memory map */
-   int offset;          /* from start of video mem, in bytes */
-   int pitch;           /* row stride, in pixels */
-   unsigned int tiled; 
+   drmSize size;                /* region size in bytes */
+   char *map;                   /* memory map */
+   int offset;                  /* from start of video mem, in bytes */
+   unsigned int bo_handle;	/* buffer object id if available, or -1 */
+   /**
+    * Flags if the region is tiled.
+    *
+    * Not included is Y versus X tiling.
+    */
+   GLboolean tiled;
 } intelRegion;
 
-typedef struct 
+typedef struct
 {
    intelRegion front;
    intelRegion back;
-   intelRegion rotated;
+   intelRegion third;
    intelRegion depth;
    intelRegion tex;
-   
+
    int deviceID;
    int width;
    int height;
-   int mem;         /* unused */
-
-   int cpp;         /* for front and back buffers */
-   int fbFormat;
+   int pitch;                   /* common row stride, in pixels */
 
    int logTextureGranularity;
-   
+
    __DRIscreenPrivate *driScrnPriv;
    unsigned int sarea_priv_offset;
 
@@ -71,44 +74,36 @@ typedef struct
    int irq_active;
    int allow_batchbuffer;
 
-/*    struct matrix23 rotMatrix; */
-
-   int current_rotation;  /* 0, 90, 180 or 270 */
-   int rotatedWidth, rotatedHeight;
-
    /**
-    * Configuration cache with default values for all contexts 
-    */
+   * Configuration cache with default values for all contexts
+   */
    driOptionCache optionCache;
 } intelScreenPrivate;
 
 
-extern GLboolean
-intelMapScreenRegions(__DRIscreenPrivate *sPriv);
 
-extern void
-intelUnmapScreenRegions(intelScreenPrivate *intelScreen);
+extern GLboolean intelMapScreenRegions(__DRIscreenPrivate * sPriv);
 
-extern void
-intelUpdateScreenFromSAREA(intelScreenPrivate *intelScreen,
-                           volatile drmI830Sarea *sarea);
+extern void intelUnmapScreenRegions(intelScreenPrivate * intelScreen);
 
 extern void
-intelDestroyContext(__DRIcontextPrivate *driContextPriv);
+intelUpdateScreenFromSAREA(intelScreenPrivate * intelScreen,
+                           struct drm_i915_sarea * sarea);
 
-extern GLboolean
-intelUnbindContext(__DRIcontextPrivate *driContextPriv);
+extern void intelDestroyContext(__DRIcontextPrivate * driContextPriv);
+
+extern GLboolean intelUnbindContext(__DRIcontextPrivate * driContextPriv);
 
 extern GLboolean
-intelMakeCurrent(__DRIcontextPrivate *driContextPriv,
-                 __DRIdrawablePrivate *driDrawPriv,
-                 __DRIdrawablePrivate *driReadPriv);
+intelMakeCurrent(__DRIcontextPrivate * driContextPriv,
+                 __DRIdrawablePrivate * driDrawPriv,
+                 __DRIdrawablePrivate * driReadPriv);
+
+extern void intelSwapBuffers(__DRIdrawablePrivate * dPriv);
 
 extern void
-intelSwapBuffers(__DRIdrawablePrivate *dPriv);
+intelCopySubBuffer(__DRIdrawablePrivate * dPriv, int x, int y, int w, int h);
 
-extern void 
-intelCopySubBuffer( __DRIdrawablePrivate *dPriv,
-		    int x, int y, int w, int h );
+extern struct intel_context *intelScreenContext(intelScreenPrivate *intelScreen);
 
 #endif
diff --git a/shared/intel_span.c b/shared/intel_span.c
new file mode 100644
index 0000000..742b1b8
--- /dev/null
+++ b/shared/intel_span.c
@@ -0,0 +1,409 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "glheader.h"
+#include "macros.h"
+#include "mtypes.h"
+#include "colormac.h"
+
+#include "intel_fbo.h"
+#include "intel_screen.h"
+#include "intel_span.h"
+#include "intel_regions.h"
+#include "intel_ioctl.h"
+#include "intel_tex.h"
+
+#include "swrast/swrast.h"
+
+/*
+  break intelWriteRGBASpan_ARGB8888
+*/
+
+#undef DBG
+#define DBG 0
+
+#define LOCAL_VARS							\
+   struct intel_context *intel = intel_context(ctx);			\
+   struct intel_renderbuffer *irb = intel_renderbuffer(rb);		\
+   const GLint yScale = irb->RenderToTexture ? 1 : -1;			\
+   const GLint yBias = irb->RenderToTexture ? 0 : irb->Base.Height - 1;	\
+   GLubyte *buf = (GLubyte *) irb->pfMap				\
+      + (intel->drawY * irb->pfPitch + intel->drawX) * irb->region->cpp;\
+   GLuint p;								\
+   assert(irb->pfMap);\
+   (void) p;
+
+/* XXX FBO: this is identical to the macro in spantmp2.h except we get
+ * the cliprect info from the context, not the driDrawable.
+ * Move this into spantmp2.h someday.
+ */
+#define HW_CLIPLOOP()							\
+   do {									\
+      int _nc = intel->numClipRects;					\
+      while ( _nc-- ) {							\
+	 int minx = intel->pClipRects[_nc].x1 - intel->drawX;		\
+	 int miny = intel->pClipRects[_nc].y1 - intel->drawY;		\
+	 int maxx = intel->pClipRects[_nc].x2 - intel->drawX;		\
+	 int maxy = intel->pClipRects[_nc].y2 - intel->drawY;
+
+
+
+
+#define Y_FLIP(_y) ((_y) * yScale + yBias)
+
+#define HW_LOCK()
+
+#define HW_UNLOCK()
+
+/* 16 bit, RGB565 color spanline and pixel functions
+ */
+#define SPANTMP_PIXEL_FMT GL_RGB
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5
+
+#define TAG(x)    intel##x##_RGB565
+#define TAG2(x,y) intel##x##_RGB565##y
+#define GET_PTR(X,Y) (buf + ((Y) * irb->pfPitch + (X)) * 2)
+#include "spantmp2.h"
+
+/* 32 bit, ARGB8888 color spanline and pixel functions
+ */
+#define SPANTMP_PIXEL_FMT GL_BGRA
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
+
+#define TAG(x)    intel##x##_ARGB8888
+#define TAG2(x,y) intel##x##_ARGB8888##y
+#define GET_PTR(X,Y) (buf + ((Y) * irb->pfPitch + (X)) * 4)
+#include "spantmp2.h"
+
+#define LOCAL_DEPTH_VARS						\
+   struct intel_context *intel = intel_context(ctx);			\
+   struct intel_renderbuffer *irb = intel_renderbuffer(rb);		\
+   const GLuint pitch = irb->pfPitch/***XXX region->pitch*/; /* in pixels */ \
+   const GLint yScale = irb->RenderToTexture ? 1 : -1;			\
+   const GLint yBias = irb->RenderToTexture ? 0 : irb->Base.Height - 1;	\
+   char *buf = (char *) irb->pfMap/*XXX use region->map*/ +             \
+      (intel->drawY * pitch + intel->drawX) * irb->region->cpp;
+
+
+#define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS
+
+/**
+ ** 16-bit depthbuffer functions.
+ **/
+#define VALUE_TYPE GLushort
+
+#define WRITE_DEPTH( _x, _y, d ) \
+   ((GLushort *)buf)[(_x) + (_y) * pitch] = d;
+
+#define READ_DEPTH( d, _x, _y )	\
+   d = ((GLushort *)buf)[(_x) + (_y) * pitch];
+
+
+#define TAG(x) intel##x##_z16
+#include "depthtmp.h"
+
+
+/**
+ ** 24/8-bit interleaved depth/stencil functions
+ ** Note: we're actually reading back combined depth+stencil values.
+ ** The wrappers in main/depthstencil.c are used to extract the depth
+ ** and stencil values.
+ **/
+#define VALUE_TYPE GLuint
+
+/* Change ZZZS -> SZZZ */
+#define WRITE_DEPTH( _x, _y, d ) {				\
+   GLuint tmp = ((d) >> 8) | ((d) << 24);			\
+   ((GLuint *)buf)[(_x) + (_y) * pitch] = tmp;			\
+}
+
+/* Change SZZZ -> ZZZS */
+#define READ_DEPTH( d, _x, _y ) {				\
+   GLuint tmp = ((GLuint *)buf)[(_x) + (_y) * pitch];		\
+   d = (tmp << 8) | (tmp >> 24);				\
+}
+
+#define TAG(x) intel##x##_z24_s8
+#include "depthtmp.h"
+
+
+/**
+ ** 8-bit stencil function (XXX FBO: This is obsolete)
+ **/
+#define WRITE_STENCIL( _x, _y, d ) {				\
+   GLuint tmp = ((GLuint *)buf)[(_x) + (_y) * pitch];		\
+   tmp &= 0xffffff;						\
+   tmp |= ((d) << 24);						\
+   ((GLuint *) buf)[(_x) + (_y) * pitch] = tmp;			\
+}
+
+#define READ_STENCIL( d, _x, _y )				\
+   d = ((GLuint *)buf)[(_x) + (_y) * pitch] >> 24;
+
+#define TAG(x) intel##x##_z24_s8
+#include "stenciltmp.h"
+
+
+
+/**
+ * Map or unmap all the renderbuffers which we may need during
+ * software rendering.
+ * XXX in the future, we could probably convey extra information to
+ * reduce the number of mappings needed.  I.e. if doing a glReadPixels
+ * from the depth buffer, we really only need one mapping.
+ *
+ * XXX Rewrite this function someday.
+ * We can probably just loop over all the renderbuffer attachments,
+ * map/unmap all of them, and not worry about the _ColorDrawBuffers
+ * _ColorReadBuffer, _DepthBuffer or _StencilBuffer fields.
+ */
+static void
+intel_map_unmap_buffers(struct intel_context *intel, GLboolean map)
+{
+   GLcontext *ctx = &intel->ctx;
+   GLuint i, j;
+   struct intel_renderbuffer *irb;
+
+   /* color draw buffers */
+   for (j = 0; j < ctx->DrawBuffer->_NumColorDrawBuffers; j++) {
+      struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[j];
+      irb = intel_renderbuffer(rb);
+      if (irb) {
+         /* this is a user-created intel_renderbuffer */
+         if (irb->region) {
+            if (map)
+               intel_region_map(intel, irb->region);
+            else
+               intel_region_unmap(intel, irb->region);
+            irb->pfMap = irb->region->map;
+            irb->pfPitch = irb->region->pitch;
+         }
+      }
+   }
+
+   /* check for render to textures */
+   for (i = 0; i < BUFFER_COUNT; i++) {
+      struct gl_renderbuffer_attachment *att =
+         ctx->DrawBuffer->Attachment + i;
+      struct gl_texture_object *tex = att->Texture;
+      if (tex) {
+         /* render to texture */
+         ASSERT(att->Renderbuffer);
+         if (map) {
+            struct gl_texture_image *texImg;
+            texImg = tex->Image[att->CubeMapFace][att->TextureLevel];
+            intel_tex_map_images(intel, intel_texture_object(tex));
+         }
+         else {
+            intel_tex_unmap_images(intel, intel_texture_object(tex));
+         }
+      }
+   }
+
+   /* color read buffers */
+   irb = intel_renderbuffer(ctx->ReadBuffer->_ColorReadBuffer);
+   if (irb && irb->region) {
+      if (map)
+         intel_region_map(intel, irb->region);
+      else
+         intel_region_unmap(intel, irb->region);
+      irb->pfMap = irb->region->map;
+      irb->pfPitch = irb->region->pitch;
+   }
+
+   /* Account for front/back color page flipping.
+    * The span routines use the pfMap and pfPitch fields which will
+    * swap the front/back region map/pitch if we're page flipped.
+    * Do this after mapping, above, so the map field is valid.
+    */
+#if 0
+   if (map && ctx->DrawBuffer->Name == 0) {
+      struct intel_renderbuffer *irbFront
+         = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_FRONT_LEFT);
+      struct intel_renderbuffer *irbBack
+         = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_BACK_LEFT);
+      if (irbBack) {
+         /* double buffered */
+         if (intel->sarea->pf_current_page == 0) {
+            irbFront->pfMap = irbFront->region->map;
+            irbFront->pfPitch = irbFront->region->pitch;
+            irbBack->pfMap = irbBack->region->map;
+            irbBack->pfPitch = irbBack->region->pitch;
+         }
+         else {
+            irbFront->pfMap = irbBack->region->map;
+            irbFront->pfPitch = irbBack->region->pitch;
+            irbBack->pfMap = irbFront->region->map;
+            irbBack->pfPitch = irbFront->region->pitch;
+         }
+      }
+   }
+#endif
+
+   /* depth buffer (Note wrapper!) */
+   if (ctx->DrawBuffer->_DepthBuffer) {
+      irb = intel_renderbuffer(ctx->DrawBuffer->_DepthBuffer->Wrapped);
+      if (irb && irb->region) {
+         if (map) {
+            intel_region_map(intel, irb->region);
+            irb->pfMap = irb->region->map;
+            irb->pfPitch = irb->region->pitch;
+         }
+         else {
+            intel_region_unmap(intel, irb->region);
+            irb->pfMap = irb->region->map;
+            irb->pfPitch = irb->region->pitch;
+         }
+      }
+   }
+
+   /* stencil buffer (Note wrapper!) */
+   if (ctx->DrawBuffer->_StencilBuffer) {
+      irb = intel_renderbuffer(ctx->DrawBuffer->_StencilBuffer->Wrapped);
+      if (irb && irb->region) {
+         if (map) {
+            intel_region_map(intel, irb->region);
+            irb->pfMap = irb->region->map;
+            irb->pfPitch = irb->region->pitch;
+         }
+         else {
+            intel_region_unmap(intel, irb->region);
+            irb->pfMap = irb->region->map;
+            irb->pfPitch = irb->region->pitch;
+         }
+      }
+   }
+}
+
+
+
+/**
+ * Prepare for softare rendering.  Map current read/draw framebuffers'
+ * renderbuffes and all currently bound texture objects.
+ *
+ * Old note: Moved locking out to get reasonable span performance.
+ */
+void
+intelSpanRenderStart(GLcontext * ctx)
+{
+   struct intel_context *intel = intel_context(ctx);
+   GLuint i;
+
+   intelFinish(&intel->ctx);
+   LOCK_HARDWARE(intel);
+
+#if 0
+   /* Just map the framebuffer and all textures.  Bufmgr code will
+    * take care of waiting on the necessary fences:
+    */
+   intel_region_map(intel, intel->front_region);
+   intel_region_map(intel, intel->back_region);
+   intel_region_map(intel, intel->depth_region);
+#endif
+
+   for (i = 0; i < ctx->Const.MaxTextureCoordUnits; i++) {
+      if (ctx->Texture.Unit[i]._ReallyEnabled) {
+         struct gl_texture_object *texObj = ctx->Texture.Unit[i]._Current;
+         intel_tex_map_images(intel, intel_texture_object(texObj));
+      }
+   }
+
+   intel_map_unmap_buffers(intel, GL_TRUE);
+}
+
+/**
+ * Called when done softare rendering.  Unmap the buffers we mapped in
+ * the above function.
+ */
+void
+intelSpanRenderFinish(GLcontext * ctx)
+{
+   struct intel_context *intel = intel_context(ctx);
+   GLuint i;
+
+   _swrast_flush(ctx);
+
+   /* Now unmap the framebuffer:
+    */
+#if 0
+   intel_region_unmap(intel, intel->front_region);
+   intel_region_unmap(intel, intel->back_region);
+   intel_region_unmap(intel, intel->depth_region);
+#endif
+
+   for (i = 0; i < ctx->Const.MaxTextureCoordUnits; i++) {
+      if (ctx->Texture.Unit[i]._ReallyEnabled) {
+         struct gl_texture_object *texObj = ctx->Texture.Unit[i]._Current;
+         intel_tex_unmap_images(intel, intel_texture_object(texObj));
+      }
+   }
+
+   intel_map_unmap_buffers(intel, GL_FALSE);
+
+   UNLOCK_HARDWARE(intel);
+}
+
+
+void
+intelInitSpanFuncs(GLcontext * ctx)
+{
+   struct swrast_device_driver *swdd = _swrast_GetDeviceDriverReference(ctx);
+   swdd->SpanRenderStart = intelSpanRenderStart;
+   swdd->SpanRenderFinish = intelSpanRenderFinish;
+}
+
+
+/**
+ * Plug in appropriate span read/write functions for the given renderbuffer.
+ * These are used for the software fallbacks.
+ */
+void
+intel_set_span_functions(struct gl_renderbuffer *rb)
+{
+   if (rb->_ActualFormat == GL_RGB5) {
+      /* 565 RGB */
+      intelInitPointers_RGB565(rb);
+   }
+   else if (rb->_ActualFormat == GL_RGBA8) {
+      /* 8888 RGBA */
+      intelInitPointers_ARGB8888(rb);
+   }
+   else if (rb->_ActualFormat == GL_DEPTH_COMPONENT16) {
+      intelInitDepthPointers_z16(rb);
+   }
+   else if (rb->_ActualFormat == GL_DEPTH_COMPONENT24 ||        /* XXX FBO remove */
+            rb->_ActualFormat == GL_DEPTH24_STENCIL8_EXT) {
+      intelInitDepthPointers_z24_s8(rb);
+   }
+   else if (rb->_ActualFormat == GL_STENCIL_INDEX8_EXT) {       /* XXX FBO remove */
+      intelInitStencilPointers_z24_s8(rb);
+   }
+   else {
+      _mesa_problem(NULL,
+                    "Unexpected _ActualFormat in intelSetSpanFunctions");
+   }
+}
diff --git a/i965/intel_span.h b/shared/intel_span.h
index 2d4f858..5201f6d 100644
--- a/i965/intel_span.h
+++ b/shared/intel_span.h
@@ -28,14 +28,11 @@
 #ifndef _INTEL_SPAN_H
 #define _INTEL_SPAN_H
 
-#include "drirenderbuffer.h"
+extern void intelInitSpanFuncs(GLcontext * ctx);
 
-extern void intelInitSpanFuncs( GLcontext *ctx );
+extern void intelSpanRenderFinish(GLcontext * ctx);
+extern void intelSpanRenderStart(GLcontext * ctx);
 
-extern void intelSpanRenderFinish( GLcontext *ctx );
-extern void intelSpanRenderStart( GLcontext *ctx );
-
-extern void
-intelSetSpanFunctions(driRenderbuffer *rb, const GLvisual *vis);
+extern void intel_set_span_functions(struct gl_renderbuffer *rb);
 
 #endif
diff --git a/shared/intel_tex.c b/shared/intel_tex.c
new file mode 100644
index 0000000..4fa18e2
--- /dev/null
+++ b/shared/intel_tex.c
@@ -0,0 +1,256 @@
+#include "swrast/swrast.h"
+#include "texobj.h"
+#include "teximage.h"
+#include "mipmap.h"
+#include "intel_context.h"
+#include "intel_mipmap_tree.h"
+#include "intel_tex.h"
+
+#define FILE_DEBUG_FLAG DEBUG_TEXTURE
+
+static GLboolean
+intelIsTextureResident(GLcontext * ctx, struct gl_texture_object *texObj)
+{
+#if 0
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_texture_object *intelObj = intel_texture_object(texObj);
+
+   return
+      intelObj->mt &&
+      intelObj->mt->region &&
+      intel_is_region_resident(intel, intelObj->mt->region);
+#endif
+   return 1;
+}
+
+
+
+static struct gl_texture_image *
+intelNewTextureImage(GLcontext * ctx)
+{
+   DBG("%s\n", __FUNCTION__);
+   (void) ctx;
+   return (struct gl_texture_image *) CALLOC_STRUCT(intel_texture_image);
+}
+
+
+static struct gl_texture_object *
+intelNewTextureObject(GLcontext * ctx, GLuint name, GLenum target)
+{
+   struct intel_texture_object *obj = CALLOC_STRUCT(intel_texture_object);
+
+   DBG("%s\n", __FUNCTION__);
+   _mesa_initialize_texture_object(&obj->base, name, target);
+
+   return &obj->base;
+}
+
+static void 
+intelDeleteTextureObject(GLcontext *ctx,
+			 struct gl_texture_object *texObj)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_texture_object *intelObj = intel_texture_object(texObj);
+
+   if (intelObj->mt)
+      intel_miptree_release(intel, &intelObj->mt);
+
+   _mesa_delete_texture_object(ctx, texObj);
+}
+
+
+static void
+intelFreeTextureImageData(GLcontext * ctx, struct gl_texture_image *texImage)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_texture_image *intelImage = intel_texture_image(texImage);
+
+   DBG("%s\n", __FUNCTION__);
+
+   if (intelImage->mt) {
+      intel_miptree_release(intel, &intelImage->mt);
+   }
+
+   if (texImage->Data) {
+      _mesa_free_texmemory(texImage->Data);
+      texImage->Data = NULL;
+   }
+}
+
+
+/* The system memcpy (at least on ubuntu 5.10) has problems copying
+ * to agp (writecombined) memory from a source which isn't 64-byte
+ * aligned - there is a 4x performance falloff.
+ *
+ * The x86 __memcpy is immune to this but is slightly slower
+ * (10%-ish) than the system memcpy.
+ *
+ * The sse_memcpy seems to have a slight cliff at 64/32 bytes, but
+ * isn't much faster than x86_memcpy for agp copies.
+ * 
+ * TODO: switch dynamically.
+ */
+static void *
+do_memcpy(void *dest, const void *src, size_t n)
+{
+   if ((((unsigned) src) & 63) || (((unsigned) dest) & 63)) {
+      return __memcpy(dest, src, n);
+   }
+   else
+      return memcpy(dest, src, n);
+}
+
+
+#if DO_DEBUG && !defined(__ia64__)
+
+#ifndef __x86_64__
+static unsigned
+fastrdtsc(void)
+{
+   unsigned eax;
+   __asm__ volatile ("\t"
+                     "pushl  %%ebx\n\t"
+                     "cpuid\n\t" ".byte 0x0f, 0x31\n\t"
+                     "popl %%ebx\n":"=a" (eax)
+                     :"0"(0)
+                     :"ecx", "edx", "cc");
+
+   return eax;
+}
+#else
+static unsigned
+fastrdtsc(void)
+{
+   unsigned eax;
+   __asm__ volatile ("\t" "cpuid\n\t" ".byte 0x0f, 0x31\n\t":"=a" (eax)
+                     :"0"(0)
+                     :"ecx", "edx", "ebx", "cc");
+
+   return eax;
+}
+#endif
+
+static unsigned
+time_diff(unsigned t, unsigned t2)
+{
+   return ((t < t2) ? t2 - t : 0xFFFFFFFFU - (t - t2 - 1));
+}
+
+
+static void *
+timed_memcpy(void *dest, const void *src, size_t n)
+{
+   void *ret;
+   unsigned t1, t2;
+   double rate;
+
+   if ((((unsigned) src) & 63) || (((unsigned) dest) & 63))
+      _mesa_printf("Warning - non-aligned texture copy!\n");
+
+   t1 = fastrdtsc();
+   ret = do_memcpy(dest, src, n);
+   t2 = fastrdtsc();
+
+   rate = time_diff(t1, t2);
+   rate /= (double) n;
+   _mesa_printf("timed_memcpy: %u %u --> %f clocks/byte\n", t1, t2, rate);
+   return ret;
+}
+#endif /* DO_DEBUG */
+
+/**
+ * Generate new mipmap data from BASE+1 to BASE+p (the minimally-sized mipmap
+ * level).
+ *
+ * The texture object's miptree must be mapped.
+ *
+ * It would be really nice if this was just called by Mesa whenever mipmaps
+ * needed to be regenerated, rather than us having to remember to do so in
+ * each texture image modification path.
+ *
+ * This function should also include an accelerated path.
+ */
+void
+intel_generate_mipmap(GLcontext *ctx, GLenum target,
+                      struct gl_texture_object *texObj)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_texture_object *intelObj = intel_texture_object(texObj);
+   GLuint nr_faces = (intelObj->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
+   int face, i;
+
+   _mesa_generate_mipmap(ctx, target, texObj);
+
+   /* Update the level information in our private data in the new images, since
+    * it didn't get set as part of a normal TexImage path.
+    */
+   for (face = 0; face < nr_faces; face++) {
+      for (i = texObj->BaseLevel + 1; i < texObj->MaxLevel; i++) {
+         struct intel_texture_image *intelImage;
+
+	 intelImage = intel_texture_image(texObj->Image[face][i]);
+	 if (intelImage == NULL)
+	    break;
+
+	 intelImage->level = i;
+	 intelImage->face = face;
+	 /* Unreference the miptree to signal that the new Data is a bare
+	  * pointer from mesa.
+	  */
+	 intel_miptree_release(intel, &intelImage->mt);
+      }
+   }
+}
+
+static void intelGenerateMipmap(GLcontext *ctx, GLenum target, struct gl_texture_object *texObj)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_texture_object *intelObj = intel_texture_object(texObj);
+
+   intel_tex_map_level_images(intel, intelObj, texObj->BaseLevel);
+   intel_generate_mipmap(ctx, target, texObj);
+   intel_tex_unmap_level_images(intel, intelObj, texObj->BaseLevel);
+}
+
+void
+intelInitTextureFuncs(struct dd_function_table *functions)
+{
+   functions->ChooseTextureFormat = intelChooseTextureFormat;
+   functions->TexImage1D = intelTexImage1D;
+   functions->TexImage2D = intelTexImage2D;
+   functions->TexImage3D = intelTexImage3D;
+   functions->TexSubImage1D = intelTexSubImage1D;
+   functions->TexSubImage2D = intelTexSubImage2D;
+   functions->TexSubImage3D = intelTexSubImage3D;
+#ifdef I915
+   functions->CopyTexImage1D = intelCopyTexImage1D;
+   functions->CopyTexImage2D = intelCopyTexImage2D;
+   functions->CopyTexSubImage1D = intelCopyTexSubImage1D;
+   functions->CopyTexSubImage2D = intelCopyTexSubImage2D;
+#else
+   functions->CopyTexImage1D = _swrast_copy_teximage1d;
+   functions->CopyTexImage2D = _swrast_copy_teximage2d;
+   functions->CopyTexSubImage1D = _swrast_copy_texsubimage1d;
+   functions->CopyTexSubImage2D = _swrast_copy_texsubimage2d;
+#endif
+   functions->GetTexImage = intelGetTexImage;
+   functions->GenerateMipmap = intelGenerateMipmap;
+
+   /* compressed texture functions */
+   functions->CompressedTexImage2D = intelCompressedTexImage2D;
+   functions->GetCompressedTexImage = intelGetCompressedTexImage;
+
+   functions->NewTextureObject = intelNewTextureObject;
+   functions->NewTextureImage = intelNewTextureImage;
+   functions->DeleteTexture = intelDeleteTextureObject;
+   functions->FreeTexImageData = intelFreeTextureImageData;
+   functions->UpdateTexturePalette = 0;
+   functions->IsTextureResident = intelIsTextureResident;
+
+#if DO_DEBUG && !defined(__ia64__)
+   if (INTEL_DEBUG & DEBUG_BUFMGR)
+      functions->TextureMemCpy = timed_memcpy;
+   else
+#endif
+      functions->TextureMemCpy = do_memcpy;
+}
diff --git a/shared/intel_tex.h b/shared/intel_tex.h
new file mode 100644
index 0000000..fe7a8ba
--- /dev/null
+++ b/shared/intel_tex.h
@@ -0,0 +1,164 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef INTELTEX_INC
+#define INTELTEX_INC
+
+#include "mtypes.h"
+#include "intel_context.h"
+#include "texmem.h"
+
+
+void intelInitTextureFuncs(struct dd_function_table *functions);
+
+const struct gl_texture_format *intelChooseTextureFormat(GLcontext * ctx,
+                                                         GLint internalFormat,
+                                                         GLenum format,
+                                                         GLenum type);
+
+
+void intelTexImage3D(GLcontext * ctx,
+                     GLenum target, GLint level,
+                     GLint internalFormat,
+                     GLint width, GLint height, GLint depth,
+                     GLint border,
+                     GLenum format, GLenum type, const void *pixels,
+                     const struct gl_pixelstore_attrib *packing,
+                     struct gl_texture_object *texObj,
+                     struct gl_texture_image *texImage);
+
+void intelTexSubImage3D(GLcontext * ctx,
+                        GLenum target,
+                        GLint level,
+                        GLint xoffset, GLint yoffset, GLint zoffset,
+                        GLsizei width, GLsizei height, GLsizei depth,
+                        GLenum format, GLenum type,
+                        const GLvoid * pixels,
+                        const struct gl_pixelstore_attrib *packing,
+                        struct gl_texture_object *texObj,
+                        struct gl_texture_image *texImage);
+
+void intelTexImage2D(GLcontext * ctx,
+                     GLenum target, GLint level,
+                     GLint internalFormat,
+                     GLint width, GLint height, GLint border,
+                     GLenum format, GLenum type, const void *pixels,
+                     const struct gl_pixelstore_attrib *packing,
+                     struct gl_texture_object *texObj,
+                     struct gl_texture_image *texImage);
+
+void intelTexSubImage2D(GLcontext * ctx,
+                        GLenum target,
+                        GLint level,
+                        GLint xoffset, GLint yoffset,
+                        GLsizei width, GLsizei height,
+                        GLenum format, GLenum type,
+                        const GLvoid * pixels,
+                        const struct gl_pixelstore_attrib *packing,
+                        struct gl_texture_object *texObj,
+                        struct gl_texture_image *texImage);
+
+void intelTexImage1D(GLcontext * ctx,
+                     GLenum target, GLint level,
+                     GLint internalFormat,
+                     GLint width, GLint border,
+                     GLenum format, GLenum type, const void *pixels,
+                     const struct gl_pixelstore_attrib *packing,
+                     struct gl_texture_object *texObj,
+                     struct gl_texture_image *texImage);
+
+void intelTexSubImage1D(GLcontext * ctx,
+                        GLenum target,
+                        GLint level,
+                        GLint xoffset,
+                        GLsizei width,
+                        GLenum format, GLenum type,
+                        const GLvoid * pixels,
+                        const struct gl_pixelstore_attrib *packing,
+                        struct gl_texture_object *texObj,
+                        struct gl_texture_image *texImage);
+
+void intelCopyTexImage1D(GLcontext * ctx, GLenum target, GLint level,
+                         GLenum internalFormat,
+                         GLint x, GLint y, GLsizei width, GLint border);
+
+void intelCopyTexImage2D(GLcontext * ctx, GLenum target, GLint level,
+                         GLenum internalFormat,
+                         GLint x, GLint y, GLsizei width, GLsizei height,
+                         GLint border);
+
+void intelCopyTexSubImage1D(GLcontext * ctx, GLenum target, GLint level,
+                            GLint xoffset, GLint x, GLint y, GLsizei width);
+
+void intelCopyTexSubImage2D(GLcontext * ctx, GLenum target, GLint level,
+                            GLint xoffset, GLint yoffset,
+                            GLint x, GLint y, GLsizei width, GLsizei height);
+
+void intelGetTexImage(GLcontext * ctx, GLenum target, GLint level,
+                      GLenum format, GLenum type, GLvoid * pixels,
+                      struct gl_texture_object *texObj,
+                      struct gl_texture_image *texImage);
+
+void intelCompressedTexImage2D( GLcontext *ctx, GLenum target, GLint level,
+				GLint internalFormat,
+				GLint width, GLint height, GLint border,
+				GLsizei imageSize, const GLvoid *data,
+				struct gl_texture_object *texObj,
+				struct gl_texture_image *texImage );
+
+void intelGetCompressedTexImage(GLcontext *ctx, GLenum target, GLint level,
+				GLvoid *pixels,
+				struct gl_texture_object *texObj,
+				struct gl_texture_image *texImage);
+
+void intelSetTexOffset(__DRIcontext *pDRICtx, GLint texname,
+		       unsigned long long offset, GLint depth, GLuint pitch);
+void intelSetTexBuffer(__DRIcontext *pDRICtx,
+		       GLint target, __DRIdrawable *pDraw);
+
+GLuint intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit);
+
+void intel_tex_map_level_images(struct intel_context *intel,
+				struct intel_texture_object *intelObj,
+				int level);
+
+void intel_tex_unmap_level_images(struct intel_context *intel,
+				  struct intel_texture_object *intelObj,
+				  int level);
+
+void intel_tex_map_images(struct intel_context *intel,
+                          struct intel_texture_object *intelObj);
+
+void intel_tex_unmap_images(struct intel_context *intel,
+                            struct intel_texture_object *intelObj);
+
+int intel_compressed_num_bytes(GLuint mesaFormat);
+
+void intel_generate_mipmap(GLcontext *ctx, GLenum target,
+			   struct gl_texture_object *texObj);
+
+#endif
diff --git a/shared/intel_tex_copy.c b/shared/intel_tex_copy.c
new file mode 100644
index 0000000..1add7c6
--- /dev/null
+++ b/shared/intel_tex_copy.c
@@ -0,0 +1,300 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "mtypes.h"
+#include "enums.h"
+#include "image.h"
+#include "teximage.h"
+#include "mipmap.h"
+#include "swrast/swrast.h"
+
+#include "intel_screen.h"
+#include "intel_context.h"
+#include "intel_batchbuffer.h"
+#include "intel_buffers.h"
+#include "intel_mipmap_tree.h"
+#include "intel_regions.h"
+#include "intel_fbo.h"
+#include "intel_tex.h"
+#include "intel_blit.h"
+
+#define FILE_DEBUG_FLAG DEBUG_TEXTURE
+
+/**
+ * Get the intel_region which is the source for any glCopyTex[Sub]Image call.
+ *
+ * Do the best we can using the blitter.  A future project is to use
+ * the texture engine and fragment programs for these copies.
+ */
+static const struct intel_region *
+get_teximage_source(struct intel_context *intel, GLenum internalFormat)
+{
+   struct intel_renderbuffer *irb;
+
+   DBG("%s %s\n", __FUNCTION__,
+       _mesa_lookup_enum_by_nr(internalFormat));
+
+   switch (internalFormat) {
+   case GL_DEPTH_COMPONENT:
+   case GL_DEPTH_COMPONENT16_ARB:
+      irb = intel_get_renderbuffer(intel->ctx.ReadBuffer, BUFFER_DEPTH);
+      if (irb && irb->region && irb->region->cpp == 2)
+         return irb->region;
+      return NULL;
+   case GL_DEPTH24_STENCIL8_EXT:
+   case GL_DEPTH_STENCIL_EXT:
+      irb = intel_get_renderbuffer(intel->ctx.ReadBuffer, BUFFER_DEPTH);
+      if (irb && irb->region && irb->region->cpp == 4)
+         return irb->region;
+      return NULL;
+   case GL_RGBA:
+   case GL_RGBA8:
+      return intel_readbuf_region(intel);
+   case GL_RGB:
+      if (intel->ctx.Visual.rgbBits == 16)
+         return intel_readbuf_region(intel);
+      return NULL;
+   default:
+      return NULL;
+   }
+}
+
+
+static GLboolean
+do_copy_texsubimage(struct intel_context *intel,
+		    GLenum target,
+                    struct intel_texture_image *intelImage,
+                    GLenum internalFormat,
+                    GLint dstx, GLint dsty,
+                    GLint x, GLint y, GLsizei width, GLsizei height)
+{
+   GLcontext *ctx = &intel->ctx;
+   struct gl_texture_object *texObj = intelImage->base.TexObject;
+   const struct intel_region *src =
+      get_teximage_source(intel, internalFormat);
+
+   if (!intelImage->mt || !src) {
+      DBG("%s fail %p %p\n", __FUNCTION__, intelImage->mt, src);
+      return GL_FALSE;
+   }
+
+   intelFlush(ctx);
+   LOCK_HARDWARE(intel);
+   {
+      GLuint image_offset = intel_miptree_image_offset(intelImage->mt,
+                                                       intelImage->face,
+                                                       intelImage->level);
+      const GLint orig_x = x;
+      const GLint orig_y = y;
+      const struct gl_framebuffer *fb = ctx->DrawBuffer;
+
+      if (_mesa_clip_to_region(fb->_Xmin, fb->_Ymin, fb->_Xmax, fb->_Ymax,
+                               &x, &y, &width, &height)) {
+         /* Update dst for clipped src.  Need to also clip the source rect.
+          */
+         dstx += x - orig_x;
+         dsty += y - orig_y;
+
+         if (ctx->ReadBuffer->Name == 0) {
+            /* reading from a window, adjust x, y */
+            __DRIdrawablePrivate *dPriv = intel->driDrawable;
+            GLuint window_y;
+            /* window_y = position of window on screen if y=0=bottom */
+            window_y = intel->intelScreen->height - (dPriv->y + dPriv->h);
+            y = window_y + y;
+            x += dPriv->x;
+         }
+         else {
+            /* reading from a FBO */
+            /* invert Y */
+            y = ctx->ReadBuffer->Height - y - 1;
+         }
+
+
+         /* A bit of fiddling to get the blitter to work with -ve
+          * pitches.  But we get a nice inverted blit this way, so it's
+          * worth it:
+          */
+         intelEmitCopyBlit(intel,
+                           intelImage->mt->cpp,
+                           -src->pitch,
+                           src->buffer,
+                           src->height * src->pitch * src->cpp,
+			   GL_FALSE,
+                           intelImage->mt->pitch,
+                           intelImage->mt->region->buffer,
+                           image_offset,
+			   intelImage->mt->region->tiled,
+                           x, y + height, dstx, dsty, width, height,
+			   GL_COPY); /* ? */
+
+         intel_batchbuffer_flush(intel->batch);
+      }
+   }
+
+
+   UNLOCK_HARDWARE(intel);
+
+   /* GL_SGIS_generate_mipmap */
+   if (intelImage->level == texObj->BaseLevel && texObj->GenerateMipmap) {
+      intel_generate_mipmap(ctx, target, texObj);
+   }
+
+   return GL_TRUE;
+}
+
+
+
+
+
+void
+intelCopyTexImage1D(GLcontext * ctx, GLenum target, GLint level,
+                    GLenum internalFormat,
+                    GLint x, GLint y, GLsizei width, GLint border)
+{
+   struct gl_texture_unit *texUnit =
+      &ctx->Texture.Unit[ctx->Texture.CurrentUnit];
+   struct gl_texture_object *texObj =
+      _mesa_select_tex_object(ctx, texUnit, target);
+   struct gl_texture_image *texImage =
+      _mesa_select_tex_image(ctx, texObj, target, level);
+
+   if (border)
+      goto fail;
+
+   /* Setup or redefine the texture object, mipmap tree and texture
+    * image.  Don't populate yet.  
+    */
+   ctx->Driver.TexImage1D(ctx, target, level, internalFormat,
+                          width, border,
+                          GL_RGBA, CHAN_TYPE, NULL,
+                          &ctx->DefaultPacking, texObj, texImage);
+
+   if (!do_copy_texsubimage(intel_context(ctx), target,
+                            intel_texture_image(texImage),
+                            internalFormat, 0, 0, x, y, width, 1))
+      goto fail;
+
+   return;
+
+ fail:
+   _swrast_copy_teximage1d(ctx, target, level, internalFormat, x, y,
+                           width, border);
+}
+
+void
+intelCopyTexImage2D(GLcontext * ctx, GLenum target, GLint level,
+                    GLenum internalFormat,
+                    GLint x, GLint y, GLsizei width, GLsizei height,
+                    GLint border)
+{
+   struct gl_texture_unit *texUnit =
+      &ctx->Texture.Unit[ctx->Texture.CurrentUnit];
+   struct gl_texture_object *texObj =
+      _mesa_select_tex_object(ctx, texUnit, target);
+   struct gl_texture_image *texImage =
+      _mesa_select_tex_image(ctx, texObj, target, level);
+
+   if (border)
+      goto fail;
+
+   /* Setup or redefine the texture object, mipmap tree and texture
+    * image.  Don't populate yet.  
+    */
+   ctx->Driver.TexImage2D(ctx, target, level, internalFormat,
+                          width, height, border,
+                          GL_RGBA, CHAN_TYPE, NULL,
+                          &ctx->DefaultPacking, texObj, texImage);
+
+
+   if (!do_copy_texsubimage(intel_context(ctx), target,
+                            intel_texture_image(texImage),
+                            internalFormat, 0, 0, x, y, width, height))
+      goto fail;
+
+   return;
+
+ fail:
+   _swrast_copy_teximage2d(ctx, target, level, internalFormat, x, y,
+                           width, height, border);
+}
+
+
+void
+intelCopyTexSubImage1D(GLcontext * ctx, GLenum target, GLint level,
+                       GLint xoffset, GLint x, GLint y, GLsizei width)
+{
+   struct gl_texture_unit *texUnit =
+      &ctx->Texture.Unit[ctx->Texture.CurrentUnit];
+   struct gl_texture_object *texObj =
+      _mesa_select_tex_object(ctx, texUnit, target);
+   struct gl_texture_image *texImage =
+      _mesa_select_tex_image(ctx, texObj, target, level);
+   GLenum internalFormat = texImage->InternalFormat;
+
+   /* XXX need to check <border> as in above function? */
+
+   /* Need to check texture is compatible with source format. 
+    */
+
+   if (!do_copy_texsubimage(intel_context(ctx), target,
+                            intel_texture_image(texImage),
+                            internalFormat, xoffset, 0, x, y, width, 1)) {
+      _swrast_copy_texsubimage1d(ctx, target, level, xoffset, x, y, width);
+   }
+}
+
+
+
+void
+intelCopyTexSubImage2D(GLcontext * ctx, GLenum target, GLint level,
+                       GLint xoffset, GLint yoffset,
+                       GLint x, GLint y, GLsizei width, GLsizei height)
+{
+   struct gl_texture_unit *texUnit =
+      &ctx->Texture.Unit[ctx->Texture.CurrentUnit];
+   struct gl_texture_object *texObj =
+      _mesa_select_tex_object(ctx, texUnit, target);
+   struct gl_texture_image *texImage =
+      _mesa_select_tex_image(ctx, texObj, target, level);
+   GLenum internalFormat = texImage->InternalFormat;
+
+
+   /* Need to check texture is compatible with source format. 
+    */
+
+   if (!do_copy_texsubimage(intel_context(ctx), target,
+                            intel_texture_image(texImage),
+                            internalFormat,
+                            xoffset, yoffset, x, y, width, height)) {
+
+      DBG("%s - fallback to swrast\n", __FUNCTION__);
+
+      _swrast_copy_texsubimage2d(ctx, target, level,
+                                 xoffset, yoffset, x, y, width, height);
+   }
+}
diff --git a/shared/intel_tex_format.c b/shared/intel_tex_format.c
new file mode 100644
index 0000000..349a29b
--- /dev/null
+++ b/shared/intel_tex_format.c
@@ -0,0 +1,193 @@
+#include "intel_context.h"
+#include "intel_tex.h"
+#include "texformat.h"
+#include "enums.h"
+
+/* It works out that this function is fine for all the supported
+ * hardware.  However, there is still a need to map the formats onto
+ * hardware descriptors.
+ */
+/* Note that the i915 can actually support many more formats than
+ * these if we take the step of simply swizzling the colors
+ * immediately after sampling...
+ */
+const struct gl_texture_format *
+intelChooseTextureFormat(GLcontext * ctx, GLint internalFormat,
+                         GLenum format, GLenum type)
+{
+   struct intel_context *intel = intel_context(ctx);
+   const GLboolean do32bpt = (intel->ctx.Visual.rgbBits == 32);
+
+   switch (internalFormat) {
+   case 4:
+   case GL_RGBA:
+   case GL_COMPRESSED_RGBA:
+      if (format == GL_BGRA) {
+         if (type == GL_UNSIGNED_BYTE || type == GL_UNSIGNED_INT_8_8_8_8_REV) {
+            return &_mesa_texformat_argb8888;
+         }
+         else if (type == GL_UNSIGNED_SHORT_4_4_4_4_REV) {
+            return &_mesa_texformat_argb4444;
+         }
+         else if (type == GL_UNSIGNED_SHORT_1_5_5_5_REV) {
+            return &_mesa_texformat_argb1555;
+         }
+      }
+      return do32bpt ? &_mesa_texformat_argb8888 : &_mesa_texformat_argb4444;
+
+   case 3:
+   case GL_RGB:
+   case GL_COMPRESSED_RGB:
+      if (format == GL_RGB && type == GL_UNSIGNED_SHORT_5_6_5) {
+         return &_mesa_texformat_rgb565;
+      }
+      return do32bpt ? &_mesa_texformat_argb8888 : &_mesa_texformat_rgb565;
+
+   case GL_RGBA8:
+   case GL_RGB10_A2:
+   case GL_RGBA12:
+   case GL_RGBA16:
+      return do32bpt ? &_mesa_texformat_argb8888 : &_mesa_texformat_argb4444;
+
+   case GL_RGBA4:
+   case GL_RGBA2:
+      return &_mesa_texformat_argb4444;
+
+   case GL_RGB5_A1:
+      return &_mesa_texformat_argb1555;
+
+   case GL_RGB8:
+   case GL_RGB10:
+   case GL_RGB12:
+   case GL_RGB16:
+      return &_mesa_texformat_argb8888;
+
+   case GL_RGB5:
+   case GL_RGB4:
+   case GL_R3_G3_B2:
+      return &_mesa_texformat_rgb565;
+
+   case GL_ALPHA:
+   case GL_ALPHA4:
+   case GL_ALPHA8:
+   case GL_ALPHA12:
+   case GL_ALPHA16:
+   case GL_COMPRESSED_ALPHA:
+      return &_mesa_texformat_a8;
+
+   case 1:
+   case GL_LUMINANCE:
+   case GL_LUMINANCE4:
+   case GL_LUMINANCE8:
+   case GL_LUMINANCE12:
+   case GL_LUMINANCE16:
+   case GL_COMPRESSED_LUMINANCE:
+      return &_mesa_texformat_l8;
+
+   case 2:
+   case GL_LUMINANCE_ALPHA:
+   case GL_LUMINANCE4_ALPHA4:
+   case GL_LUMINANCE6_ALPHA2:
+   case GL_LUMINANCE8_ALPHA8:
+   case GL_LUMINANCE12_ALPHA4:
+   case GL_LUMINANCE12_ALPHA12:
+   case GL_LUMINANCE16_ALPHA16:
+   case GL_COMPRESSED_LUMINANCE_ALPHA:
+      return &_mesa_texformat_al88;
+
+   case GL_INTENSITY:
+   case GL_INTENSITY4:
+   case GL_INTENSITY8:
+   case GL_INTENSITY12:
+   case GL_INTENSITY16:
+   case GL_COMPRESSED_INTENSITY:
+      return &_mesa_texformat_i8;
+
+   case GL_YCBCR_MESA:
+      if (type == GL_UNSIGNED_SHORT_8_8_MESA || type == GL_UNSIGNED_BYTE)
+         return &_mesa_texformat_ycbcr;
+      else
+         return &_mesa_texformat_ycbcr_rev;
+
+   case GL_COMPRESSED_RGB_FXT1_3DFX:
+      return &_mesa_texformat_rgb_fxt1;
+   case GL_COMPRESSED_RGBA_FXT1_3DFX:
+      return &_mesa_texformat_rgba_fxt1;
+
+   case GL_RGB_S3TC:
+   case GL_RGB4_S3TC:
+   case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
+      return &_mesa_texformat_rgb_dxt1;
+
+   case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
+      return &_mesa_texformat_rgba_dxt1;
+
+   case GL_RGBA_S3TC:
+   case GL_RGBA4_S3TC:
+   case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
+      return &_mesa_texformat_rgba_dxt3;
+
+   case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
+      return &_mesa_texformat_rgba_dxt5;
+
+   case GL_DEPTH_COMPONENT:
+   case GL_DEPTH_COMPONENT16:
+   case GL_DEPTH_COMPONENT24:
+   case GL_DEPTH_COMPONENT32:
+      return &_mesa_texformat_z16;
+
+   case GL_DEPTH_STENCIL_EXT:
+   case GL_DEPTH24_STENCIL8_EXT:
+      return &_mesa_texformat_z24_s8;
+
+#ifndef I915
+   case GL_SRGB_EXT:
+   case GL_SRGB8_EXT:
+   case GL_SRGB_ALPHA_EXT:
+   case GL_SRGB8_ALPHA8_EXT:
+   case GL_SLUMINANCE_EXT:
+   case GL_SLUMINANCE8_EXT:
+   case GL_SLUMINANCE_ALPHA_EXT:
+   case GL_SLUMINANCE8_ALPHA8_EXT:
+   case GL_COMPRESSED_SRGB_EXT:
+   case GL_COMPRESSED_SRGB_ALPHA_EXT:
+   case GL_COMPRESSED_SLUMINANCE_EXT:
+   case GL_COMPRESSED_SLUMINANCE_ALPHA_EXT:
+       return &_mesa_texformat_srgba8;
+   case GL_COMPRESSED_SRGB_S3TC_DXT1_EXT:
+   case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT:
+   case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT:
+   case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT:
+     return &_mesa_texformat_srgb_dxt1;
+#endif
+
+   default:
+      fprintf(stderr, "unexpected texture format %s in %s\n",
+              _mesa_lookup_enum_by_nr(internalFormat), __FUNCTION__);
+      return NULL;
+   }
+
+   return NULL;                 /* never get here */
+}
+
+int intel_compressed_num_bytes(GLuint mesaFormat)
+{
+   int bytes = 0;
+   switch(mesaFormat) {
+     
+   case MESA_FORMAT_RGB_FXT1:
+   case MESA_FORMAT_RGBA_FXT1:
+   case MESA_FORMAT_RGB_DXT1:
+   case MESA_FORMAT_RGBA_DXT1:
+     bytes = 2;
+     break;
+     
+   case MESA_FORMAT_RGBA_DXT3:
+   case MESA_FORMAT_RGBA_DXT5:
+     bytes = 4;
+   default:
+     break;
+   }
+   
+   return bytes;
+}
diff --git a/shared/intel_tex_image.c b/shared/intel_tex_image.c
new file mode 100644
index 0000000..f261034
--- /dev/null
+++ b/shared/intel_tex_image.c
@@ -0,0 +1,760 @@
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "glheader.h"
+#include "macros.h"
+#include "mtypes.h"
+#include "enums.h"
+#include "colortab.h"
+#include "convolve.h"
+#include "context.h"
+#include "simple_list.h"
+#include "texcompress.h"
+#include "texformat.h"
+#include "texobj.h"
+#include "texstore.h"
+#include "teximage.h"
+
+#include "intel_context.h"
+#include "intel_mipmap_tree.h"
+#include "intel_buffer_objects.h"
+#include "intel_batchbuffer.h"
+#include "intel_tex.h"
+#include "intel_ioctl.h"
+#include "intel_blit.h"
+#include "intel_fbo.h"
+
+#define FILE_DEBUG_FLAG DEBUG_TEXTURE
+
+/* Functions to store texture images.  Where possible, mipmap_tree's
+ * will be created or further instantiated with image data, otherwise
+ * images will be stored in malloc'd memory.  A validation step is
+ * required to pull those images into a mipmap tree, or otherwise
+ * decide a fallback is required.
+ */
+
+
+static int
+logbase2(int n)
+{
+   GLint i = 1;
+   GLint log2 = 0;
+
+   while (n > i) {
+      i *= 2;
+      log2++;
+   }
+
+   return log2;
+}
+
+
+/* Otherwise, store it in memory if (Border != 0) or (any dimension ==
+ * 1).
+ *    
+ * Otherwise, if max_level >= level >= min_level, create tree with
+ * space for textures from min_level down to max_level.
+ *
+ * Otherwise, create tree with space for textures from (level
+ * 0)..(1x1).  Consider pruning this tree at a validation if the
+ * saving is worth it.
+ */
+static void
+guess_and_alloc_mipmap_tree(struct intel_context *intel,
+                            struct intel_texture_object *intelObj,
+                            struct intel_texture_image *intelImage)
+{
+   GLuint firstLevel;
+   GLuint lastLevel;
+   GLuint width = intelImage->base.Width;
+   GLuint height = intelImage->base.Height;
+   GLuint depth = intelImage->base.Depth;
+   GLuint l2width, l2height, l2depth;
+   GLuint i, comp_byte = 0;
+
+   DBG("%s\n", __FUNCTION__);
+
+   if (intelImage->base.Border ||
+       ((intelImage->base._BaseFormat == GL_DEPTH_COMPONENT) && 
+        ((intelObj->base.WrapS == GL_CLAMP_TO_BORDER) ||
+         (intelObj->base.WrapT == GL_CLAMP_TO_BORDER))))
+      return;
+
+   if (intelImage->level > intelObj->base.BaseLevel &&
+       (intelImage->base.Width == 1 ||
+        (intelObj->base.Target != GL_TEXTURE_1D &&
+         intelImage->base.Height == 1) ||
+        (intelObj->base.Target == GL_TEXTURE_3D &&
+         intelImage->base.Depth == 1)))
+      return;
+
+   /* If this image disrespects BaseLevel, allocate from level zero.
+    * Usually BaseLevel == 0, so it's unlikely to happen.
+    */
+   if (intelImage->level < intelObj->base.BaseLevel)
+      firstLevel = 0;
+   else
+      firstLevel = intelObj->base.BaseLevel;
+
+
+   /* Figure out image dimensions at start level. 
+    */
+   for (i = intelImage->level; i > firstLevel; i--) {
+      width <<= 1;
+      if (height != 1)
+         height <<= 1;
+      if (depth != 1)
+         depth <<= 1;
+   }
+
+   /* Guess a reasonable value for lastLevel.  This is probably going
+    * to be wrong fairly often and might mean that we have to look at
+    * resizable buffers, or require that buffers implement lazy
+    * pagetable arrangements.
+    */
+   if ((intelObj->base.MinFilter == GL_NEAREST ||
+        intelObj->base.MinFilter == GL_LINEAR) &&
+       intelImage->level == firstLevel) {
+      lastLevel = firstLevel;
+   }
+   else {
+      l2width = logbase2(width);
+      l2height = logbase2(height);
+      l2depth = logbase2(depth);
+      lastLevel = firstLevel + MAX2(MAX2(l2width, l2height), l2depth);
+   }
+
+   assert(!intelObj->mt);
+   if (intelImage->base.IsCompressed)
+      comp_byte = intel_compressed_num_bytes(intelImage->base.TexFormat->MesaFormat);
+   intelObj->mt = intel_miptree_create(intel,
+                                       intelObj->base.Target,
+                                       intelImage->base.InternalFormat,
+                                       firstLevel,
+                                       lastLevel,
+                                       width,
+                                       height,
+                                       depth,
+                                       intelImage->base.TexFormat->TexelBytes,
+                                       comp_byte);
+
+   DBG("%s - success\n", __FUNCTION__);
+}
+
+
+
+
+static GLuint
+target_to_face(GLenum target)
+{
+   switch (target) {
+   case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB:
+   case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB:
+   case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB:
+   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB:
+   case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB:
+   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB:
+      return ((GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X);
+   default:
+      return 0;
+   }
+}
+
+/* There are actually quite a few combinations this will work for,
+ * more than what I've listed here.
+ */
+static GLboolean
+check_pbo_format(GLint internalFormat,
+                 GLenum format, GLenum type,
+                 const struct gl_texture_format *mesa_format)
+{
+   switch (internalFormat) {
+   case 4:
+   case GL_RGBA:
+      return (format == GL_BGRA &&
+              (type == GL_UNSIGNED_BYTE ||
+               type == GL_UNSIGNED_INT_8_8_8_8_REV) &&
+              mesa_format == &_mesa_texformat_argb8888);
+   case 3:
+   case GL_RGB:
+      return (format == GL_RGB &&
+              type == GL_UNSIGNED_SHORT_5_6_5 &&
+              mesa_format == &_mesa_texformat_rgb565);
+   case GL_YCBCR_MESA:
+      return (type == GL_UNSIGNED_SHORT_8_8_MESA || type == GL_UNSIGNED_BYTE);
+   default:
+      return GL_FALSE;
+   }
+}
+
+
+/* XXX: Do this for TexSubImage also:
+ */
+static GLboolean
+try_pbo_upload(struct intel_context *intel,
+               struct intel_texture_image *intelImage,
+               const struct gl_pixelstore_attrib *unpack,
+               GLint internalFormat,
+               GLint width, GLint height,
+               GLenum format, GLenum type, const void *pixels)
+{
+   struct intel_buffer_object *pbo = intel_buffer_object(unpack->BufferObj);
+   GLuint src_offset, src_stride;
+   GLuint dst_offset, dst_stride;
+
+   if (!pbo ||
+       intel->ctx._ImageTransferState ||
+       unpack->SkipPixels || unpack->SkipRows) {
+      _mesa_printf("%s: failure 1\n", __FUNCTION__);
+      return GL_FALSE;
+   }
+
+   src_offset = (GLuint) pixels;
+
+   if (unpack->RowLength > 0)
+      src_stride = unpack->RowLength;
+   else
+      src_stride = width;
+
+   dst_offset = intel_miptree_image_offset(intelImage->mt,
+                                           intelImage->face,
+                                           intelImage->level);
+
+   dst_stride = intelImage->mt->pitch;
+
+   intelFlush(&intel->ctx);
+   LOCK_HARDWARE(intel);
+   {
+      dri_bo *src_buffer = intel_bufferobj_buffer(intel, pbo, INTEL_READ);
+      dri_bo *dst_buffer = intel_region_buffer(intel,
+					       intelImage->mt->region,
+					       INTEL_WRITE_FULL);
+
+
+      intelEmitCopyBlit(intel,
+                        intelImage->mt->cpp,
+                        src_stride, src_buffer, src_offset, GL_FALSE,
+                        dst_stride, dst_buffer, dst_offset, GL_FALSE,
+                        0, 0, 0, 0, width, height,
+			GL_COPY);
+
+      intel_batchbuffer_flush(intel->batch);
+   }
+   UNLOCK_HARDWARE(intel);
+
+   return GL_TRUE;
+}
+
+
+
+static GLboolean
+try_pbo_zcopy(struct intel_context *intel,
+              struct intel_texture_image *intelImage,
+              const struct gl_pixelstore_attrib *unpack,
+              GLint internalFormat,
+              GLint width, GLint height,
+              GLenum format, GLenum type, const void *pixels)
+{
+   struct intel_buffer_object *pbo = intel_buffer_object(unpack->BufferObj);
+   GLuint src_offset, src_stride;
+   GLuint dst_offset, dst_stride;
+
+   if (!pbo ||
+       intel->ctx._ImageTransferState ||
+       unpack->SkipPixels || unpack->SkipRows) {
+      _mesa_printf("%s: failure 1\n", __FUNCTION__);
+      return GL_FALSE;
+   }
+
+   src_offset = (GLuint) pixels;
+
+   if (unpack->RowLength > 0)
+      src_stride = unpack->RowLength;
+   else
+      src_stride = width;
+
+   dst_offset = intel_miptree_image_offset(intelImage->mt,
+                                           intelImage->face,
+                                           intelImage->level);
+
+   dst_stride = intelImage->mt->pitch;
+
+   if (src_stride != dst_stride || dst_offset != 0 || src_offset != 0) {
+      _mesa_printf("%s: failure 2\n", __FUNCTION__);
+      return GL_FALSE;
+   }
+
+   intel_region_attach_pbo(intel, intelImage->mt->region, pbo);
+
+   return GL_TRUE;
+}
+
+
+
+
+
+
+static void
+intelTexImage(GLcontext * ctx,
+              GLint dims,
+              GLenum target, GLint level,
+              GLint internalFormat,
+              GLint width, GLint height, GLint depth,
+              GLint border,
+              GLenum format, GLenum type, const void *pixels,
+              const struct gl_pixelstore_attrib *unpack,
+              struct gl_texture_object *texObj,
+              struct gl_texture_image *texImage, GLsizei imageSize, int compressed)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_texture_object *intelObj = intel_texture_object(texObj);
+   struct intel_texture_image *intelImage = intel_texture_image(texImage);
+   GLint postConvWidth = width;
+   GLint postConvHeight = height;
+   GLint texelBytes, sizeInBytes;
+   GLuint dstRowStride, srcRowStride = texImage->RowStride;
+
+
+   DBG("%s target %s level %d %dx%dx%d border %d\n", __FUNCTION__,
+       _mesa_lookup_enum_by_nr(target), level, width, height, depth, border);
+
+   intelFlush(ctx);
+
+   intelImage->face = target_to_face(target);
+   intelImage->level = level;
+
+   if (ctx->_ImageTransferState & IMAGE_CONVOLUTION_BIT) {
+      _mesa_adjust_image_for_convolution(ctx, dims, &postConvWidth,
+                                         &postConvHeight);
+   }
+
+   /* choose the texture format */
+   texImage->TexFormat = intelChooseTextureFormat(ctx, internalFormat,
+                                                  format, type);
+
+   _mesa_set_fetch_functions(texImage, dims);
+
+   if (texImage->TexFormat->TexelBytes == 0) {
+      /* must be a compressed format */
+      texelBytes = 0;
+      texImage->IsCompressed = GL_TRUE;
+      texImage->CompressedSize =
+	 ctx->Driver.CompressedTextureSize(ctx, texImage->Width,
+					   texImage->Height, texImage->Depth,
+					   texImage->TexFormat->MesaFormat);
+   } else {
+      texelBytes = texImage->TexFormat->TexelBytes;
+      
+      /* Minimum pitch of 32 bytes */
+      if (postConvWidth * texelBytes < 32) {
+	 postConvWidth = 32 / texelBytes;
+	 texImage->RowStride = postConvWidth;
+      }
+
+      if (!intelImage->mt) {      
+	  assert(texImage->RowStride == postConvWidth);
+      }
+   }
+
+   /* Release the reference to a potentially orphaned buffer.   
+    * Release any old malloced memory.
+    */
+   if (intelImage->mt) {
+      intel_miptree_release(intel, &intelImage->mt);
+      assert(!texImage->Data);
+   }
+   else if (texImage->Data) {
+      _mesa_free_texmemory(texImage->Data);
+      texImage->Data = NULL;
+   }
+
+   /* If this is the only texture image in the tree, could call
+    * bmBufferData with NULL data to free the old block and avoid
+    * waiting on any outstanding fences.
+    */
+   if (intelObj->mt &&
+       intelObj->mt->first_level == level &&
+       intelObj->mt->last_level == level &&
+       intelObj->mt->target != GL_TEXTURE_CUBE_MAP_ARB &&
+       !intel_miptree_match_image(intelObj->mt, &intelImage->base,
+                                  intelImage->face, intelImage->level)) {
+
+      DBG("release it\n");
+      intel_miptree_release(intel, &intelObj->mt);
+      assert(!intelObj->mt);
+   }
+
+   if (!intelObj->mt) {
+      guess_and_alloc_mipmap_tree(intel, intelObj, intelImage);
+      if (!intelObj->mt) {
+	 DBG("guess_and_alloc_mipmap_tree: failed\n");
+      }
+   }
+
+   assert(!intelImage->mt);
+
+   if (intelObj->mt &&
+       intel_miptree_match_image(intelObj->mt, &intelImage->base,
+                                 intelImage->face, intelImage->level)) {
+
+      intel_miptree_reference(&intelImage->mt, intelObj->mt);
+      assert(intelImage->mt);
+   }
+
+   if (!intelImage->mt)
+      DBG("XXX: Image did not fit into tree - storing in local memory!\n");
+
+   /* PBO fastpaths:
+    */
+   if (dims <= 2 &&
+       intelImage->mt &&
+       intel_buffer_object(unpack->BufferObj) &&
+       check_pbo_format(internalFormat, format,
+                        type, intelImage->base.TexFormat)) {
+
+      DBG("trying pbo upload\n");
+
+      /* Attempt to texture directly from PBO data (zero copy upload).
+       *
+       * Currently disable as it can lead to worse as well as better
+       * performance (in particular when intel_region_cow() is
+       * required).
+       */
+      if (intelObj->mt == intelImage->mt &&
+          intelObj->mt->first_level == level &&
+          intelObj->mt->last_level == level) {
+
+         if (try_pbo_zcopy(intel, intelImage, unpack,
+                           internalFormat,
+                           width, height, format, type, pixels)) {
+
+            DBG("pbo zcopy upload succeeded\n");
+            return;
+         }
+      }
+
+
+      /* Otherwise, attempt to use the blitter for PBO image uploads.
+       */
+      if (try_pbo_upload(intel, intelImage, unpack,
+                         internalFormat,
+                         width, height, format, type, pixels)) {
+         DBG("pbo upload succeeded\n");
+         return;
+      }
+
+      DBG("pbo upload failed\n");
+   }
+
+
+
+   /* intelCopyTexImage calls this function with pixels == NULL, with
+    * the expectation that the mipmap tree will be set up but nothing
+    * more will be done.  This is where those calls return:
+    */
+   if (compressed) {
+      pixels = _mesa_validate_pbo_compressed_teximage(ctx, imageSize, pixels,
+						      unpack,
+						      "glCompressedTexImage");
+   } else {
+      pixels = _mesa_validate_pbo_teximage(ctx, dims, width, height, 1,
+					   format, type,
+					   pixels, unpack, "glTexImage");
+   }
+
+   LOCK_HARDWARE(intel);
+
+   if (intelImage->mt) {
+      texImage->Data = intel_miptree_image_map(intel,
+                                               intelImage->mt,
+                                               intelImage->face,
+                                               intelImage->level,
+                                               &dstRowStride,
+                                               intelImage->base.ImageOffsets);
+      texImage->RowStride = dstRowStride / intelImage->mt->cpp;
+   }
+   else {
+      /* Allocate regular memory and store the image there temporarily.   */
+      if (texImage->IsCompressed) {
+         sizeInBytes = texImage->CompressedSize;
+         dstRowStride =
+            _mesa_compressed_row_stride(texImage->TexFormat->MesaFormat, width);
+         assert(dims != 3);
+      }
+      else {
+         dstRowStride = postConvWidth * texelBytes;
+         sizeInBytes = depth * dstRowStride * postConvHeight;
+      }
+
+      texImage->Data = _mesa_alloc_texmemory(sizeInBytes);
+   }
+
+   DBG("Upload image %dx%dx%d row_len %d "
+       "pitch %d\n",
+       width, height, depth, width * texelBytes, dstRowStride);
+
+   /* Copy data.  Would like to know when it's ok for us to eg. use
+    * the blitter to copy.  Or, use the hardware to do the format
+    * conversion and copy:
+    */
+   if (pixels) {
+       if (compressed) {
+	   if (intelImage->mt) {
+	       struct intel_region *dst = intelImage->mt->region;
+	       _mesa_copy_rect(texImage->Data, dst->cpp, dst->pitch,
+			       0, 0,
+			       intelImage->mt->level[level].width,
+			       intelImage->mt->level[level].height/4,
+			       pixels,
+			       srcRowStride,
+			       0, 0);
+	   } else
+	       memcpy(texImage->Data, pixels, imageSize);
+       } else if (!texImage->TexFormat->StoreImage(ctx, dims, 
+						   texImage->_BaseFormat, 
+						   texImage->TexFormat, 
+						   texImage->Data, 0, 0, 0, /* dstX/Y/Zoffset */
+						   dstRowStride,
+						   texImage->ImageOffsets,
+						   width, height, depth,
+						   format, type, pixels, unpack)) {
+	   _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage");
+       }
+   }
+
+   /* GL_SGIS_generate_mipmap */
+   if (level == texObj->BaseLevel && texObj->GenerateMipmap) {
+      intel_generate_mipmap(ctx, target, texObj);
+   }
+
+   _mesa_unmap_teximage_pbo(ctx, unpack);
+
+   if (intelImage->mt) {
+      intel_miptree_image_unmap(intel, intelImage->mt);
+      texImage->Data = NULL;
+   }
+
+   UNLOCK_HARDWARE(intel);
+}
+
+void
+intelTexImage3D(GLcontext * ctx,
+                GLenum target, GLint level,
+                GLint internalFormat,
+                GLint width, GLint height, GLint depth,
+                GLint border,
+                GLenum format, GLenum type, const void *pixels,
+                const struct gl_pixelstore_attrib *unpack,
+                struct gl_texture_object *texObj,
+                struct gl_texture_image *texImage)
+{
+   intelTexImage(ctx, 3, target, level,
+                 internalFormat, width, height, depth, border,
+                 format, type, pixels, unpack, texObj, texImage, 0, 0);
+}
+
+
+void
+intelTexImage2D(GLcontext * ctx,
+                GLenum target, GLint level,
+                GLint internalFormat,
+                GLint width, GLint height, GLint border,
+                GLenum format, GLenum type, const void *pixels,
+                const struct gl_pixelstore_attrib *unpack,
+                struct gl_texture_object *texObj,
+                struct gl_texture_image *texImage)
+{
+   intelTexImage(ctx, 2, target, level,
+                 internalFormat, width, height, 1, border,
+                 format, type, pixels, unpack, texObj, texImage, 0, 0);
+}
+
+void
+intelTexImage1D(GLcontext * ctx,
+                GLenum target, GLint level,
+                GLint internalFormat,
+                GLint width, GLint border,
+                GLenum format, GLenum type, const void *pixels,
+                const struct gl_pixelstore_attrib *unpack,
+                struct gl_texture_object *texObj,
+                struct gl_texture_image *texImage)
+{
+   intelTexImage(ctx, 1, target, level,
+                 internalFormat, width, 1, 1, border,
+                 format, type, pixels, unpack, texObj, texImage, 0, 0);
+}
+
+void intelCompressedTexImage2D( GLcontext *ctx, GLenum target, GLint level,
+				GLint internalFormat,
+				GLint width, GLint height, GLint border,
+				GLsizei imageSize, const GLvoid *data,
+				struct gl_texture_object *texObj,
+				struct gl_texture_image *texImage )
+{
+   intelTexImage(ctx, 2, target, level,
+		 internalFormat, width, height, 1, border,
+		 0, 0, data, &ctx->Unpack, texObj, texImage, imageSize, 1);
+}
+
+/**
+ * Need to map texture image into memory before copying image data,
+ * then unmap it.
+ */
+static void
+intel_get_tex_image(GLcontext * ctx, GLenum target, GLint level,
+		    GLenum format, GLenum type, GLvoid * pixels,
+		    struct gl_texture_object *texObj,
+		    struct gl_texture_image *texImage, int compressed)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_texture_image *intelImage = intel_texture_image(texImage);
+
+   /* Map */
+   if (intelImage->mt) {
+      /* Image is stored in hardware format in a buffer managed by the
+       * kernel.  Need to explicitly map and unmap it.
+       */
+      intelImage->base.Data =
+         intel_miptree_image_map(intel,
+                                 intelImage->mt,
+                                 intelImage->face,
+                                 intelImage->level,
+                                 &intelImage->base.RowStride,
+                                 intelImage->base.ImageOffsets);
+      intelImage->base.RowStride /= intelImage->mt->cpp;
+   }
+   else {
+      /* Otherwise, the image should actually be stored in
+       * intelImage->base.Data.  This is pretty confusing for
+       * everybody, I'd much prefer to separate the two functions of
+       * texImage->Data - storage for texture images in main memory
+       * and access (ie mappings) of images.  In other words, we'd
+       * create a new texImage->Map field and leave Data simply for
+       * storage.
+       */
+      assert(intelImage->base.Data);
+   }
+
+
+   if (compressed) {
+      _mesa_get_compressed_teximage(ctx, target, level, pixels,
+				    texObj, texImage);
+   } else {
+      _mesa_get_teximage(ctx, target, level, format, type, pixels,
+			 texObj, texImage);
+   }
+     
+
+   /* Unmap */
+   if (intelImage->mt) {
+      intel_miptree_image_unmap(intel, intelImage->mt);
+      intelImage->base.Data = NULL;
+   }
+}
+
+void
+intelGetTexImage(GLcontext * ctx, GLenum target, GLint level,
+                 GLenum format, GLenum type, GLvoid * pixels,
+                 struct gl_texture_object *texObj,
+                 struct gl_texture_image *texImage)
+{
+   intel_get_tex_image(ctx, target, level, format, type, pixels,
+		       texObj, texImage, 0);
+
+
+}
+
+void
+intelGetCompressedTexImage(GLcontext *ctx, GLenum target, GLint level,
+			   GLvoid *pixels,
+			   struct gl_texture_object *texObj,
+			   struct gl_texture_image *texImage)
+{
+   intel_get_tex_image(ctx, target, level, 0, 0, pixels,
+		       texObj, texImage, 1);
+}
+
+void
+intelSetTexOffset(__DRIcontext *pDRICtx, GLint texname,
+		  unsigned long long offset, GLint depth, GLuint pitch)
+{
+   struct intel_context *intel = pDRICtx->driverPrivate;
+   struct gl_texture_object *tObj = _mesa_lookup_texture(&intel->ctx, texname);
+   struct intel_texture_object *intelObj = intel_texture_object(tObj);
+
+   if (!intelObj)
+      return;
+
+   if (intelObj->mt)
+      intel_miptree_release(intel, &intelObj->mt);
+
+   intelObj->imageOverride = GL_TRUE;
+   intelObj->depthOverride = depth;
+   intelObj->pitchOverride = pitch;
+
+   if (offset)
+      intelObj->textureOffset = offset;
+}
+
+void
+intelSetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv)
+{
+   struct intel_framebuffer *intel_fb = dPriv->driverPrivate;
+   struct intel_context *intel = pDRICtx->driverPrivate;
+   struct intel_texture_object *intelObj;
+   struct intel_texture_image *intelImage;
+   struct intel_mipmap_tree *mt;
+   struct intel_renderbuffer *rb;
+   struct gl_texture_unit *texUnit;
+   struct gl_texture_object *texObj;
+   struct gl_texture_image *texImage;
+   int level = 0, type, format, internalFormat;
+
+   texUnit = &intel->ctx.Texture.Unit[intel->ctx.Texture.CurrentUnit];
+   texObj = _mesa_select_tex_object(&intel->ctx, texUnit, target);
+   intelObj = intel_texture_object(texObj);
+
+   if (!intelObj)
+      return;
+
+   __driParseEvents(pDRICtx, dPriv);
+
+   rb = intel_fb->color_rb[0];
+   type = GL_BGRA;
+   format = GL_UNSIGNED_BYTE;
+   internalFormat = (rb->region->cpp == 3 ? 3 : 4);
+
+   mt = intel_miptree_create_for_region(intel, target,
+					internalFormat,
+					0, 0, rb->region, 1, 0);
+   if (mt == NULL)
+       return;
+
+   _mesa_lock_texture(&intel->ctx, texObj);
+
+   if (intelObj->mt)
+      intel_miptree_release(intel, &intelObj->mt);
+
+   intelObj->mt = mt;
+   texImage = _mesa_get_tex_image(&intel->ctx, texObj, target, level);
+   _mesa_init_teximage_fields(&intel->ctx, target, texImage,
+			      rb->region->pitch, rb->region->height, 1,
+			      0, internalFormat);
+
+   intelImage = intel_texture_image(texImage);
+   intelImage->face = target_to_face(target);
+   intelImage->level = level;
+   texImage->TexFormat = intelChooseTextureFormat(&intel->ctx, internalFormat,
+                                                  type, format);
+   _mesa_set_fetch_functions(texImage, 2);
+   texImage->RowStride = rb->region->pitch;
+   intel_miptree_reference(&intelImage->mt, intelObj->mt);
+
+   if (!intel_miptree_match_image(intelObj->mt, &intelImage->base,
+				  intelImage->face, intelImage->level)) {
+	   fprintf(stderr, "miptree doesn't match image\n");
+   }
+
+   _mesa_unlock_texture(&intel->ctx, texObj);
+}
diff --git a/shared/intel_tex_layout.c b/shared/intel_tex_layout.c
index 39a443c..edc3a2e 100644
--- a/shared/intel_tex_layout.c
+++ b/shared/intel_tex_layout.c
@@ -32,12 +32,24 @@
 
 #include "intel_mipmap_tree.h"
 #include "intel_tex_layout.h"
+#include "intel_context.h"
 #include "macros.h"
 
-
-static int align(int value, int alignment)
+GLuint intel_compressed_alignment(GLenum internalFormat)
 {
-   return (value + alignment - 1) & ~(alignment - 1);
+    GLuint alignment = 4;
+
+    switch (internalFormat) {
+    case GL_COMPRESSED_RGB_FXT1_3DFX:
+    case GL_COMPRESSED_RGBA_FXT1_3DFX:
+        alignment = 8;
+        break;
+
+    default:
+        break;
+    }
+
+    return alignment;
 }
 
 void i945_miptree_layout_2d( struct intel_context *intel, struct intel_mipmap_tree *mt )
@@ -51,17 +63,30 @@ void i945_miptree_layout_2d( struct intel_context *intel, struct intel_mipmap_tr
 
    mt->pitch = mt->width0;
 
+   if (mt->compressed) {
+       align_w = intel_compressed_alignment(mt->internal_format);
+       mt->pitch = ALIGN(mt->width0, align_w);
+   }
+
    /* May need to adjust pitch to accomodate the placement of
     * the 2nd mipmap.  This occurs when the alignment
     * constraints of mipmap placement push the right edge of the
     * 2nd mipmap out past the width of its parent.
     */
    if (mt->first_level != mt->last_level) {
-      GLuint mip1_width = align(minify(mt->width0), align_w)
-			+ minify(minify(mt->width0));
+       GLuint mip1_width;
+
+       if (mt->compressed) {
+           mip1_width = ALIGN(minify(mt->width0), align_w)
+               + ALIGN(minify(minify(mt->width0)), align_w);
+       } else {
+           mip1_width = ALIGN(minify(mt->width0), align_w)
+               + minify(minify(mt->width0));
+       }
 
-      if (mip1_width > mt->width0)
-	 mt->pitch = mip1_width;
+       if (mip1_width > mt->pitch) {
+           mt->pitch = mip1_width;
+       }
    }
 
    /* Pitch must be a whole number of dwords, even though we
@@ -79,7 +104,7 @@ void i945_miptree_layout_2d( struct intel_context *intel, struct intel_mipmap_tr
       if (mt->compressed)
 	 img_height = MAX2(1, height/4);
       else
-	 img_height = align(height, align_h);
+	 img_height = ALIGN(height, align_h);
 
 
       /* Because the images are packed better, the final offset
@@ -90,7 +115,7 @@ void i945_miptree_layout_2d( struct intel_context *intel, struct intel_mipmap_tr
       /* Layout_below: step right after second mipmap.
        */
       if (level == mt->first_level + 1) {
-	 x += align(width, align_w);
+	 x += ALIGN(width, align_w);
       }
       else {
 	 y += img_height;
diff --git a/shared/intel_tex_layout.h b/shared/intel_tex_layout.h
index 46151db..193699d 100644
--- a/shared/intel_tex_layout.h
+++ b/shared/intel_tex_layout.h
@@ -39,3 +39,4 @@ static GLuint minify( GLuint d )
 }
 
 extern void i945_miptree_layout_2d( struct intel_context *intel, struct intel_mipmap_tree *mt );
+extern GLuint intel_compressed_alignment(GLenum);
diff --git a/i915/intel_tex.h b/shared/intel_tex_obj.h
index 9b7e550..5a93461 100644
--- a/i915/intel_tex.h
+++ b/shared/intel_tex_obj.h
@@ -25,21 +25,59 @@
  * 
  **************************************************************************/
 
-#ifndef INTELTEX_INC
-#define INTELTEX_INC
+#ifndef _INTEL_TEX_OBJ_H
+#define _INTEL_TEX_OBJ_H
 
-#include "mtypes.h"
-#include "intel_context.h"
-#include "texmem.h"
+struct intel_texture_object
+{
+   struct gl_texture_object base;       /* The "parent" object */
 
+   /* The mipmap tree must include at least these levels once
+    * validated:
+    */
+   GLuint firstLevel;
+   GLuint lastLevel;
 
-void intelInitTextureFuncs( struct dd_function_table *functions );
+   /* Offset for firstLevel image:
+    */
+   GLuint textureOffset;
 
-void intelDestroyTexObj( intelContextPtr intel, intelTextureObjectPtr t );
-int intelUploadTexImages( intelContextPtr intel, intelTextureObjectPtr t,
-			  GLuint face );
+   /* On validation any active images held in main memory or in other
+    * regions will be copied to this region and the old storage freed.
+    */
+   struct intel_mipmap_tree *mt;
 
-GLboolean 
-intel_driReinitTextureHeap( driTexHeap *heap,
-			    unsigned size );
-#endif
+   GLboolean imageOverride;
+   GLint depthOverride;
+   GLuint pitchOverride;
+};
+
+struct intel_texture_image
+{
+   struct gl_texture_image base;
+
+   /* These aren't stored in gl_texture_image 
+    */
+   GLuint level;
+   GLuint face;
+
+   /* If intelImage->mt != NULL, image data is stored here.
+    * Else if intelImage->base.Data != NULL, image is stored there.
+    * Else there is no image data.
+    */
+   struct intel_mipmap_tree *mt;
+};
+
+static INLINE struct intel_texture_object *
+intel_texture_object(struct gl_texture_object *obj)
+{
+   return (struct intel_texture_object *) obj;
+}
+
+static INLINE struct intel_texture_image *
+intel_texture_image(struct gl_texture_image *img)
+{
+   return (struct intel_texture_image *) img;
+}
+
+#endif /* _INTEL_TEX_OBJ_H */
diff --git a/shared/intel_tex_subimage.c b/shared/intel_tex_subimage.c
new file mode 100644
index 0000000..5428a1d
--- /dev/null
+++ b/shared/intel_tex_subimage.c
@@ -0,0 +1,186 @@
+
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "mtypes.h"
+#include "texobj.h"
+#include "texstore.h"
+#include "texcompress.h"
+#include "enums.h"
+
+#include "intel_context.h"
+#include "intel_tex.h"
+#include "intel_mipmap_tree.h"
+
+#define FILE_DEBUG_FLAG DEBUG_TEXTURE
+
+static void
+intelTexSubimage(GLcontext * ctx,
+                 GLint dims,
+                 GLenum target, GLint level,
+                 GLint xoffset, GLint yoffset, GLint zoffset,
+                 GLint width, GLint height, GLint depth,
+                 GLenum format, GLenum type, const void *pixels,
+                 const struct gl_pixelstore_attrib *packing,
+                 struct gl_texture_object *texObj,
+                 struct gl_texture_image *texImage)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_texture_image *intelImage = intel_texture_image(texImage);
+   GLuint dstRowStride = 0;
+   
+   DBG("%s target %s level %d offset %d,%d %dx%d\n", __FUNCTION__,
+       _mesa_lookup_enum_by_nr(target),
+       level, xoffset, yoffset, width, height);
+
+   intelFlush(ctx);
+
+   pixels =
+      _mesa_validate_pbo_teximage(ctx, dims, width, height, depth, format,
+                                  type, pixels, packing, "glTexSubImage2D");
+   if (!pixels)
+      return;
+
+   LOCK_HARDWARE(intel);
+
+   /* Map buffer if necessary.  Need to lock to prevent other contexts
+    * from uploading the buffer under us.
+    */
+   if (intelImage->mt) 
+      texImage->Data = intel_miptree_image_map(intel,
+                                               intelImage->mt,
+                                               intelImage->face,
+                                               intelImage->level,
+                                               &dstRowStride,
+                                               texImage->ImageOffsets);
+   else {
+      if (texImage->IsCompressed) {
+         dstRowStride =
+            _mesa_compressed_row_stride(texImage->TexFormat->MesaFormat, width);
+         assert(dims != 3);
+      }
+      else {
+         dstRowStride = texImage->RowStride * texImage->TexFormat->TexelBytes;
+      }
+   }
+
+   assert(dstRowStride);
+
+   if (!texImage->TexFormat->StoreImage(ctx, dims, texImage->_BaseFormat,
+                                        texImage->TexFormat,
+                                        texImage->Data,
+                                        xoffset, yoffset, zoffset,
+                                        dstRowStride,
+                                        texImage->ImageOffsets,
+                                        width, height, depth,
+                                        format, type, pixels, packing)) {
+      _mesa_error(ctx, GL_OUT_OF_MEMORY, "intelTexSubImage");
+   }
+
+   /* GL_SGIS_generate_mipmap */
+   if (level == texObj->BaseLevel && texObj->GenerateMipmap) {
+      intel_generate_mipmap(ctx, target, texObj);
+   }
+
+   _mesa_unmap_teximage_pbo(ctx, packing);
+
+   if (intelImage->mt) {
+      intel_miptree_image_unmap(intel, intelImage->mt);
+      texImage->Data = NULL;
+   }
+
+   UNLOCK_HARDWARE(intel);
+}
+
+
+
+
+
+void
+intelTexSubImage3D(GLcontext * ctx,
+                   GLenum target,
+                   GLint level,
+                   GLint xoffset, GLint yoffset, GLint zoffset,
+                   GLsizei width, GLsizei height, GLsizei depth,
+                   GLenum format, GLenum type,
+                   const GLvoid * pixels,
+                   const struct gl_pixelstore_attrib *packing,
+                   struct gl_texture_object *texObj,
+                   struct gl_texture_image *texImage)
+{
+
+   intelTexSubimage(ctx, 3,
+                    target, level,
+                    xoffset, yoffset, zoffset,
+                    width, height, depth,
+                    format, type, pixels, packing, texObj, texImage);
+
+}
+
+
+
+void
+intelTexSubImage2D(GLcontext * ctx,
+                   GLenum target,
+                   GLint level,
+                   GLint xoffset, GLint yoffset,
+                   GLsizei width, GLsizei height,
+                   GLenum format, GLenum type,
+                   const GLvoid * pixels,
+                   const struct gl_pixelstore_attrib *packing,
+                   struct gl_texture_object *texObj,
+                   struct gl_texture_image *texImage)
+{
+
+   intelTexSubimage(ctx, 2,
+                    target, level,
+                    xoffset, yoffset, 0,
+                    width, height, 1,
+                    format, type, pixels, packing, texObj, texImage);
+
+}
+
+
+void
+intelTexSubImage1D(GLcontext * ctx,
+                   GLenum target,
+                   GLint level,
+                   GLint xoffset,
+                   GLsizei width,
+                   GLenum format, GLenum type,
+                   const GLvoid * pixels,
+                   const struct gl_pixelstore_attrib *packing,
+                   struct gl_texture_object *texObj,
+                   struct gl_texture_image *texImage)
+{
+   intelTexSubimage(ctx, 1,
+                    target, level,
+                    xoffset, 0, 0,
+                    width, 1, 1,
+                    format, type, pixels, packing, texObj, texImage);
+
+}
diff --git a/shared/intel_tex_validate.c b/shared/intel_tex_validate.c
new file mode 100644
index 0000000..1b3aa89
--- /dev/null
+++ b/shared/intel_tex_validate.c
@@ -0,0 +1,314 @@
+#include "mtypes.h"
+#include "macros.h"
+
+#include "intel_context.h"
+#include "intel_batchbuffer.h"
+#include "intel_mipmap_tree.h"
+#include "intel_tex.h"
+
+#define FILE_DEBUG_FLAG DEBUG_TEXTURE
+
+/**
+ * Compute which mipmap levels that really need to be sent to the hardware.
+ * This depends on the base image size, GL_TEXTURE_MIN_LOD,
+ * GL_TEXTURE_MAX_LOD, GL_TEXTURE_BASE_LEVEL, and GL_TEXTURE_MAX_LEVEL.
+ */
+static void
+intel_calculate_first_last_level(struct intel_texture_object *intelObj)
+{
+   struct gl_texture_object *tObj = &intelObj->base;
+   const struct gl_texture_image *const baseImage =
+      tObj->Image[0][tObj->BaseLevel];
+
+   /* These must be signed values.  MinLod and MaxLod can be negative numbers,
+    * and having firstLevel and lastLevel as signed prevents the need for
+    * extra sign checks.
+    */
+   int firstLevel;
+   int lastLevel;
+
+   /* Yes, this looks overly complicated, but it's all needed.
+    */
+   switch (tObj->Target) {
+   case GL_TEXTURE_1D:
+   case GL_TEXTURE_2D:
+   case GL_TEXTURE_3D:
+   case GL_TEXTURE_CUBE_MAP:
+      if (tObj->MinFilter == GL_NEAREST || tObj->MinFilter == GL_LINEAR) {
+         /* GL_NEAREST and GL_LINEAR only care about GL_TEXTURE_BASE_LEVEL.
+          */
+         firstLevel = lastLevel = tObj->BaseLevel;
+      }
+      else {
+#ifdef I915
+         firstLevel = tObj->BaseLevel + (GLint) (tObj->MinLod + 0.5);
+         firstLevel = MAX2(firstLevel, tObj->BaseLevel);
+         firstLevel = MIN2(firstLevel, tObj->BaseLevel + baseImage->MaxLog2);
+         lastLevel = tObj->BaseLevel + (GLint) (tObj->MaxLod + 0.5);
+         lastLevel = MAX2(lastLevel, tObj->BaseLevel);
+         lastLevel = MIN2(lastLevel, tObj->BaseLevel + baseImage->MaxLog2);
+         lastLevel = MIN2(lastLevel, tObj->MaxLevel);
+         lastLevel = MAX2(firstLevel, lastLevel);       /* need at least one level */
+#else
+	 /* Currently not taking min/max lod into account here, those
+	  * values are programmed as sampler state elsewhere and we
+	  * upload the same mipmap levels regardless.  Not sure if
+	  * this makes sense as it means it isn't possible for the app
+	  * to use min/max lod to reduce texture memory pressure:
+	  */
+	 firstLevel = tObj->BaseLevel;
+	 lastLevel = MIN2(tObj->BaseLevel + baseImage->MaxLog2,
+			  tObj->MaxLevel);
+	 lastLevel = MAX2(firstLevel, lastLevel); /* need at least one level */
+#endif
+      }
+      break;
+   case GL_TEXTURE_RECTANGLE_NV:
+   case GL_TEXTURE_4D_SGIS:
+      firstLevel = lastLevel = 0;
+      break;
+   default:
+      return;
+   }
+
+   /* save these values */
+   intelObj->firstLevel = firstLevel;
+   intelObj->lastLevel = lastLevel;
+}
+
+/**
+ * Copies the image's contents at its level into the object's miptree,
+ * and updates the image to point at the object's miptree.
+ */
+static void
+copy_image_data_to_tree(struct intel_context *intel,
+                        struct intel_texture_object *intelObj,
+                        struct intel_texture_image *intelImage)
+{
+   if (intelImage->mt) {
+      /* Copy potentially with the blitter:
+       */
+      intel_miptree_image_copy(intel,
+                               intelObj->mt,
+                               intelImage->face,
+                               intelImage->level, intelImage->mt);
+
+      intel_miptree_release(intel, &intelImage->mt);
+   }
+   else {
+      assert(intelImage->base.Data != NULL);
+
+      /* More straightforward upload.  
+       */
+      intel_miptree_image_data(intel,
+                               intelObj->mt,
+                               intelImage->face,
+                               intelImage->level,
+                               intelImage->base.Data,
+                               intelImage->base.RowStride,
+                               intelImage->base.RowStride *
+                               intelImage->base.Height);
+      _mesa_align_free(intelImage->base.Data);
+      intelImage->base.Data = NULL;
+   }
+
+   intel_miptree_reference(&intelImage->mt, intelObj->mt);
+}
+
+
+/*  
+ */
+GLuint
+intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit)
+{
+   struct gl_texture_object *tObj = intel->ctx.Texture.Unit[unit]._Current;
+   struct intel_texture_object *intelObj = intel_texture_object(tObj);
+   int comp_byte = 0;
+   int cpp;
+
+   GLuint face, i;
+   GLuint nr_faces = 0;
+   struct intel_texture_image *firstImage;
+
+   GLboolean need_flush = GL_FALSE;
+
+   /* We know/require this is true by now: 
+    */
+   assert(intelObj->base._Complete);
+
+   /* What levels must the tree include at a minimum?
+    */
+   intel_calculate_first_last_level(intelObj);
+   firstImage =
+      intel_texture_image(intelObj->base.Image[0][intelObj->firstLevel]);
+
+   /* Fallback case:
+    */
+   if (firstImage->base.Border ||
+       ((firstImage->base._BaseFormat == GL_DEPTH_COMPONENT) &&
+        ((tObj->WrapS == GL_CLAMP_TO_BORDER) ||
+         (tObj->WrapT == GL_CLAMP_TO_BORDER)))) {
+      if (intelObj->mt) {
+         intel_miptree_release(intel, &intelObj->mt);
+      }
+      return GL_FALSE;
+   }
+
+
+   /* If both firstImage and intelObj have a tree which can contain
+    * all active images, favour firstImage.  Note that because of the
+    * completeness requirement, we know that the image dimensions
+    * will match.
+    */
+   if (firstImage->mt &&
+       firstImage->mt != intelObj->mt &&
+       firstImage->mt->first_level <= intelObj->firstLevel &&
+       firstImage->mt->last_level >= intelObj->lastLevel) {
+
+      if (intelObj->mt)
+         intel_miptree_release(intel, &intelObj->mt);
+
+      intel_miptree_reference(&intelObj->mt, firstImage->mt);
+   }
+
+   if (firstImage->base.IsCompressed) {
+      comp_byte = intel_compressed_num_bytes(firstImage->base.TexFormat->MesaFormat);
+      cpp = comp_byte;
+   }
+   else cpp = firstImage->base.TexFormat->TexelBytes;
+
+   /* Check tree can hold all active levels.  Check tree matches
+    * target, imageFormat, etc.
+    * 
+    * XXX: For some layouts (eg i945?), the test might have to be
+    * first_level == firstLevel, as the tree isn't valid except at the
+    * original start level.  Hope to get around this by
+    * programming minLod, maxLod, baseLevel into the hardware and
+    * leaving the tree alone.
+    */
+   if (intelObj->mt &&
+       (intelObj->mt->target != intelObj->base.Target ||
+	intelObj->mt->internal_format != firstImage->base.InternalFormat ||
+	intelObj->mt->first_level != intelObj->firstLevel ||
+	intelObj->mt->last_level != intelObj->lastLevel ||
+	intelObj->mt->width0 != firstImage->base.Width ||
+	intelObj->mt->height0 != firstImage->base.Height ||
+	intelObj->mt->depth0 != firstImage->base.Depth ||
+	intelObj->mt->cpp != cpp ||
+	intelObj->mt->compressed != firstImage->base.IsCompressed)) {
+      intel_miptree_release(intel, &intelObj->mt);
+   }
+
+
+   /* May need to create a new tree:
+    */
+   if (!intelObj->mt) {
+      intelObj->mt = intel_miptree_create(intel,
+                                          intelObj->base.Target,
+                                          firstImage->base.InternalFormat,
+                                          intelObj->firstLevel,
+                                          intelObj->lastLevel,
+                                          firstImage->base.Width,
+                                          firstImage->base.Height,
+                                          firstImage->base.Depth,
+                                          cpp,
+                                          comp_byte);
+   }
+
+   /* Pull in any images not in the object's tree:
+    */
+   nr_faces = (intelObj->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
+   for (face = 0; face < nr_faces; face++) {
+      for (i = intelObj->firstLevel; i <= intelObj->lastLevel; i++) {
+         struct intel_texture_image *intelImage =
+            intel_texture_image(intelObj->base.Image[face][i]);
+
+         /* Need to import images in main memory or held in other trees.
+          */
+         if (intelObj->mt != intelImage->mt) {
+            copy_image_data_to_tree(intel, intelObj, intelImage);
+	    need_flush = GL_TRUE;
+         }
+      }
+   }
+
+#ifdef I915
+   /* XXX: what is this flush about?
+    * On 965, it causes a batch flush in the middle of the state relocation
+    * emits, which means that the eventual rendering doesn't have all of the
+    * required relocations in place.
+    */
+   if (need_flush)
+      intel_batchbuffer_flush(intel->batch);
+#endif
+
+   return GL_TRUE;
+}
+
+void
+intel_tex_map_level_images(struct intel_context *intel,
+			   struct intel_texture_object *intelObj,
+			   int level)
+{
+   GLuint nr_faces = (intelObj->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
+   GLuint face;
+
+   for (face = 0; face < nr_faces; face++) {
+      struct intel_texture_image *intelImage =
+	 intel_texture_image(intelObj->base.Image[face][level]);
+
+      if (intelImage->mt) {
+	 intelImage->base.Data =
+	    intel_miptree_image_map(intel,
+				    intelImage->mt,
+				    intelImage->face,
+				    intelImage->level,
+				    &intelImage->base.RowStride,
+				    intelImage->base.ImageOffsets);
+	 /* convert stride to texels, not bytes */
+	 intelImage->base.RowStride /= intelImage->mt->cpp;
+	 /* intelImage->base.ImageStride /= intelImage->mt->cpp; */
+      }
+   }
+}
+
+void
+intel_tex_unmap_level_images(struct intel_context *intel,
+			     struct intel_texture_object *intelObj,
+			     int level)
+{
+   GLuint nr_faces = (intelObj->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
+   GLuint face;
+
+   for (face = 0; face < nr_faces; face++) {
+      struct intel_texture_image *intelImage =
+	 intel_texture_image(intelObj->base.Image[face][level]);
+
+      if (intelImage->mt) {
+	 intel_miptree_image_unmap(intel, intelImage->mt);
+	 intelImage->base.Data = NULL;
+      }
+   }
+}
+
+void
+intel_tex_map_images(struct intel_context *intel,
+                     struct intel_texture_object *intelObj)
+{
+   int i;
+
+   DBG("%s\n", __FUNCTION__);
+
+   for (i = intelObj->firstLevel; i <= intelObj->lastLevel; i++)
+      intel_tex_map_level_images(intel, intelObj, i);
+}
+
+void
+intel_tex_unmap_images(struct intel_context *intel,
+                       struct intel_texture_object *intelObj)
+{
+   int i;
+
+   for (i = intelObj->firstLevel; i <= intelObj->lastLevel; i++)
+      intel_tex_unmap_level_images(intel, intelObj, i);
+}
diff --git a/i965/server/i830_dri.h b/shared/server/i830_dri.h
index 2295181..def049e 100644
--- a/i965/server/i830_dri.h
+++ b/shared/server/i830_dri.h
@@ -1,15 +1,14 @@
-/* $XFree86: xc/programs/Xserver/hw/xfree86/drivers/i810/i830_dri.h,v 1.4 2002/10/30 12:52:18 alanh Exp $ */
+/* $XFree86: xc/programs/Xserver/hw/xfree86/drivers/i810/i830_dri.h,v 1.6 2003/09/28 20:15:59 alanh Exp $ */
 
 #ifndef _I830_DRI_H
 #define _I830_DRI_H
 
 #include "xf86drm.h"
-#include "i830_common.h"
 
 #define I830_MAX_DRAWABLES 256
 
 #define I830_MAJOR_VERSION 1
-#define I830_MINOR_VERSION 3
+#define I830_MINOR_VERSION 9
 #define I830_PATCHLEVEL 0
 
 #define I830_REG_SIZE 0x80000
@@ -24,7 +23,7 @@ typedef struct _I830DRIRec {
    drmSize unused3; /* depthbufferSize */
    drm_handle_t unused4; /* depthbuffer */
 
-   drmSize unused5; /* rotatedSize /*/
+   drmSize unused5; /* rotatedSize */
    drm_handle_t unused6; /* rotatedbuffer */
 
    drm_handle_t unused7; /* textures */
diff --git a/i915/server/intel.h b/shared/server/intel.h
index d7858a2..6ea7249 100644
--- a/i915/server/intel.h
+++ b/shared/server/intel.h
@@ -75,6 +75,9 @@
 
 #define I830_GMCH_CTRL		0x52
 
+#define I830_GMCH_MEM_MASK      0x1
+#define I830_GMCH_MEM_64M       0x1
+#define I830_GMCH_MEM_128M      0
 
 #define I830_GMCH_GMS_MASK			0x70
 #define I830_GMCH_GMS_DISABLED		0x00
@@ -141,7 +144,7 @@ typedef struct _I830Rec {
    unsigned char *MMIOBase;
    unsigned char *FbBase;
    int cpp;
-
+   uint32_t aper_size;
    unsigned int bios_version;
 
    /* These are set in PreInit and never changed. */
diff --git a/i915/server/intel_dri.c b/shared/server/intel_dri.c
index b6946b7..e49c421 100644
--- a/i915/server/intel_dri.c
+++ b/shared/server/intel_dri.c
@@ -292,15 +292,43 @@ static void I830SetupMemoryTiling(const DRIDriverContext *ctx, I830Rec *pI830)
 
 static int I830DetectMemory(const DRIDriverContext *ctx, I830Rec *pI830)
 {
-  struct pci_device host_bridge;
+  struct pci_device host_bridge, ig_dev;
   uint32_t gmch_ctrl;
   int memsize = 0;
   int range;
-
+  uint32_t aper_size;
+  uint32_t membase2 = 0;
+      
   memset(&host_bridge, 0, sizeof(host_bridge));
+  memset(&ig_dev, 0, sizeof(ig_dev));
+
+  ig_dev.dev = 2;
 
   pci_device_cfg_read_u32(&host_bridge, &gmch_ctrl, I830_GMCH_CTRL);
-  
+
+  if (IS_I830(pI830) || IS_845G(pI830)) {
+    if ((gmch_ctrl & I830_GMCH_MEM_MASK) == I830_GMCH_MEM_128M) {
+      aper_size = 0x80000000;
+    } else {
+      aper_size = 0x40000000;
+    }
+  } else {
+    if (IS_I9XX(pI830)) {
+      int ret;
+      ret = pci_device_cfg_read_u32(&ig_dev, &membase2, 0x18);
+      if (membase2 & 0x08000000)
+	aper_size = 0x8000000;
+      else
+	aper_size = 0x10000000;
+
+      fprintf(stderr,"aper size is %08X %08x %d\n", aper_size, membase2, ret);
+    } else
+      aper_size = 0x8000000;
+  }
+
+  pI830->aper_size = aper_size;
+
+
   /* We need to reduce the stolen size, by the GTT and the popup.
    * The GTT varying according the the FbMapSize and the popup is 4KB */
   range = (ctx->shared.fbSize / (1024*1024)) + 4;
@@ -577,7 +605,8 @@ I830AllocateMemory(const DRIDriverContext *ctx, I830Rec *pI830)
     fprintf(stderr,"unable to allocate context buffer %ld\n", ret);
     return FALSE;
   }
-  
+
+#if 0  
   memset(&(pI830->TexMem), 0, sizeof(pI830->TexMem));
   pI830->TexMem.Key = -1;
 
@@ -588,6 +617,7 @@ I830AllocateMemory(const DRIDriverContext *ctx, I830Rec *pI830)
     fprintf(stderr,"unable to allocate texture memory %ld\n", ret);
     return FALSE;
   }
+#endif
 
   return TRUE;
 }
@@ -605,12 +635,29 @@ I830BindMemory(const DRIDriverContext *ctx, I830Rec *pI830)
     return FALSE;
   if (!BindAgpRange(ctx, &pI830->ContextMem))
     return FALSE;
+#if 0
   if (!BindAgpRange(ctx, &pI830->TexMem))
     return FALSE;
-
+#endif
   return TRUE;
 }
 
+static void SetupDRIMM(const DRIDriverContext *ctx, I830Rec *pI830)
+{
+  unsigned long aperEnd = ROUND_DOWN_TO(pI830->aper_size, GTT_PAGE_SIZE) / GTT_PAGE_SIZE;
+  unsigned long aperStart = ROUND_TO(pI830->aper_size - KB(32768), GTT_PAGE_SIZE) / GTT_PAGE_SIZE;
+
+  fprintf(stderr, "aper size is %08X\n", ctx->shared.fbSize);
+  if (drmMMInit(ctx->drmFD, aperStart, aperEnd - aperStart, DRM_BO_MEM_TT)) {
+      fprintf(stderr,
+	      "DRM MM Initialization Failed\n");
+  } else {
+    fprintf(stderr,
+	    "DRM MM Initialized at offset 0x%lx length %d page\n", aperStart, aperEnd-aperStart);
+  }
+
+}
+
 static Bool
 I830CleanupDma(const DRIDriverContext *ctx)
 {
@@ -810,6 +857,7 @@ I830DRIMapScreenRegions(DRIDriverContext *ctx, I830Rec *pI830, drmI830Sarea *sar
    fprintf(stderr, "[drm] Depth Buffer = 0x%08x\n",
               sarea->depth_handle);
 
+#if 0
    if (drmAddMap(ctx->drmFD,
 		 (drm_handle_t)sarea->tex_offset,
 		 sarea->tex_size, DRM_AGP, 0,
@@ -820,7 +868,7 @@ I830DRIMapScreenRegions(DRIDriverContext *ctx, I830Rec *pI830, drmI830Sarea *sar
    }
    fprintf(stderr, "[drm] textures = 0x%08x\n",
 	      sarea->tex_handle);
-
+#endif
    return TRUE;
 }
 
@@ -848,29 +896,6 @@ I830DRIUnmapScreenRegions(const DRIDriverContext *ctx, I830Rec *pI830, drmI830Sa
    }
 }
 
-static void
-I830InitTextureHeap(const DRIDriverContext *ctx, I830Rec *pI830, drmI830Sarea *sarea)
-{
-   /* Start up the simple memory manager for agp space */
-   drmI830MemInitHeap drmHeap;
-   drmHeap.region = I830_MEM_REGION_AGP;
-   drmHeap.start  = 0;
-   drmHeap.size   = sarea->tex_size;
-      
-   if (drmCommandWrite(ctx->drmFD, DRM_I830_INIT_HEAP,
-			  &drmHeap, sizeof(drmHeap))) {
-      fprintf(stderr,
-		    "[drm] Failed to initialized agp heap manager\n");
-   } else {
-      fprintf(stderr,
-		    "[drm] Initialized kernel agp heap manager, %d\n",
-		    sarea->tex_size);
-
-      I830SetParam(ctx, I830_SETPARAM_TEX_LRU_LOG_GRANULARITY, 
-		      sarea->log_tex_granularity);
-   }
-}
-
 static Bool
 I830DRIDoMappings(DRIDriverContext *ctx, I830Rec *pI830, drmI830Sarea *sarea)
 {
@@ -892,7 +917,7 @@ I830DRIDoMappings(DRIDriverContext *ctx, I830Rec *pI830, drmI830Sarea *sarea)
    /* init to zero to be safe */
 
   I830DRIMapScreenRegions(ctx, pI830, sarea);
-  I830InitTextureHeap(ctx, pI830, sarea);
+  SetupDRIMM(ctx, pI830);
 
    if (ctx->pciDevice != PCI_CHIP_845_G &&
        ctx->pciDevice != PCI_CHIP_I830_M) {
@@ -1084,6 +1109,10 @@ I830ScreenInit(DRIDriverContext *ctx, I830Rec *pI830)
      return FALSE;
    }
 
+   pSAREAPriv->rotated_offset = -1;
+   pSAREAPriv->rotated_size = 0;
+   pSAREAPriv->rotated_pitch = ctx->shared.virtualWidth;
+
    pSAREAPriv->front_offset = pI830->FrontBuffer.Start;
    pSAREAPriv->front_size = pI830->FrontBuffer.Size;
    pSAREAPriv->width = ctx->shared.virtualWidth;
@@ -1095,8 +1124,10 @@ I830ScreenInit(DRIDriverContext *ctx, I830Rec *pI830)
    pSAREAPriv->back_size = pI830->BackBuffer.Size;
    pSAREAPriv->depth_offset = pI830->DepthBuffer.Start;
    pSAREAPriv->depth_size = pI830->DepthBuffer.Size;
+#if 0
    pSAREAPriv->tex_offset = pI830->TexMem.Start;
    pSAREAPriv->tex_size = pI830->TexMem.Size;
+#endif
    pSAREAPriv->log_tex_granularity = pI830->TexGranularity;
 
    ctx->driverClientMsg = malloc(sizeof(I830DRIRec));
@@ -1108,14 +1139,6 @@ I830ScreenInit(DRIDriverContext *ctx, I830Rec *pI830)
    pI830DRI->height = ctx->shared.virtualHeight;
    pI830DRI->mem = ctx->shared.fbSize;
    pI830DRI->cpp = ctx->cpp;
-   pI830DRI->backOffset = pI830->BackBuffer.Start;
-   pI830DRI->backPitch = pI830->BackBuffer.Pitch; 
-
-   pI830DRI->depthOffset = pI830->DepthBuffer.Start;
-   pI830DRI->depthPitch = pI830->DepthBuffer.Pitch; 
-
-   pI830DRI->fbOffset = pI830->FrontBuffer.Start;
-   pI830DRI->fbStride = pI830->FrontBuffer.Pitch;
 
    pI830DRI->bitsPerPixel = ctx->bpp;
    pI830DRI->sarea_priv_offset = sizeof(drm_sarea_t);
author	Luc Verhaegen <libv@skynet.be>	2010-03-13 02:36:00 +0100
committer	Luc Verhaegen <libv@skynet.be>	2010-03-13 02:36:00 +0100
commit	fedcb3219e8f9a587c693bbb2178ec3e83bf0320 (patch)
tree	b37f142039934c27eb13d9ff2344776d7f92bff6
parent	6e23622cb869c14d82f8c901c4bbea80ded6220e (diff)