Import i915 and i965 dri drivers from mesa 7.8-rc1.7.8-rc1

author: Luc Verhaegen <libv@skynet.be> 2010-03-16 20:19:03 +0100
committer: Luc Verhaegen <libv@skynet.be> 2010-03-16 20:19:03 +0100
commit: 54a8e9cc3988d908b5b846a752679127cacefd3b (patch)
tree: ee289172f13f246903151b12db7808b7cb0651e8
parent: 6b263d1e2c599ddcc0ce4c84425dbc1ac8de020c (diff)
128 files changed, 4787 insertions, 5747 deletions
diff --git a/configure.ac b/configure.ac
index d339ca1..93954e0 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,7 +1,7 @@
 # Process this file with autoconf to produce a configure script
 
 AC_PREREQ(2.57)
-AC_INIT([mesa-dri-i9xx], 7.7.0, [], mesa-dri-i9xx)
+AC_INIT([mesa-dri-i9xx], 7.8.0, [], mesa-dri-i9xx)
 
 AM_INIT_AUTOMAKE([dist-bzip2])
 
@@ -16,8 +16,8 @@ AC_PROG_CC
 AC_HEADER_STDC
 
 PKG_CHECK_MODULES([DRM], [libdrm >= 2.4.15 libdrm_intel])
-PKG_CHECK_MODULES([DRI], [libmesadri >= 7.7.0 libmesadri < 7.8.0
-			  libmesadricommon >= 7.7.0 libmesadricommon < 7.8.0])
+PKG_CHECK_MODULES([DRI], [libmesadri >= 7.8.0 libmesadri < 7.9.0
+			  libmesadricommon >= 7.8.0 libmesadricommon < 7.9.0])
 
 AC_OUTPUT([
 	Makefile
diff --git a/i915/Makefile.am b/i915/Makefile.am
index c994f96..609d661 100644
--- a/i915/Makefile.am
+++ b/i915/Makefile.am
@@ -9,7 +9,6 @@ i915_dri_la_LDFLAGS = -module -noprefix -avoid-version -lm -ldl \
 i915_dri_ladir = @libdir@/dri
 i915_dri_la_SOURCES = \
 	i830_context.c \
-	i830_metaops.c \
 	i830_state.c \
 	i830_texblend.c \
 	i830_texstate.c \
@@ -35,14 +34,12 @@ i915_dri_la_SOURCES = \
 	../shared/intel_pixel_read.c \
 	../shared/intel_buffers.c \
 	../shared/intel_blit.c \
-	../shared/intel_swapbuffers.c \
 	i915_tex_layout.c \
 	i915_texstate.c \
 	i915_context.c \
 	i915_debug.c \
 	i915_debug_fp.c \
 	i915_fragprog.c \
-	i915_metaops.c \
 	i915_program.c \
 	i915_state.c \
 	i915_vtbl.c \
diff --git a/i915/i830_context.c b/i915/i830_context.c
index 840946f..ebe8b15 100644
--- a/i915/i830_context.c
+++ b/i915/i830_context.c
@@ -28,14 +28,11 @@
 #include "i830_context.h"
 #include "main/imports.h"
 #include "texmem.h"
-#include "intel_tex.h"
 #include "tnl/tnl.h"
 #include "tnl/t_vertex.h"
 #include "tnl/t_context.h"
 #include "tnl/t_pipeline.h"
-#include "utils.h"
 #include "intel_span.h"
-#include "intel_pixel.h"
 #include "intel_tris.h"
 
 /***************************************
@@ -53,7 +50,7 @@ extern const struct tnl_pipeline_stage *intel_pipeline[];
 
 GLboolean
 i830CreateContext(const __GLcontextModes * mesaVis,
-                  __DRIcontextPrivate * driContextPriv,
+                  __DRIcontext * driContextPriv,
                   void *sharedContextPrivate)
 {
    struct dd_function_table functions;
@@ -108,7 +105,6 @@ i830CreateContext(const __GLcontextModes * mesaVis,
    intel->verts = TNL_CONTEXT(ctx)->clipspace.vertex_buf;
 
    i830InitState(i830);
-   i830InitMetaFuncs(i830);
 
    _tnl_allow_vertex_fog(ctx, 1);
    _tnl_allow_pixel_fog(ctx, 0);
diff --git a/i915/i830_context.h b/i915/i830_context.h
index f73cbbf..d7eb9c2 100644
--- a/i915/i830_context.h
+++ b/i915/i830_context.h
@@ -34,7 +34,8 @@
 #define I830_FALLBACK_COLORMASK		 0x2000
 #define I830_FALLBACK_STENCIL		 0x4000
 #define I830_FALLBACK_STIPPLE		 0x8000
-#define I830_FALLBACK_LOGICOP		 0x10000
+#define I830_FALLBACK_LOGICOP		 0x20000
+#define I830_FALLBACK_DRAW_OFFSET	 0x200000
 
 #define I830_UPLOAD_CTX              0x1
 #define I830_UPLOAD_BUFFERS          0x2
@@ -144,7 +145,7 @@ struct i830_context
    GLuint lodbias_tm0s3[MAX_TEXTURE_UNITS];
      DECLARE_RENDERINPUTS(last_index_bitset);
 
-   struct i830_hw_state meta, initial, state, *current;
+   struct i830_hw_state state;
 };
 
 
@@ -178,7 +179,7 @@ i830_state_draw_region(struct intel_context *intel,
  */
 extern GLboolean
 i830CreateContext(const __GLcontextModes * mesaVis,
-                  __DRIcontextPrivate * driContextPriv,
+                  __DRIcontext * driContextPriv,
                   void *sharedContextPrivate);
 
 /* i830_tex.c, i830_texstate.c
@@ -206,10 +207,6 @@ extern void i830EmitState(struct i830_context *i830);
 extern void i830InitState(struct i830_context *i830);
 extern void i830_update_provoking_vertex(GLcontext *ctx);
 
-/* i830_metaops.c
- */
-extern void i830InitMetaFuncs(struct i830_context *i830);
-
 /*======================================================================
  * Inline conversion functions.  These are better-typed than the
  * macros used previously:
diff --git a/i915/i830_metaops.c b/i915/i830_metaops.c
deleted file mode 100644
index 2cce661..0000000
--- a/i915/i830_metaops.c
+++ /dev/null
@@ -1,456 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#include "main/glheader.h"
-#include "main/enums.h"
-#include "main/mtypes.h"
-#include "main/macros.h"
-#include "utils.h"
-
-#include "intel_screen.h"
-#include "intel_batchbuffer.h"
-#include "intel_regions.h"
-
-#include "i830_context.h"
-#include "i830_reg.h"
-
-/* A large amount of state doesn't need to be uploaded.
- */
-#define ACTIVE (I830_UPLOAD_INVARIENT |         \
-		I830_UPLOAD_CTX |		\
-		I830_UPLOAD_BUFFERS |		\
-		I830_UPLOAD_STIPPLE |		\
-		I830_UPLOAD_TEXBLEND(0) |	\
-		I830_UPLOAD_TEX(0))
-
-
-#define SET_STATE( i830, STATE )		\
-do {						\
-   i830->current->emitted &= ~ACTIVE;			\
-   i830->current = &i830->STATE;		\
-   i830->current->emitted &= ~ACTIVE;			\
-} while (0)
-
-
-static void
-set_no_stencil_write(struct intel_context *intel)
-{
-   struct i830_context *i830 = i830_context(&intel->ctx);
-
-   /* ctx->Driver.Enable( ctx, GL_STENCIL_TEST, GL_FALSE )
-    */
-   i830->meta.Ctx[I830_CTXREG_ENABLES_1] &= ~ENABLE_STENCIL_TEST;
-   i830->meta.Ctx[I830_CTXREG_ENABLES_2] &= ~ENABLE_STENCIL_WRITE;
-   i830->meta.Ctx[I830_CTXREG_ENABLES_1] |= DISABLE_STENCIL_TEST;
-   i830->meta.Ctx[I830_CTXREG_ENABLES_2] |= DISABLE_STENCIL_WRITE;
-
-   i830->meta.emitted &= ~I830_UPLOAD_CTX;
-}
-
-static void
-set_no_depth_write(struct intel_context *intel)
-{
-   struct i830_context *i830 = i830_context(&intel->ctx);
-
-   /* ctx->Driver.Enable( ctx, GL_DEPTH_TEST, GL_FALSE )
-    */
-   i830->meta.Ctx[I830_CTXREG_ENABLES_1] &= ~ENABLE_DIS_DEPTH_TEST_MASK;
-   i830->meta.Ctx[I830_CTXREG_ENABLES_2] &= ~ENABLE_DIS_DEPTH_WRITE_MASK;
-   i830->meta.Ctx[I830_CTXREG_ENABLES_1] |= DISABLE_DEPTH_TEST;
-   i830->meta.Ctx[I830_CTXREG_ENABLES_2] |= DISABLE_DEPTH_WRITE;
-
-   i830->meta.emitted &= ~I830_UPLOAD_CTX;
-}
-
-/* Set depth unit to replace.
- */
-static void
-set_depth_replace(struct intel_context *intel)
-{
-   struct i830_context *i830 = i830_context(&intel->ctx);
-
-   /* ctx->Driver.Enable( ctx, GL_DEPTH_TEST, GL_FALSE )
-    * ctx->Driver.DepthMask( ctx, GL_TRUE )
-    */
-   i830->meta.Ctx[I830_CTXREG_ENABLES_1] &= ~ENABLE_DIS_DEPTH_TEST_MASK;
-   i830->meta.Ctx[I830_CTXREG_ENABLES_2] &= ~ENABLE_DIS_DEPTH_WRITE_MASK;
-   i830->meta.Ctx[I830_CTXREG_ENABLES_1] |= ENABLE_DEPTH_TEST;
-   i830->meta.Ctx[I830_CTXREG_ENABLES_2] |= ENABLE_DEPTH_WRITE;
-
-   /* ctx->Driver.DepthFunc( ctx, GL_ALWAYS )
-    */
-   i830->meta.Ctx[I830_CTXREG_STATE3] &= ~DEPTH_TEST_FUNC_MASK;
-   i830->meta.Ctx[I830_CTXREG_STATE3] |= (ENABLE_DEPTH_TEST_FUNC |
-                                          DEPTH_TEST_FUNC
-                                          (COMPAREFUNC_ALWAYS));
-
-   i830->meta.emitted &= ~I830_UPLOAD_CTX;
-}
-
-
-/* Set stencil unit to replace always with the reference value.
- */
-static void
-set_stencil_replace(struct intel_context *intel,
-                    GLuint s_mask, GLuint s_clear)
-{
-   struct i830_context *i830 = i830_context(&intel->ctx);
-
-   /* ctx->Driver.Enable( ctx, GL_STENCIL_TEST, GL_TRUE )
-    */
-   i830->meta.Ctx[I830_CTXREG_ENABLES_1] |= ENABLE_STENCIL_TEST;
-   i830->meta.Ctx[I830_CTXREG_ENABLES_2] |= ENABLE_STENCIL_WRITE;
-
-   /* ctx->Driver.StencilMask( ctx, s_mask )
-    */
-   i830->meta.Ctx[I830_CTXREG_STATE4] &= ~MODE4_ENABLE_STENCIL_WRITE_MASK;
-   i830->meta.Ctx[I830_CTXREG_STATE4] |= (ENABLE_STENCIL_WRITE_MASK |
-                                          STENCIL_WRITE_MASK((s_mask &
-                                                              0xff)));
-
-   /* ctx->Driver.StencilOp( ctx, GL_REPLACE, GL_REPLACE, GL_REPLACE )
-    */
-   i830->meta.Ctx[I830_CTXREG_STENCILTST] &= ~(STENCIL_OPS_MASK);
-   i830->meta.Ctx[I830_CTXREG_STENCILTST] |=
-      (ENABLE_STENCIL_PARMS |
-       STENCIL_FAIL_OP(STENCILOP_REPLACE) |
-       STENCIL_PASS_DEPTH_FAIL_OP(STENCILOP_REPLACE) |
-       STENCIL_PASS_DEPTH_PASS_OP(STENCILOP_REPLACE));
-
-   /* ctx->Driver.StencilFunc( ctx, GL_ALWAYS, s_clear, ~0 )
-    */
-   i830->meta.Ctx[I830_CTXREG_STATE4] &= ~MODE4_ENABLE_STENCIL_TEST_MASK;
-   i830->meta.Ctx[I830_CTXREG_STATE4] |= (ENABLE_STENCIL_TEST_MASK |
-                                          STENCIL_TEST_MASK(0xff));
-
-   i830->meta.Ctx[I830_CTXREG_STENCILTST] &= ~(STENCIL_REF_VALUE_MASK |
-                                               ENABLE_STENCIL_TEST_FUNC_MASK);
-   i830->meta.Ctx[I830_CTXREG_STENCILTST] |=
-      (ENABLE_STENCIL_REF_VALUE |
-       ENABLE_STENCIL_TEST_FUNC |
-       STENCIL_REF_VALUE((s_clear & 0xff)) |
-       STENCIL_TEST_FUNC(COMPAREFUNC_ALWAYS));
-
-
-
-   i830->meta.emitted &= ~I830_UPLOAD_CTX;
-}
-
-
-static void
-set_color_mask(struct intel_context *intel, GLboolean state)
-{
-   struct i830_context *i830 = i830_context(&intel->ctx);
-
-   const GLuint mask = ((1 << WRITEMASK_RED_SHIFT) |
-                        (1 << WRITEMASK_GREEN_SHIFT) |
-                        (1 << WRITEMASK_BLUE_SHIFT) |
-                        (1 << WRITEMASK_ALPHA_SHIFT));
-
-   i830->meta.Ctx[I830_CTXREG_ENABLES_2] &= ~mask;
-
-   if (state) {
-      i830->meta.Ctx[I830_CTXREG_ENABLES_2] |=
-         (i830->state.Ctx[I830_CTXREG_ENABLES_2] & mask);
-   }
-
-   i830->meta.emitted &= ~I830_UPLOAD_CTX;
-}
-
-/* Installs a one-stage passthrough texture blend pipeline.  Is there
- * more that can be done to turn off texturing?
- */
-static void
-set_no_texture(struct intel_context *intel)
-{
-   struct i830_context *i830 = i830_context(&intel->ctx);
-   static const struct gl_tex_env_combine_state comb = {
-      GL_NONE, GL_NONE,
-      {GL_TEXTURE, 0, 0,}, {GL_TEXTURE, 0, 0,},
-      {GL_SRC_COLOR, 0, 0}, {GL_SRC_ALPHA, 0, 0},
-      0, 0, 0, 0
-   };
-
-   i830->meta.TexBlendWordsUsed[0] =
-      i830SetTexEnvCombine(i830, &comb, 0, TEXBLENDARG_TEXEL0,
-                           i830->meta.TexBlend[0], NULL);
-
-   i830->meta.TexBlend[0][0] |= TEXOP_LAST_STAGE;
-   i830->meta.emitted &= ~I830_UPLOAD_TEXBLEND(0);
-}
-
-/* Set up a single element blend stage for 'replace' texturing with no
- * funny ops.
- */
-static void
-set_texture_blend_replace(struct intel_context *intel)
-{
-   struct i830_context *i830 = i830_context(&intel->ctx);
-   static const struct gl_tex_env_combine_state comb = {
-      GL_REPLACE, GL_REPLACE,
-      {GL_TEXTURE, GL_TEXTURE, GL_TEXTURE,}, {GL_TEXTURE, GL_TEXTURE,
-                                              GL_TEXTURE,},
-      {GL_SRC_COLOR, GL_SRC_COLOR, GL_SRC_COLOR}, {GL_SRC_ALPHA, GL_SRC_ALPHA,
-                                                   GL_SRC_ALPHA},
-      0, 0, 1, 1
-   };
-
-   i830->meta.TexBlendWordsUsed[0] =
-      i830SetTexEnvCombine(i830, &comb, 0, TEXBLENDARG_TEXEL0,
-                           i830->meta.TexBlend[0], NULL);
-
-   i830->meta.TexBlend[0][0] |= TEXOP_LAST_STAGE;
-   i830->meta.emitted &= ~I830_UPLOAD_TEXBLEND(0);
-
-/*    fprintf(stderr, "%s: TexBlendWordsUsed[0]: %d\n",  */
-/* 	   __FUNCTION__, i830->meta.TexBlendWordsUsed[0]); */
-}
-
-
-
-/* Set up an arbitary piece of memory as a rectangular texture
- * (including the front or back buffer).
- */
-static GLboolean
-set_tex_rect_source(struct intel_context *intel,
-                    dri_bo *buffer,
-                    GLuint offset,
-                    GLuint pitch, GLuint height, GLenum format, GLenum type)
-{
-   struct i830_context *i830 = i830_context(&intel->ctx);
-   GLuint *setup = i830->meta.Tex[0];
-   GLint numLevels = 1;
-   GLuint textureFormat;
-   GLuint cpp;
-
-   /* A full implementation of this would do the upload through
-    * glTexImage2d, and get all the conversion operations at that
-    * point.  We are restricted, but still at least have access to the
-    * fragment program swizzle.
-    */
-   switch (format) {
-   case GL_BGRA:
-      switch (type) {
-      case GL_UNSIGNED_INT_8_8_8_8_REV:
-      case GL_UNSIGNED_BYTE:
-         textureFormat = (MAPSURF_32BIT | MT_32BIT_ARGB8888);
-         cpp = 4;
-         break;
-      default:
-         return GL_FALSE;
-      }
-      break;
-   case GL_RGBA:
-      switch (type) {
-      case GL_UNSIGNED_INT_8_8_8_8_REV:
-      case GL_UNSIGNED_BYTE:
-         textureFormat = (MAPSURF_32BIT | MT_32BIT_ABGR8888);
-         cpp = 4;
-         break;
-      default:
-         return GL_FALSE;
-      }
-      break;
-   case GL_BGR:
-      switch (type) {
-      case GL_UNSIGNED_SHORT_5_6_5_REV:
-         textureFormat = (MAPSURF_16BIT | MT_16BIT_RGB565);
-         cpp = 2;
-         break;
-      default:
-         return GL_FALSE;
-      }
-      break;
-   case GL_RGB:
-      switch (type) {
-      case GL_UNSIGNED_SHORT_5_6_5:
-         textureFormat = (MAPSURF_16BIT | MT_16BIT_RGB565);
-         cpp = 2;
-         break;
-      default:
-         return GL_FALSE;
-      }
-      break;
-
-   default:
-      return GL_FALSE;
-   }
-
-   i830->meta.tex_buffer[0] = buffer;
-   i830->meta.tex_offset[0] = offset;
-
-   setup[I830_TEXREG_TM0LI] = (_3DSTATE_LOAD_STATE_IMMEDIATE_2 |
-                               (LOAD_TEXTURE_MAP0 << 0) | 4);
-   setup[I830_TEXREG_TM0S1] = (((height - 1) << TM0S1_HEIGHT_SHIFT) |
-                               ((pitch - 1) << TM0S1_WIDTH_SHIFT) |
-                               textureFormat);
-   setup[I830_TEXREG_TM0S2] =
-      (((((pitch * cpp) / 4) -
-         1) << TM0S2_PITCH_SHIFT) | TM0S2_CUBE_FACE_ENA_MASK);
-
-   setup[I830_TEXREG_TM0S3] =
-      ((((numLevels -
-          1) *
-         4) << TM0S3_MIN_MIP_SHIFT) | (FILTER_NEAREST <<
-                                       TM0S3_MIN_FILTER_SHIFT) |
-       (MIPFILTER_NONE << TM0S3_MIP_FILTER_SHIFT) | (FILTER_NEAREST <<
-                                                     TM0S3_MAG_FILTER_SHIFT));
-
-   setup[I830_TEXREG_CUBE] = (_3DSTATE_MAP_CUBE | MAP_UNIT(0));
-
-   setup[I830_TEXREG_MCS] = (_3DSTATE_MAP_COORD_SET_CMD |
-                             MAP_UNIT(0) |
-                             ENABLE_TEXCOORD_PARAMS |
-                             TEXCOORDS_ARE_IN_TEXELUNITS |
-                             TEXCOORDTYPE_CARTESIAN |
-                             ENABLE_ADDR_V_CNTL |
-                             TEXCOORD_ADDR_V_MODE(TEXCOORDMODE_WRAP) |
-                             ENABLE_ADDR_U_CNTL |
-                             TEXCOORD_ADDR_U_MODE(TEXCOORDMODE_WRAP));
-
-   i830->meta.emitted &= ~I830_UPLOAD_TEX(0);
-   return GL_TRUE;
-}
-
-
-static void
-set_vertex_format(struct intel_context *intel)
-{
-   struct i830_context *i830 = i830_context(&intel->ctx);
-   i830->meta.Ctx[I830_CTXREG_VF] = (_3DSTATE_VFT0_CMD |
-                                     VFT0_TEX_COUNT(1) |
-                                     VFT0_DIFFUSE | VFT0_XYZ);
-   i830->meta.Ctx[I830_CTXREG_VF2] = (_3DSTATE_VFT1_CMD |
-                                      VFT1_TEX0_FMT(TEXCOORDFMT_2D) |
-                                      VFT1_TEX1_FMT(TEXCOORDFMT_2D) |
-                                      VFT1_TEX2_FMT(TEXCOORDFMT_2D) |
-                                      VFT1_TEX3_FMT(TEXCOORDFMT_2D));
-   i830->meta.emitted &= ~I830_UPLOAD_CTX;
-}
-
-
-static void
-meta_import_pixel_state(struct intel_context *intel)
-{
-   struct i830_context *i830 = i830_context(&intel->ctx);
-
-   i830->meta.Ctx[I830_CTXREG_STATE1] = i830->state.Ctx[I830_CTXREG_STATE1];
-   i830->meta.Ctx[I830_CTXREG_STATE2] = i830->state.Ctx[I830_CTXREG_STATE2];
-   i830->meta.Ctx[I830_CTXREG_STATE3] = i830->state.Ctx[I830_CTXREG_STATE3];
-   i830->meta.Ctx[I830_CTXREG_STATE4] = i830->state.Ctx[I830_CTXREG_STATE4];
-   i830->meta.Ctx[I830_CTXREG_STATE5] = i830->state.Ctx[I830_CTXREG_STATE5];
-   i830->meta.Ctx[I830_CTXREG_IALPHAB] = i830->state.Ctx[I830_CTXREG_IALPHAB];
-   i830->meta.Ctx[I830_CTXREG_STENCILTST] =
-      i830->state.Ctx[I830_CTXREG_STENCILTST];
-   i830->meta.Ctx[I830_CTXREG_ENABLES_1] =
-      i830->state.Ctx[I830_CTXREG_ENABLES_1];
-   i830->meta.Ctx[I830_CTXREG_ENABLES_2] =
-      i830->state.Ctx[I830_CTXREG_ENABLES_2];
-   i830->meta.Ctx[I830_CTXREG_AA] = i830->state.Ctx[I830_CTXREG_AA];
-   i830->meta.Ctx[I830_CTXREG_FOGCOLOR] =
-      i830->state.Ctx[I830_CTXREG_FOGCOLOR];
-   i830->meta.Ctx[I830_CTXREG_BLENDCOLOR0] =
-      i830->state.Ctx[I830_CTXREG_BLENDCOLOR0];
-   i830->meta.Ctx[I830_CTXREG_BLENDCOLOR1] =
-      i830->state.Ctx[I830_CTXREG_BLENDCOLOR1];
-   i830->meta.Ctx[I830_CTXREG_MCSB0] = i830->state.Ctx[I830_CTXREG_MCSB0];
-   i830->meta.Ctx[I830_CTXREG_MCSB1] = i830->state.Ctx[I830_CTXREG_MCSB1];
-
-
-   i830->meta.Ctx[I830_CTXREG_STATE3] &= ~CULLMODE_MASK;
-   i830->meta.Stipple[I830_STPREG_ST1] &= ~ST1_ENABLE;
-   i830->meta.emitted &= ~I830_UPLOAD_CTX;
-
-
-   i830->meta.Buffer[I830_DESTREG_SENABLE] =
-      i830->state.Buffer[I830_DESTREG_SENABLE];
-   i830->meta.Buffer[I830_DESTREG_SR1] = i830->state.Buffer[I830_DESTREG_SR1];
-   i830->meta.Buffer[I830_DESTREG_SR2] = i830->state.Buffer[I830_DESTREG_SR2];
-   i830->meta.emitted &= ~I830_UPLOAD_BUFFERS;
-}
-
-
-
-/* Select between front and back draw buffers.
- */
-static void
-meta_draw_region(struct intel_context *intel,
-                 struct intel_region *color_region,
-                 struct intel_region *depth_region)
-{
-   struct i830_context *i830 = i830_context(&intel->ctx);
-
-   i830_state_draw_region(intel, &i830->meta, color_region, depth_region);
-}
-
-
-/* Operations where the 3D engine is decoupled temporarily from the
- * current GL state and used for other purposes than simply rendering
- * incoming triangles.
- */
-static void
-install_meta_state(struct intel_context *intel)
-{
-   struct i830_context *i830 = i830_context(&intel->ctx);
-   memcpy(&i830->meta, &i830->initial, sizeof(i830->meta));
-
-   i830->meta.active = ACTIVE;
-   i830->meta.emitted = 0;
-
-   SET_STATE(i830, meta);
-   set_vertex_format(intel);
-   set_no_texture(intel);
-}
-
-static void
-leave_meta_state(struct intel_context *intel)
-{
-   struct i830_context *i830 = i830_context(&intel->ctx);
-   intel_region_release(&i830->meta.draw_region);
-   intel_region_release(&i830->meta.depth_region);
-/*    intel_region_release(intel, &i830->meta.tex_region[0]); */
-   SET_STATE(i830, state);
-}
-
-
-
-void
-i830InitMetaFuncs(struct i830_context *i830)
-{
-   i830->intel.vtbl.install_meta_state = install_meta_state;
-   i830->intel.vtbl.leave_meta_state = leave_meta_state;
-   i830->intel.vtbl.meta_no_depth_write = set_no_depth_write;
-   i830->intel.vtbl.meta_no_stencil_write = set_no_stencil_write;
-   i830->intel.vtbl.meta_stencil_replace = set_stencil_replace;
-   i830->intel.vtbl.meta_depth_replace = set_depth_replace;
-   i830->intel.vtbl.meta_color_mask = set_color_mask;
-   i830->intel.vtbl.meta_no_texture = set_no_texture;
-   i830->intel.vtbl.meta_texture_blend_replace = set_texture_blend_replace;
-   i830->intel.vtbl.meta_tex_rect_source = set_tex_rect_source;
-   i830->intel.vtbl.meta_draw_region = meta_draw_region;
-   i830->intel.vtbl.meta_import_pixel_state = meta_import_pixel_state;
-}
diff --git a/i915/i830_state.c b/i915/i830_state.c
index 645ebe3..3b9b3ae 100644
--- a/i915/i830_state.c
+++ b/i915/i830_state.c
@@ -620,7 +620,7 @@ i830LineWidth(GLcontext * ctx, GLfloat widthf)
    DBG("%s\n", __FUNCTION__);
    
    width = (int) (widthf * 2);
-   CLAMP_SELF(width, 1, 15);
+   width = CLAMP(width, 1, 15);
 
    state5 = i830->state.Ctx[I830_CTXREG_STATE5] & ~FIXED_LINE_WIDTH_MASK;
    state5 |= (ENABLE_FIXED_LINE_WIDTH | FIXED_LINE_WIDTH(width));
@@ -639,7 +639,7 @@ i830PointSize(GLcontext * ctx, GLfloat size)
 
    DBG("%s\n", __FUNCTION__);
    
-   CLAMP_SELF(point_size, 1, 256);
+   point_size = CLAMP(point_size, 1, 256);
    I830_STATECHANGE(i830, I830_UPLOAD_CTX);
    i830->state.Ctx[I830_CTXREG_STATE5] &= ~FIXED_POINT_WIDTH_MASK;
    i830->state.Ctx[I830_CTXREG_STATE5] |= (ENABLE_FIXED_POINT_WIDTH |
@@ -1127,9 +1127,6 @@ i830InitState(struct i830_context *i830)
 
    _mesa_init_driver_state(ctx);
 
-   memcpy(&i830->initial, &i830->state, sizeof(i830->state));
-
-   i830->current = &i830->state;
    i830->state.emitted = 0;
    i830->state.active = (I830_UPLOAD_INVARIENT |
                          I830_UPLOAD_RASTER_RULES |
diff --git a/i915/i830_texstate.c b/i915/i830_texstate.c
index ce409b3..e8f7e37 100644
--- a/i915/i830_texstate.c
+++ b/i915/i830_texstate.c
@@ -27,6 +27,7 @@
 
 #include "main/mtypes.h"
 #include "main/enums.h"
+#include "main/colormac.h"
 
 #include "intel_mipmap_tree.h"
 #include "intel_tex.h"
@@ -121,6 +122,7 @@ i830_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3)
    GLuint *state = i830->state.Tex[unit], format, pitch;
    GLint lodbias;
    GLubyte border[4];
+   GLuint dst_x, dst_y;
 
    memset(state, 0, sizeof(state));
 
@@ -131,7 +133,7 @@ i830_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3)
        i830->state.tex_buffer[unit] = NULL;
    }
 
-   if (!intelObj->imageOverride && !intel_finalize_mipmap_tree(intel, unit))
+   if (!intel_finalize_mipmap_tree(intel, unit))
       return GL_FALSE;
 
    /* Get first image here, since intelObj->firstLevel will get set in
@@ -139,42 +141,20 @@ i830_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3)
     */
    firstImage = tObj->Image[0][intelObj->firstLevel];
 
-   if (intelObj->imageOverride) {
-      i830->state.tex_buffer[unit] = NULL;
-      i830->state.tex_offset[unit] = intelObj->textureOffset;
+   intel_miptree_get_image_offset(intelObj->mt, intelObj->firstLevel, 0, 0,
+				  &dst_x, &dst_y);
 
-      switch (intelObj->depthOverride) {
-      case 32:
-	 format = MAPSURF_32BIT | MT_32BIT_ARGB8888;
-	 break;
-      case 24:
-      default:
-	 format = MAPSURF_32BIT | MT_32BIT_XRGB8888;
-	 break;
-      case 16:
-	 format = MAPSURF_16BIT | MT_16BIT_RGB565;
-	 break;
-      }
-
-      pitch = intelObj->pitchOverride;
-   } else {
-      GLuint dst_x, dst_y;
-
-      intel_miptree_get_image_offset(intelObj->mt, intelObj->firstLevel, 0, 0,
-				     &dst_x, &dst_y);
-
-      dri_bo_reference(intelObj->mt->region->buffer);
-      i830->state.tex_buffer[unit] = intelObj->mt->region->buffer;
-      /* XXX: This calculation is probably broken for tiled images with
-       * a non-page-aligned offset.
-       */
-      i830->state.tex_offset[unit] = (dst_x + dst_y * intelObj->mt->pitch) *
-	 intelObj->mt->cpp;
+   dri_bo_reference(intelObj->mt->region->buffer);
+   i830->state.tex_buffer[unit] = intelObj->mt->region->buffer;
+   /* XXX: This calculation is probably broken for tiled images with
+    * a non-page-aligned offset.
+    */
+   i830->state.tex_offset[unit] = (dst_x + dst_y * intelObj->mt->pitch) *
+      intelObj->mt->cpp;
 
-      format = translate_texture_format(firstImage->TexFormat,
-					firstImage->InternalFormat);
-      pitch = intelObj->mt->pitch * intelObj->mt->cpp;
-   }
+   format = translate_texture_format(firstImage->TexFormat,
+				     firstImage->InternalFormat);
+   pitch = intelObj->mt->pitch * intelObj->mt->cpp;
 
    state[I830_TEXREG_TM0LI] = (_3DSTATE_LOAD_STATE_IMMEDIATE_2 |
                                (LOAD_TEXTURE_MAP0 << unit) | 4);
@@ -303,16 +283,15 @@ i830_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3)
    }
 
    /* convert border color from float to ubyte */
-   CLAMPED_FLOAT_TO_UBYTE(border[0], tObj->BorderColor[0]);
-   CLAMPED_FLOAT_TO_UBYTE(border[1], tObj->BorderColor[1]);
-   CLAMPED_FLOAT_TO_UBYTE(border[2], tObj->BorderColor[2]);
-   CLAMPED_FLOAT_TO_UBYTE(border[3], tObj->BorderColor[3]);
-
-   state[I830_TEXREG_TM0S4] = INTEL_PACKCOLOR8888(border[0],
-                                                  border[1],
-                                                  border[2],
-                                                  border[3]);
-
+   CLAMPED_FLOAT_TO_UBYTE(border[0], tObj->BorderColor.f[0]);
+   CLAMPED_FLOAT_TO_UBYTE(border[1], tObj->BorderColor.f[1]);
+   CLAMPED_FLOAT_TO_UBYTE(border[2], tObj->BorderColor.f[2]);
+   CLAMPED_FLOAT_TO_UBYTE(border[3], tObj->BorderColor.f[3]);
+
+   state[I830_TEXREG_TM0S4] = PACK_COLOR_8888(border[3],
+					      border[0],
+					      border[1],
+					      border[2]);
 
    I830_ACTIVESTATE(i830, I830_UPLOAD_TEX(unit), GL_TRUE);
    /* memcmp was already disabled, but definitely won't work as the
diff --git a/i915/i830_vtbl.c b/i915/i830_vtbl.c
index e8c8d5a..be96419 100644
--- a/i915/i830_vtbl.c
+++ b/i915/i830_vtbl.c
@@ -25,8 +25,6 @@
  * 
  **************************************************************************/
 
-#include "glapi/glapi.h"
-
 #include "i830_context.h"
 #include "i830_reg.h"
 #include "intel_batchbuffer.h"
@@ -126,7 +124,7 @@ i830_render_start(struct intel_context *intel)
 
       for (i = 0; i < I830_TEX_UNITS; i++) {
          if (RENDERINPUTS_TEST(index_bitset, _TNL_ATTRIB_TEX(i))) {
-            GLuint sz = VB->TexCoordPtr[i]->size;
+            GLuint sz = VB->AttribPtr[_TNL_ATTRIB_TEX0 + i]->size;
             GLuint emit;
             GLuint mcs = (i830->state.Tex[i][I830_TEXREG_MCS] &
                           ~TEXCOORDTYPE_MASK);
@@ -237,8 +235,8 @@ static GLboolean
 i830_check_vertex_size(struct intel_context *intel, GLuint expected)
 {
    struct i830_context *i830 = i830_context(&intel->ctx);
-   int vft0 = i830->current->Ctx[I830_CTXREG_VF];
-   int vft1 = i830->current->Ctx[I830_CTXREG_VF2];
+   int vft0 = i830->state.Ctx[I830_CTXREG_VF];
+   int vft1 = i830->state.Ctx[I830_CTXREG_VF2];
    int nrtex = (vft0 & VFT0_TEX_COUNT_MASK) >> VFT0_TEX_COUNT_SHIFT;
    int i, sz = 0;
 
@@ -298,7 +296,7 @@ i830_emit_invarient_state(struct intel_context *intel)
 {
    BATCH_LOCALS;
 
-   BEGIN_BATCH(29, IGNORE_CLIPRECTS);
+   BEGIN_BATCH(29);
 
    OUT_BATCH(_3DSTATE_DFLT_DIFFUSE_CMD);
    OUT_BATCH(0);
@@ -366,7 +364,7 @@ i830_emit_invarient_state(struct intel_context *intel)
 
 
 #define emit( intel, state, size )			\
-   intel_batchbuffer_data(intel->batch, state, size, IGNORE_CLIPRECTS )
+   intel_batchbuffer_data(intel->batch, state, size )
 
 static GLuint
 get_dirty(struct i830_hw_state *state)
@@ -414,7 +412,7 @@ static void
 i830_emit_state(struct intel_context *intel)
 {
    struct i830_context *i830 = i830_context(&intel->ctx);
-   struct i830_hw_state *state = i830->current;
+   struct i830_hw_state *state = &i830->state;
    int i, count;
    GLuint dirty;
    dri_bo *aper_array[3 + I830_TEX_UNITS];
@@ -429,13 +427,9 @@ i830_emit_state(struct intel_context *intel)
     * It might be better to talk about explicit places where
     * scheduling is allowed, rather than assume that it is whenever a
     * batchbuffer fills up.
-    *
-    * Set the space as LOOP_CLIPRECTS now, since that's what our primitives
-    * will be emitted under.
     */
    intel_batchbuffer_require_space(intel->batch,
-				   get_state_size(state) + INTEL_PRIM_EMIT_SIZE,
-				   LOOP_CLIPRECTS);
+				   get_state_size(state) + INTEL_PRIM_EMIT_SIZE);
    count = 0;
  again:
    aper_count = 0;
@@ -491,29 +485,24 @@ i830_emit_state(struct intel_context *intel)
    }
 
    if (dirty & I830_UPLOAD_BUFFERS) {
-      GLuint count = 9; 
+      GLuint count = 15;
 
       DBG("I830_UPLOAD_BUFFERS:\n");
 
       if (state->depth_region)
           count += 3;
 
-      if (intel->constant_cliprect)
-          count += 6;
-
-      BEGIN_BATCH(count, IGNORE_CLIPRECTS);
+      BEGIN_BATCH(count);
       OUT_BATCH(state->Buffer[I830_DESTREG_CBUFADDR0]);
       OUT_BATCH(state->Buffer[I830_DESTREG_CBUFADDR1]);
       OUT_RELOC(state->draw_region->buffer,
-		I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
-                state->draw_region->draw_offset);
+		I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
 
       if (state->depth_region) {
          OUT_BATCH(state->Buffer[I830_DESTREG_DBUFADDR0]);
          OUT_BATCH(state->Buffer[I830_DESTREG_DBUFADDR1]);
          OUT_RELOC(state->depth_region->buffer,
-		   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
-                   state->depth_region->draw_offset);
+		   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
       }
 
       OUT_BATCH(state->Buffer[I830_DESTREG_DV0]);
@@ -523,15 +512,13 @@ i830_emit_state(struct intel_context *intel)
       OUT_BATCH(state->Buffer[I830_DESTREG_SR1]);
       OUT_BATCH(state->Buffer[I830_DESTREG_SR2]);
 
-      if (intel->constant_cliprect) {
-	 assert(state->Buffer[I830_DESTREG_DRAWRECT0] != MI_NOOP);
-	 OUT_BATCH(state->Buffer[I830_DESTREG_DRAWRECT0]);
-	 OUT_BATCH(state->Buffer[I830_DESTREG_DRAWRECT1]);
-	 OUT_BATCH(state->Buffer[I830_DESTREG_DRAWRECT2]);
-	 OUT_BATCH(state->Buffer[I830_DESTREG_DRAWRECT3]);
-	 OUT_BATCH(state->Buffer[I830_DESTREG_DRAWRECT4]);
-	 OUT_BATCH(state->Buffer[I830_DESTREG_DRAWRECT5]);
-      }
+      assert(state->Buffer[I830_DESTREG_DRAWRECT0] != MI_NOOP);
+      OUT_BATCH(state->Buffer[I830_DESTREG_DRAWRECT0]);
+      OUT_BATCH(state->Buffer[I830_DESTREG_DRAWRECT1]);
+      OUT_BATCH(state->Buffer[I830_DESTREG_DRAWRECT2]);
+      OUT_BATCH(state->Buffer[I830_DESTREG_DRAWRECT3]);
+      OUT_BATCH(state->Buffer[I830_DESTREG_DRAWRECT4]);
+      OUT_BATCH(state->Buffer[I830_DESTREG_DRAWRECT5]);
       ADVANCE_BATCH();
    }
    
@@ -544,7 +531,7 @@ i830_emit_state(struct intel_context *intel)
       if ((dirty & I830_UPLOAD_TEX(i))) {
          DBG("I830_UPLOAD_TEX(%d):\n", i);
 
-         BEGIN_BATCH(I830_TEX_SETUP_SIZE + 1, IGNORE_CLIPRECTS);
+         BEGIN_BATCH(I830_TEX_SETUP_SIZE + 1);
          OUT_BATCH(state->Tex[i][I830_TEXREG_TM0LI]);
 
          if (state->tex_buffer[i]) {
@@ -552,10 +539,6 @@ i830_emit_state(struct intel_context *intel)
 		      I915_GEM_DOMAIN_SAMPLER, 0,
                       state->tex_offset[i]);
          }
-	 else if (state == &i830->meta) {
-	    assert(i == 0);
-	    OUT_BATCH(0);
-	 }
 	 else {
 	    OUT_BATCH(state->tex_offset[i]);
 	 }
@@ -590,10 +573,6 @@ i830_destroy_context(struct intel_context *intel)
 
    intel_region_release(&i830->state.draw_region);
    intel_region_release(&i830->state.depth_region);
-   intel_region_release(&i830->meta.draw_region);
-   intel_region_release(&i830->meta.depth_region);
-   intel_region_release(&i830->initial.draw_region);
-   intel_region_release(&i830->initial.depth_region);
 
    for (i = 0; i < I830_TEX_UNITS; i++) {
       if (i830->state.tex_buffer[i] != NULL) {
@@ -605,24 +584,23 @@ i830_destroy_context(struct intel_context *intel)
    _tnl_free_vertices(&intel->ctx);
 }
 
-
-void
-i830_state_draw_region(struct intel_context *intel,
-		       struct i830_hw_state *state,
-		       struct intel_region *color_region,
-		       struct intel_region *depth_region)
+static void
+i830_set_draw_region(struct intel_context *intel,
+                     struct intel_region *color_regions[],
+                     struct intel_region *depth_region,
+		     GLuint num_regions)
 {
    struct i830_context *i830 = i830_context(&intel->ctx);
    GLcontext *ctx = &intel->ctx;
    struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[0];
    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
    GLuint value;
+   struct i830_hw_state *state = &i830->state;
+   uint32_t draw_x, draw_y;
 
-   ASSERT(state == &i830->state || state == &i830->meta);
-
-   if (state->draw_region != color_region) {
+   if (state->draw_region != color_regions[0]) {
       intel_region_release(&state->draw_region);
-      intel_region_reference(&state->draw_region, color_region);
+      intel_region_reference(&state->draw_region, color_regions[0]);
    }
    if (state->depth_region != depth_region) {
       intel_region_release(&state->depth_region);
@@ -633,7 +611,7 @@ i830_state_draw_region(struct intel_context *intel,
     * Set stride/cpp values
     */
    i915_set_buf_info_for_region(&state->Buffer[I830_DESTREG_CBUFADDR0],
-				color_region, BUF_3D_ID_COLOR_BACK);
+				color_regions[0], BUF_3D_ID_COLOR_BACK);
 
    i915_set_buf_info_for_region(&state->Buffer[I830_DESTREG_DBUFADDR0],
 				depth_region, BUF_3D_ID_DEPTH);
@@ -673,38 +651,42 @@ i830_state_draw_region(struct intel_context *intel,
    }
    state->Buffer[I830_DESTREG_DV1] = value;
 
-   if (intel->constant_cliprect) {
-      state->Buffer[I830_DESTREG_DRAWRECT0] = _3DSTATE_DRAWRECT_INFO;
-      state->Buffer[I830_DESTREG_DRAWRECT1] = 0;
-      state->Buffer[I830_DESTREG_DRAWRECT2] = 0; /* xmin, ymin */
-      state->Buffer[I830_DESTREG_DRAWRECT3] =
-	 (ctx->DrawBuffer->Width & 0xffff) |
-	 (ctx->DrawBuffer->Height << 16);
-      state->Buffer[I830_DESTREG_DRAWRECT4] = 0; /* xoff, yoff */
-      state->Buffer[I830_DESTREG_DRAWRECT5] = 0;
+   /* We set up the drawing rectangle to be offset into the color
+    * region's location in the miptree.  If it doesn't match with
+    * depth's offsets, we can't render to it.
+    *
+    * (Well, not actually true -- the hw grew a bit to let depth's
+    * offset get forced to 0,0.  We may want to use that if people are
+    * hitting that case.  Also, some configurations may be supportable
+    * by tweaking the start offset of the buffers around, which we
+    * can't do in general due to tiling)
+    */
+   FALLBACK(intel, I830_FALLBACK_DRAW_OFFSET,
+	    (depth_region && color_regions[0]) &&
+	    (depth_region->draw_x != color_regions[0]->draw_x ||
+	     depth_region->draw_y != color_regions[0]->draw_y));
+
+   if (color_regions[0]) {
+      draw_x = color_regions[0]->draw_x;
+      draw_y = color_regions[0]->draw_y;
+   } else if (depth_region) {
+      draw_x = depth_region->draw_x;
+      draw_y = depth_region->draw_y;
    } else {
-      state->Buffer[I830_DESTREG_DRAWRECT0] = MI_NOOP;
-      state->Buffer[I830_DESTREG_DRAWRECT1] = MI_NOOP;
-      state->Buffer[I830_DESTREG_DRAWRECT2] = MI_NOOP;
-      state->Buffer[I830_DESTREG_DRAWRECT3] = MI_NOOP;
-      state->Buffer[I830_DESTREG_DRAWRECT4] = MI_NOOP;
-      state->Buffer[I830_DESTREG_DRAWRECT5] = MI_NOOP;
+      draw_x = 0;
+      draw_y = 0;
    }
 
-   I830_STATECHANGE(i830, I830_UPLOAD_BUFFERS);
-
-
-}
+   state->Buffer[I830_DESTREG_DRAWRECT0] = _3DSTATE_DRAWRECT_INFO;
+   state->Buffer[I830_DESTREG_DRAWRECT1] = 0;
+   state->Buffer[I830_DESTREG_DRAWRECT2] = (draw_y << 16) | draw_x;
+   state->Buffer[I830_DESTREG_DRAWRECT3] =
+      ((ctx->DrawBuffer->Width + draw_x) & 0xffff) |
+      ((ctx->DrawBuffer->Height + draw_y) << 16);
+   state->Buffer[I830_DESTREG_DRAWRECT4] = (draw_y << 16) | draw_x;
+   state->Buffer[I830_DESTREG_DRAWRECT5] = MI_NOOP;
 
-
-static void
-i830_set_draw_region(struct intel_context *intel,
-                     struct intel_region *color_regions[],
-                     struct intel_region *depth_region,
-		     GLuint num_regions)
-{
-   struct i830_context *i830 = i830_context(&intel->ctx);
-   i830_state_draw_region(intel, &i830->state, color_regions[0], depth_region);
+   I830_STATECHANGE(i830, I830_UPLOAD_BUFFERS);
 }
 
 /* This isn't really handled at the moment.
@@ -714,17 +696,13 @@ i830_new_batch(struct intel_context *intel)
 {
    struct i830_context *i830 = i830_context(&intel->ctx);
    i830->state.emitted = 0;
-
-   /* Check that we didn't just wrap our batchbuffer at a bad time. */
-   assert(!intel->no_batch_wrap);
 }
 
 static void 
 i830_assert_not_dirty( struct intel_context *intel )
 {
    struct i830_context *i830 = i830_context(&intel->ctx);
-   struct i830_hw_state *state = i830->current;
-   assert(!get_dirty(state));
+   assert(!get_dirty(&i830->state));
 }
 
 static void
diff --git a/i915/i915_context.c b/i915/i915_context.c
index 7d4c7cf..4d86aae 100644
--- a/i915/i915_context.c
+++ b/i915/i915_context.c
@@ -28,7 +28,6 @@
 #include "i915_context.h"
 #include "main/imports.h"
 #include "main/macros.h"
-#include "intel_tex.h"
 #include "intel_tris.h"
 #include "tnl/t_context.h"
 #include "tnl/t_pipeline.h"
@@ -38,15 +37,11 @@
 #include "swrast_setup/swrast_setup.h"
 #include "tnl/tnl.h"
 
-#include "utils.h"
 #include "i915_reg.h"
 #include "i915_program.h"
 
-#include "intel_regions.h"
-#include "intel_batchbuffer.h"
 #include "intel_tris.h"
 #include "intel_span.h"
-#include "intel_pixel.h"
 
 /***************************************
  * Mesa's Driver Functions
@@ -100,7 +95,7 @@ extern const struct tnl_pipeline_stage *intel_pipeline[];
 
 GLboolean
 i915CreateContext(const __GLcontextModes * mesaVis,
-                  __DRIcontextPrivate * driContextPriv,
+                  __DRIcontext * driContextPriv,
                   void *sharedContextPrivate)
 {
    struct dd_function_table functions;
@@ -113,10 +108,9 @@ i915CreateContext(const __GLcontextModes * mesaVis,
       return GL_FALSE;
 
    if (0)
-      _mesa_printf("\ntexmem-0-3 branch\n\n");
+      printf("\ntexmem-0-3 branch\n\n");
 
    i915InitVtbl(i915);
-   i915InitMetaFuncs(i915);
 
    i915InitDriverFunctions(&functions);
 
@@ -143,6 +137,9 @@ i915CreateContext(const __GLcontextModes * mesaVis,
    ctx->Const.MaxTextureImageUnits = I915_TEX_UNITS;
    ctx->Const.MaxTextureCoordUnits = I915_TEX_UNITS;
    ctx->Const.MaxVarying = I915_TEX_UNITS;
+   ctx->Const.MaxCombinedTextureImageUnits =
+      ctx->Const.MaxVertexTextureImageUnits +
+      ctx->Const.MaxTextureImageUnits;
 
    /* Advertise the full hardware capabilities.  The new memory
     * manager should cope much better with overload situations:
diff --git a/i915/i915_context.h b/i915/i915_context.h
index 25418d5..b516928 100644
--- a/i915/i915_context.h
+++ b/i915/i915_context.h
@@ -40,6 +40,7 @@
 #define I915_FALLBACK_POLYGON_SMOOTH	 0x40000
 #define I915_FALLBACK_POINT_SMOOTH	 0x80000
 #define I915_FALLBACK_POINT_SPRITE_COORD_ORIGIN	 0x100000
+#define I915_FALLBACK_DRAW_OFFSET	 0x200000
 
 #define I915_UPLOAD_CTX              0x1
 #define I915_UPLOAD_BUFFERS          0x2
@@ -259,7 +260,7 @@ struct i915_context
 
    struct i915_fragment_program *current_program;
 
-   struct i915_hw_state meta, initial, state, *current;
+   struct i915_hw_state state;
 };
 
 
@@ -318,7 +319,7 @@ do {									\
  * i915_context.c
  */
 extern GLboolean i915CreateContext(const __GLcontextModes * mesaVis,
-                                   __DRIcontextPrivate * driContextPriv,
+                                   __DRIcontext * driContextPriv,
                                    void *sharedContextPrivate);
 
 
@@ -346,12 +347,6 @@ extern void i915UpdateTextureState(struct intel_context *intel);
 extern void i915InitTextureFuncs(struct dd_function_table *functions);
 
 /*======================================================================
- * i915_metaops.c
- */
-void i915InitMetaFuncs(struct i915_context *i915);
-
-
-/*======================================================================
  * i915_fragprog.c
  */
 extern void i915ValidateFragmentProgram(struct i915_context *i915);
diff --git a/i915/i915_debug.c b/i915/i915_debug.c
index fecfac3..4569fb9 100644
--- a/i915/i915_debug.c
+++ b/i915/i915_debug.c
@@ -31,27 +31,25 @@
 #include "i915_context.h"
 #include "i915_debug.h"
 
-#define PRINTF( ... ) _mesa_printf( __VA_ARGS__ )
-
 static GLboolean debug( struct debug_stream *stream, const char *name, GLuint len )
 {
    GLuint i;
    GLuint *ptr = (GLuint *)(stream->ptr + stream->offset);
    
    if (len == 0) {
-      PRINTF("Error - zero length packet (0x%08x)\n", stream->ptr[0]);
+      printf("Error - zero length packet (0x%08x)\n", stream->ptr[0]);
       assert(0);
       return GL_FALSE;
    }
 
    if (stream->print_addresses)
-      PRINTF("%08x:  ", stream->offset);
+      printf("%08x:  ", stream->offset);
 
 
-   PRINTF("%s (%d dwords):\n", name, len);
+   printf("%s (%d dwords):\n", name, len);
    for (i = 0; i < len; i++)
-      PRINTF("\t0x%08x\n",  ptr[i]);   
-   PRINTF("\n");
+      printf("\t0x%08x\n",  ptr[i]);   
+   printf("\n");
 
    stream->offset += len * sizeof(GLuint);
    
@@ -88,17 +86,17 @@ static GLboolean debug_prim( struct debug_stream *stream, const char *name,
    
 
 
-   PRINTF("%s %s (%d dwords):\n", name, prim, len);
-   PRINTF("\t0x%08x\n",  ptr[0]);   
+   printf("%s %s (%d dwords):\n", name, prim, len);
+   printf("\t0x%08x\n",  ptr[0]);   
    for (i = 1; i < len; i++) {
       if (dump_floats)
-	 PRINTF("\t0x%08x // %f\n",  ptr[i], *(GLfloat *)&ptr[i]);   
+	 printf("\t0x%08x // %f\n",  ptr[i], *(GLfloat *)&ptr[i]);   
       else
-	 PRINTF("\t0x%08x\n",  ptr[i]);   
+	 printf("\t0x%08x\n",  ptr[i]);   
    }
 
       
-   PRINTF("\n");
+   printf("\n");
 
    stream->offset += len * sizeof(GLuint);
    
@@ -113,15 +111,15 @@ static GLboolean debug_program( struct debug_stream *stream, const char *name, G
    GLuint *ptr = (GLuint *)(stream->ptr + stream->offset);
 
    if (len == 0) {
-      PRINTF("Error - zero length packet (0x%08x)\n", stream->ptr[0]);
+      printf("Error - zero length packet (0x%08x)\n", stream->ptr[0]);
       assert(0);
       return GL_FALSE;
    }
 
    if (stream->print_addresses)
-      PRINTF("%08x:  ", stream->offset);
+      printf("%08x:  ", stream->offset);
 
-   PRINTF("%s (%d dwords):\n", name, len);
+   printf("%s (%d dwords):\n", name, len);
    i915_disassemble_program( ptr, len );
 
    stream->offset += len * sizeof(GLuint);
@@ -135,17 +133,17 @@ static GLboolean debug_chain( struct debug_stream *stream, const char *name, GLu
    GLuint old_offset = stream->offset + len * sizeof(GLuint);
    GLuint i;
 
-   PRINTF("%s (%d dwords):\n", name, len);
+   printf("%s (%d dwords):\n", name, len);
    for (i = 0; i < len; i++)
-      PRINTF("\t0x%08x\n",  ptr[i]);
+      printf("\t0x%08x\n",  ptr[i]);
 
    stream->offset = ptr[1] & ~0x3;
    
    if (stream->offset < old_offset)
-      PRINTF("\n... skipping backwards from 0x%x --> 0x%x ...\n\n", 
+      printf("\n... skipping backwards from 0x%x --> 0x%x ...\n\n", 
 		   old_offset, stream->offset );
    else
-      PRINTF("\n... skipping from 0x%x --> 0x%x ...\n\n", 
+      printf("\n... skipping from 0x%x --> 0x%x ...\n\n", 
 		   old_offset, stream->offset );
 
 
@@ -165,10 +163,10 @@ static GLboolean debug_variable_length_prim( struct debug_stream *stream )
 
    len = 1+(i+2)/2;
 
-   PRINTF("3DPRIM, %s variable length %d indicies (%d dwords):\n", prim, i, len);
+   printf("3DPRIM, %s variable length %d indicies (%d dwords):\n", prim, i, len);
    for (i = 0; i < len; i++)
-      PRINTF("\t0x%08x\n",  ptr[i]);
-   PRINTF("\n");
+      printf("\t0x%08x\n",  ptr[i]);
+   printf("\n");
 
    stream->offset += len * sizeof(GLuint);
    return GL_TRUE;
@@ -178,9 +176,9 @@ static GLboolean debug_variable_length_prim( struct debug_stream *stream )
 #define BITS( dw, hi, lo, ... )				\
 do {							\
    unsigned himask = 0xffffffffU >> (31 - (hi));		\
-   PRINTF("\t\t ");				\
-   PRINTF(__VA_ARGS__);			\
-   PRINTF(": 0x%x\n", ((dw) & himask) >> (lo));	\
+   printf("\t\t ");				\
+   printf(__VA_ARGS__);			\
+   printf(": 0x%x\n", ((dw) & himask) >> (lo));	\
 } while (0)
 
 #define MBZ( dw, hi, lo) do {							\
@@ -194,9 +192,9 @@ do {							\
 #define FLAG( dw, bit, ... )			\
 do {							\
    if (((dw) >> (bit)) & 1) {				\
-      PRINTF("\t\t ");				\
-      PRINTF(__VA_ARGS__);			\
-      PRINTF("\n");				\
+      printf("\t\t ");				\
+      printf(__VA_ARGS__);			\
+      printf("\n");				\
    }							\
 } while (0)
 
@@ -208,17 +206,17 @@ static GLboolean debug_load_immediate( struct debug_stream *stream,
    GLuint bits = (ptr[0] >> 4) & 0xff;
    GLuint j = 0;
    
-   PRINTF("%s (%d dwords, flags: %x):\n", name, len, bits);
-   PRINTF("\t0x%08x\n",  ptr[j++]);
+   printf("%s (%d dwords, flags: %x):\n", name, len, bits);
+   printf("\t0x%08x\n",  ptr[j++]);
 
    if (bits & (1<<0)) {
-      PRINTF("\t  LIS0: 0x%08x\n", ptr[j]);
-      PRINTF("\t vb address: 0x%08x\n", (ptr[j] & ~0x3));
+      printf("\t  LIS0: 0x%08x\n", ptr[j]);
+      printf("\t vb address: 0x%08x\n", (ptr[j] & ~0x3));
       BITS(ptr[j], 0, 0, "vb invalidate disable");
       j++;
    }
    if (bits & (1<<1)) {
-      PRINTF("\t  LIS1: 0x%08x\n", ptr[j]);
+      printf("\t  LIS1: 0x%08x\n", ptr[j]);
       BITS(ptr[j], 29, 24, "vb dword width");
       BITS(ptr[j], 21, 16, "vb dword pitch");
       BITS(ptr[j], 15, 0, "vb max index");
@@ -226,7 +224,7 @@ static GLboolean debug_load_immediate( struct debug_stream *stream,
    }
    if (bits & (1<<2)) {
       int i;
-      PRINTF("\t  LIS2: 0x%08x\n", ptr[j]);
+      printf("\t  LIS2: 0x%08x\n", ptr[j]);
       for (i = 0; i < 8; i++) {
 	 unsigned tc = (ptr[j] >> (i * 4)) & 0xf;
 	 if (tc != 0xf)
@@ -235,11 +233,11 @@ static GLboolean debug_load_immediate( struct debug_stream *stream,
       j++;
    }
    if (bits & (1<<3)) {
-      PRINTF("\t  LIS3: 0x%08x\n", ptr[j]);
+      printf("\t  LIS3: 0x%08x\n", ptr[j]);
       j++;
    }
    if (bits & (1<<4)) {
-      PRINTF("\t  LIS4: 0x%08x\n", ptr[j]);
+      printf("\t  LIS4: 0x%08x\n", ptr[j]);
       BITS(ptr[j], 31, 23, "point width");
       BITS(ptr[j], 22, 19, "line width");
       FLAG(ptr[j], 18, "alpha flatshade");
@@ -261,7 +259,7 @@ static GLboolean debug_load_immediate( struct debug_stream *stream,
       j++;
    }
    if (bits & (1<<5)) {
-      PRINTF("\t  LIS5: 0x%08x\n", ptr[j]);
+      printf("\t  LIS5: 0x%08x\n", ptr[j]);
       BITS(ptr[j], 31, 28, "rgba write disables");
       FLAG(ptr[j], 27,     "force dflt point width");
       FLAG(ptr[j], 26,     "last pixel enable");
@@ -279,7 +277,7 @@ static GLboolean debug_load_immediate( struct debug_stream *stream,
       j++;
    }
    if (bits & (1<<6)) {
-      PRINTF("\t  LIS6: 0x%08x\n", ptr[j]);
+      printf("\t  LIS6: 0x%08x\n", ptr[j]);
       FLAG(ptr[j], 31,      "alpha test enable");
       BITS(ptr[j], 30, 28,  "alpha func");
       BITS(ptr[j], 27, 20,  "alpha ref");
@@ -296,7 +294,7 @@ static GLboolean debug_load_immediate( struct debug_stream *stream,
    }
 
 
-   PRINTF("\n");
+   printf("\n");
 
    assert(j == len);
 
@@ -315,34 +313,34 @@ static GLboolean debug_load_indirect( struct debug_stream *stream,
    GLuint bits = (ptr[0] >> 8) & 0x3f;
    GLuint i, j = 0;
    
-   PRINTF("%s (%d dwords):\n", name, len);
-   PRINTF("\t0x%08x\n",  ptr[j++]);
+   printf("%s (%d dwords):\n", name, len);
+   printf("\t0x%08x\n",  ptr[j++]);
 
    for (i = 0; i < 6; i++) {
       if (bits & (1<<i)) {
 	 switch (1<<(8+i)) {
 	 case LI0_STATE_STATIC_INDIRECT:
-	    PRINTF("        STATIC: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++;
-	    PRINTF("                0x%08x\n", ptr[j++]);
+	    printf("        STATIC: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++;
+	    printf("                0x%08x\n", ptr[j++]);
 	    break;
 	 case LI0_STATE_DYNAMIC_INDIRECT:
-	    PRINTF("       DYNAMIC: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++;
+	    printf("       DYNAMIC: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++;
 	    break;
 	 case LI0_STATE_SAMPLER:
-	    PRINTF("       SAMPLER: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++;
-	    PRINTF("                0x%08x\n", ptr[j++]);
+	    printf("       SAMPLER: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++;
+	    printf("                0x%08x\n", ptr[j++]);
 	    break;
 	 case LI0_STATE_MAP:
-	    PRINTF("           MAP: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++;
-	    PRINTF("                0x%08x\n", ptr[j++]);
+	    printf("           MAP: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++;
+	    printf("                0x%08x\n", ptr[j++]);
 	    break;
 	 case LI0_STATE_PROGRAM:
-	    PRINTF("       PROGRAM: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++;
-	    PRINTF("                0x%08x\n", ptr[j++]);
+	    printf("       PROGRAM: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++;
+	    printf("                0x%08x\n", ptr[j++]);
 	    break;
 	 case LI0_STATE_CONSTANTS:
-	    PRINTF("     CONSTANTS: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++;
-	    PRINTF("                0x%08x\n", ptr[j++]);
+	    printf("     CONSTANTS: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++;
+	    printf("                0x%08x\n", ptr[j++]);
 	    break;
 	 default:
 	    assert(0);
@@ -352,10 +350,10 @@ static GLboolean debug_load_indirect( struct debug_stream *stream,
    }
 
    if (bits == 0) {
-      PRINTF("\t  DUMMY: 0x%08x\n", ptr[j++]);
+      printf("\t  DUMMY: 0x%08x\n", ptr[j++]);
    }
 
-   PRINTF("\n");
+   printf("\n");
 
 
    assert(j == len);
@@ -368,7 +366,7 @@ static GLboolean debug_load_indirect( struct debug_stream *stream,
 static void BR13( struct debug_stream *stream,
 		  GLuint val )
 {
-   PRINTF("\t0x%08x\n",  val);
+   printf("\t0x%08x\n",  val);
    FLAG(val, 30, "clipping enable");
    BITS(val, 25, 24, "color depth (3==32bpp)");
    BITS(val, 23, 16, "raster op");
@@ -384,11 +382,11 @@ static void BR2223( struct debug_stream *stream,
    BR22.val = val22;
    BR23.val = val23;
 
-   PRINTF("\t0x%08x\n",  val22);
+   printf("\t0x%08x\n",  val22);
    BITS(val22, 31, 16, "dest y1");
    BITS(val22, 15, 0,  "dest x1");
 
-   PRINTF("\t0x%08x\n",  val23);
+   printf("\t0x%08x\n",  val23);
    BITS(val23, 31, 16, "dest y2");
    BITS(val23, 15, 0,  "dest x2");
 
@@ -400,13 +398,13 @@ static void BR2223( struct debug_stream *stream,
 static void BR09( struct debug_stream *stream,
 		  GLuint val )
 {
-   PRINTF("\t0x%08x -- dest address\n",  val);
+   printf("\t0x%08x -- dest address\n",  val);
 }
 
 static void BR26( struct debug_stream *stream,
 		  GLuint val )
 {
-   PRINTF("\t0x%08x\n",  val);
+   printf("\t0x%08x\n",  val);
    BITS(val, 31, 16, "src y1");
    BITS(val, 15, 0,  "src x1");
 }
@@ -414,20 +412,20 @@ static void BR26( struct debug_stream *stream,
 static void BR11( struct debug_stream *stream,
 		  GLuint val )
 {
-   PRINTF("\t0x%08x\n",  val);
+   printf("\t0x%08x\n",  val);
    BITS(val, 15, 0,  "src pitch");
 }
 
 static void BR12( struct debug_stream *stream,
 		  GLuint val )
 {
-   PRINTF("\t0x%08x -- src address\n",  val);
+   printf("\t0x%08x -- src address\n",  val);
 }
 
 static void BR16( struct debug_stream *stream,
 		  GLuint val )
 {
-   PRINTF("\t0x%08x -- color\n",  val);
+   printf("\t0x%08x -- color\n",  val);
 }
    
 static GLboolean debug_copy_blit( struct debug_stream *stream,
@@ -437,8 +435,8 @@ static GLboolean debug_copy_blit( struct debug_stream *stream,
    GLuint *ptr = (GLuint *)(stream->ptr + stream->offset);
    int j = 0;
 
-   PRINTF("%s (%d dwords):\n", name, len);
-   PRINTF("\t0x%08x\n",  ptr[j++]);
+   printf("%s (%d dwords):\n", name, len);
+   printf("\t0x%08x\n",  ptr[j++]);
    
    BR13(stream, ptr[j++]);
    BR2223(stream, ptr[j], ptr[j+1]);
@@ -460,8 +458,8 @@ static GLboolean debug_color_blit( struct debug_stream *stream,
    GLuint *ptr = (GLuint *)(stream->ptr + stream->offset);
    int j = 0;
 
-   PRINTF("%s (%d dwords):\n", name, len);
-   PRINTF("\t0x%08x\n",  ptr[j++]);
+   printf("%s (%d dwords):\n", name, len);
+   printf("\t0x%08x\n",  ptr[j++]);
 
    BR13(stream, ptr[j++]);
    BR2223(stream, ptr[j], ptr[j+1]);
@@ -481,8 +479,8 @@ static GLboolean debug_modes4( struct debug_stream *stream,
    GLuint *ptr = (GLuint *)(stream->ptr + stream->offset);
    int j = 0;
 
-   PRINTF("%s (%d dwords):\n", name, len);
-   PRINTF("\t0x%08x\n",  ptr[j]);
+   printf("%s (%d dwords):\n", name, len);
+   printf("\t0x%08x\n",  ptr[j]);
    BITS(ptr[j], 21, 18, "logicop func");
    FLAG(ptr[j], 17, "stencil test mask modify-enable");
    FLAG(ptr[j], 16, "stencil write mask modify-enable");
@@ -502,26 +500,26 @@ static GLboolean debug_map_state( struct debug_stream *stream,
    GLuint *ptr = (GLuint *)(stream->ptr + stream->offset);
    int j = 0;
 
-   PRINTF("%s (%d dwords):\n", name, len);
-   PRINTF("\t0x%08x\n",  ptr[j++]);
+   printf("%s (%d dwords):\n", name, len);
+   printf("\t0x%08x\n",  ptr[j++]);
    
    {
-      PRINTF("\t0x%08x\n",  ptr[j]);
+      printf("\t0x%08x\n",  ptr[j]);
       BITS(ptr[j], 15, 0,   "map mask");
       j++;
    }
 
    while (j < len) {
       {
-	 PRINTF("\t  TMn.0: 0x%08x\n", ptr[j]);
-	 PRINTF("\t map address: 0x%08x\n", (ptr[j] & ~0x3));
+	 printf("\t  TMn.0: 0x%08x\n", ptr[j]);
+	 printf("\t map address: 0x%08x\n", (ptr[j] & ~0x3));
 	 FLAG(ptr[j], 1, "vertical line stride");
 	 FLAG(ptr[j], 0, "vertical line stride offset");
 	 j++;
       }
 
       {
-	 PRINTF("\t  TMn.1: 0x%08x\n", ptr[j]);
+	 printf("\t  TMn.1: 0x%08x\n", ptr[j]);
 	 BITS(ptr[j], 31, 21, "height");
 	 BITS(ptr[j], 20, 10, "width");
 	 BITS(ptr[j], 9, 7, "surface format");
@@ -532,7 +530,7 @@ static GLboolean debug_map_state( struct debug_stream *stream,
 	 j++;
       }
       {
-	 PRINTF("\t  TMn.2: 0x%08x\n", ptr[j]);
+	 printf("\t  TMn.2: 0x%08x\n", ptr[j]);
 	 BITS(ptr[j], 31, 21, "dword pitch");
 	 BITS(ptr[j], 20, 15, "cube face enables");
 	 BITS(ptr[j], 14, 9, "max lod");
@@ -554,18 +552,18 @@ static GLboolean debug_sampler_state( struct debug_stream *stream,
    GLuint *ptr = (GLuint *)(stream->ptr + stream->offset);
    int j = 0;
 
-   PRINTF("%s (%d dwords):\n", name, len);
-   PRINTF("\t0x%08x\n",  ptr[j++]);
+   printf("%s (%d dwords):\n", name, len);
+   printf("\t0x%08x\n",  ptr[j++]);
    
    {
-      PRINTF("\t0x%08x\n",  ptr[j]);
+      printf("\t0x%08x\n",  ptr[j]);
       BITS(ptr[j], 15, 0,   "sampler mask");
       j++;
    }
 
    while (j < len) {
       {
-	 PRINTF("\t  TSn.0: 0x%08x\n", ptr[j]);
+	 printf("\t  TSn.0: 0x%08x\n", ptr[j]);
 	 FLAG(ptr[j], 31, "reverse gamma");
 	 FLAG(ptr[j], 30, "planar to packed");
 	 FLAG(ptr[j], 29, "yuv->rgb");
@@ -582,7 +580,7 @@ static GLboolean debug_sampler_state( struct debug_stream *stream,
       }
 
       {
-	 PRINTF("\t  TSn.1: 0x%08x\n", ptr[j]);
+	 printf("\t  TSn.1: 0x%08x\n", ptr[j]);
 	 BITS(ptr[j], 31, 24, "min lod");
 	 MBZ( ptr[j], 23, 18 );
 	 FLAG(ptr[j], 17,     "kill pixel enable");
@@ -597,7 +595,7 @@ static GLboolean debug_sampler_state( struct debug_stream *stream,
 	 j++;
       }
       {
-	 PRINTF("\t  TSn.2: 0x%08x  (default color)\n", ptr[j]);
+	 printf("\t  TSn.2: 0x%08x  (default color)\n", ptr[j]);
 	 j++;
       }
    }
@@ -614,11 +612,11 @@ static GLboolean debug_dest_vars( struct debug_stream *stream,
    GLuint *ptr = (GLuint *)(stream->ptr + stream->offset);
    int j = 0;
 
-   PRINTF("%s (%d dwords):\n", name, len);
-   PRINTF("\t0x%08x\n",  ptr[j++]);
+   printf("%s (%d dwords):\n", name, len);
+   printf("\t0x%08x\n",  ptr[j++]);
 
    {
-      PRINTF("\t0x%08x\n",  ptr[j]);
+      printf("\t0x%08x\n",  ptr[j]);
       FLAG(ptr[j], 31,     "early classic ztest");
       FLAG(ptr[j], 30,     "opengl tex default color");
       FLAG(ptr[j], 29,     "bypass iz");
@@ -649,11 +647,11 @@ static GLboolean debug_buf_info( struct debug_stream *stream,
    GLuint *ptr = (GLuint *)(stream->ptr + stream->offset);
    int j = 0;
 
-   PRINTF("%s (%d dwords):\n", name, len);
-   PRINTF("\t0x%08x\n",  ptr[j++]);
+   printf("%s (%d dwords):\n", name, len);
+   printf("\t0x%08x\n",  ptr[j++]);
 
    {
-      PRINTF("\t0x%08x\n",  ptr[j]);
+      printf("\t0x%08x\n",  ptr[j]);
       BITS(ptr[j], 28, 28, "aux buffer id");
       BITS(ptr[j], 27, 24, "buffer id (7=depth, 3=back)");
       FLAG(ptr[j], 23,     "use fence regs");
@@ -665,7 +663,7 @@ static GLboolean debug_buf_info( struct debug_stream *stream,
       j++;
    }
    
-   PRINTF("\t0x%08x -- buffer base address\n",  ptr[j++]);
+   printf("\t0x%08x -- buffer base address\n",  ptr[j++]);
 
    stream->offset += len * sizeof(GLuint);
    assert(j == len);
@@ -826,7 +824,7 @@ i915_dump_batchbuffer( GLuint *start,
    GLuint bytes = (end - start) * 4;
    GLboolean done = GL_FALSE;
 
-   PRINTF("\n\nBATCH: (%d)\n", bytes / 4);
+   printf("\n\nBATCH: (%d)\n", bytes / 4);
 
    stream.offset = 0;
    stream.ptr = (char *)start;
@@ -843,7 +841,7 @@ i915_dump_batchbuffer( GLuint *start,
 	     stream.offset >= 0);
    }
 
-   PRINTF("END-BATCH\n\n\n");
+   printf("END-BATCH\n\n\n");
 }
 
 
diff --git a/i915/i915_debug_fp.c b/i915/i915_debug_fp.c
index 84347a0..adfc9e8 100644
--- a/i915/i915_debug_fp.c
+++ b/i915/i915_debug_fp.c
@@ -30,11 +30,6 @@
 #include "i915_reg.h"
 #include "i915_debug.h"
 #include "main/imports.h"
-#include "shader/program.h"
-#include "shader/prog_instruction.h"
-#include "shader/prog_print.h"
-
-#define PRINTF( ... ) _mesa_printf( __VA_ARGS__ )
 
 static const char *opcodes[0x20] = {
    "NOP",
@@ -126,27 +121,27 @@ print_reg_type_nr(GLuint type, GLuint nr)
    case REG_TYPE_T:
       switch (nr) {
       case T_DIFFUSE:
-         PRINTF("T_DIFFUSE");
+         printf("T_DIFFUSE");
          return;
       case T_SPECULAR:
-         PRINTF("T_SPECULAR");
+         printf("T_SPECULAR");
          return;
       case T_FOG_W:
-         PRINTF("T_FOG_W");
+         printf("T_FOG_W");
          return;
       default:
-         PRINTF("T_TEX%d", nr);
+         printf("T_TEX%d", nr);
          return;
       }
    case REG_TYPE_OC:
       if (nr == 0) {
-         PRINTF("oC");
+         printf("oC");
          return;
       }
       break;
    case REG_TYPE_OD:
       if (nr == 0) {
-         PRINTF("oD");
+         printf("oD");
          return;
       }
       break;
@@ -154,7 +149,7 @@ print_reg_type_nr(GLuint type, GLuint nr)
       break;
    }
 
-   PRINTF("%s[%d]", regname[type], nr);
+   printf("%s[%d]", regname[type], nr);
 }
 
 #define REG_SWIZZLE_MASK 0x7777
@@ -175,33 +170,33 @@ print_reg_neg_swizzle(GLuint reg)
        (reg & REG_NEGATE_MASK) == 0)
       return;
 
-   PRINTF(".");
+   printf(".");
 
    for (i = 3; i >= 0; i--) {
       if (reg & (1 << ((i * 4) + 3)))
-         PRINTF("-");
+         printf("-");
 
       switch ((reg >> (i * 4)) & 0x7) {
       case 0:
-         PRINTF("x");
+         printf("x");
          break;
       case 1:
-         PRINTF("y");
+         printf("y");
          break;
       case 2:
-         PRINTF("z");
+         printf("z");
          break;
       case 3:
-         PRINTF("w");
+         printf("w");
          break;
       case 4:
-         PRINTF("0");
+         printf("0");
          break;
       case 5:
-         PRINTF("1");
+         printf("1");
          break;
       default:
-         PRINTF("?");
+         printf("?");
          break;
       }
    }
@@ -226,15 +221,15 @@ print_dest_reg(GLuint dword)
    print_reg_type_nr(type, nr);
    if ((dword & A0_DEST_CHANNEL_ALL) == A0_DEST_CHANNEL_ALL)
       return;
-   PRINTF(".");
+   printf(".");
    if (dword & A0_DEST_CHANNEL_X)
-      PRINTF("x");
+      printf("x");
    if (dword & A0_DEST_CHANNEL_Y)
-      PRINTF("y");
+      printf("y");
    if (dword & A0_DEST_CHANNEL_Z)
-      PRINTF("z");
+      printf("z");
    if (dword & A0_DEST_CHANNEL_W)
-      PRINTF("w");
+      printf("w");
 }
 
 
@@ -249,29 +244,29 @@ print_arith_op(GLuint opcode, const GLuint * program)
    if (opcode != A0_NOP) {
       print_dest_reg(program[0]);
       if (program[0] & A0_DEST_SATURATE)
-         PRINTF(" = SATURATE ");
+         printf(" = SATURATE ");
       else
-         PRINTF(" = ");
+         printf(" = ");
    }
 
-   PRINTF("%s ", opcodes[opcode]);
+   printf("%s ", opcodes[opcode]);
 
    print_src_reg(GET_SRC0_REG(program[0], program[1]));
    if (args[opcode] == 1) {
-      PRINTF("\n");
+      printf("\n");
       return;
    }
 
-   PRINTF(", ");
+   printf(", ");
    print_src_reg(GET_SRC1_REG(program[1], program[2]));
    if (args[opcode] == 2) {
-      PRINTF("\n");
+      printf("\n");
       return;
    }
 
-   PRINTF(", ");
+   printf(", ");
    print_src_reg(GET_SRC2_REG(program[2]));
-   PRINTF("\n");
+   printf("\n");
    return;
 }
 
@@ -280,24 +275,24 @@ static void
 print_tex_op(GLuint opcode, const GLuint * program)
 {
    print_dest_reg(program[0] | A0_DEST_CHANNEL_ALL);
-   PRINTF(" = ");
+   printf(" = ");
 
-   PRINTF("%s ", opcodes[opcode]);
+   printf("%s ", opcodes[opcode]);
 
-   PRINTF("S[%d],", program[0] & T0_SAMPLER_NR_MASK);
+   printf("S[%d],", program[0] & T0_SAMPLER_NR_MASK);
 
    print_reg_type_nr((program[1] >> T1_ADDRESS_REG_TYPE_SHIFT) &
                      REG_TYPE_MASK,
                      (program[1] >> T1_ADDRESS_REG_NR_SHIFT) & REG_NR_MASK);
-   PRINTF("\n");
+   printf("\n");
 }
 
 static void
 print_dcl_op(GLuint opcode, const GLuint * program)
 {
-   PRINTF("%s ", opcodes[opcode]);
+   printf("%s ", opcodes[opcode]);
    print_dest_reg(program[0] | A0_DEST_CHANNEL_ALL);
-   PRINTF("\n");
+   printf("\n");
 }
 
 
@@ -307,7 +302,7 @@ i915_disassemble_program(const GLuint * program, GLuint sz)
    GLuint size = program[0] & 0x1ff;
    GLint i;
 
-   PRINTF("\t\tBEGIN\n");
+   printf("\t\tBEGIN\n");
 
    assert(size + 2 == sz);
 
@@ -315,7 +310,7 @@ i915_disassemble_program(const GLuint * program, GLuint sz)
    for (i = 1; i < sz; i += 3, program += 3) {
       GLuint opcode = program[0] & (0x1f << 24);
 
-      PRINTF("\t\t");
+      printf("\t\t");
 
       if ((GLint) opcode >= A0_NOP && opcode <= A0_SLT)
          print_arith_op(opcode >> 24, program);
@@ -324,10 +319,10 @@ i915_disassemble_program(const GLuint * program, GLuint sz)
       else if (opcode == D0_DCL)
          print_dcl_op(opcode >> 24, program);
       else
-         PRINTF("Unknown opcode 0x%x\n", opcode);
+         printf("Unknown opcode 0x%x\n", opcode);
    }
 
-   PRINTF("\t\tEND\n\n");
+   printf("\t\tEND\n\n");
 }
 
 
diff --git a/i915/i915_fragprog.c b/i915/i915_fragprog.c
index d9c6144..15e3b87 100644
--- a/i915/i915_fragprog.c
+++ b/i915/i915_fragprog.c
@@ -663,7 +663,7 @@ upload_program(struct i915_fragment_program *p)
 			 A0_MOV,
 			 get_result_vector(p, inst),
 			 get_result_flags(inst), 0,
-			 swizzle(src0, ZERO, ZERO, ZERO, ZERO), 0, 0);
+			 swizzle(tmp, ZERO, ZERO, ZERO, ZERO), 0, 0);
 
       case OPCODE_POW:
          src0 = src_vector(p, &inst->SrcReg[0], program);
@@ -1205,7 +1205,7 @@ i915IsProgramNative(GLcontext * ctx, GLenum target, struct gl_program *prog)
       return GL_TRUE;
 }
 
-static void
+static GLboolean
 i915ProgramStringNotify(GLcontext * ctx,
                         GLenum target, struct gl_program *prog)
 {
@@ -1223,7 +1223,10 @@ i915ProgramStringNotify(GLcontext * ctx,
       }
    }
 
-   _tnl_program_string(ctx, target, prog);
+   (void) _tnl_program_string(ctx, target, prog);
+
+   /* XXX check if program is legal, within limits */
+   return GL_TRUE;
 }
 
 void
@@ -1301,7 +1304,7 @@ i915ValidateFragmentProgram(struct i915_context *i915)
 
    for (i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) {
       if (inputsRead & FRAG_BIT_TEX(i)) {
-         int sz = VB->TexCoordPtr[i]->size;
+         int sz = VB->AttribPtr[_TNL_ATTRIB_TEX0 + i]->size;
 
          s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK);
          s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(sz));
diff --git a/i915/i915_metaops.c b/i915/i915_metaops.c
deleted file mode 100644
index 90a78c6..0000000
--- a/i915/i915_metaops.c
+++ /dev/null
@@ -1,507 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#include "main/glheader.h"
-#include "main/enums.h"
-#include "main/mtypes.h"
-#include "main/macros.h"
-#include "utils.h"
-
-#include "intel_screen.h"
-#include "intel_batchbuffer.h"
-#include "intel_regions.h"
-
-#include "i915_context.h"
-#include "i915_reg.h"
-
-/* We touch almost everything:
- */
-#define ACTIVE (I915_UPLOAD_INVARIENT | 	\
-		I915_UPLOAD_CTX |		\
-		I915_UPLOAD_BUFFERS |		\
-		I915_UPLOAD_STIPPLE |		\
-                I915_UPLOAD_PROGRAM | 		\
-                I915_UPLOAD_FOG | 		\
-		I915_UPLOAD_TEX(0))
-
-#define SET_STATE( i915, STATE )		\
-do {						\
-   i915->current->emitted &= ~ACTIVE;		\
-   i915->current = &i915->STATE;		\
-   i915->current->emitted &= ~ACTIVE;		\
-} while (0)
-
-
-static void
-meta_no_stencil_write(struct intel_context *intel)
-{
-   struct i915_context *i915 = i915_context(&intel->ctx);
-
-   /* ctx->Driver.Enable( ctx, GL_STENCIL_TEST, GL_FALSE )
-    */
-   i915->meta.Ctx[I915_CTXREG_LIS5] &= ~(S5_STENCIL_TEST_ENABLE |
-                                         S5_STENCIL_WRITE_ENABLE);
-
-   i915->meta.emitted &= ~I915_UPLOAD_CTX;
-}
-
-static void
-meta_no_depth_write(struct intel_context *intel)
-{
-   struct i915_context *i915 = i915_context(&intel->ctx);
-
-   /* ctx->Driver.Enable( ctx, GL_DEPTH_TEST, GL_FALSE )
-    */
-   i915->meta.Ctx[I915_CTXREG_LIS6] &= ~(S6_DEPTH_TEST_ENABLE |
-                                         S6_DEPTH_WRITE_ENABLE);
-
-   i915->meta.emitted &= ~I915_UPLOAD_CTX;
-}
-
-static void
-meta_depth_replace(struct intel_context *intel)
-{
-   struct i915_context *i915 = i915_context(&intel->ctx);
-
-   /* ctx->Driver.Enable( ctx, GL_DEPTH_TEST, GL_TRUE )
-    * ctx->Driver.DepthMask( ctx, GL_TRUE )
-    */
-   i915->meta.Ctx[I915_CTXREG_LIS6] |= (S6_DEPTH_TEST_ENABLE |
-                                        S6_DEPTH_WRITE_ENABLE);
-
-   /* ctx->Driver.DepthFunc( ctx, GL_ALWAYS )
-    */
-   i915->meta.Ctx[I915_CTXREG_LIS6] &= ~S6_DEPTH_TEST_FUNC_MASK;
-   i915->meta.Ctx[I915_CTXREG_LIS6] |=
-      COMPAREFUNC_ALWAYS << S6_DEPTH_TEST_FUNC_SHIFT;
-
-   i915->meta.emitted &= ~I915_UPLOAD_CTX;
-}
-
-
-/* Set stencil unit to replace always with the reference value.
- */
-static void
-meta_stencil_replace(struct intel_context *intel,
-                     GLuint s_mask, GLuint s_clear)
-{
-   struct i915_context *i915 = i915_context(&intel->ctx);
-   GLuint op = STENCILOP_REPLACE;
-   GLuint func = COMPAREFUNC_ALWAYS;
-
-   /* ctx->Driver.Enable( ctx, GL_STENCIL_TEST, GL_TRUE )
-    */
-   i915->meta.Ctx[I915_CTXREG_LIS5] |= (S5_STENCIL_TEST_ENABLE |
-                                        S5_STENCIL_WRITE_ENABLE);
-
-   /* ctx->Driver.StencilMask( ctx, s_mask )
-    */
-   i915->meta.Ctx[I915_CTXREG_STATE4] &= ~MODE4_ENABLE_STENCIL_WRITE_MASK;
-
-   i915->meta.Ctx[I915_CTXREG_STATE4] |= (ENABLE_STENCIL_WRITE_MASK |
-                                          STENCIL_WRITE_MASK(s_mask));
-
-   /* ctx->Driver.StencilOp( ctx, GL_REPLACE, GL_REPLACE, GL_REPLACE )
-    */
-   i915->meta.Ctx[I915_CTXREG_LIS5] &= ~(S5_STENCIL_FAIL_MASK |
-                                         S5_STENCIL_PASS_Z_FAIL_MASK |
-                                         S5_STENCIL_PASS_Z_PASS_MASK);
-
-   i915->meta.Ctx[I915_CTXREG_LIS5] |= ((op << S5_STENCIL_FAIL_SHIFT) |
-                                        (op << S5_STENCIL_PASS_Z_FAIL_SHIFT) |
-                                        (op << S5_STENCIL_PASS_Z_PASS_SHIFT));
-
-
-   /* ctx->Driver.StencilFunc( ctx, GL_ALWAYS, s_ref, ~0 )
-    */
-   i915->meta.Ctx[I915_CTXREG_STATE4] &= ~MODE4_ENABLE_STENCIL_TEST_MASK;
-   i915->meta.Ctx[I915_CTXREG_STATE4] |= (ENABLE_STENCIL_TEST_MASK |
-                                          STENCIL_TEST_MASK(0xff));
-
-   i915->meta.Ctx[I915_CTXREG_LIS5] &= ~(S5_STENCIL_REF_MASK |
-                                         S5_STENCIL_TEST_FUNC_MASK);
-
-   i915->meta.Ctx[I915_CTXREG_LIS5] |= ((s_clear << S5_STENCIL_REF_SHIFT) |
-                                        (func << S5_STENCIL_TEST_FUNC_SHIFT));
-
-
-   i915->meta.emitted &= ~I915_UPLOAD_CTX;
-}
-
-
-static void
-meta_color_mask(struct intel_context *intel, GLboolean state)
-{
-   struct i915_context *i915 = i915_context(&intel->ctx);
-   const GLuint mask = (S5_WRITEDISABLE_RED |
-                        S5_WRITEDISABLE_GREEN |
-                        S5_WRITEDISABLE_BLUE | S5_WRITEDISABLE_ALPHA);
-
-   /* Copy colormask state from "regular" hw context.
-    */
-   if (state) {
-      i915->meta.Ctx[I915_CTXREG_LIS5] &= ~mask;
-      i915->meta.Ctx[I915_CTXREG_LIS5] |=
-         (i915->state.Ctx[I915_CTXREG_LIS5] & mask);
-   }
-   else
-      i915->meta.Ctx[I915_CTXREG_LIS5] |= mask;
-
-   i915->meta.emitted &= ~I915_UPLOAD_CTX;
-}
-
-
-
-static void
-meta_import_pixel_state(struct intel_context *intel)
-{
-   struct i915_context *i915 = i915_context(&intel->ctx);
-   memcpy(i915->meta.Fog, i915->state.Fog, I915_FOG_SETUP_SIZE * 4);
-
-   i915->meta.Ctx[I915_CTXREG_LIS5] = i915->state.Ctx[I915_CTXREG_LIS5];
-   i915->meta.Ctx[I915_CTXREG_LIS6] = i915->state.Ctx[I915_CTXREG_LIS6];
-   i915->meta.Ctx[I915_CTXREG_STATE4] = i915->state.Ctx[I915_CTXREG_STATE4];
-   i915->meta.Ctx[I915_CTXREG_BLENDCOLOR1] =
-      i915->state.Ctx[I915_CTXREG_BLENDCOLOR1];
-   i915->meta.Ctx[I915_CTXREG_IAB] = i915->state.Ctx[I915_CTXREG_IAB];
-
-   i915->meta.Buffer[I915_DESTREG_SENABLE] =
-      i915->state.Buffer[I915_DESTREG_SENABLE];
-   i915->meta.Buffer[I915_DESTREG_SR1] = i915->state.Buffer[I915_DESTREG_SR1];
-   i915->meta.Buffer[I915_DESTREG_SR2] = i915->state.Buffer[I915_DESTREG_SR2];
-
-   i915->meta.emitted &= ~I915_UPLOAD_FOG;
-   i915->meta.emitted &= ~I915_UPLOAD_BUFFERS;
-   i915->meta.emitted &= ~I915_UPLOAD_CTX;
-}
-
-
-
-
-#define REG( type, nr ) (((type)<<5)|(nr))
-
-#define REG_R(x)       REG(REG_TYPE_R, x)
-#define REG_T(x)       REG(REG_TYPE_T, x)
-#define REG_CONST(x)   REG(REG_TYPE_CONST, x)
-#define REG_S(x)       REG(REG_TYPE_S, x)
-#define REG_OC         REG(REG_TYPE_OC, 0)
-#define REG_OD	       REG(REG_TYPE_OD, 0)
-#define REG_U(x)       REG(REG_TYPE_U, x)
-
-#define REG_T_DIFFUSE  REG(REG_TYPE_T, T_DIFFUSE)
-#define REG_T_SPECULAR REG(REG_TYPE_T, T_SPECULAR)
-#define REG_T_FOG_W    REG(REG_TYPE_T, T_FOG_W)
-#define REG_T_TEX(x)   REG(REG_TYPE_T, x)
-
-
-#define A0_DEST_REG( reg ) ( (reg) << A0_DEST_NR_SHIFT )
-#define A0_SRC0_REG( reg ) ( (reg) << A0_SRC0_NR_SHIFT )
-#define A1_SRC1_REG( reg ) ( (reg) << A1_SRC1_NR_SHIFT )
-#define A1_SRC2_REG( reg ) ( (reg) << A1_SRC2_NR_SHIFT )
-#define A2_SRC2_REG( reg ) ( (reg) << A2_SRC2_NR_SHIFT )
-#define D0_DECL_REG( reg ) ( (reg) << D0_NR_SHIFT )
-#define T0_DEST_REG( reg ) ( (reg) << T0_DEST_NR_SHIFT )
-
-#define T0_SAMPLER( unit )     ((unit)<<T0_SAMPLER_NR_SHIFT)
-
-#define T1_ADDRESS_REG( type, nr ) (((type)<<T1_ADDRESS_REG_TYPE_SHIFT)| \
-				    ((nr)<<T1_ADDRESS_REG_NR_SHIFT))
-
-
-#define A1_SRC0_XYZW ((SRC_X << A1_SRC0_CHANNEL_X_SHIFT) |	\
-		      (SRC_Y << A1_SRC0_CHANNEL_Y_SHIFT) |	\
-		      (SRC_Z << A1_SRC0_CHANNEL_Z_SHIFT) |	\
-		      (SRC_W << A1_SRC0_CHANNEL_W_SHIFT))
-
-#define A1_SRC1_XY   ((SRC_X << A1_SRC1_CHANNEL_X_SHIFT) |	\
-		      (SRC_Y << A1_SRC1_CHANNEL_Y_SHIFT))
-
-#define A2_SRC1_ZW   ((SRC_Z << A2_SRC1_CHANNEL_Z_SHIFT) |	\
-		      (SRC_W << A2_SRC1_CHANNEL_W_SHIFT))
-
-#define A2_SRC2_XYZW ((SRC_X << A2_SRC2_CHANNEL_X_SHIFT) |	\
-		      (SRC_Y << A2_SRC2_CHANNEL_Y_SHIFT) |	\
-		      (SRC_Z << A2_SRC2_CHANNEL_Z_SHIFT) |	\
-		      (SRC_W << A2_SRC2_CHANNEL_W_SHIFT))
-
-
-
-
-
-static void
-meta_no_texture(struct intel_context *intel)
-{
-   struct i915_context *i915 = i915_context(&intel->ctx);
-
-   static const GLuint prog[] = {
-      _3DSTATE_PIXEL_SHADER_PROGRAM,
-
-      /* Declare incoming diffuse color:
-       */
-      (D0_DCL | D0_DECL_REG(REG_T_DIFFUSE) | D0_CHANNEL_ALL),
-      D1_MBZ,
-      D2_MBZ,
-
-      /* output-color = mov(t_diffuse)
-       */
-      (A0_MOV |
-       A0_DEST_REG(REG_OC) |
-       A0_DEST_CHANNEL_ALL | A0_SRC0_REG(REG_T_DIFFUSE)),
-      (A1_SRC0_XYZW),
-      0,
-   };
-
-
-   memcpy(i915->meta.Program, prog, sizeof(prog));
-   i915->meta.ProgramSize = sizeof(prog) / sizeof(*prog);
-   i915->meta.Program[0] |= i915->meta.ProgramSize - 2;
-   i915->meta.emitted &= ~I915_UPLOAD_PROGRAM;
-}
-
-static void
-meta_texture_blend_replace(struct intel_context *intel)
-{
-   struct i915_context *i915 = i915_context(&intel->ctx);
-
-   static const GLuint prog[] = {
-      _3DSTATE_PIXEL_SHADER_PROGRAM,
-
-      /* Declare the sampler:
-       */
-      (D0_DCL | D0_DECL_REG(REG_S(0)) | D0_SAMPLE_TYPE_2D | D0_CHANNEL_NONE),
-      D1_MBZ,
-      D2_MBZ,
-
-      /* Declare the interpolated texture coordinate:
-       */
-      (D0_DCL | D0_DECL_REG(REG_T_TEX(0)) | D0_CHANNEL_ALL),
-      D1_MBZ,
-      D2_MBZ,
-
-      /* output-color = texld(sample0, texcoord0) 
-       */
-      (T0_TEXLD | T0_DEST_REG(REG_OC) | T0_SAMPLER(0)),
-      T1_ADDRESS_REG(REG_TYPE_T, 0),
-      T2_MBZ
-   };
-
-   memcpy(i915->meta.Program, prog, sizeof(prog));
-   i915->meta.ProgramSize = sizeof(prog) / sizeof(*prog);
-   i915->meta.Program[0] |= i915->meta.ProgramSize - 2;
-   i915->meta.emitted &= ~I915_UPLOAD_PROGRAM;
-}
-
-
-
-
-
-/* Set up an arbitary piece of memory as a rectangular texture
- * (including the front or back buffer).
- */
-static GLboolean
-meta_tex_rect_source(struct intel_context *intel,
-                     dri_bo *buffer,
-                     GLuint offset,
-                     GLuint pitch, GLuint height, GLenum format, GLenum type)
-{
-   struct i915_context *i915 = i915_context(&intel->ctx);
-   GLuint unit = 0;
-   GLint numLevels = 1;
-   GLuint *state = i915->meta.Tex[0];
-   GLuint textureFormat;
-   GLuint cpp;
-
-   /* A full implementation of this would do the upload through
-    * glTexImage2d, and get all the conversion operations at that
-    * point.  We are restricted, but still at least have access to the
-    * fragment program swizzle.
-    */
-   switch (format) {
-   case GL_BGRA:
-      switch (type) {
-      case GL_UNSIGNED_INT_8_8_8_8_REV:
-      case GL_UNSIGNED_BYTE:
-         textureFormat = (MAPSURF_32BIT | MT_32BIT_ARGB8888);
-         cpp = 4;
-         break;
-      default:
-         return GL_FALSE;
-      }
-      break;
-   case GL_RGBA:
-      switch (type) {
-      case GL_UNSIGNED_INT_8_8_8_8_REV:
-      case GL_UNSIGNED_BYTE:
-         textureFormat = (MAPSURF_32BIT | MT_32BIT_ABGR8888);
-         cpp = 4;
-         break;
-      default:
-         return GL_FALSE;
-      }
-      break;
-   case GL_BGR:
-      switch (type) {
-      case GL_UNSIGNED_SHORT_5_6_5_REV:
-         textureFormat = (MAPSURF_16BIT | MT_16BIT_RGB565);
-         cpp = 2;
-         break;
-      default:
-         return GL_FALSE;
-      }
-      break;
-   case GL_RGB:
-      switch (type) {
-      case GL_UNSIGNED_SHORT_5_6_5:
-         textureFormat = (MAPSURF_16BIT | MT_16BIT_RGB565);
-         cpp = 2;
-         break;
-      default:
-         return GL_FALSE;
-      }
-      break;
-
-   default:
-      return GL_FALSE;
-   }
-
-
-   if ((pitch * cpp) & 3) {
-      _mesa_printf("%s: texture is not dword pitch\n", __FUNCTION__);
-      return GL_FALSE;
-   }
-
-/*    intel_region_release(&i915->meta.tex_region[0]); */
-/*    intel_region_reference(&i915->meta.tex_region[0], region); */
-   i915->meta.tex_buffer[0] = buffer;
-   i915->meta.tex_offset[0] = offset;
-
-   state[I915_TEXREG_MS3] = (((height - 1) << MS3_HEIGHT_SHIFT) |
-                             ((pitch - 1) << MS3_WIDTH_SHIFT) |
-                             textureFormat | MS3_USE_FENCE_REGS);
-
-   state[I915_TEXREG_MS4] = (((((pitch * cpp) / 4) - 1) << MS4_PITCH_SHIFT) |
-                             MS4_CUBE_FACE_ENA_MASK |
-                             ((((numLevels - 1) * 4)) << MS4_MAX_LOD_SHIFT));
-
-   state[I915_TEXREG_SS2] = ((FILTER_NEAREST << SS2_MIN_FILTER_SHIFT) |
-                             (MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT) |
-                             (FILTER_NEAREST << SS2_MAG_FILTER_SHIFT));
-
-   state[I915_TEXREG_SS3] = ((TEXCOORDMODE_WRAP << SS3_TCX_ADDR_MODE_SHIFT) |
-                             (TEXCOORDMODE_WRAP << SS3_TCY_ADDR_MODE_SHIFT) |
-                             (TEXCOORDMODE_WRAP << SS3_TCZ_ADDR_MODE_SHIFT) |
-                             (unit << SS3_TEXTUREMAP_INDEX_SHIFT));
-
-   state[I915_TEXREG_SS4] = 0;
-
-   i915->meta.emitted &= ~I915_UPLOAD_TEX(0);
-   return GL_TRUE;
-}
-
-
-/**
- * Set the color and depth drawing region for meta ops.
- */
-static void
-meta_draw_region(struct intel_context *intel,
-                 struct intel_region *color_region,
-                 struct intel_region *depth_region)
-{
-   struct i915_context *i915 = i915_context(&intel->ctx);
-   i915_state_draw_region(intel, &i915->meta, color_region, depth_region);
-}
-
-
-static void
-set_vertex_format(struct intel_context *intel)
-{
-   struct i915_context *i915 = i915_context(&intel->ctx);
-
-   i915->meta.Ctx[I915_CTXREG_LIS2] =
-      (S2_TEXCOORD_FMT(0, TEXCOORDFMT_2D) |
-       S2_TEXCOORD_FMT(1, TEXCOORDFMT_NOT_PRESENT) |
-       S2_TEXCOORD_FMT(2, TEXCOORDFMT_NOT_PRESENT) |
-       S2_TEXCOORD_FMT(3, TEXCOORDFMT_NOT_PRESENT) |
-       S2_TEXCOORD_FMT(4, TEXCOORDFMT_NOT_PRESENT) |
-       S2_TEXCOORD_FMT(5, TEXCOORDFMT_NOT_PRESENT) |
-       S2_TEXCOORD_FMT(6, TEXCOORDFMT_NOT_PRESENT) |
-       S2_TEXCOORD_FMT(7, TEXCOORDFMT_NOT_PRESENT));
-
-   i915->meta.Ctx[I915_CTXREG_LIS4] &= ~S4_VFMT_MASK;
-
-   i915->meta.Ctx[I915_CTXREG_LIS4] |= (S4_VFMT_COLOR | S4_VFMT_XYZ);
-
-   i915->meta.emitted &= ~I915_UPLOAD_CTX;
-}
-
-
-
-/* Operations where the 3D engine is decoupled temporarily from the
- * current GL state and used for other purposes than simply rendering
- * incoming triangles.
- */
-static void
-install_meta_state(struct intel_context *intel)
-{
-   struct i915_context *i915 = i915_context(&intel->ctx);
-   memcpy(&i915->meta, &i915->initial, sizeof(i915->meta));
-   i915->meta.active = ACTIVE;
-   i915->meta.emitted = 0;
-
-   SET_STATE(i915, meta);
-   set_vertex_format(intel);
-   meta_no_texture(intel);
-}
-
-static void
-leave_meta_state(struct intel_context *intel)
-{
-   struct i915_context *i915 = i915_context(&intel->ctx);
-   intel_region_release(&i915->meta.draw_region);
-   intel_region_release(&i915->meta.depth_region);
-/*    intel_region_release(&i915->meta.tex_region[0]); */
-   SET_STATE(i915, state);
-}
-
-
-
-void
-i915InitMetaFuncs(struct i915_context *i915)
-{
-   i915->intel.vtbl.install_meta_state = install_meta_state;
-   i915->intel.vtbl.leave_meta_state = leave_meta_state;
-   i915->intel.vtbl.meta_no_depth_write = meta_no_depth_write;
-   i915->intel.vtbl.meta_no_stencil_write = meta_no_stencil_write;
-   i915->intel.vtbl.meta_stencil_replace = meta_stencil_replace;
-   i915->intel.vtbl.meta_depth_replace = meta_depth_replace;
-   i915->intel.vtbl.meta_color_mask = meta_color_mask;
-   i915->intel.vtbl.meta_no_texture = meta_no_texture;
-   i915->intel.vtbl.meta_texture_blend_replace = meta_texture_blend_replace;
-   i915->intel.vtbl.meta_tex_rect_source = meta_tex_rect_source;
-   i915->intel.vtbl.meta_draw_region = meta_draw_region;
-   i915->intel.vtbl.meta_import_pixel_state = meta_import_pixel_state;
-}
diff --git a/i915/i915_program.c b/i915/i915_program.c
index e7908bd..3902c69 100644
--- a/i915/i915_program.c
+++ b/i915/i915_program.c
@@ -245,7 +245,7 @@ GLuint i915_emit_texld( struct i915_fragment_program *p,
    }
    else {
       assert(GET_UREG_TYPE(dest) != REG_TYPE_CONST);
-      assert(dest = UREG(GET_UREG_TYPE(dest), GET_UREG_NR(dest)));
+      assert(dest == UREG(GET_UREG_TYPE(dest), GET_UREG_NR(dest)));
       /* Can't use unsaved temps for coords, as the phase boundary would result
        * in the contents becoming undefined.
        */
diff --git a/i915/i915_state.c b/i915/i915_state.c
index cc98d12..7275617 100644
--- a/i915/i915_state.c
+++ b/i915/i915_state.c
@@ -571,7 +571,7 @@ i915LineWidth(GLcontext * ctx, GLfloat widthf)
    DBG("%s\n", __FUNCTION__);
    
    width = (int) (widthf * 2);
-   CLAMP_SELF(width, 1, 0xf);
+   width = CLAMP(width, 1, 0xf);
    lis4 |= width << S4_LINE_WIDTH_SHIFT;
 
    if (lis4 != i915->state.Ctx[I915_CTXREG_LIS4]) {
@@ -589,7 +589,7 @@ i915PointSize(GLcontext * ctx, GLfloat size)
 
    DBG("%s\n", __FUNCTION__);
    
-   CLAMP_SELF(point_size, 1, 255);
+   point_size = CLAMP(point_size, 1, 255);
    lis4 |= point_size << S4_POINT_WIDTH_SHIFT;
 
    if (lis4 != i915->state.Ctx[I915_CTXREG_LIS4]) {
@@ -1157,7 +1157,4 @@ i915InitState(struct i915_context *i915)
    i915_init_packets(i915);
 
    _mesa_init_driver_state(ctx);
-
-   memcpy(&i915->initial, &i915->state, sizeof(i915->state));
-   i915->current = &i915->state;
 }
diff --git a/i915/i915_tex_layout.c b/i915/i915_tex_layout.c
index d9588e5..fe3908f 100644
--- a/i915/i915_tex_layout.c
+++ b/i915/i915_tex_layout.c
@@ -145,8 +145,8 @@ i915_miptree_layout_cube(struct intel_context *intel,
 	 intel_miptree_set_image_offset(mt, level, face, x, y);
 
 	 if (d == 0)
-	    _mesa_printf("cube mipmap %d/%d (%d..%d) is 0x0\n",
-			 face, level, mt->first_level, mt->last_level);
+	    printf("cube mipmap %d/%d (%d..%d) is 0x0\n",
+		   face, level, mt->first_level, mt->last_level);
 
 	 d >>= 1;
 	 x += step_offsets[face][0] * d;
diff --git a/i915/i915_texstate.c b/i915/i915_texstate.c
index de25848..a1ab8f8 100644
--- a/i915/i915_texstate.c
+++ b/i915/i915_texstate.c
@@ -28,6 +28,7 @@
 #include "main/mtypes.h"
 #include "main/enums.h"
 #include "main/macros.h"
+#include "main/colormac.h"
 
 #include "intel_mipmap_tree.h"
 #include "intel_tex.h"
@@ -149,7 +150,7 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3)
        i915->state.tex_buffer[unit] = NULL;
    }
 
-   if (!intelObj->imageOverride && !intel_finalize_mipmap_tree(intel, unit))
+   if (!intel_finalize_mipmap_tree(intel, unit))
       return GL_FALSE;
 
    /* Get first image here, since intelObj->firstLevel will get set in
@@ -157,34 +158,14 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3)
     */
    firstImage = tObj->Image[0][intelObj->firstLevel];
 
-   if (intelObj->imageOverride) {
-      i915->state.tex_buffer[unit] = NULL;
-      i915->state.tex_offset[unit] = intelObj->textureOffset;
+   dri_bo_reference(intelObj->mt->region->buffer);
+   i915->state.tex_buffer[unit] = intelObj->mt->region->buffer;
+   i915->state.tex_offset[unit] = 0; /* Always the origin of the miptree */
 
-      switch (intelObj->depthOverride) {
-      case 32:
-	 format = MAPSURF_32BIT | MT_32BIT_ARGB8888;
-	 break;
-      case 24:
-      default:
-	 format = MAPSURF_32BIT | MT_32BIT_XRGB8888;
-	 break;
-      case 16:
-	 format = MAPSURF_16BIT | MT_16BIT_RGB565;
-	 break;
-      }
-
-      pitch = intelObj->pitchOverride;
-   } else {
-      dri_bo_reference(intelObj->mt->region->buffer);
-      i915->state.tex_buffer[unit] = intelObj->mt->region->buffer;
-      i915->state.tex_offset[unit] = 0; /* Always the origin of the miptree */
-
-      format = translate_texture_format(firstImage->TexFormat,
-					firstImage->InternalFormat,
-					tObj->DepthMode);
-      pitch = intelObj->mt->pitch * intelObj->mt->cpp;
-   }
+   format = translate_texture_format(firstImage->TexFormat,
+				     firstImage->InternalFormat,
+				     tObj->DepthMode);
+   pitch = intelObj->mt->pitch * intelObj->mt->cpp;
 
    state[I915_TEXREG_MS3] =
       (((firstImage->Height - 1) << MS3_HEIGHT_SHIFT) |
@@ -196,10 +177,11 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3)
 	 state[I915_TEXREG_MS3] |= MS3_TILE_WALK;
    }
 
-   /* We get one field with fraction bits to cover the maximum addressable (smallest
-    * resolution) LOD.  Use it to cover both MAX_LEVEL and MAX_LOD.
+   /* We get one field with fraction bits for the maximum addressable
+    * (lowest resolution) LOD.  Use it to cover both MAX_LEVEL and
+    * MAX_LOD.
     */
-   maxlod = MIN2(tObj->MaxLod, tObj->MaxLevel - tObj->BaseLevel);
+   maxlod = MIN2(tObj->MaxLod, tObj->_MaxLevel - tObj->BaseLevel);
    state[I915_TEXREG_MS4] =
       ((((pitch / 4) - 1) << MS4_PITCH_SHIFT) |
        MS4_CUBE_FACE_ENA_MASK |
@@ -346,25 +328,25 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3)
    }
 
    /* convert border color from float to ubyte */
-   CLAMPED_FLOAT_TO_UBYTE(border[0], tObj->BorderColor[0]);
-   CLAMPED_FLOAT_TO_UBYTE(border[1], tObj->BorderColor[1]);
-   CLAMPED_FLOAT_TO_UBYTE(border[2], tObj->BorderColor[2]);
-   CLAMPED_FLOAT_TO_UBYTE(border[3], tObj->BorderColor[3]);
+   CLAMPED_FLOAT_TO_UBYTE(border[0], tObj->BorderColor.f[0]);
+   CLAMPED_FLOAT_TO_UBYTE(border[1], tObj->BorderColor.f[1]);
+   CLAMPED_FLOAT_TO_UBYTE(border[2], tObj->BorderColor.f[2]);
+   CLAMPED_FLOAT_TO_UBYTE(border[3], tObj->BorderColor.f[3]);
 
    if (firstImage->_BaseFormat == GL_DEPTH_COMPONENT) {
       /* GL specs that border color for depth textures is taken from the
        * R channel, while the hardware uses A.  Spam R into all the channels
        * for safety.
        */
-      state[I915_TEXREG_SS4] = INTEL_PACKCOLOR8888(border[0],
-						   border[0],
-						   border[0],
-						   border[0]);
+      state[I915_TEXREG_SS4] = PACK_COLOR_8888(border[0],
+					       border[0],
+					       border[0],
+					       border[0]);
    } else {
-      state[I915_TEXREG_SS4] = INTEL_PACKCOLOR8888(border[0],
-						   border[1],
-						   border[2],
-						   border[3]);
+      state[I915_TEXREG_SS4] = PACK_COLOR_8888(border[3],
+					       border[0],
+					       border[1],
+					       border[2]);
    }
 
 
diff --git a/i915/i915_vtbl.c b/i915/i915_vtbl.c
index ff97e5a..0a93e64 100644
--- a/i915/i915_vtbl.c
+++ b/i915/i915_vtbl.c
@@ -37,17 +37,13 @@
 #include "tnl/t_vertex.h"
 
 #include "intel_batchbuffer.h"
-#include "intel_tex.h"
 #include "intel_regions.h"
 #include "intel_tris.h"
 #include "intel_fbo.h"
-#include "intel_chipset.h"
 
 #include "i915_reg.h"
 #include "i915_context.h"
 
-#include "glapi/glapi.h"
-
 static void
 i915_render_prevalidate(struct intel_context *intel)
 {
@@ -59,6 +55,7 @@ i915_render_prevalidate(struct intel_context *intel)
 static void
 i915_render_start(struct intel_context *intel)
 {
+   intel_prepare_render(intel);
 }
 
 
@@ -100,8 +97,8 @@ static GLboolean
 i915_check_vertex_size(struct intel_context *intel, GLuint expected)
 {
    struct i915_context *i915 = i915_context(&intel->ctx);
-   int lis2 = i915->current->Ctx[I915_CTXREG_LIS2];
-   int lis4 = i915->current->Ctx[I915_CTXREG_LIS4];
+   int lis2 = i915->state.Ctx[I915_CTXREG_LIS2];
+   int lis4 = i915->state.Ctx[I915_CTXREG_LIS4];
    int i, sz = 0;
 
    switch (lis4 & S4_VFMT_XYZW_MASK) {
@@ -174,7 +171,7 @@ i915_emit_invarient_state(struct intel_context *intel)
 {
    BATCH_LOCALS;
 
-   BEGIN_BATCH(17, IGNORE_CLIPRECTS);
+   BEGIN_BATCH(17);
 
    OUT_BATCH(_3DSTATE_AA_CMD |
              AA_LINE_ECAAR_WIDTH_ENABLE |
@@ -220,7 +217,7 @@ i915_emit_invarient_state(struct intel_context *intel)
 
 
 #define emit(intel, state, size )		     \
-   intel_batchbuffer_data(intel->batch, state, size, IGNORE_CLIPRECTS )
+   intel_batchbuffer_data(intel->batch, state, size)
 
 static GLuint
 get_dirty(struct i915_hw_state *state)
@@ -287,7 +284,7 @@ static void
 i915_emit_state(struct intel_context *intel)
 {
    struct i915_context *i915 = i915_context(&intel->ctx);
-   struct i915_hw_state *state = i915->current;
+   struct i915_hw_state *state = &i915->state;
    int i, count, aper_count;
    GLuint dirty;
    dri_bo *aper_array[3 + I915_TEX_UNITS];
@@ -301,13 +298,9 @@ i915_emit_state(struct intel_context *intel)
     * It might be better to talk about explicit places where
     * scheduling is allowed, rather than assume that it is whenever a
     * batchbuffer fills up.
-    *
-    * Set the space as LOOP_CLIPRECTS now, since that's what our primitives
-    * will be emitted under.
     */
    intel_batchbuffer_require_space(intel->batch,
-				   get_state_size(state) + INTEL_PRIM_EMIT_SIZE,
-				   LOOP_CLIPRECTS);
+				   get_state_size(state) + INTEL_PRIM_EMIT_SIZE);
    count = 0;
  again:
    aper_count = 0;
@@ -373,7 +366,7 @@ i915_emit_state(struct intel_context *intel)
    }
 
    if (dirty & I915_UPLOAD_BUFFERS) {
-      GLuint count = 9;
+      GLuint count = 15;
 
       if (INTEL_DEBUG & DEBUG_STATE)
          fprintf(stderr, "I915_UPLOAD_BUFFERS:\n");
@@ -381,22 +374,17 @@ i915_emit_state(struct intel_context *intel)
       if (state->depth_region)
           count += 3;
 
-      if (intel->constant_cliprect)
-          count += 6;
-
-      BEGIN_BATCH(count, IGNORE_CLIPRECTS);
+      BEGIN_BATCH(count);
       OUT_BATCH(state->Buffer[I915_DESTREG_CBUFADDR0]);
       OUT_BATCH(state->Buffer[I915_DESTREG_CBUFADDR1]);
       OUT_RELOC(state->draw_region->buffer,
-		I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
-                state->draw_region->draw_offset);
+		I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
 
       if (state->depth_region) {
          OUT_BATCH(state->Buffer[I915_DESTREG_DBUFADDR0]);
          OUT_BATCH(state->Buffer[I915_DESTREG_DBUFADDR1]);
          OUT_RELOC(state->depth_region->buffer,
-		   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
-                   state->depth_region->draw_offset);
+		   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
       }
 
       OUT_BATCH(state->Buffer[I915_DESTREG_DV0]);
@@ -406,15 +394,13 @@ i915_emit_state(struct intel_context *intel)
       OUT_BATCH(state->Buffer[I915_DESTREG_SR1]);
       OUT_BATCH(state->Buffer[I915_DESTREG_SR2]);
 
-      if (intel->constant_cliprect) {
-	 assert(state->Buffer[I915_DESTREG_DRAWRECT0] != MI_NOOP);
-	 OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT0]);
-	 OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT1]);
-	 OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT2]);
-	 OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT3]);
-	 OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT4]);
-	 OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT5]);
-      }
+      assert(state->Buffer[I915_DESTREG_DRAWRECT0] != MI_NOOP);
+      OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT0]);
+      OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT1]);
+      OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT2]);
+      OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT3]);
+      OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT4]);
+      OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT5]);
 
       ADVANCE_BATCH();
    }
@@ -441,7 +427,7 @@ i915_emit_state(struct intel_context *intel)
          if (dirty & I915_UPLOAD_TEX(i))
             nr++;
 
-      BEGIN_BATCH(2 + nr * 3, IGNORE_CLIPRECTS);
+      BEGIN_BATCH(2 + nr * 3);
       OUT_BATCH(_3DSTATE_MAP_STATE | (3 * nr));
       OUT_BATCH((dirty & I915_UPLOAD_TEX_ALL) >> I915_UPLOAD_TEX_0_SHIFT);
       for (i = 0; i < I915_TEX_UNITS; i++)
@@ -452,10 +438,6 @@ i915_emit_state(struct intel_context *intel)
 			 I915_GEM_DOMAIN_SAMPLER, 0,
                          state->tex_offset[i]);
             }
-            else if (state == &i915->meta) {
-               assert(i == 0);
-               OUT_BATCH(0);
-            }
             else {
                OUT_BATCH(state->tex_offset[i]);
             }
@@ -465,7 +447,7 @@ i915_emit_state(struct intel_context *intel)
          }
       ADVANCE_BATCH();
 
-      BEGIN_BATCH(2 + nr * 3, IGNORE_CLIPRECTS);
+      BEGIN_BATCH(2 + nr * 3);
       OUT_BATCH(_3DSTATE_SAMPLER_STATE | (3 * nr));
       OUT_BATCH((dirty & I915_UPLOAD_TEX_ALL) >> I915_UPLOAD_TEX_0_SHIFT);
       for (i = 0; i < I915_TEX_UNITS; i++)
@@ -509,10 +491,6 @@ i915_destroy_context(struct intel_context *intel)
 
    intel_region_release(&i915->state.draw_region);
    intel_region_release(&i915->state.depth_region);
-   intel_region_release(&i915->meta.draw_region);
-   intel_region_release(&i915->meta.depth_region);
-   intel_region_release(&i915->initial.draw_region);
-   intel_region_release(&i915->initial.depth_region);
 
    for (i = 0; i < I915_TEX_UNITS; i++) {
       if (i915->state.tex_buffer[i] != NULL) {
@@ -542,29 +520,23 @@ i915_set_buf_info_for_region(uint32_t *state, struct intel_region *region,
    }
 }
 
-/**
- * Set the drawing regions for the color and depth/stencil buffers.
- * This involves setting the pitch, cpp and buffer ID/location.
- * Also set pixel format for color and Z rendering
- * Used for setting both regular and meta state.
- */
-void
-i915_state_draw_region(struct intel_context *intel,
-                       struct i915_hw_state *state,
-                       struct intel_region *color_region,
-                       struct intel_region *depth_region)
+static void
+i915_set_draw_region(struct intel_context *intel,
+                     struct intel_region *color_regions[],
+                     struct intel_region *depth_region,
+		     GLuint num_regions)
 {
    struct i915_context *i915 = i915_context(&intel->ctx);
    GLcontext *ctx = &intel->ctx;
    struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[0];
    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
    GLuint value;
+   struct i915_hw_state *state = &i915->state;
+   uint32_t draw_x, draw_y;
 
-   ASSERT(state == &i915->state || state == &i915->meta);
-
-   if (state->draw_region != color_region) {
+   if (state->draw_region != color_regions[0]) {
       intel_region_release(&state->draw_region);
-      intel_region_reference(&state->draw_region, color_region);
+      intel_region_reference(&state->draw_region, color_regions[0]);
    }
    if (state->depth_region != depth_region) {
       intel_region_release(&state->depth_region);
@@ -575,7 +547,7 @@ i915_state_draw_region(struct intel_context *intel,
     * Set stride/cpp values
     */
    i915_set_buf_info_for_region(&state->Buffer[I915_DESTREG_CBUFADDR0],
-				color_region, BUF_3D_ID_COLOR_BACK);
+				color_regions[0], BUF_3D_ID_COLOR_BACK);
 
    i915_set_buf_info_for_region(&state->Buffer[I915_DESTREG_DBUFADDR0],
 				depth_region, BUF_3D_ID_DEPTH);
@@ -611,7 +583,7 @@ i915_state_draw_region(struct intel_context *intel,
     * the value of this bit, the pipeline needs to be MI_FLUSHed.  And it
     * can only be set when a depth buffer is already defined.
     */
-   if (IS_945(intel->intelScreen->deviceID) && intel->use_early_z &&
+   if (intel->is_945 && intel->use_early_z &&
        depth_region->tiling != I915_TILING_NONE)
       value |= CLASSIC_EARLY_DEPTH;
 
@@ -623,36 +595,43 @@ i915_state_draw_region(struct intel_context *intel,
    }
    state->Buffer[I915_DESTREG_DV1] = value;
 
-   if (intel->constant_cliprect) {
-      state->Buffer[I915_DESTREG_DRAWRECT0] = _3DSTATE_DRAWRECT_INFO;
-      state->Buffer[I915_DESTREG_DRAWRECT1] = 0;
-      state->Buffer[I915_DESTREG_DRAWRECT2] = 0; /* xmin, ymin */
-      state->Buffer[I915_DESTREG_DRAWRECT3] =
-	 (ctx->DrawBuffer->Width & 0xffff) |
-	 (ctx->DrawBuffer->Height << 16);
-      state->Buffer[I915_DESTREG_DRAWRECT4] = 0; /* xoff, yoff */
-      state->Buffer[I915_DESTREG_DRAWRECT5] = 0;
+   /* We set up the drawing rectangle to be offset into the color
+    * region's location in the miptree.  If it doesn't match with
+    * depth's offsets, we can't render to it.
+    *
+    * (Well, not actually true -- the hw grew a bit to let depth's
+    * offset get forced to 0,0.  We may want to use that if people are
+    * hitting that case.  Also, some configurations may be supportable
+    * by tweaking the start offset of the buffers around, which we
+    * can't do in general due to tiling)
+    */
+   FALLBACK(intel, I915_FALLBACK_DRAW_OFFSET,
+	    (depth_region && color_regions[0]) &&
+	    (depth_region->draw_x != color_regions[0]->draw_x ||
+	     depth_region->draw_y != color_regions[0]->draw_y));
+
+   if (color_regions[0]) {
+      draw_x = color_regions[0]->draw_x;
+      draw_y = color_regions[0]->draw_y;
+   } else if (depth_region) {
+      draw_x = depth_region->draw_x;
+      draw_y = depth_region->draw_y;
    } else {
-      state->Buffer[I915_DESTREG_DRAWRECT0] = MI_NOOP;
-      state->Buffer[I915_DESTREG_DRAWRECT1] = MI_NOOP;
-      state->Buffer[I915_DESTREG_DRAWRECT2] = MI_NOOP;
-      state->Buffer[I915_DESTREG_DRAWRECT3] = MI_NOOP;
-      state->Buffer[I915_DESTREG_DRAWRECT4] = MI_NOOP;
-      state->Buffer[I915_DESTREG_DRAWRECT5] = MI_NOOP;
+      draw_x = 0;
+      draw_y = 0;
    }
 
-   I915_STATECHANGE(i915, I915_UPLOAD_BUFFERS);
-}
-
+   /* When changing drawing rectangle offset, an MI_FLUSH is first required. */
+   state->Buffer[I915_DESTREG_DRAWRECT0] = MI_FLUSH;
+   state->Buffer[I915_DESTREG_DRAWRECT1] = _3DSTATE_DRAWRECT_INFO;
+   state->Buffer[I915_DESTREG_DRAWRECT2] = 0;
+   state->Buffer[I915_DESTREG_DRAWRECT3] = (draw_y << 16) | draw_x;
+   state->Buffer[I915_DESTREG_DRAWRECT4] =
+      ((ctx->DrawBuffer->Width + draw_x) & 0xffff) |
+      ((ctx->DrawBuffer->Height + draw_y) << 16);
+   state->Buffer[I915_DESTREG_DRAWRECT5] = (draw_y << 16) | draw_x;
 
-static void
-i915_set_draw_region(struct intel_context *intel,
-                     struct intel_region *color_regions[],
-                     struct intel_region *depth_region,
-		     GLuint num_regions)
-{
-   struct i915_context *i915 = i915_context(&intel->ctx);
-   i915_state_draw_region(intel, &i915->state, color_regions[0], depth_region);
+   I915_STATECHANGE(i915, I915_UPLOAD_BUFFERS);
 }
 
 
@@ -667,17 +646,13 @@ i915_new_batch(struct intel_context *intel)
     * difficulties associated with them (physical address requirements).
     */
    i915->state.emitted = 0;
-
-   /* Check that we didn't just wrap our batchbuffer at a bad time. */
-   assert(!intel->no_batch_wrap);
 }
 
 static void 
 i915_assert_not_dirty( struct intel_context *intel )
 {
    struct i915_context *i915 = i915_context(&intel->ctx);
-   struct i915_hw_state *state = i915->current;
-   GLuint dirty = get_dirty(state);
+   GLuint dirty = get_dirty(&i915->state);
    assert(!dirty);
 }
 
diff --git a/i915/intel_render.c b/i915/intel_render.c
index 410052b..ec20939 100644
--- a/i915/intel_render.c
+++ b/i915/intel_render.c
@@ -117,7 +117,7 @@ intelDmaPrimitive(struct intel_context *intel, GLenum prim)
    intel_set_prim(intel, hw_prim[prim]);
 }
 
-static inline GLuint intel_get_vb_max(struct intel_context *intel)
+static INLINE GLuint intel_get_vb_max(struct intel_context *intel)
 {
    GLuint ret;
 
@@ -129,7 +129,7 @@ static inline GLuint intel_get_vb_max(struct intel_context *intel)
    return ret;
 }
 
-static inline GLuint intel_get_current_max(struct intel_context *intel)
+static INLINE GLuint intel_get_current_max(struct intel_context *intel)
 {
 
    if (intel->intelScreen->no_vbo)
diff --git a/i915/intel_tris.c b/i915/intel_tris.c
index bc527aa..fb191fe 100644
--- a/i915/intel_tris.c
+++ b/i915/intel_tris.c
@@ -52,8 +52,6 @@
 #include "intel_buffers.h"
 #include "intel_reg.h"
 #include "intel_span.h"
-#include "intel_tex.h"
-#include "intel_chipset.h"
 #include "i830_context.h"
 #include "i830_reg.h"
 
@@ -68,7 +66,7 @@ intel_flush_inline_primitive(struct intel_context *intel)
 
    assert(intel->prim.primitive != ~0);
 
-/*    _mesa_printf("/\n"); */
+/*    printf("/\n"); */
 
    if (used < 8)
       goto do_discard;
@@ -89,20 +87,18 @@ intel_flush_inline_primitive(struct intel_context *intel)
 
 static void intel_start_inline(struct intel_context *intel, uint32_t prim)
 {
-   uint32_t batch_flags = LOOP_CLIPRECTS;
    BATCH_LOCALS;
 
    intel->vtbl.emit_state(intel);
 
    intel->no_batch_wrap = GL_TRUE;
 
-   /*_mesa_printf("%s *", __progname);*/
+   /*printf("%s *", __progname);*/
 
    /* Emit a slot which will be filled with the inline primitive
     * command later.
     */
-   BEGIN_BATCH(2, batch_flags);
-   OUT_BATCH(0);
+   BEGIN_BATCH(1);
 
    assert((intel->batch->dirty_state & (1<<1)) == 0);
 
@@ -114,7 +110,7 @@ static void intel_start_inline(struct intel_context *intel, uint32_t prim)
    ADVANCE_BATCH();
 
    intel->no_batch_wrap = GL_FALSE;
-/*    _mesa_printf(">"); */
+/*    printf(">"); */
 }
 
 static void intel_wrap_inline(struct intel_context *intel)
@@ -136,7 +132,7 @@ static GLuint *intel_extend_inline(struct intel_context *intel, GLuint dwords)
    if (intel_batchbuffer_space(intel->batch) < sz)
       intel_wrap_inline(intel);
 
-/*    _mesa_printf("."); */
+/*    printf("."); */
 
    intel->vtbl.assert_not_dirty(intel);
 
@@ -221,7 +217,7 @@ void intel_flush_prim(struct intel_context *intel)
    intel->prim.count = 0;
    offset = intel->prim.start_offset;
    intel->prim.start_offset = intel->prim.current_offset;
-   if (!IS_9XX(intel->intelScreen->deviceID))
+   if (intel->gen < 3)
       intel->prim.start_offset = ALIGN(intel->prim.start_offset, 128);
    intel->prim.flush = NULL;
 
@@ -251,8 +247,8 @@ void intel_flush_prim(struct intel_context *intel)
 	  intel->vertex_size * 4);
 #endif
 
-   if (IS_9XX(intel->intelScreen->deviceID)) {
-      BEGIN_BATCH(5, LOOP_CLIPRECTS);
+   if (intel->gen >= 3) {
+      BEGIN_BATCH(5);
       OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
 		I1_LOAD_S(0) | I1_LOAD_S(1) | 1);
       assert((offset & !S0_VB_OFFSET_MASK) == 0);
@@ -270,7 +266,7 @@ void intel_flush_prim(struct intel_context *intel)
    } else {
       struct i830_context *i830 = i830_context(&intel->ctx);
 
-      BEGIN_BATCH(5, LOOP_CLIPRECTS);
+      BEGIN_BATCH(5);
       OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
 		I1_LOAD_S(0) | I1_LOAD_S(2) | 1);
       /* S0 */
@@ -607,7 +603,6 @@ static struct
 #define DO_POINTS    1
 #define DO_FULL_QUAD 1
 
-#define HAVE_RGBA         1
 #define HAVE_SPEC         1
 #define HAVE_BACK_COLORS  0
 #define HAVE_HW_FLATSHADE 1
@@ -1181,6 +1176,8 @@ static char *fallbackStrings[] = {
    [17] = "Logic op",
    [18] = "Smooth polygon",
    [19] = "Smooth point",
+   [20] = "point sprite coord origin",
+   [21] = "depth/color drawing offset",
 };
 
 
@@ -1250,81 +1247,6 @@ union fi
    GLint i;
 };
 
-
-/**********************************************************************/
-/*             Used only with the metaops callbacks.                  */
-/**********************************************************************/
-static void
-intel_meta_draw_poly(struct intel_context *intel,
-                     GLuint n,
-                     GLfloat xy[][2],
-                     GLfloat z, GLuint color, GLfloat tex[][2])
-{
-   union fi *vb;
-   GLint i;
-   unsigned int saved_vertex_size = intel->vertex_size;
-
-   LOCK_HARDWARE(intel);
-
-   intel->vertex_size = 6;
-
-   /* All 3d primitives should be emitted with LOOP_CLIPRECTS,
-    * otherwise the drawing origin (DR4) might not be set correctly.
-    */
-   intel_set_prim(intel, PRIM3D_TRIFAN);
-   vb = (union fi *) intel_get_prim_space(intel, n);
-
-   for (i = 0; i < n; i++) {
-      vb[0].f = xy[i][0];
-      vb[1].f = xy[i][1];
-      vb[2].f = z;
-      vb[3].i = color;
-      vb[4].f = tex[i][0];
-      vb[5].f = tex[i][1];
-      vb += 6;
-   }
-
-   INTEL_FIREVERTICES(intel);
-
-   intel->vertex_size = saved_vertex_size;
-
-   UNLOCK_HARDWARE(intel);
-}
-
-static void
-intel_meta_draw_quad(struct intel_context *intel,
-                     GLfloat x0, GLfloat x1,
-                     GLfloat y0, GLfloat y1,
-                     GLfloat z,
-                     GLuint color,
-                     GLfloat s0, GLfloat s1, GLfloat t0, GLfloat t1)
-{
-   GLfloat xy[4][2];
-   GLfloat tex[4][2];
-
-   xy[0][0] = x0;
-   xy[0][1] = y0;
-   xy[1][0] = x1;
-   xy[1][1] = y0;
-   xy[2][0] = x1;
-   xy[2][1] = y1;
-   xy[3][0] = x0;
-   xy[3][1] = y1;
-
-   tex[0][0] = s0;
-   tex[0][1] = t0;
-   tex[1][0] = s1;
-   tex[1][1] = t0;
-   tex[2][0] = s1;
-   tex[2][1] = t1;
-   tex[3][0] = s0;
-   tex[3][1] = t1;
-
-   intel_meta_draw_poly(intel, 4, xy, z, color, tex);
-}
-
-
-
 /**********************************************************************/
 /*                            Initialization.                         */
 /**********************************************************************/
@@ -1333,7 +1255,6 @@ intel_meta_draw_quad(struct intel_context *intel,
 void
 intelInitTriFuncs(GLcontext * ctx)
 {
-   struct intel_context *intel = intel_context(ctx);
    TNLcontext *tnl = TNL_CONTEXT(ctx);
    static int firsttime = 1;
 
@@ -1350,6 +1271,4 @@ intelInitTriFuncs(GLcontext * ctx)
    tnl->Driver.Render.BuildVertices = _tnl_build_vertices;
    tnl->Driver.Render.CopyPV = _tnl_copy_pv;
    tnl->Driver.Render.Interp = _tnl_interp;
-
-   intel->vtbl.meta_draw_quad = intel_meta_draw_quad;
 }
diff --git a/i965/Makefile.am b/i965/Makefile.am
index 8a61dab..d9c82d5 100644
--- a/i965/Makefile.am
+++ b/i965/Makefile.am
@@ -27,7 +27,6 @@ i965_dri_la_SOURCES = \
 	../shared/intel_pixel_draw.c \
 	../shared/intel_pixel_read.c \
 	../shared/intel_state.c \
-	../shared/intel_swapbuffers.c \
 	../shared/intel_syncobj.c \
 	../shared/intel_tex.c \
 	../shared/intel_tex_copy.c \
@@ -88,4 +87,15 @@ i965_dri_la_SOURCES = \
 	brw_wm_pass2.c \
 	brw_wm_sampler_state.c \
 	brw_wm_state.c \
-	brw_wm_surface_state.c
+	brw_wm_surface_state.c \
+	gen6_cc.c \
+	gen6_clip_state.c \
+	gen6_depthstencil.c \
+	gen6_gs_state.c \
+	gen6_sampler_state.c \
+	gen6_scissor_state.c \
+	gen6_sf_state.c \
+	gen6_urb.c \
+	gen6_viewport_state.c \
+	gen6_vs_state.c \
+	gen6_wm_state.c
diff --git a/i965/brw_cc.c b/i965/brw_cc.c
index bac1c3a..fa2d394 100644
--- a/i965/brw_cc.c
+++ b/i965/brw_cc.c
@@ -34,9 +34,7 @@
 #include "brw_state.h"
 #include "brw_defines.h"
 #include "brw_util.h"
-#include "intel_fbo.h"
 #include "main/macros.h"
-#include "main/enums.h"
 
 static void prepare_cc_vp( struct brw_context *brw )
 {
@@ -295,8 +293,7 @@ cc_unit_create_from_key(struct brw_context *brw, struct brw_cc_unit_key *key)
    bo = brw_upload_cache(&brw->cache, BRW_CC_UNIT,
 			 key, sizeof(*key),
 			 &brw->cc.vp_bo, 1,
-			 &cc, sizeof(cc),
-			 NULL, NULL);
+			 &cc, sizeof(cc));
 
    /* Emit CC viewport relocation */
    dri_bo_emit_reloc(bo,
diff --git a/i965/brw_clip.c b/i965/brw_clip.c
index dbd10a5..d3275c7 100644
--- a/i965/brw_clip.c
+++ b/i965/brw_clip.c
@@ -50,6 +50,7 @@
 static void compile_clip_prog( struct brw_context *brw,
 			     struct brw_clip_prog_key *key )
 {
+   struct intel_context *intel = &brw->intel;
    struct brw_clip_compile c;
    const GLuint *program;
    GLuint program_size;
@@ -65,14 +66,13 @@ static void compile_clip_prog( struct brw_context *brw,
    c.func.single_program_flow = 1;
 
    c.key = *key;
-   c.need_ff_sync = BRW_IS_IGDNG(brw);
 
    /* Need to locate the two positions present in vertex + header.
     * These are currently hardcoded:
     */
    c.header_position_offset = ATTR_SIZE;
 
-   if (BRW_IS_IGDNG(brw))
+   if (intel->is_ironlake)
        delta = 3 * REG_SIZE;
    else
        delta = REG_SIZE;
@@ -85,7 +85,7 @@ static void compile_clip_prog( struct brw_context *brw,
 
    c.nr_attrs = brw_count_bits(c.key.attrs);
    
-   if (BRW_IS_IGDNG(brw))
+   if (intel->is_ironlake)
        c.nr_regs = (c.nr_attrs + 1) / 2 + 3;  /* are vertices packed, or reg-aligned? */
    else
        c.nr_regs = (c.nr_attrs + 1) / 2 + 1;  /* are vertices packed, or reg-aligned? */
@@ -130,20 +130,22 @@ static void compile_clip_prog( struct brw_context *brw,
    /* Upload
     */
    dri_bo_unreference(brw->clip.prog_bo);
-   brw->clip.prog_bo = brw_upload_cache( &brw->cache,
-					 BRW_CLIP_PROG,
-					 &c.key, sizeof(c.key),
-					 NULL, 0,
-					 program, program_size,
-					 &c.prog_data,
-					 &brw->clip.prog_data );
+   brw->clip.prog_bo = brw_upload_cache_with_auxdata(&brw->cache,
+						     BRW_CLIP_PROG,
+						     &c.key, sizeof(c.key),
+						     NULL, 0,
+						     program, program_size,
+						     &c.prog_data,
+						     sizeof(c.prog_data),
+						     &brw->clip.prog_data);
 }
 
 /* Calculate interpolants for triangle and line rasterization.
  */
 static void upload_clip_prog(struct brw_context *brw)
 {
-   GLcontext *ctx = &brw->intel.ctx;
+   struct intel_context *intel = &brw->intel;
+   GLcontext *ctx = &intel->ctx;
    struct brw_clip_prog_key key;
 
    memset(&key, 0, sizeof(key));
@@ -160,7 +162,7 @@ static void upload_clip_prog(struct brw_context *brw)
    /* _NEW_TRANSFORM */
    key.nr_userclip = brw_count_bits(ctx->Transform.ClipPlanesEnabled);
 
-   if (BRW_IS_IGDNG(brw))
+   if (intel->is_ironlake)
        key.clip_mode = BRW_CLIPMODE_KERNEL_CLIP;
    else
        key.clip_mode = BRW_CLIPMODE_NORMAL;
diff --git a/i965/brw_clip.h b/i965/brw_clip.h
index 1c68255..d71bac7 100644
--- a/i965/brw_clip.h
+++ b/i965/brw_clip.h
@@ -118,7 +118,6 @@ struct brw_clip_compile {
 
    GLuint header_position_offset;
    GLuint offset[VERT_ATTRIB_MAX];
-   GLboolean need_ff_sync;
 };
 
 #define ATTR_SIZE  (4*4)
diff --git a/i965/brw_clip_line.c b/i965/brw_clip_line.c
index fa9648f..ceb62a3 100644
--- a/i965/brw_clip_line.c
+++ b/i965/brw_clip_line.c
@@ -39,13 +39,13 @@
 #include "brw_defines.h"
 #include "brw_context.h"
 #include "brw_eu.h"
-#include "brw_util.h"
 #include "brw_clip.h"
 
 
 
 static void brw_clip_line_alloc_regs( struct brw_clip_compile *c )
 {
+   struct intel_context *intel = &c->func.brw->intel;
    GLuint i = 0,j;
 
    /* Register usage is static, precompute here:
@@ -85,7 +85,7 @@ static void brw_clip_line_alloc_regs( struct brw_clip_compile *c )
       i++;
    }
 
-   if (c->need_ff_sync) {
+   if (intel->needs_ff_sync) {
       c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD);
       i++;
    }
@@ -126,6 +126,7 @@ static void brw_clip_line_alloc_regs( struct brw_clip_compile *c )
 static void clip_and_emit_line( struct brw_clip_compile *c )
 {
    struct brw_compile *p = &c->func;
+   struct brw_context *brw = p->brw;
    struct brw_indirect vtx0     = brw_indirect(0, 0);
    struct brw_indirect vtx1      = brw_indirect(1, 0);
    struct brw_indirect newvtx0   = brw_indirect(2, 0);
@@ -152,7 +153,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c )
    brw_clip_init_clipmask(c);
 
    /* -ve rhw workaround */
-   if (BRW_IS_965(p->brw)) {
+   if (brw->has_negative_rhw_bug) {
       brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
       brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2),
               brw_imm_ud(1<<20));
@@ -189,7 +190,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c )
               * Both can be negative on GM965/G965 due to RHW workaround
               * if so, this object should be rejected.
               */
-             if (BRW_IS_965(p->brw)) {
+             if (brw->has_negative_rhw_bug) {
                  brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_LE, c->reg.dp0, brw_imm_f(0.0));
                  is_neg2 = brw_IF(p, BRW_EXECUTE_1);
                  {
@@ -214,7 +215,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c )
 
              /* If both are positive, do nothing */
              /* Only on GM965/G965 */
-             if (BRW_IS_965(p->brw)) {
+             if (brw->has_negative_rhw_bug) {
                  brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.dp0, brw_imm_f(0.0));
                  is_neg2 = brw_IF(p, BRW_EXECUTE_1);
              }
@@ -229,7 +230,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c )
                  brw_set_predicate_control(p, BRW_PREDICATE_NONE);
              }
 
-             if (BRW_IS_965(p->brw)) {
+             if (brw->has_negative_rhw_bug) {
                  brw_ENDIF(p, is_neg2);
              }
          }
diff --git a/i965/brw_clip_point.c b/i965/brw_clip_point.c
index 8458f61..7f47634 100644
--- a/i965/brw_clip_point.c
+++ b/i965/brw_clip_point.c
@@ -39,7 +39,6 @@
 #include "brw_defines.h"
 #include "brw_context.h"
 #include "brw_eu.h"
-#include "brw_util.h"
 #include "brw_clip.h"
 
 
diff --git a/i965/brw_clip_state.c b/i965/brw_clip_state.c
index 234b374..424c9a1 100644
--- a/i965/brw_clip_state.c
+++ b/i965/brw_clip_state.c
@@ -32,7 +32,6 @@
 #include "brw_context.h"
 #include "brw_state.h"
 #include "brw_defines.h"
-#include "main/macros.h"
 
 struct brw_clip_unit_key {
    unsigned int total_grf;
@@ -74,6 +73,7 @@ static dri_bo *
 clip_unit_create_from_key(struct brw_context *brw,
 			  struct brw_clip_unit_key *key)
 {
+   struct intel_context *intel = &brw->intel;
    struct brw_clip_unit_state clip;
    dri_bo *bo;
 
@@ -105,7 +105,7 @@ clip_unit_create_from_key(struct brw_context *brw,
       /* Although up to 16 concurrent Clip threads are allowed on IGDNG, 
        * only 2 threads can output VUEs at a time.
        */
-      if (BRW_IS_IGDNG(brw))
+      if (intel->is_ironlake)
          clip.thread4.max_threads = 16 - 1;        
       else
          clip.thread4.max_threads = 2 - 1;
@@ -130,7 +130,7 @@ clip_unit_create_from_key(struct brw_context *brw,
    clip.clip5.api_mode = BRW_CLIP_API_OGL;
    clip.clip5.clip_mode = key->clip_mode;
 
-   if (BRW_IS_G4X(brw))
+   if (intel->is_g4x)
       clip.clip5.negative_w_clip_test = 1;
 
    clip.clip6.clipper_viewport_state_ptr = 0;
@@ -142,8 +142,7 @@ clip_unit_create_from_key(struct brw_context *brw,
    bo = brw_upload_cache(&brw->cache, BRW_CLIP_UNIT,
 			 key, sizeof(*key),
 			 &brw->clip.prog_bo, 1,
-			 &clip, sizeof(clip),
-			 NULL, NULL);
+			 &clip, sizeof(clip));
 
    /* Emit clip program relocation */
    assert(brw->clip.prog_bo);
diff --git a/i965/brw_clip_tri.c b/i965/brw_clip_tri.c
index cf79224..815211a 100644
--- a/i965/brw_clip_tri.c
+++ b/i965/brw_clip_tri.c
@@ -39,7 +39,6 @@
 #include "brw_defines.h"
 #include "brw_context.h"
 #include "brw_eu.h"
-#include "brw_util.h"
 #include "brw_clip.h"
 
 static void release_tmps( struct brw_clip_compile *c )
@@ -51,6 +50,7 @@ static void release_tmps( struct brw_clip_compile *c )
 void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, 
 			      GLuint nr_verts )
 {
+   struct intel_context *intel = &c->func.brw->intel;
    GLuint i = 0,j;
 
    /* Register usage is static, precompute here:
@@ -78,7 +78,7 @@ void brw_clip_tri_alloc_regs( struct brw_clip_compile *c,
       for (j = 0; j < 3; j++) {
 	 GLuint delta = c->nr_attrs*16 + 32;
 
-         if (BRW_IS_IGDNG(c->func.brw))
+         if (intel->is_ironlake)
              delta = c->nr_attrs * 16 + 32 * 3;
 
 	 brw_MOV(&c->func, byte_offset(c->reg.vertex[j], delta), brw_imm_f(0));
@@ -119,7 +119,7 @@ void brw_clip_tri_alloc_regs( struct brw_clip_compile *c,
       i++;
    }
 
-   if (c->need_ff_sync) {
+   if (intel->needs_ff_sync) {
       c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD);
       i++;
    }
@@ -571,6 +571,7 @@ void brw_emit_tri_clip( struct brw_clip_compile *c )
 {
    struct brw_instruction *neg_rhw;
    struct brw_compile *p = &c->func;
+   struct brw_context *brw = p->brw;
    brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6);
    brw_clip_tri_init_vertices(c);
    brw_clip_init_clipmask(c);
@@ -578,7 +579,7 @@ void brw_emit_tri_clip( struct brw_clip_compile *c )
 
    /* if -ve rhw workaround bit is set, 
       do cliptest */
-   if (BRW_IS_965(p->brw)) {
+   if (brw->has_negative_rhw_bug) {
       brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
       brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2), 
               brw_imm_ud(1<<20));
diff --git a/i965/brw_clip_unfilled.c b/i965/brw_clip_unfilled.c
index ad1bfa4..f36d22f 100644
--- a/i965/brw_clip_unfilled.c
+++ b/i965/brw_clip_unfilled.c
@@ -39,7 +39,6 @@
 #include "brw_defines.h"
 #include "brw_context.h"
 #include "brw_eu.h"
-#include "brw_util.h"
 #include "brw_clip.h"
 
 
diff --git a/i965/brw_clip_util.c b/i965/brw_clip_util.c
index 5a73abd..14bc889 100644
--- a/i965/brw_clip_util.c
+++ b/i965/brw_clip_util.c
@@ -40,7 +40,6 @@
 #include "brw_defines.h"
 #include "brw_context.h"
 #include "brw_eu.h"
-#include "brw_util.h"
 #include "brw_clip.h"
 
 
@@ -135,6 +134,7 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c,
 			     GLboolean force_edgeflag)
 {
    struct brw_compile *p = &c->func;
+   struct intel_context *intel = &p->brw->intel;
    struct brw_reg tmp = get_tmp(c);
    GLuint i;
 
@@ -142,7 +142,7 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c,
     */
    /*
     * After CLIP stage, only first 256 bits of the VUE are read
-    * back on IGDNG, so needn't change it
+    * back on Ironlake, so needn't change it
     */
    brw_copy_indirect_to_indirect(p, dest_ptr, v0_ptr, 1);
       
@@ -151,7 +151,7 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c,
    for (i = 0; i < c->nr_attrs; i++) {
       GLuint delta = i*16 + 32;
 
-      if (BRW_IS_IGDNG(p->brw))
+      if (intel->is_ironlake)
           delta = i * 16 + 32 * 3;
 
       if (delta == c->offset[VERT_RESULT_EDGE]) {
@@ -185,7 +185,7 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c,
    if (i & 1) {
       GLuint delta = i*16 + 32;
 
-      if (BRW_IS_IGDNG(p->brw))
+      if (intel->is_ironlake)
           delta = i * 16 + 32 * 3;
 
       brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(0));
@@ -359,7 +359,9 @@ void brw_clip_init_clipmask( struct brw_clip_compile *c )
 
 void brw_clip_ff_sync(struct brw_clip_compile *c)
 {
-    if (c->need_ff_sync) {
+    struct intel_context *intel = &c->func.brw->intel;
+
+    if (intel->needs_ff_sync) {
         struct brw_compile *p = &c->func;
         struct brw_instruction *need_ff_sync;
 
@@ -388,7 +390,9 @@ void brw_clip_ff_sync(struct brw_clip_compile *c)
 
 void brw_clip_init_ff_sync(struct brw_clip_compile *c)
 {
-    if (c->need_ff_sync) {
+    struct intel_context *intel = &c->func.brw->intel;
+
+    if (intel->needs_ff_sync) {
 	struct brw_compile *p = &c->func;
         
         brw_MOV(p, c->reg.ff_sync, brw_imm_ud(0));
diff --git a/i965/brw_context.c b/i965/brw_context.c
index aaa2d80..a512896 100644
--- a/i965/brw_context.c
+++ b/i965/brw_context.c
@@ -33,7 +33,6 @@
 #include "main/imports.h"
 #include "main/api_noop.h"
 #include "main/macros.h"
-/* #include "main/vtxfmt.h" */
 #include "main/simple_list.h"
 #include "shader/shader_api.h"
 
@@ -41,16 +40,9 @@
 #include "brw_defines.h"
 #include "brw_draw.h"
 #include "brw_state.h"
-#include "brw_vs.h"
-#include "intel_tex.h"
-#include "intel_blit.h"
-#include "intel_batchbuffer.h"
-#include "intel_pixel.h"
 #include "intel_span.h"
 #include "tnl/t_pipeline.h"
 
-#include "utils.h"
-
 
 /***************************************
  * Mesa's Driver Functions
@@ -77,7 +69,7 @@ static void brwInitDriverFunctions( struct dd_function_table *functions )
 }
 
 GLboolean brwCreateContext( const __GLcontextModes *mesaVis,
-			    __DRIcontextPrivate *driContextPriv,
+			    __DRIcontext *driContextPriv,
 			    void *sharedContextPrivate)
 {
    struct dd_function_table functions;
@@ -86,7 +78,7 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis,
    GLcontext *ctx = &intel->ctx;
 
    if (!brw) {
-      _mesa_printf("%s: failed to alloc context\n", __FUNCTION__);
+      printf("%s: failed to alloc context\n", __FUNCTION__);
       return GL_FALSE;
    }
 
@@ -95,7 +87,7 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis,
 
    if (!intelInitContext( intel, mesaVis, driContextPriv,
 			  sharedContextPrivate, &functions )) {
-      _mesa_printf("%s: failed to init intel context\n", __FUNCTION__);
+      printf("%s: failed to init intel context\n", __FUNCTION__);
       FREE(brw);
       return GL_FALSE;
    }
@@ -111,6 +103,9 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis,
    ctx->Const.MaxTextureUnits = MIN2(ctx->Const.MaxTextureCoordUnits,
                                      ctx->Const.MaxTextureImageUnits);
    ctx->Const.MaxVertexTextureImageUnits = 0; /* no vertex shader textures */
+   ctx->Const.MaxCombinedTextureImageUnits =
+      ctx->Const.MaxVertexTextureImageUnits +
+      ctx->Const.MaxTextureImageUnits;
 
    /* Mesa limits textures to 4kx4k; it would be nice to fix that someday
     */
@@ -155,6 +150,38 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis,
       MIN2(ctx->Const.FragmentProgram.MaxNativeParameters,
 	   ctx->Const.FragmentProgram.MaxEnvParams);
 
+   if (intel->is_ironlake || intel->is_g4x || intel->gen >= 6) {
+      brw->CMD_VF_STATISTICS = CMD_VF_STATISTICS_GM45;
+      brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_GM45;
+      brw->has_surface_tile_offset = GL_TRUE;
+      brw->has_compr4 = GL_TRUE;
+      brw->has_aa_line_parameters = GL_TRUE;
+  } else {
+      brw->CMD_VF_STATISTICS = CMD_VF_STATISTICS_965;
+      brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_965;
+   }
+
+   /* WM maximum threads is number of EUs times number of threads per EU. */
+   if (intel->is_ironlake) {
+      brw->urb.size = 1024;
+      brw->vs_max_threads = 72;
+      brw->wm_max_threads = 12 * 6;
+   } else if (intel->is_g4x) {
+      brw->urb.size = 384;
+      brw->vs_max_threads = 32;
+      brw->wm_max_threads = 10 * 5;
+   } else if (intel->gen < 6) {
+      brw->urb.size = 256;
+      brw->vs_max_threads = 16;
+      brw->wm_max_threads = 8 * 4;
+      brw->has_negative_rhw_bug = GL_TRUE;
+   }
+
+   if (INTEL_DEBUG & DEBUG_SINGLE_THREAD) {
+      brw->vs_max_threads = 1;
+      brw->wm_max_threads = 1;
+   }
+
    brw_init_state( brw );
 
    brw->state.dirty.mesa = ~0;
diff --git a/i965/brw_context.h b/i965/brw_context.h
index fded47a..d6fc37e 100644
--- a/i965/brw_context.h
+++ b/i965/brw_context.h
@@ -131,7 +131,6 @@ struct brw_context;
 #define BRW_NEW_WM_INPUT_DIMENSIONS     0x100
 #define BRW_NEW_PSP                     0x800
 #define BRW_NEW_WM_SURFACES		0x1000
-#define BRW_NEW_FENCE                   0x2000
 #define BRW_NEW_INDICES			0x4000
 #define BRW_NEW_VERTICES		0x8000
 /**
@@ -172,8 +171,8 @@ struct brw_fragment_program {
    GLuint id;  /**< serial no. to identify frag progs, never re-used */
    GLboolean isGLSL;  /**< really, any IF/LOOP/CONT/BREAK instructions */
 
-   dri_bo *const_buffer;    /** Program constant buffer/surface */
    GLboolean use_const_buffer;
+   dri_bo *const_buffer;    /** Program constant buffer/surface */
 
    /** for debugging, which texture units are referenced */
    GLbitfield tex_units_used;
@@ -283,6 +282,9 @@ struct brw_vs_ouput_sizes {
 
 
 enum brw_cache_id {
+   BRW_BLEND_STATE,
+   BRW_DEPTH_STENCIL_STATE,
+   BRW_COLOR_CALC_STATE,
    BRW_CC_VP,
    BRW_CC_UNIT,
    BRW_WM_PROG,
@@ -291,7 +293,7 @@ enum brw_cache_id {
    BRW_WM_UNIT,
    BRW_SF_PROG,
    BRW_SF_VP,
-   BRW_SF_UNIT,
+   BRW_SF_UNIT, /* scissor state on gen6 */
    BRW_VS_UNIT,
    BRW_VS_PROG,
    BRW_GS_UNIT,
@@ -332,7 +334,6 @@ struct brw_cache {
    struct brw_cache_item **items;
    GLuint size, n_items;
 
-   GLuint aux_size[BRW_MAX_CACHE];
    char *name[BRW_MAX_CACHE];
 
    /* Record of the last BOs chosen for each cache_id.  Used to set
@@ -356,6 +357,9 @@ struct brw_tracked_state {
 
 /* Flags for brw->state.cache.
  */
+#define CACHE_NEW_BLEND_STATE            (1<<BRW_BLEND_STATE)
+#define CACHE_NEW_DEPTH_STENCIL_STATE    (1<<BRW_DEPTH_STENCIL_STATE)
+#define CACHE_NEW_COLOR_CALC_STATE       (1<<BRW_COLOR_CALC_STATE)
 #define CACHE_NEW_CC_VP                  (1<<BRW_CC_VP)
 #define CACHE_NEW_CC_UNIT                (1<<BRW_CC_UNIT)
 #define CACHE_NEW_WM_PROG                (1<<BRW_WM_PROG)
@@ -438,8 +442,11 @@ struct brw_context
    GLuint primitive;
 
    GLboolean emit_state_always;
-   GLboolean no_batch_wrap;
-
+   GLboolean has_surface_tile_offset;
+   GLboolean has_compr4;
+   GLboolean has_negative_rhw_bug;
+   GLboolean has_aa_line_parameters;
+;
    struct {
       struct brw_state_flags dirty;
 
@@ -515,6 +522,12 @@ struct brw_context
     */
    GLuint next_free_page;
 
+   /* hw-dependent 3DSTATE_VF_STATISTICS opcode */
+   uint32_t CMD_VF_STATISTICS;
+   /* hw-dependent 3DSTATE_PIPELINE_SELECT opcode */
+   uint32_t CMD_PIPELINE_SELECT;
+   int vs_max_threads;
+   int wm_max_threads;
 
    /* BRW_NEW_URB_ALLOCATIONS:
     */
@@ -531,7 +544,8 @@ struct brw_context
       GLuint nr_sf_entries;
       GLuint nr_cs_entries;
 
-/*       GLuint vs_size; */
+      /* gen6 */
+      GLuint vs_size;
 /*       GLuint gs_size; */
 /*       GLuint clip_size; */
 /*       GLuint sf_size; */
@@ -542,6 +556,7 @@ struct brw_context
       GLuint clip_start;
       GLuint sf_start;
       GLuint cs_start;
+      GLuint size; /* Hardware URB size, in KB. */
    } urb;
 
    
@@ -564,15 +579,11 @@ struct brw_context
 
       GLfloat *last_buf;
       GLuint last_bufsz;
-      /**
-       *  Whether we should create a new bo instead of reusing the old one
-       * (if we just dispatch the batch pointing at the old one.
-       */
-      GLboolean need_new_bo;
    } curbe;
 
    struct {
       struct brw_vs_prog_data *prog_data;
+      int8_t *constant_map; /* variable array following prog_data */
 
       dri_bo *prog_bo;
       dri_bo *state_bo;
@@ -639,9 +650,16 @@ struct brw_context
 
 
    struct {
+      /* gen4 */
       dri_bo *prog_bo;
-      dri_bo *state_bo;
       dri_bo *vp_bo;
+
+      /* gen6 */
+      dri_bo *blend_state_bo;
+      dri_bo *depth_stencil_state_bo;
+      dri_bo *color_calc_state_bo;
+
+      dri_bo *state_bo;
    } cc;
 
    struct {
@@ -669,7 +687,7 @@ void brwInitVtbl( struct brw_context *brw );
  * brw_context.c
  */
 GLboolean brwCreateContext( const __GLcontextModes *mesaVis,
-			    __DRIcontextPrivate *driContextPriv,
+			    __DRIcontext *driContextPriv,
 			    void *sharedContextPrivate);
 
 /*======================================================================
diff --git a/i965/brw_curbe.c b/i965/brw_curbe.c
index aadcfbe..4e78b08 100644
--- a/i965/brw_curbe.c
+++ b/i965/brw_curbe.c
@@ -114,13 +114,13 @@ static void calculate_curbe_offsets( struct brw_context *brw )
       brw->curbe.total_size = reg;
 
       if (0)
-	 _mesa_printf("curbe wm %d+%d clip %d+%d vs %d+%d\n",
-		      brw->curbe.wm_start,
-		      brw->curbe.wm_size,
-		      brw->curbe.clip_start,
-		      brw->curbe.clip_size,
-		      brw->curbe.vs_start,
-		      brw->curbe.vs_size );
+	 printf("curbe wm %d+%d clip %d+%d vs %d+%d\n",
+		brw->curbe.wm_start,
+		brw->curbe.wm_size,
+		brw->curbe.clip_start,
+		brw->curbe.clip_size,
+		brw->curbe.vs_start,
+		brw->curbe.vs_size );
 
       brw->state.dirty.brw |= BRW_NEW_CURBE_OFFSETS;
    }
@@ -198,7 +198,7 @@ static void prepare_constant_buffer(struct brw_context *brw)
       return;
    }
 
-   buf = (GLfloat *) _mesa_calloc(bufsz);
+   buf = (GLfloat *) calloc(1, bufsz);
 
    /* fragment shader constants */
    if (brw->curbe.wm_size) {
@@ -256,25 +256,36 @@ static void prepare_constant_buffer(struct brw_context *brw)
        */
       _mesa_load_state_parameters(ctx, vp->program.Base.Parameters); 
 
-      /* XXX just use a memcpy here */
-      for (i = 0; i < nr; i++) {
-         const GLfloat *value = vp->program.Base.Parameters->ParameterValues[i];
-	 buf[offset + i * 4 + 0] = value[0];
-	 buf[offset + i * 4 + 1] = value[1];
-	 buf[offset + i * 4 + 2] = value[2];
-	 buf[offset + i * 4 + 3] = value[3];
+      if (vp->use_const_buffer) {
+	 /* Load the subset of push constants that will get used when
+	  * we also have a pull constant buffer.
+	  */
+	 for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) {
+	    if (brw->vs.constant_map[i] != -1) {
+	       assert(brw->vs.constant_map[i] <= nr);
+	       memcpy(buf + offset + brw->vs.constant_map[i] * 4,
+		      vp->program.Base.Parameters->ParameterValues[i],
+		      4 * sizeof(float));
+	    }
+	 }
+      } else {
+	 for (i = 0; i < nr; i++) {
+	    memcpy(buf + offset + i * 4,
+		   vp->program.Base.Parameters->ParameterValues[i],
+		   4 * sizeof(float));
+	 }
       }
    }
 
    if (0) {
       for (i = 0; i < sz*16; i+=4) 
-	 _mesa_printf("curbe %d.%d: %f %f %f %f\n", i/8, i&4,
-		      buf[i+0], buf[i+1], buf[i+2], buf[i+3]);
+	 printf("curbe %d.%d: %f %f %f %f\n", i/8, i&4,
+		buf[i+0], buf[i+1], buf[i+2], buf[i+3]);
 
-      _mesa_printf("last_buf %p buf %p sz %d/%d cmp %d\n",
-		   brw->curbe.last_buf, buf,
-		   bufsz, brw->curbe.last_bufsz,
-		   brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1);
+      printf("last_buf %p buf %p sz %d/%d cmp %d\n",
+	     brw->curbe.last_buf, buf,
+	     bufsz, brw->curbe.last_bufsz,
+	     brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1);
    }
 
    if (brw->curbe.curbe_bo != NULL &&
@@ -282,20 +293,20 @@ static void prepare_constant_buffer(struct brw_context *brw)
        bufsz == brw->curbe.last_bufsz &&
        memcmp(buf, brw->curbe.last_buf, bufsz) == 0) {
       /* constants have not changed */
-      _mesa_free(buf);
+      free(buf);
    } 
    else {
       /* constants have changed */
       if (brw->curbe.last_buf)
-	 _mesa_free(brw->curbe.last_buf);
+	 free(brw->curbe.last_buf);
 
       brw->curbe.last_buf = buf;
       brw->curbe.last_bufsz = bufsz;
 
       if (brw->curbe.curbe_bo != NULL &&
-	  (brw->curbe.need_new_bo ||
-	   brw->curbe.curbe_next_offset + bufsz > brw->curbe.curbe_bo->size))
+	  brw->curbe.curbe_next_offset + bufsz > brw->curbe.curbe_bo->size)
       {
+	 drm_intel_gem_bo_unmap_gtt(brw->curbe.curbe_bo);
 	 dri_bo_unreference(brw->curbe.curbe_bo);
 	 brw->curbe.curbe_bo = NULL;
       }
@@ -307,6 +318,7 @@ static void prepare_constant_buffer(struct brw_context *brw)
 	 brw->curbe.curbe_bo = dri_bo_alloc(brw->intel.bufmgr, "CURBE",
 					    4096, 1 << 6);
 	 brw->curbe.curbe_next_offset = 0;
+	 drm_intel_gem_bo_map_gtt(brw->curbe.curbe_bo);
       }
 
       brw->curbe.curbe_offset = brw->curbe.curbe_next_offset;
@@ -315,7 +327,9 @@ static void prepare_constant_buffer(struct brw_context *brw)
 
       /* Copy data to the buffer:
        */
-      dri_bo_subdata(brw->curbe.curbe_bo, brw->curbe.curbe_offset, bufsz, buf);
+      memcpy(brw->curbe.curbe_bo->virtual + brw->curbe.curbe_offset,
+	     buf,
+	     bufsz);
    }
 
    brw_add_validated_bo(brw, brw->curbe.curbe_bo);
@@ -340,7 +354,7 @@ static void emit_constant_buffer(struct brw_context *brw)
    struct intel_context *intel = &brw->intel;
    GLuint sz = brw->curbe.total_size;
 
-   BEGIN_BATCH(2, IGNORE_CLIPRECTS);
+   BEGIN_BATCH(2);
    if (sz == 0) {
       OUT_BATCH((CMD_CONST_BUFFER << 16) | (2 - 2));
       OUT_BATCH(0);
diff --git a/i965/brw_defines.h b/i965/brw_defines.h
index c19510b..bb1b5f5 100644
--- a/i965/brw_defines.h
+++ b/i965/brw_defines.h
@@ -530,6 +530,7 @@
 #define BRW_OPCODE_POP        47
 #define BRW_OPCODE_WAIT       48
 #define BRW_OPCODE_SEND       49
+#define BRW_OPCODE_MATH       56
 #define BRW_OPCODE_ADD        64
 #define BRW_OPCODE_MUL        65
 #define BRW_OPCODE_AVG        66
@@ -727,7 +728,8 @@
 #define BRW_MATH_FUNCTION_SIN                              6 /* was 7 */
 #define BRW_MATH_FUNCTION_COS                              7 /* was 8 */
 #define BRW_MATH_FUNCTION_SINCOS                           8 /* was 6 */
-#define BRW_MATH_FUNCTION_TAN                              9
+#define BRW_MATH_FUNCTION_TAN                              9 /* gen4 */
+#define BRW_MATH_FUNCTION_FDIV                             9 /* gen6+ */
 #define BRW_MATH_FUNCTION_POW                              10
 #define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER   11
 #define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT                 12
@@ -778,17 +780,33 @@
 
 #define CMD_PIPELINED_STATE_POINTERS  0x7800
 #define CMD_BINDING_TABLE_PTRS        0x7801
+# define GEN6_BINDING_TABLE_MODIFY_VS	(1 << 8)
+# define GEN6_BINDING_TABLE_MODIFY_GS	(1 << 9)
+# define GEN6_BINDING_TABLE_MODIFY_PS	(1 << 10)
+
+#define CMD_3D_SAMPLER_STATE_POINTERS			0x7802 /* SNB+ */
+# define PS_SAMPLER_STATE_CHANGE				(1 << 12)
+# define GS_SAMPLER_STATE_CHANGE				(1 << 9)
+# define VS_SAMPLER_STATE_CHANGE				(1 << 8)
+/* DW1: VS */
+/* DW2: GS */
+/* DW3: PS */
 
 #define CMD_VERTEX_BUFFER             0x7808
 # define BRW_VB0_INDEX_SHIFT		27
+# define GEN6_VB0_INDEX_SHIFT		26
 # define BRW_VB0_ACCESS_VERTEXDATA	(0 << 26)
 # define BRW_VB0_ACCESS_INSTANCEDATA	(1 << 26)
+# define GEN6_VB0_ACCESS_VERTEXDATA	(0 << 20)
+# define GEN6_VB0_ACCESS_INSTANCEDATA	(1 << 20)
 # define BRW_VB0_PITCH_SHIFT		0
 
 #define CMD_VERTEX_ELEMENT            0x7809
 # define BRW_VE0_INDEX_SHIFT		27
+# define GEN6_VE0_INDEX_SHIFT		26
 # define BRW_VE0_FORMAT_SHIFT		16
 # define BRW_VE0_VALID			(1 << 26)
+# define GEN6_VE0_VALID			(1 << 25)
 # define BRW_VE0_SRC_OFFSET_SHIFT	0
 # define BRW_VE1_COMPONENT_NOSTORE	0
 # define BRW_VE1_COMPONENT_STORE_SRC	1
@@ -805,8 +823,219 @@
 # define BRW_VE1_DST_OFFSET_SHIFT	0
 
 #define CMD_INDEX_BUFFER              0x780a
-#define CMD_VF_STATISTICS_965         0x780b
+#define CMD_VF_STATISTICS_965          0x780b
 #define CMD_VF_STATISTICS_GM45        0x680b
+#define CMD_3D_CC_STATE_POINTERS      0x780e /* GEN6+ */
+
+#define CMD_URB					0x7805 /* GEN6+ */
+# define GEN6_URB_VS_SIZE_SHIFT				16
+# define GEN6_URB_VS_ENTRIES_SHIFT			0
+# define GEN6_URB_GS_SIZE_SHIFT				8
+# define GEN6_URB_GS_ENTRIES_SHIFT			0
+
+#define CMD_VIEWPORT_STATE_POINTERS			0x780d /* GEN6+ */
+# define GEN6_CC_VIEWPORT_MODIFY			(1 << 12)
+# define GEN6_SF_VIEWPORT_MODIFY			(1 << 11)
+# define GEN6_CLIP_VIEWPORT_MODIFY			(1 << 10)
+
+#define CMD_3D_SCISSOR_STATE_POINTERS		0x780f /* GEN6+ */
+
+#define CMD_3D_VS_STATE		      0x7810 /* GEN6+ */
+/* DW2 */
+# define GEN6_VS_SPF_MODE				(1 << 31)
+# define GEN6_VS_VECTOR_MASK_ENABLE			(1 << 30)
+# define GEN6_VS_SAMPLER_COUNT_SHIFT			27
+# define GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT	18
+/* DW4 */
+# define GEN6_VS_DISPATCH_START_GRF_SHIFT		20
+# define GEN6_VS_URB_READ_LENGTH_SHIFT			11
+# define GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT		4
+/* DW5 */
+# define GEN6_VS_MAX_THREADS_SHIFT			25
+# define GEN6_VS_STATISTICS_ENABLE			(1 << 10)
+# define GEN6_VS_CACHE_DISABLE				(1 << 1)
+# define GEN6_VS_ENABLE					(1 << 0)
+
+#define CMD_3D_GS_STATE		      0x7811 /* GEN6+ */
+/* DW2 */
+# define GEN6_GS_SPF_MODE				(1 << 31)
+# define GEN6_GS_VECTOR_MASK_ENABLE			(1 << 30)
+# define GEN6_GS_SAMPLER_COUNT_SHIFT			27
+# define GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT	18
+/* DW4 */
+# define GEN6_GS_URB_READ_LENGTH_SHIFT			11
+# define GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT		4
+# define GEN6_GS_DISPATCH_START_GRF_SHIFT		0
+/* DW5 */
+# define GEN6_GS_MAX_THREADS_SHIFT			25
+# define GEN6_GS_STATISTICS_ENABLE			(1 << 10)
+# define GEN6_GS_SO_STATISTICS_ENABLE			(1 << 9)
+# define GEN6_GS_RENDERING_ENABLE			(1 << 8)
+/* DW6 */
+# define GEN6_GS_ENABLE					(1 << 15)
+
+#define CMD_3D_CLIP_STATE		      0x7812 /* GEN6+ */
+/* DW1 */
+# define GEN6_CLIP_STATISTICS_ENABLE			(1 << 10)
+/* DW2 */
+# define GEN6_CLIP_ENABLE				(1 << 31)
+# define GEN6_CLIP_API_OGL				(0 << 30)
+# define GEN6_CLIP_API_D3D				(1 << 30)
+# define GEN6_CLIP_XY_TEST				(1 << 28)
+# define GEN6_CLIP_Z_TEST				(1 << 27)
+# define GEN6_CLIP_GB_TEST				(1 << 26)
+# define GEN6_CLIP_MODE_NORMAL				(0 << 13)
+# define GEN6_CLIP_MODE_REJECT_ALL			(3 << 13)
+# define GEN6_CLIP_MODE_ACCEPT_ALL			(4 << 13)
+# define GEN6_CLIP_PERSPECTIVE_DIVIDE_DISABLE		(1 << 9)
+# define GEN6_CLIP_BARYCENTRIC_ENABLE			(1 << 8)
+# define GEN6_CLIP_TRI_PROVOKE_SHIFT			4
+# define GEN6_CLIP_LINE_PROVOKE_SHIFT			2
+# define GEN6_CLIP_TRIFAN_PROVOKE_SHIFT			0
+/* DW3 */
+# define GEN6_CLIP_MIN_POINT_WIDTH_SHIFT		17
+# define GEN6_CLIP_MAX_POINT_WIDTH_SHIFT		6
+
+#define CMD_3D_SF_STATE				0x7813 /* GEN6+ */
+/* DW1 */
+# define GEN6_SF_NUM_OUTPUTS_SHIFT			22
+# define GEN6_SF_SWIZZLE_ENABLE				(1 << 21)
+# define GEN6_SF_POINT_SPRITE_LOWERLEFT			(1 << 20)
+# define GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT		11
+# define GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT		4
+/* DW2 */
+# define GEN6_SF_LEGACY_GLOBAL_DEPTH_BIAS		(1 << 11)
+# define GEN6_SF_STATISTICS_ENABLE			(1 << 10)
+# define GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID		(1 << 9)
+# define GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME		(1 << 8)
+# define GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT		(1 << 7)
+# define GEN6_SF_FRONT_SOLID				(0 << 5)
+# define GEN6_SF_FRONT_WIREFRAME			(1 << 5)
+# define GEN6_SF_FRONT_POINT				(2 << 5)
+# define GEN6_SF_BACK_SOLID				(0 << 3)
+# define GEN6_SF_BACK_WIREFRAME				(1 << 3)
+# define GEN6_SF_BACK_POINT				(2 << 3)
+# define GEN6_SF_VIEWPORT_TRANSFORM_ENABLE		(1 << 1)
+# define GEN6_SF_WINDING_CCW				(1 << 0)
+/* DW3 */
+# define GEN6_SF_LINE_AA_ENABLE				(1 << 31)
+# define GEN6_SF_CULL_BOTH				(0 << 29)
+# define GEN6_SF_CULL_NONE				(1 << 29)
+# define GEN6_SF_CULL_FRONT				(2 << 29)
+# define GEN6_SF_CULL_BACK				(3 << 29)
+# define GEN6_SF_LINE_WIDTH_SHIFT			18 /* U3.7 */
+# define GEN6_SF_LINE_END_CAP_WIDTH_0_5			(0 << 16)
+# define GEN6_SF_LINE_END_CAP_WIDTH_1_0			(1 << 16)
+# define GEN6_SF_LINE_END_CAP_WIDTH_2_0			(2 << 16)
+# define GEN6_SF_LINE_END_CAP_WIDTH_4_0			(3 << 16)
+# define GEN6_SF_SCISSOR_ENABLE				(1 << 11)
+# define GEN6_SF_MSRAST_OFF_PIXEL			(0 << 8)
+# define GEN6_SF_MSRAST_OFF_PATTERN			(1 << 8)
+# define GEN6_SF_MSRAST_ON_PIXEL			(2 << 8)
+# define GEN6_SF_MSRAST_ON_PATTERN			(3 << 8)
+/* DW4 */
+# define GEN6_SF_TRI_PROVOKE_SHIFT			29
+# define GEN6_SF_LINE_PROVOKE_SHIFT			27
+# define GEN6_SF_TRIFAN_PROVOKE_SHIFT			25
+# define GEN6_SF_LINE_AA_MODE_MANHATTAN			(0 << 14)
+# define GEN6_SF_LINE_AA_MODE_TRUE			(1 << 14)
+# define GEN6_SF_VERTEX_SUBPIXEL_8BITS			(0 << 12)
+# define GEN6_SF_VERTEX_SUBPIXEL_4BITS			(1 << 12)
+# define GEN6_SF_USE_STATE_POINT_WIDTH			(1 << 11)
+# define GEN6_SF_POINT_WIDTH_SHIFT			0 /* U8.3 */
+/* DW5: depth offset constant */
+/* DW6: depth offset scale */
+/* DW7: depth offset clamp */
+/* DW8 */
+# define ATTRIBUTE_1_OVERRIDE_W				(1 << 31)
+# define ATTRIBUTE_1_OVERRIDE_Z				(1 << 30)
+# define ATTRIBUTE_1_OVERRIDE_Y				(1 << 29)
+# define ATTRIBUTE_1_OVERRIDE_X				(1 << 28)
+# define ATTRIBUTE_1_CONST_SOURCE_SHIFT			25
+# define ATTRIBUTE_1_SWIZZLE_SHIFT			22
+# define ATTRIBUTE_1_SOURCE_SHIFT			16
+# define ATTRIBUTE_0_OVERRIDE_W				(1 << 15)
+# define ATTRIBUTE_0_OVERRIDE_Z				(1 << 14)
+# define ATTRIBUTE_0_OVERRIDE_Y				(1 << 13)
+# define ATTRIBUTE_0_OVERRIDE_X				(1 << 12)
+# define ATTRIBUTE_0_CONST_SOURCE_SHIFT			9
+# define ATTRIBUTE_0_SWIZZLE_SHIFT			6
+# define ATTRIBUTE_0_SOURCE_SHIFT			0
+/* DW16: Point sprite texture coordinate enables */
+/* DW17: Constant interpolation enables */
+/* DW18: attr 0-7 wrap shortest enables */
+/* DW19: attr 8-16 wrap shortest enables */
+
+#define CMD_3D_WM_STATE		      0x7814 /* GEN6+ */
+/* DW1: kernel pointer */
+/* DW2 */
+# define GEN6_WM_SPF_MODE				(1 << 31)
+# define GEN6_WM_VECTOR_MASK_ENABLE			(1 << 30)
+# define GEN6_WM_SAMPLER_COUNT_SHIFT			27
+# define GEN6_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT	18
+/* DW3: scratch space */
+/* DW4 */
+# define GEN6_WM_STATISTICS_ENABLE			(1 << 31)
+# define GEN6_WM_DEPTH_CLEAR				(1 << 30)
+# define GEN6_WM_DEPTH_RESOLVE				(1 << 28)
+# define GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE		(1 << 27)
+# define GEN6_WM_DISPATCH_START_GRF_SHIFT_0		16
+# define GEN6_WM_DISPATCH_START_GRF_SHIFT_1		8
+# define GEN6_WM_DISPATCH_START_GRF_SHIFT_2		0
+/* DW5 */
+# define GEN6_WM_MAX_THREADS_SHIFT			25
+# define GEN6_WM_KILL_ENABLE				(1 << 22)
+# define GEN6_WM_COMPUTED_DEPTH				(1 << 21)
+# define GEN6_WM_USES_SOURCE_DEPTH			(1 << 20)
+# define GEN6_WM_DISPATCH_ENABLE			(1 << 19)
+# define GEN6_WM_LINE_END_CAP_AA_WIDTH_0_5		(0 << 16)
+# define GEN6_WM_LINE_END_CAP_AA_WIDTH_1_0		(1 << 16)
+# define GEN6_WM_LINE_END_CAP_AA_WIDTH_2_0		(2 << 16)
+# define GEN6_WM_LINE_END_CAP_AA_WIDTH_4_0		(3 << 16)
+# define GEN6_WM_LINE_AA_WIDTH_0_5			(0 << 14)
+# define GEN6_WM_LINE_AA_WIDTH_1_0			(1 << 14)
+# define GEN6_WM_LINE_AA_WIDTH_2_0			(2 << 14)
+# define GEN6_WM_LINE_AA_WIDTH_4_0			(3 << 14)
+# define GEN6_WM_POLYGON_STIPPLE_ENABLE			(1 << 13)
+# define GEN6_WM_LINE_STIPPLE_ENABLE			(1 << 12)
+# define GEN6_WM_OMASK_TO_RENDER_TARGET			(1 << 9)
+# define GEN6_WM_USES_SOURCE_W				(1 << 8)
+# define GEN6_WM_DUAL_SOURCE_BLEND_ENABLE		(1 << 7)
+# define GEN6_WM_32_DISPATCH_ENABLE			(1 << 2)
+# define GEN6_WM_16_DISPATCH_ENABLE			(1 << 1)
+# define GEN6_WM_8_DISPATCH_ENABLE			(1 << 0)
+/* DW6 */
+# define GEN6_WM_NUM_SF_OUTPUTS_SHIFT			20
+# define GEN6_WM_POSOFFSET_NONE				(0 << 18)
+# define GEN6_WM_POSOFFSET_CENTROID			(2 << 18)
+# define GEN6_WM_POSOFFSET_SAMPLE			(3 << 18)
+# define GEN6_WM_POSITION_ZW_PIXEL			(0 << 16)
+# define GEN6_WM_POSITION_ZW_CENTROID			(2 << 16)
+# define GEN6_WM_POSITION_ZW_SAMPLE			(3 << 16)
+# define GEN6_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC	(1 << 15)
+# define GEN6_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC	(1 << 14)
+# define GEN6_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC	(1 << 13)
+# define GEN6_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC		(1 << 12)
+# define GEN6_WM_PERSPECTIVE_CENTROID_BARYCENTRIC	(1 << 11)
+# define GEN6_WM_PERSPECTIVE_PIXEL_BARYCENTRIC		(1 << 10)
+# define GEN6_WM_POINT_RASTRULE_UPPER_RIGHT		(1 << 9)
+# define GEN6_WM_MSRAST_OFF_PIXEL			(0 << 1)
+# define GEN6_WM_MSRAST_OFF_PATTERN			(1 << 1)
+# define GEN6_WM_MSRAST_ON_PIXEL			(2 << 1)
+# define GEN6_WM_MSRAST_ON_PATTERN			(3 << 1)
+# define GEN6_WM_MSDISPMODE_PERPIXEL			(1 << 0)
+/* DW7: kernel 1 pointer */
+/* DW8: kernel 2 pointer */
+
+#define CMD_3D_CONSTANT_VS_STATE	      0x7815 /* GEN6+ */
+#define CMD_3D_CONSTANT_GS_STATE	      0x7816 /* GEN6+ */
+#define CMD_3D_CONSTANT_PS_STATE	      0x7817 /* GEN6+ */
+# define GEN6_CONSTANT_BUFFER_3_ENABLE			(1 << 15)
+# define GEN6_CONSTANT_BUFFER_2_ENABLE			(1 << 14)
+# define GEN6_CONSTANT_BUFFER_1_ENABLE			(1 << 13)
+# define GEN6_CONSTANT_BUFFER_0_ENABLE			(1 << 12)
+
+#define CMD_3D_SAMPLE_MASK			0x7818 /* GEN6+ */
 
 #define CMD_DRAW_RECT                 0x7900
 #define CMD_BLEND_CONSTANT_COLOR      0x7901
@@ -818,6 +1047,25 @@
 #define CMD_GLOBAL_DEPTH_OFFSET_CLAMP 0x7909
 #define CMD_AA_LINE_PARAMETERS        0x790a
 
+#define CMD_GS_SVB_INDEX			0x790b /* CTG+ */
+/* DW1 */
+# define SVB_INDEX_SHIFT				29
+# define SVB_LOAD_INTERNAL_VERTEX_COUNT			(1 << 0) /* SNB+ */
+/* DW2: SVB index */
+/* DW3: SVB maximum index */
+
+#define CMD_3D_MULTISAMPLE			0x790d /* SNB+ */
+/* DW1 */
+# define MS_PIXEL_LOCATION_CENTER			(0 << 4)
+# define MS_PIXEL_LOCATION_UPPER_LEFT			(1 << 4)
+# define MS_NUMSAMPLES_1				(0 << 1)
+# define MS_NUMSAMPLES_4				(2 << 1)
+# define MS_NUMSAMPLES_8				(3 << 1)
+
+#define CMD_3D_CLEAR_PARAMS			0x7910 /* ILK+ */
+# define DEPTH_CLEAR_VALID				(1 << 15)
+/* DW1: depth clear value */
+
 #define CMD_PIPE_CONTROL              0x7a00
 
 #define CMD_3D_PRIM                   0x7b00
@@ -832,12 +1080,4 @@
 
 #include "intel_chipset.h"
 
-#define BRW_IS_G4X(brw)         (IS_G4X((brw)->intel.intelScreen->deviceID))
-#define BRW_IS_IGDNG(brw)         (IS_IGDNG((brw)->intel.intelScreen->deviceID))
-#define BRW_IS_965(brw)         (!(BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)))
-#define CMD_PIPELINE_SELECT(brw)        ((BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? CMD_PIPELINE_SELECT_GM45 : CMD_PIPELINE_SELECT_965)
-#define CMD_VF_STATISTICS(brw)          ((BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? CMD_VF_STATISTICS_GM45 : CMD_VF_STATISTICS_965)
-#define URB_SIZES(brw)                  (BRW_IS_IGDNG(brw) ? 1024 : \
-                                         (BRW_IS_G4X(brw) ? 384 : 256))  /* 512 bit units */
-
 #endif
diff --git a/i965/brw_disasm.c b/i965/brw_disasm.c
index 9fef230..a8f6b99 100644
--- a/i965/brw_disasm.c
+++ b/i965/brw_disasm.c
@@ -239,7 +239,7 @@ char *imm_encoding[8] = {
     [2] = "UW",
     [3] = "W",
     [5] = "VF",
-    [5] = "V",
+    [6] = "V",
     [7] = "F"
 };
 
@@ -365,6 +365,7 @@ static int format (FILE *f, char *format, ...)
     va_start (args, format);
 
     vsnprintf (buf, sizeof (buf) - 1, format, args);
+    va_end (args);
     string (f, buf);
     return 0;
 }
diff --git a/i965/brw_draw.c b/i965/brw_draw.c
index 8bcb608..e348d46 100644
--- a/i965/brw_draw.c
+++ b/i965/brw_draw.c
@@ -39,10 +39,8 @@
 #include "brw_defines.h"
 #include "brw_context.h"
 #include "brw_state.h"
-#include "brw_fallback.h"
 
 #include "intel_batchbuffer.h"
-#include "intel_buffer_objects.h"
 
 #define FILE_DEBUG_FLAG DEBUG_BATCH
 
@@ -84,7 +82,7 @@ static GLuint brw_set_prim(struct brw_context *brw, GLenum prim)
    GLcontext *ctx = &brw->intel.ctx;
 
    if (INTEL_DEBUG & DEBUG_PRIMS)
-      _mesa_printf("PRIM: %s\n", _mesa_lookup_enum_by_nr(prim));
+      printf("PRIM: %s\n", _mesa_lookup_enum_by_nr(prim));
    
    /* Slight optimization to avoid the GS program when not needed:
     */
@@ -127,7 +125,7 @@ static void brw_emit_prim(struct brw_context *brw,
    struct intel_context *intel = &brw->intel;
 
    if (INTEL_DEBUG & DEBUG_PRIMS)
-      _mesa_printf("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode), 
+      printf("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode), 
 		   prim->start, prim->count);
 
    prim_packet.header.opcode = CMD_3D_PRIM;
@@ -145,7 +143,7 @@ static void brw_emit_prim(struct brw_context *brw,
    prim_packet.base_vert_location = prim->basevertex;
 
    /* Can't wrap here, since we rely on the validated state. */
-   brw->no_batch_wrap = GL_TRUE;
+   intel->no_batch_wrap = GL_TRUE;
 
    /* If we're set to always flush, do it before and after the primitive emit.
     * We want to catch both missed flushes that hurt instruction/state cache
@@ -157,13 +155,13 @@ static void brw_emit_prim(struct brw_context *brw,
    }
    if (prim_packet.verts_per_instance) {
       intel_batchbuffer_data( brw->intel.batch, &prim_packet,
-			      sizeof(prim_packet), LOOP_CLIPRECTS);
+			      sizeof(prim_packet));
    }
    if (intel->always_flush_cache) {
       intel_batchbuffer_emit_mi_flush(intel->batch);
    }
 
-   brw->no_batch_wrap = GL_FALSE;
+   intel->no_batch_wrap = GL_FALSE;
 }
 
 static void brw_merge_inputs( struct brw_context *brw,
@@ -339,12 +337,7 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx,
     * so can't access it earlier.
     */
 
-   LOCK_HARDWARE(intel);
-
-   if (!intel->constant_cliprect && intel->driDrawable->numClipRects == 0) {
-      UNLOCK_HARDWARE(intel);
-      return GL_TRUE;
-   }
+   intel_prepare_render(intel);
 
    for (i = 0; i < nr_prims; i++) {
       uint32_t hw_prim;
@@ -356,8 +349,7 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx,
        * an upper bound of how much we might emit in a single
        * brw_try_draw_prims().
        */
-      intel_batchbuffer_require_space(intel->batch, intel->batch->size / 4,
-				      LOOP_CLIPRECTS);
+      intel_batchbuffer_require_space(intel->batch, intel->batch->size / 4);
 
       hw_prim = brw_set_prim(brw, prim[i].mode);
 
@@ -404,7 +396,6 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx,
    if (intel->always_flush_batch)
       intel_batchbuffer_flush(intel->batch);
  out:
-   UNLOCK_HARDWARE(intel);
 
    brw_state_cache_check_size(brw);
 
diff --git a/i965/brw_draw_upload.c b/i965/brw_draw_upload.c
index ee684f6..71a4357 100644
--- a/i965/brw_draw_upload.c
+++ b/i965/brw_draw_upload.c
@@ -29,19 +29,15 @@
 #include "main/glheader.h"
 #include "main/bufferobj.h"
 #include "main/context.h"
-#include "main/state.h"
-/* #include "main/api_validate.h" */
 #include "main/enums.h"
 
 #include "brw_draw.h"
 #include "brw_defines.h"
 #include "brw_context.h"
 #include "brw_state.h"
-#include "brw_fallback.h"
 
 #include "intel_batchbuffer.h"
 #include "intel_buffer_objects.h"
-#include "intel_tex.h"
 
 static GLuint double_types[5] = {
    0,
@@ -59,6 +55,14 @@ static GLuint float_types[5] = {
    BRW_SURFACEFORMAT_R32G32B32A32_FLOAT
 };
 
+static GLuint half_float_types[5] = {
+   0,
+   BRW_SURFACEFORMAT_R16_FLOAT,
+   BRW_SURFACEFORMAT_R16G16_FLOAT,
+   0, /* can't seem to render this one */
+   BRW_SURFACEFORMAT_R16G16B16A16_FLOAT
+};
+
 static GLuint uint_types_norm[5] = {
    0,
    BRW_SURFACEFORMAT_R32_UNORM,
@@ -165,13 +169,14 @@ static GLuint get_surface_type( GLenum type, GLuint size,
                                 GLenum format, GLboolean normalized )
 {
    if (INTEL_DEBUG & DEBUG_VERTS)
-      _mesa_printf("type %s size %d normalized %d\n", 
+      printf("type %s size %d normalized %d\n", 
 		   _mesa_lookup_enum_by_nr(type), size, normalized);
 
    if (normalized) {
       switch (type) {
       case GL_DOUBLE: return double_types[size];
       case GL_FLOAT: return float_types[size];
+      case GL_HALF_FLOAT: return half_float_types[size];
       case GL_INT: return int_types_norm[size];
       case GL_SHORT: return short_types_norm[size];
       case GL_BYTE: return byte_types_norm[size];
@@ -194,6 +199,7 @@ static GLuint get_surface_type( GLenum type, GLuint size,
       switch (type) {
       case GL_DOUBLE: return double_types[size];
       case GL_FLOAT: return float_types[size];
+      case GL_HALF_FLOAT: return half_float_types[size];
       case GL_INT: return int_types_scale[size];
       case GL_SHORT: return short_types_scale[size];
       case GL_BYTE: return byte_types_scale[size];
@@ -211,6 +217,7 @@ static GLuint get_size( GLenum type )
    switch (type) {
    case GL_DOUBLE: return sizeof(GLdouble);
    case GL_FLOAT: return sizeof(GLfloat);
+   case GL_HALF_FLOAT: return sizeof(GLhalfARB);
    case GL_INT: return sizeof(GLint);
    case GL_SHORT: return sizeof(GLshort);
    case GL_BYTE: return sizeof(GLbyte);
@@ -243,14 +250,6 @@ static void wrap_buffers( struct brw_context *brw,
       dri_bo_unreference(brw->vb.upload.bo);
    brw->vb.upload.bo = dri_bo_alloc(brw->intel.bufmgr, "temporary VBO",
 				    size, 1);
-
-   /* Set the internal VBO\ to no-backing-store.  We only use them as a
-    * temporary within a brw_try_draw_prims while the lock is held.
-    */
-   /* DON'T DO THIS AS IF WE HAVE TO RE-ORG MEMORY WE NEED SOMEWHERE WITH
-      FAKE TO PUSH THIS STUFF */
-//   if (!brw->intel.ttm)
-//      dri_bo_fake_disable_backing_store(brw->vb.upload.bo, NULL, NULL);
 }
 
 static void get_space( struct brw_context *brw,
@@ -277,7 +276,6 @@ copy_array_to_vbo_array( struct brw_context *brw,
 			 struct brw_vertex_element *element,
 			 GLuint dst_stride)
 {
-   struct intel_context *intel = &brw->intel;
    GLuint size = element->count * dst_stride;
 
    get_space(brw, size, &element->bo, &element->offset);
@@ -290,52 +288,26 @@ copy_array_to_vbo_array( struct brw_context *brw,
    }
 
    if (dst_stride == element->glarray->StrideB) {
-      if (intel->intelScreen->kernel_exec_fencing) {
-	 drm_intel_gem_bo_map_gtt(element->bo);
-	 memcpy((char *)element->bo->virtual + element->offset,
-		element->glarray->Ptr, size);
-	 drm_intel_gem_bo_unmap_gtt(element->bo);
-      } else {
-	 dri_bo_subdata(element->bo,
-			element->offset,
-			size,
-			element->glarray->Ptr);
-      }
+      drm_intel_gem_bo_map_gtt(element->bo);
+      memcpy((char *)element->bo->virtual + element->offset,
+	     element->glarray->Ptr, size);
+      drm_intel_gem_bo_unmap_gtt(element->bo);
    } else {
       char *dest;
       const unsigned char *src = element->glarray->Ptr;
       int i;
 
-      if (intel->intelScreen->kernel_exec_fencing) {
-	 drm_intel_gem_bo_map_gtt(element->bo);
-	 dest = element->bo->virtual;
-	 dest += element->offset;
-
-	 for (i = 0; i < element->count; i++) {
-	    memcpy(dest, src, dst_stride);
-	    src += element->glarray->StrideB;
-	    dest += dst_stride;
-	 }
-
-	 drm_intel_gem_bo_unmap_gtt(element->bo);
-      } else {
-	 void *data;
-
-	 data = _mesa_malloc(dst_stride * element->count);
-	 dest = data;
-	 for (i = 0; i < element->count; i++) {
-	    memcpy(dest, src, dst_stride);
-	    src += element->glarray->StrideB;
-	    dest += dst_stride;
-	 }
-
-	 dri_bo_subdata(element->bo,
-			element->offset,
-			size,
-			data);
+      drm_intel_gem_bo_map_gtt(element->bo);
+      dest = element->bo->virtual;
+      dest += element->offset;
 
-	 _mesa_free(data);
+      for (i = 0; i < element->count; i++) {
+	 memcpy(dest, src, dst_stride);
+	 src += element->glarray->StrideB;
+	 dest += dst_stride;
       }
+
+      drm_intel_gem_bo_unmap_gtt(element->bo);
    }
 }
 
@@ -356,7 +328,7 @@ static void brw_prepare_vertices(struct brw_context *brw)
    /* First build an array of pointers to ve's in vb.inputs_read
     */
    if (0)
-      _mesa_printf("%s %d..%d\n", __FUNCTION__, min_index, max_index);
+      printf("%s %d..%d\n", __FUNCTION__, min_index, max_index);
 
    /* Accumulate the list of enabled arrays. */
    brw->vb.nr_enabled = 0;
@@ -502,12 +474,19 @@ static void brw_emit_vertices(struct brw_context *brw)
     * a VE loads from them.
     */
    if (brw->vb.nr_enabled == 0) {
-      BEGIN_BATCH(3, IGNORE_CLIPRECTS);
+      BEGIN_BATCH(3);
       OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | 1);
-      OUT_BATCH((0 << BRW_VE0_INDEX_SHIFT) |
-		BRW_VE0_VALID |
-		(BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) |
-		(0 << BRW_VE0_SRC_OFFSET_SHIFT));
+      if (IS_GEN6(intel->intelScreen->deviceID)) {
+	 OUT_BATCH((0 << GEN6_VE0_INDEX_SHIFT) |
+		   GEN6_VE0_VALID |
+		   (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) |
+		   (0 << BRW_VE0_SRC_OFFSET_SHIFT));
+      } else {
+	 OUT_BATCH((0 << BRW_VE0_INDEX_SHIFT) |
+		   BRW_VE0_VALID |
+		   (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) |
+		   (0 << BRW_VE0_SRC_OFFSET_SHIFT));
+      }
       OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) |
 		(BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
 		(BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
@@ -522,20 +501,28 @@ static void brw_emit_vertices(struct brw_context *brw)
     * are interleaved or from the same VBO.  TBD if this makes a
     * performance difference.
     */
-   BEGIN_BATCH(1 + brw->vb.nr_enabled * 4, IGNORE_CLIPRECTS);
+   BEGIN_BATCH(1 + brw->vb.nr_enabled * 4);
    OUT_BATCH((CMD_VERTEX_BUFFER << 16) |
 	     ((1 + brw->vb.nr_enabled * 4) - 2));
 
    for (i = 0; i < brw->vb.nr_enabled; i++) {
       struct brw_vertex_element *input = brw->vb.enabled[i];
+      uint32_t dw0;
+
+      if (intel->gen >= 6) {
+	 dw0 = GEN6_VB0_ACCESS_VERTEXDATA |
+	    (i << GEN6_VB0_INDEX_SHIFT);
+      } else {
+	 dw0 = BRW_VB0_ACCESS_VERTEXDATA |
+	    (i << BRW_VB0_INDEX_SHIFT);
+      }
 
-      OUT_BATCH((i << BRW_VB0_INDEX_SHIFT) |
-		BRW_VB0_ACCESS_VERTEXDATA |
+      OUT_BATCH(dw0 |
 		(input->stride << BRW_VB0_PITCH_SHIFT));
       OUT_RELOC(input->bo,
 		I915_GEM_DOMAIN_VERTEX, 0,
 		input->offset);
-      if (BRW_IS_IGDNG(brw)) {
+      if (intel->is_ironlake || intel->gen >= 6) {
 	 OUT_RELOC(input->bo,
 		   I915_GEM_DOMAIN_VERTEX, 0,
 		   input->bo->size - 1);
@@ -545,7 +532,7 @@ static void brw_emit_vertices(struct brw_context *brw)
    }
    ADVANCE_BATCH();
 
-   BEGIN_BATCH(1 + brw->vb.nr_enabled * 2, IGNORE_CLIPRECTS);
+   BEGIN_BATCH(1 + brw->vb.nr_enabled * 2);
    OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | ((1 + brw->vb.nr_enabled * 2) - 2));
    for (i = 0; i < brw->vb.nr_enabled; i++) {
       struct brw_vertex_element *input = brw->vb.enabled[i];
@@ -566,12 +553,19 @@ static void brw_emit_vertices(struct brw_context *brw)
 	 break;
       }
 
-      OUT_BATCH((i << BRW_VE0_INDEX_SHIFT) |
-		BRW_VE0_VALID |
-		(format << BRW_VE0_FORMAT_SHIFT) |
-		(0 << BRW_VE0_SRC_OFFSET_SHIFT));
+      if (IS_GEN6(intel->intelScreen->deviceID)) {
+	 OUT_BATCH((i << GEN6_VE0_INDEX_SHIFT) |
+		   GEN6_VE0_VALID |
+		   (format << BRW_VE0_FORMAT_SHIFT) |
+		   (0 << BRW_VE0_SRC_OFFSET_SHIFT));
+      } else {
+	 OUT_BATCH((i << BRW_VE0_INDEX_SHIFT) |
+		   BRW_VE0_VALID |
+		   (format << BRW_VE0_FORMAT_SHIFT) |
+		   (0 << BRW_VE0_SRC_OFFSET_SHIFT));
+      }
 
-      if (BRW_IS_IGDNG(brw))
+      if (intel->is_ironlake || intel->gen >= 6)
           OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
                     (comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
                     (comp2 << BRW_VE1_COMPONENT_2_SHIFT) |
@@ -625,13 +619,9 @@ static void brw_prepare_indices(struct brw_context *brw)
 
       /* Straight upload
        */
-      if (intel->intelScreen->kernel_exec_fencing) {
-	 drm_intel_gem_bo_map_gtt(bo);
-	 memcpy((char *)bo->virtual + offset, index_buffer->ptr, ib_size);
-	 drm_intel_gem_bo_unmap_gtt(bo);
-      } else {
-	 dri_bo_subdata(bo, offset, ib_size, index_buffer->ptr);
-      }
+      drm_intel_gem_bo_map_gtt(bo);
+      memcpy((char *)bo->virtual + offset, index_buffer->ptr, ib_size);
+      drm_intel_gem_bo_unmap_gtt(bo);
    } else {
       offset = (GLuint) (unsigned long) index_buffer->ptr;
       brw->ib.start_vertex_offset = 0;
@@ -712,7 +702,7 @@ static void brw_emit_index_buffer(struct brw_context *brw)
       ib.header.bits.index_format = get_index_type(index_buffer->type);
       ib.header.bits.cut_index_enable = 0;
 
-      BEGIN_BATCH(4, IGNORE_CLIPRECTS);
+      BEGIN_BATCH(4);
       OUT_BATCH( ib.header.dword );
       OUT_RELOC(brw->ib.bo,
 		I915_GEM_DOMAIN_VERTEX, 0,
diff --git a/i965/brw_eu.c b/i965/brw_eu.c
index 1df5613..4e7c122 100644
--- a/i965/brw_eu.c
+++ b/i965/brw_eu.c
@@ -237,7 +237,7 @@ brw_resolve_cals(struct brw_compile *c)
         struct brw_glsl_call *call, *next;
         for (call = c->first_call; call; call = next) {
 	    next = call->next;
-	    _mesa_free(call);
+	    free(call);
 	}
 	c->first_call = NULL;
     }
@@ -247,7 +247,7 @@ brw_resolve_cals(struct brw_compile *c)
         struct brw_glsl_label *label, *next;
 	for (label = c->first_label; label; label = next) {
 	    next = label->next;
-	    _mesa_free(label);
+	    free(label);
 	}
 	c->first_label = NULL;
     }
diff --git a/i965/brw_eu_debug.c b/i965/brw_eu_debug.c
index 29f3f6d..99453af 100644
--- a/i965/brw_eu_debug.c
+++ b/i965/brw_eu_debug.c
@@ -54,9 +54,9 @@ void brw_print_reg( struct brw_reg hwreg )
       "f"
    };
 
-   _mesa_printf("%s%s", 
-		hwreg.abs ? "abs/" : "",
-		hwreg.negate ? "-" : "");
+   printf("%s%s", 
+	  hwreg.abs ? "abs/" : "",
+	  hwreg.negate ? "-" : "");
      
    if (hwreg.file == BRW_GENERAL_REGISTER_FILE &&
        hwreg.nr % 2 == 0 &&
@@ -66,7 +66,7 @@ void brw_print_reg( struct brw_reg hwreg )
        hwreg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
        hwreg.type == BRW_REGISTER_TYPE_F) {
       /* vector register */
-      _mesa_printf("vec%d", hwreg.nr);
+      printf("vec%d", hwreg.nr);
    }
    else if (hwreg.file == BRW_GENERAL_REGISTER_FILE &&
 	    hwreg.vstride == BRW_VERTICAL_STRIDE_0 &&
@@ -74,13 +74,13 @@ void brw_print_reg( struct brw_reg hwreg )
 	    hwreg.hstride == BRW_HORIZONTAL_STRIDE_0 &&
 	    hwreg.type == BRW_REGISTER_TYPE_F) {      
       /* "scalar" register */
-      _mesa_printf("scl%d.%d", hwreg.nr, hwreg.subnr / 4);
+      printf("scl%d.%d", hwreg.nr, hwreg.subnr / 4);
    }
    else if (hwreg.file == BRW_IMMEDIATE_VALUE) {
-      _mesa_printf("imm %f", hwreg.dw1.f);
+      printf("imm %f", hwreg.dw1.f);
    }
    else {
-      _mesa_printf("%s%d.%d<%d;%d,%d>:%s", 
+      printf("%s%d.%d<%d;%d,%d>:%s", 
 		   file[hwreg.file],
 		   hwreg.nr,
 		   hwreg.subnr / type_sz(hwreg.type),
diff --git a/i965/brw_eu_emit.c b/i965/brw_eu_emit.c
index 7ceabba..f69d529 100644
--- a/i965/brw_eu_emit.c
+++ b/i965/brw_eu_emit.c
@@ -102,8 +102,6 @@ static void brw_set_dest( struct brw_instruction *insn,
 static void brw_set_src0( struct brw_instruction *insn,
                           struct brw_reg reg )
 {
-   assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
-
    if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
       assert(reg.nr < 128);
 
@@ -199,7 +197,7 @@ void brw_set_src1( struct brw_instruction *insn,
        * in the future:
        */
       assert (reg.address_mode == BRW_ADDRESS_DIRECT);
-      //assert (reg.file == BRW_GENERAL_REGISTER_FILE);
+      /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
 
       if (insn->header.access_mode == BRW_ALIGN_1) {
 	 insn->bits3.da1.src1_subreg_nr = reg.subnr;
@@ -252,9 +250,10 @@ static void brw_set_math_message( struct brw_context *brw,
 				  GLboolean saturate,
 				  GLuint dataType )
 {
+   struct intel_context *intel = &brw->intel;
    brw_set_src1(insn, brw_imm_d(0));
 
-   if (BRW_IS_IGDNG(brw)) {
+   if (intel->is_ironlake) {
        insn->bits3.math_igdng.function = function;
        insn->bits3.math_igdng.int_type = integer_type;
        insn->bits3.math_igdng.precision = low_precision;
@@ -319,9 +318,10 @@ static void brw_set_urb_message( struct brw_context *brw,
 				 GLuint offset,
 				 GLuint swizzle_control )
 {
+    struct intel_context *intel = &brw->intel;
     brw_set_src1(insn, brw_imm_d(0));
 
-    if (BRW_IS_IGDNG(brw)) {
+    if (intel->is_ironlake || intel->gen >= 6) {
         insn->bits3.urb_igdng.opcode = 0;	/* ? */
         insn->bits3.urb_igdng.offset = offset;
         insn->bits3.urb_igdng.swizzle_control = swizzle_control;
@@ -332,8 +332,16 @@ static void brw_set_urb_message( struct brw_context *brw,
         insn->bits3.urb_igdng.response_length = response_length;
         insn->bits3.urb_igdng.msg_length = msg_length;
         insn->bits3.urb_igdng.end_of_thread = end_of_thread;
-        insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB;
-        insn->bits2.send_igdng.end_of_thread = end_of_thread;
+	if (intel->gen >= 6) {
+	   /* For SNB, the SFID bits moved to the condmod bits, and
+	    * EOT stayed in bits3 above.  Does the EOT bit setting
+	    * below on Ironlake even do anything?
+	    */
+	   insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB;
+	} else {
+	   insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB;
+	   insn->bits2.send_igdng.end_of_thread = end_of_thread;
+	}
     } else {
         insn->bits3.urb.opcode = 0;	/* ? */
         insn->bits3.urb.offset = offset;
@@ -358,9 +366,10 @@ static void brw_set_dp_write_message( struct brw_context *brw,
 				      GLuint response_length,
 				      GLuint end_of_thread )
 {
+   struct intel_context *intel = &brw->intel;
    brw_set_src1(insn, brw_imm_d(0));
 
-   if (BRW_IS_IGDNG(brw)) {
+   if (intel->is_ironlake) {
        insn->bits3.dp_write_igdng.binding_table_index = binding_table_index;
        insn->bits3.dp_write_igdng.msg_control = msg_control;
        insn->bits3.dp_write_igdng.pixel_scoreboard_clear = pixel_scoreboard_clear;
@@ -395,9 +404,10 @@ static void brw_set_dp_read_message( struct brw_context *brw,
 				      GLuint response_length,
 				      GLuint end_of_thread )
 {
+   struct intel_context *intel = &brw->intel;
    brw_set_src1(insn, brw_imm_d(0));
 
-   if (BRW_IS_IGDNG(brw)) {
+   if (intel->is_ironlake) {
        insn->bits3.dp_read_igdng.binding_table_index = binding_table_index;
        insn->bits3.dp_read_igdng.msg_control = msg_control;
        insn->bits3.dp_read_igdng.msg_type = msg_type;
@@ -433,10 +443,11 @@ static void brw_set_sampler_message(struct brw_context *brw,
                                     GLuint header_present,
                                     GLuint simd_mode)
 {
+   struct intel_context *intel = &brw->intel;
    assert(eot == 0);
    brw_set_src1(insn, brw_imm_d(0));
 
-   if (BRW_IS_IGDNG(brw)) {
+   if (intel->is_ironlake) {
       insn->bits3.sampler_igdng.binding_table_index = binding_table_index;
       insn->bits3.sampler_igdng.sampler = sampler;
       insn->bits3.sampler_igdng.msg_type = msg_type;
@@ -447,7 +458,7 @@ static void brw_set_sampler_message(struct brw_context *brw,
       insn->bits3.sampler_igdng.end_of_thread = eot;
       insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_SAMPLER;
       insn->bits2.send_igdng.end_of_thread = eot;
-   } else if (BRW_IS_G4X(brw)) {
+   } else if (intel->is_g4x) {
       insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
       insn->bits3.sampler_g4x.sampler = sampler;
       insn->bits3.sampler_g4x.msg_type = msg_type;
@@ -648,10 +659,11 @@ struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size)
 struct brw_instruction *brw_ELSE(struct brw_compile *p, 
 				 struct brw_instruction *if_insn)
 {
+   struct intel_context *intel = &p->brw->intel;
    struct brw_instruction *insn;
    GLuint br = 1;
 
-   if (BRW_IS_IGDNG(p->brw))
+   if (intel->is_ironlake)
       br = 2;
 
    if (p->single_program_flow) {
@@ -690,9 +702,10 @@ struct brw_instruction *brw_ELSE(struct brw_compile *p,
 void brw_ENDIF(struct brw_compile *p, 
 	       struct brw_instruction *patch_insn)
 {
+   struct intel_context *intel = &p->brw->intel;
    GLuint br = 1;
 
-   if (BRW_IS_IGDNG(p->brw))
+   if (intel->is_ironlake)
       br = 2; 
  
    if (p->single_program_flow) {
@@ -803,10 +816,11 @@ struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
 struct brw_instruction *brw_WHILE(struct brw_compile *p, 
                                   struct brw_instruction *do_insn)
 {
+   struct intel_context *intel = &p->brw->intel;
    struct brw_instruction *insn;
    GLuint br = 1;
 
-   if (BRW_IS_IGDNG(p->brw))
+   if (intel->is_ironlake)
       br = 2;
 
    if (p->single_program_flow)
@@ -846,14 +860,15 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p,
 void brw_land_fwd_jump(struct brw_compile *p, 
 		       struct brw_instruction *jmp_insn)
 {
+   struct intel_context *intel = &p->brw->intel;
    struct brw_instruction *landing = &p->store[p->nr_insn];
    GLuint jmpi = 1;
 
-   if (BRW_IS_IGDNG(p->brw))
+   if (intel->is_ironlake)
        jmpi = 2;
 
    assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
-   assert(jmp_insn->bits1.da1.src1_reg_file = BRW_IMMEDIATE_VALUE);
+   assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
 
    jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1);
 }
@@ -908,26 +923,40 @@ void brw_math( struct brw_compile *p,
 	       GLuint data_type,
 	       GLuint precision )
 {
-   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
-   GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; 
-   GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; 
+   struct intel_context *intel = &p->brw->intel;
 
-   /* Example code doesn't set predicate_control for send
-    * instructions.
-    */
-   insn->header.predicate_control = 0; 
-   insn->header.destreg__conditionalmod = msg_reg_nr;
+   if (intel->gen >= 6) {
+      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
 
-   brw_set_dest(insn, dest);
-   brw_set_src0(insn, src);
-   brw_set_math_message(p->brw,
-			insn, 
-			msg_length, response_length, 
-			function,
-			BRW_MATH_INTEGER_UNSIGNED,
-			precision,
-			saturate,
-			data_type);
+      /* Math is the same ISA format as other opcodes, except that CondModifier
+       * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
+       */
+      insn->header.destreg__conditionalmod = function;
+
+      brw_set_dest(insn, dest);
+      brw_set_src0(insn, src);
+      brw_set_src1(insn, brw_null_reg());
+   } else {
+      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+      GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
+      GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
+      /* Example code doesn't set predicate_control for send
+       * instructions.
+       */
+      insn->header.predicate_control = 0;
+      insn->header.destreg__conditionalmod = msg_reg_nr;
+
+      brw_set_dest(insn, dest);
+      brw_set_src0(insn, src);
+      brw_set_math_message(p->brw,
+			   insn,
+			   msg_length, response_length,
+			   function,
+			   BRW_MATH_INTEGER_UNSIGNED,
+			   precision,
+			   saturate,
+			   data_type);
+   }
 }
 
 /**
@@ -1263,7 +1292,7 @@ void brw_SAMPLE(struct brw_compile *p,
    GLboolean need_stall = 0;
    
    if (writemask == 0) {
-      /*_mesa_printf("%s: zero writemask??\n", __FUNCTION__); */
+      /*printf("%s: zero writemask??\n", __FUNCTION__); */
       return;
    }
    
@@ -1295,7 +1324,7 @@ void brw_SAMPLE(struct brw_compile *p,
 
       if (newmask != writemask) {
 	 need_stall = 1;
-         /* _mesa_printf("need stall %x %x\n", newmask , writemask); */
+         /* printf("need stall %x %x\n", newmask , writemask); */
       }
       else {
 	 struct brw_reg m1 = brw_message_reg(msg_reg_nr);
@@ -1368,7 +1397,18 @@ void brw_urb_WRITE(struct brw_compile *p,
 		   GLuint offset,
 		   GLuint swizzle)
 {
-   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+   struct intel_context *intel = &p->brw->intel;
+   struct brw_instruction *insn;
+
+   /* Sandybridge doesn't have the implied move for SENDs,
+    * and the first message register index comes from src0.
+    */
+   if (intel->gen >= 6) {
+      brw_MOV(p, brw_message_reg(msg_reg_nr), src0);
+      src0 = brw_message_reg(msg_reg_nr);
+   }
+
+   insn = next_insn(p, BRW_OPCODE_SEND);
 
    assert(msg_length < BRW_MAX_MRF);
 
@@ -1376,7 +1416,8 @@ void brw_urb_WRITE(struct brw_compile *p,
    brw_set_src0(insn, src0);
    brw_set_src1(insn, brw_imm_d(0));
 
-   insn->header.destreg__conditionalmod = msg_reg_nr;
+   if (intel->gen < 6)
+      insn->header.destreg__conditionalmod = msg_reg_nr;
 
    brw_set_urb_message(p->brw,
 		       insn,
diff --git a/i965/brw_fallback.c b/i965/brw_fallback.c
index 562a178..ba401c2 100644
--- a/i965/brw_fallback.c
+++ b/i965/brw_fallback.c
@@ -36,18 +36,13 @@
 #include "swrast/swrast.h"
 #include "tnl/tnl.h"
 #include "brw_context.h"
-#include "brw_fallback.h"
-#include "intel_chipset.h"
 #include "intel_fbo.h"
 #include "intel_regions.h"
 
-#include "glapi/glapi.h"
-
 #define FILE_DEBUG_FLAG DEBUG_FALLBACKS
 
 static GLboolean do_check_fallback(struct brw_context *brw)
 {
-   struct intel_context *intel = &brw->intel;
    GLcontext *ctx = &brw->intel.ctx;
    GLuint i;
 
@@ -86,8 +81,7 @@ static GLboolean do_check_fallback(struct brw_context *brw)
    }
 
    /* _NEW_BUFFERS */
-   if (IS_965(intel->intelScreen->deviceID) &&
-       !IS_G4X(intel->intelScreen->deviceID)) {
+   if (!brw->has_surface_tile_offset) {
       for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
 	 struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i];
 	 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
diff --git a/i965/brw_gs.c b/i965/brw_gs.c
index 610b6c3..7261b31 100644
--- a/i965/brw_gs.c
+++ b/i965/brw_gs.c
@@ -47,6 +47,7 @@
 static void compile_gs_prog( struct brw_context *brw,
 			     struct brw_gs_prog_key *key )
 {
+   struct intel_context *intel = &brw->intel;
    struct brw_gs_compile c;
    const GLuint *program;
    GLuint program_size;
@@ -54,13 +55,12 @@ static void compile_gs_prog( struct brw_context *brw,
    memset(&c, 0, sizeof(c));
    
    c.key = *key;
-   c.need_ff_sync = BRW_IS_IGDNG(brw);
    /* Need to locate the two positions present in vertex + header.
     * These are currently hardcoded:
     */
    c.nr_attrs = brw_count_bits(c.key.attrs);
 
-   if (BRW_IS_IGDNG(brw))
+   if (intel->is_ironlake)
        c.nr_regs = (c.nr_attrs + 1) / 2 + 3;  /* are vertices packed, or reg-aligned? */
    else
        c.nr_regs = (c.nr_attrs + 1) / 2 + 1;  /* are vertices packed, or reg-aligned? */
@@ -125,12 +125,13 @@ static void compile_gs_prog( struct brw_context *brw,
    /* Upload
     */
    dri_bo_unreference(brw->gs.prog_bo);
-   brw->gs.prog_bo = brw_upload_cache( &brw->cache, BRW_GS_PROG,
-				       &c.key, sizeof(c.key),
-				       NULL, 0,
-				       program, program_size,
-				       &c.prog_data,
-				       &brw->gs.prog_data );
+   brw->gs.prog_bo = brw_upload_cache_with_auxdata(&brw->cache, BRW_GS_PROG,
+						   &c.key, sizeof(c.key),
+						   NULL, 0,
+						   program, program_size,
+						   &c.prog_data,
+						   sizeof(c.prog_data),
+						   &brw->gs.prog_data);
 }
 
 static const GLenum gs_prim[GL_POLYGON+1] = {  
diff --git a/i965/brw_gs.h b/i965/brw_gs.h
index 010c1c2..813b8d4 100644
--- a/i965/brw_gs.h
+++ b/i965/brw_gs.h
@@ -63,7 +63,6 @@ struct brw_gs_compile {
    GLuint nr_attrs;
    GLuint nr_regs;
    GLuint nr_bytes;
-   GLboolean need_ff_sync;
 };
 
 #define ATTR_SIZE  (4*4)
diff --git a/i965/brw_gs_emit.c b/i965/brw_gs_emit.c
index 0fc5b02..dd7b057 100644
--- a/i965/brw_gs_emit.c
+++ b/i965/brw_gs_emit.c
@@ -40,7 +40,6 @@
 #include "brw_defines.h"
 #include "brw_context.h"
 #include "brw_eu.h"
-#include "brw_util.h"
 #include "brw_gs.h"
 
 static void brw_gs_alloc_regs( struct brw_gs_compile *c,
@@ -122,12 +121,14 @@ static void brw_gs_ff_sync(struct brw_gs_compile *c, int num_prim)
 
 void brw_gs_quads( struct brw_gs_compile *c, struct brw_gs_prog_key *key )
 {
+   struct intel_context *intel = &c->func.brw->intel;
+
    brw_gs_alloc_regs(c, 4);
    
    /* Use polygons for correct edgeflag behaviour. Note that vertex 3
     * is the PV for quads, but vertex 0 for polygons:
     */
-   if (c->need_ff_sync)
+   if (intel->needs_ff_sync)
 	   brw_gs_ff_sync(c, 1);
    if (key->pv_first) {
       brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START));
@@ -145,9 +146,11 @@ void brw_gs_quads( struct brw_gs_compile *c, struct brw_gs_prog_key *key )
 
 void brw_gs_quad_strip( struct brw_gs_compile *c, struct brw_gs_prog_key *key )
 {
+   struct intel_context *intel = &c->func.brw->intel;
+
    brw_gs_alloc_regs(c, 4);
    
-   if (c->need_ff_sync)
+   if (intel->needs_ff_sync)
 	   brw_gs_ff_sync(c, 1);      
    if (key->pv_first) {
       brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START));
@@ -165,9 +168,11 @@ void brw_gs_quad_strip( struct brw_gs_compile *c, struct brw_gs_prog_key *key )
 
 void brw_gs_tris( struct brw_gs_compile *c )
 {
+   struct intel_context *intel = &c->func.brw->intel;
+
    brw_gs_alloc_regs(c, 3);
 
-   if (c->need_ff_sync)
+   if (intel->needs_ff_sync)
 	   brw_gs_ff_sync(c, 1);      
    brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_TRILIST << 2) | R02_PRIM_START));
    brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_TRILIST << 2));
@@ -176,9 +181,11 @@ void brw_gs_tris( struct brw_gs_compile *c )
 
 void brw_gs_lines( struct brw_gs_compile *c )
 {
+   struct intel_context *intel = &c->func.brw->intel;
+
    brw_gs_alloc_regs(c, 2);
 
-   if (c->need_ff_sync)
+   if (intel->needs_ff_sync)
 	   brw_gs_ff_sync(c, 1);      
    brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_LINESTRIP << 2) | R02_PRIM_START));
    brw_gs_emit_vue(c, c->reg.vertex[1], 1, ((_3DPRIM_LINESTRIP << 2) | R02_PRIM_END));
@@ -186,9 +193,11 @@ void brw_gs_lines( struct brw_gs_compile *c )
 
 void brw_gs_points( struct brw_gs_compile *c )
 {
+   struct intel_context *intel = &c->func.brw->intel;
+
    brw_gs_alloc_regs(c, 1);
 
-   if (c->need_ff_sync)
+   if (intel->needs_ff_sync)
 	   brw_gs_ff_sync(c, 1);      
    brw_gs_emit_vue(c, c->reg.vertex[0], 1, ((_3DPRIM_POINTLIST << 2) | R02_PRIM_START | R02_PRIM_END));
 }
diff --git a/i965/brw_gs_state.c b/i965/brw_gs_state.c
index ed9d2ff..d8ad5ce 100644
--- a/i965/brw_gs_state.c
+++ b/i965/brw_gs_state.c
@@ -34,7 +34,6 @@
 #include "brw_context.h"
 #include "brw_state.h"
 #include "brw_defines.h"
-#include "main/macros.h"
 
 struct brw_gs_unit_key {
    unsigned int total_grf;
@@ -72,6 +71,7 @@ gs_unit_populate_key(struct brw_context *brw, struct brw_gs_unit_key *key)
 static dri_bo *
 gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key)
 {
+   struct intel_context *intel = &brw->intel;
    struct brw_gs_unit_state gs;
    dri_bo *bo;
 
@@ -98,7 +98,7 @@ gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key)
    else
       gs.thread4.max_threads = 0;
 
-   if (BRW_IS_IGDNG(brw))
+   if (intel->is_ironlake)
       gs.thread4.rendering_enable = 1;
 
    if (INTEL_DEBUG & DEBUG_STATS)
@@ -107,8 +107,7 @@ gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key)
    bo = brw_upload_cache(&brw->cache, BRW_GS_UNIT,
 			 key, sizeof(*key),
 			 &brw->gs.prog_bo, 1,
-			 &gs, sizeof(gs),
-			 NULL, NULL);
+			 &gs, sizeof(gs));
 
    if (key->prog_active) {
       /* Emit GS program relocation */
diff --git a/i965/brw_misc_state.c b/i965/brw_misc_state.c
index 4b0d598..d030ed4 100644
--- a/i965/brw_misc_state.c
+++ b/i965/brw_misc_state.c
@@ -78,10 +78,7 @@ static void upload_drawing_rect(struct brw_context *brw)
    struct intel_context *intel = &brw->intel;
    GLcontext *ctx = &intel->ctx;
 
-   if (!intel->constant_cliprect)
-      return;
-
-   BEGIN_BATCH(4, NO_LOOP_CLIPRECTS);
+   BEGIN_BATCH(4);
    OUT_BATCH(_3DSTATE_DRAWRECT_INFO_I965);
    OUT_BATCH(0); /* xmin, ymin */
    OUT_BATCH(((ctx->DrawBuffer->Width - 1) & 0xffff) |
@@ -116,7 +113,7 @@ static void upload_binding_table_pointers(struct brw_context *brw)
 {
    struct intel_context *intel = &brw->intel;
 
-   BEGIN_BATCH(6, IGNORE_CLIPRECTS);
+   BEGIN_BATCH(6);
    OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | (6 - 2));
    if (brw->vs.bind_bo != NULL)
       OUT_RELOC(brw->vs.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* vs */
@@ -139,6 +136,41 @@ const struct brw_tracked_state brw_binding_table_pointers = {
    .emit = upload_binding_table_pointers,
 };
 
+/**
+ * Upload the binding table pointers, which point each stage's array of surface
+ * state pointers.
+ *
+ * The binding table pointers are relative to the surface state base address,
+ * which is 0.
+ */
+static void upload_gen6_binding_table_pointers(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+
+   BEGIN_BATCH(4);
+   OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 |
+	     GEN6_BINDING_TABLE_MODIFY_VS |
+	     GEN6_BINDING_TABLE_MODIFY_GS |
+	     GEN6_BINDING_TABLE_MODIFY_PS |
+	     (4 - 2));
+   if (brw->vs.bind_bo != NULL)
+      OUT_RELOC(brw->vs.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* vs */
+   else
+      OUT_BATCH(0);
+   OUT_BATCH(0); /* gs */
+   OUT_RELOC(brw->wm.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* wm/ps */
+   ADVANCE_BATCH();
+}
+
+const struct brw_tracked_state gen6_binding_table_pointers = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_BATCH,
+      .cache = CACHE_NEW_SURF_BIND,
+   },
+   .prepare = prepare_binding_table_pointers,
+   .emit = upload_gen6_binding_table_pointers,
+};
 
 /**
  * Upload pointers to the per-stage state.
@@ -150,7 +182,7 @@ static void upload_pipelined_state_pointers(struct brw_context *brw )
 {
    struct intel_context *intel = &brw->intel;
 
-   BEGIN_BATCH(7, IGNORE_CLIPRECTS);
+   BEGIN_BATCH(7);
    OUT_BATCH(CMD_PIPELINED_STATE_POINTERS << 16 | (7 - 2));
    OUT_RELOC(brw->vs.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
    if (brw->gs.prog_active)
@@ -212,10 +244,17 @@ static void emit_depthbuffer(struct brw_context *brw)
 {
    struct intel_context *intel = &brw->intel;
    struct intel_region *region = brw->state.depth_region;
-   unsigned int len = (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? 6 : 5;
+   unsigned int len;
+
+   if (intel->gen >= 6)
+      len = 7;
+   else if (intel->is_g4x || intel->is_ironlake)
+      len = 6;
+   else
+      len = 5;
 
    if (region == NULL) {
-      BEGIN_BATCH(len, IGNORE_CLIPRECTS);
+      BEGIN_BATCH(len);
       OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2));
       OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) |
 		(BRW_SURFACE_NULL << 29));
@@ -223,9 +262,12 @@ static void emit_depthbuffer(struct brw_context *brw)
       OUT_BATCH(0);
       OUT_BATCH(0);
 
-      if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw))
+      if (intel->is_g4x || intel->is_ironlake || intel->gen >= 6)
          OUT_BATCH(0);
 
+      if (intel->gen >= 6)
+	 OUT_BATCH(0);
+
       ADVANCE_BATCH();
    } else {
       unsigned int format;
@@ -246,8 +288,10 @@ static void emit_depthbuffer(struct brw_context *brw)
       }
 
       assert(region->tiling != I915_TILING_X);
+      if (IS_GEN6(intel->intelScreen->deviceID))
+	 assert(region->tiling != I915_TILING_NONE);
 
-      BEGIN_BATCH(len, IGNORE_CLIPRECTS);
+      BEGIN_BATCH(len);
       OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2));
       OUT_BATCH(((region->pitch * region->cpp) - 1) |
 		(format << 18) |
@@ -262,9 +306,20 @@ static void emit_depthbuffer(struct brw_context *brw)
 		((region->height - 1) << 19));
       OUT_BATCH(0);
 
-      if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw))
+      if (intel->is_g4x || intel->is_ironlake || intel->gen >= 6)
          OUT_BATCH(0);
 
+      if (intel->gen >= 6)
+	 OUT_BATCH(0);
+
+      ADVANCE_BATCH();
+   }
+
+   /* Initialize it for safety. */
+   if (intel->gen >= 6) {
+      BEGIN_BATCH(2);
+      OUT_BATCH(CMD_3D_CLEAR_PARAMS << 16 | (2 - 2));
+      OUT_BATCH(0);
       ADVANCE_BATCH();
    }
 }
@@ -330,7 +385,7 @@ const struct brw_tracked_state brw_polygon_stipple = {
 
 static void upload_polygon_stipple_offset(struct brw_context *brw)
 {
-   __DRIdrawablePrivate *dPriv = brw->intel.driDrawable;
+   GLcontext *ctx = &brw->intel.ctx;
    struct brw_polygon_stipple_offset bpso;
 
    memset(&bpso, 0, sizeof(bpso));
@@ -346,8 +401,8 @@ static void upload_polygon_stipple_offset(struct brw_context *brw)
     * worry about.
     */
    if (brw->intel.ctx.DrawBuffer->Name == 0) {
-      bpso.bits0.x_offset = (32 - (dPriv->x & 31)) & 31;
-      bpso.bits0.y_offset = (32 - ((dPriv->y + dPriv->h) & 31)) & 31;
+      bpso.bits0.x_offset = 0;
+      bpso.bits0.y_offset = (32 - (ctx->DrawBuffer->Height & 31)) & 31;
    }
    else {
       bpso.bits0.y_offset = 0;
@@ -374,8 +429,8 @@ const struct brw_tracked_state brw_polygon_stipple_offset = {
 static void upload_aa_line_parameters(struct brw_context *brw)
 {
    struct brw_aa_line_parameters balp;
-   
-   if (BRW_IS_965(brw))
+
+   if (!brw->has_aa_line_parameters)
       return;
 
    /* use legacy aa line coverage computation */
@@ -438,18 +493,20 @@ const struct brw_tracked_state brw_line_stipple = {
 
 static void upload_invarient_state( struct brw_context *brw )
 {
+   struct intel_context *intel = &brw->intel;
+
    {
       /* 0x61040000  Pipeline Select */
       /*     PipelineSelect            : 0 */
       struct brw_pipeline_select ps;
 
       memset(&ps, 0, sizeof(ps));
-      ps.header.opcode = CMD_PIPELINE_SELECT(brw);
+      ps.header.opcode = brw->CMD_PIPELINE_SELECT;
       ps.header.pipeline_select = 0;
       BRW_BATCH_STRUCT(brw, &ps);
    }
 
-   {
+   if (intel->gen < 6) {
       struct brw_global_depth_offset_clamp gdo;
       memset(&gdo, 0, sizeof(gdo));
 
@@ -462,6 +519,32 @@ static void upload_invarient_state( struct brw_context *brw )
       BRW_BATCH_STRUCT(brw, &gdo);
    }
 
+   intel_batchbuffer_emit_mi_flush(intel->batch);
+
+   if (intel->gen >= 6) {
+      int i;
+
+      BEGIN_BATCH(3);
+      OUT_BATCH(CMD_3D_MULTISAMPLE << 16 | (3 - 2));
+      OUT_BATCH(MS_PIXEL_LOCATION_CENTER |
+		MS_NUMSAMPLES_1);
+      OUT_BATCH(0); /* positions for 4/8-sample */
+      ADVANCE_BATCH();
+
+      BEGIN_BATCH(2);
+      OUT_BATCH(CMD_3D_SAMPLE_MASK << 16 | (2 - 2));
+      OUT_BATCH(1);
+      ADVANCE_BATCH();
+
+      for (i = 0; i < 4; i++) {
+	 BEGIN_BATCH(4);
+	 OUT_BATCH(CMD_GS_SVB_INDEX << 16 | (4 - 2));
+	 OUT_BATCH(i << SVB_INDEX_SHIFT);
+	 OUT_BATCH(0);
+	 OUT_BATCH(0xffffffff);
+	 ADVANCE_BATCH();
+      }
+   }
 
    /* 0x61020000  State Instruction Pointer */
    {
@@ -480,7 +563,7 @@ static void upload_invarient_state( struct brw_context *brw )
       struct brw_vf_statistics vfs;
       memset(&vfs, 0, sizeof(vfs));
 
-      vfs.opcode = CMD_VF_STATISTICS(brw);
+      vfs.opcode = brw->CMD_VF_STATISTICS;
       if (INTEL_DEBUG & DEBUG_STATS)
 	 vfs.statistics_enable = 1; 
 
@@ -512,8 +595,21 @@ static void upload_state_base_address( struct brw_context *brw )
    /* Output the structure (brw_state_base_address) directly to the
     * batchbuffer, so we can emit relocations inline.
     */
-   if (BRW_IS_IGDNG(brw)) {
-       BEGIN_BATCH(8, IGNORE_CLIPRECTS);
+   if (intel->gen >= 6) {
+       BEGIN_BATCH(10);
+       OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2));
+       OUT_BATCH(1); /* General state base address */
+       OUT_BATCH(1); /* Surface state base address */
+       OUT_BATCH(1); /* Dynamic state base address */
+       OUT_BATCH(1); /* Indirect object base address */
+       OUT_BATCH(1); /* Instruction base address */
+       OUT_BATCH(1); /* General state upper bound */
+       OUT_BATCH(1); /* Dynamic state upper bound */
+       OUT_BATCH(1); /* Indirect object upper bound */
+       OUT_BATCH(1); /* Instruction access upper bound */
+       ADVANCE_BATCH();
+   } else if (intel->is_ironlake) {
+       BEGIN_BATCH(8);
        OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (8 - 2));
        OUT_BATCH(1); /* General state base address */
        OUT_BATCH(1); /* Surface state base address */
@@ -524,7 +620,7 @@ static void upload_state_base_address( struct brw_context *brw )
        OUT_BATCH(1); /* Instruction access upper bound */
        ADVANCE_BATCH();
    } else {
-       BEGIN_BATCH(6, IGNORE_CLIPRECTS);
+       BEGIN_BATCH(6);
        OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2));
        OUT_BATCH(1); /* General state base address */
        OUT_BATCH(1); /* Surface state base address */
diff --git a/i965/brw_program.c b/i965/brw_program.c
index bac6918..c78f7b3 100644
--- a/i965/brw_program.c
+++ b/i965/brw_program.c
@@ -37,7 +37,6 @@
 #include "tnl/tnl.h"
 
 #include "brw_context.h"
-#include "brw_util.h"
 #include "brw_wm.h"
 
 static void brwBindProgram( GLcontext *ctx,
@@ -112,9 +111,10 @@ static GLboolean brwIsProgramNative( GLcontext *ctx,
    return GL_TRUE;
 }
 
-static void brwProgramStringNotify( GLcontext *ctx,
-				    GLenum target,
-				    struct gl_program *prog )
+
+static GLboolean brwProgramStringNotify( GLcontext *ctx,
+                                         GLenum target,
+                                         struct gl_program *prog )
 {
    struct brw_context *brw = brw_context(ctx);
 
@@ -151,6 +151,9 @@ static void brwProgramStringNotify( GLcontext *ctx,
        */
       _tnl_program_string(ctx, target, prog);
    }
+
+   /* XXX check if program is legal, within limits */
+   return GL_TRUE;
 }
 
 void brwInitFragProgFuncs( struct dd_function_table *functions )
diff --git a/i965/brw_queryobj.c b/i965/brw_queryobj.c
index a195bc3..6cce7e5 100644
--- a/i965/brw_queryobj.c
+++ b/i965/brw_queryobj.c
@@ -73,7 +73,7 @@ brw_new_query_object(GLcontext *ctx, GLuint id)
 {
    struct brw_query_object *query;
 
-   query = _mesa_calloc(sizeof(struct brw_query_object));
+   query = calloc(1, sizeof(struct brw_query_object));
 
    query->Base.Id = id;
    query->Base.Result = 0;
@@ -89,7 +89,7 @@ brw_delete_query(GLcontext *ctx, struct gl_query_object *q)
    struct brw_query_object *query = (struct brw_query_object *)q;
 
    dri_bo_unreference(query->bo);
-   _mesa_free(query);
+   free(query);
 }
 
 static void
@@ -188,7 +188,7 @@ brw_emit_query_begin(struct brw_context *brw)
    if (brw->query.active || is_empty_list(&brw->query.active_head))
       return;
 
-   BEGIN_BATCH(4, IGNORE_CLIPRECTS);
+   BEGIN_BATCH(4);
    OUT_BATCH(_3DSTATE_PIPE_CONTROL |
 	     PIPE_CONTROL_DEPTH_STALL |
 	     PIPE_CONTROL_WRITE_DEPTH_COUNT);
@@ -227,7 +227,7 @@ brw_emit_query_end(struct brw_context *brw)
    if (!brw->query.active)
       return;
 
-   BEGIN_BATCH(4, IGNORE_CLIPRECTS);
+   BEGIN_BATCH(4);
    OUT_BATCH(_3DSTATE_PIPE_CONTROL |
 	     PIPE_CONTROL_DEPTH_STALL |
 	     PIPE_CONTROL_WRITE_DEPTH_COUNT);
diff --git a/i965/brw_sf.c b/i965/brw_sf.c
index 968890f..8e6839b 100644
--- a/i965/brw_sf.c
+++ b/i965/brw_sf.c
@@ -117,12 +117,13 @@ static void compile_sf_prog( struct brw_context *brw,
    /* Upload
     */
    dri_bo_unreference(brw->sf.prog_bo);
-   brw->sf.prog_bo = brw_upload_cache( &brw->cache, BRW_SF_PROG,
-				       &c.key, sizeof(c.key),
-				       NULL, 0,
-				       program, program_size,
-				       &c.prog_data,
-				       &brw->sf.prog_data );
+   brw->sf.prog_bo = brw_upload_cache_with_auxdata(&brw->cache, BRW_SF_PROG,
+						   &c.key, sizeof(c.key),
+						   NULL, 0,
+						   program, program_size,
+						   &c.prog_data,
+						   sizeof(c.prog_data),
+						   &brw->sf.prog_data);
 }
 
 /* Calculate interpolants for triangle and line rasterization.
diff --git a/i965/brw_sf_emit.c b/i965/brw_sf_emit.c
index 3eae41e..bb08055 100644
--- a/i965/brw_sf_emit.c
+++ b/i965/brw_sf_emit.c
@@ -149,6 +149,7 @@ static void copy_colors( struct brw_sf_compile *c,
 static void do_flatshade_triangle( struct brw_sf_compile *c )
 {
    struct brw_compile *p = &c->func;
+   struct intel_context *intel = &p->brw->intel;
    struct brw_reg ip = brw_ip_reg();
    GLuint nr = brw_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS);
    GLuint jmpi = 1;
@@ -161,7 +162,7 @@ static void do_flatshade_triangle( struct brw_sf_compile *c )
    if (c->key.primitive == SF_UNFILLED_TRIS)
       return;
 
-   if (BRW_IS_IGDNG(p->brw))
+   if (intel->is_ironlake)
        jmpi = 2;
 
    brw_push_insn_state(p);
@@ -187,6 +188,7 @@ static void do_flatshade_triangle( struct brw_sf_compile *c )
 static void do_flatshade_line( struct brw_sf_compile *c )
 {
    struct brw_compile *p = &c->func;
+   struct intel_context *intel = &p->brw->intel;
    struct brw_reg ip = brw_ip_reg();
    GLuint nr = brw_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS);
    GLuint jmpi = 1;
@@ -199,7 +201,7 @@ static void do_flatshade_line( struct brw_sf_compile *c )
    if (c->key.primitive == SF_UNFILLED_TRIS)
       return;
 
-   if (BRW_IS_IGDNG(p->brw))
+   if (intel->is_ironlake)
        jmpi = 2;
 
    brw_push_insn_state(p);
diff --git a/i965/brw_sf_state.c b/i965/brw_sf_state.c
index bb69435..847c886 100644
--- a/i965/brw_sf_state.c
+++ b/i965/brw_sf_state.c
@@ -35,7 +35,6 @@
 #include "brw_state.h"
 #include "brw_defines.h"
 #include "main/macros.h"
-#include "intel_fbo.h"
 
 static void upload_sf_vp(struct brw_context *brw)
 {
@@ -70,9 +69,9 @@ static void upload_sf_vp(struct brw_context *brw)
     * for DrawBuffer->_[XY]{min,max}
     */
 
-   /* The scissor only needs to handle the intersection of drawable and
-    * scissor rect.  Clipping to the boundaries of static shared buffers
-    * for front/back/depth is covered by looping over cliprects in brw_draw.c.
+   /* The scissor only needs to handle the intersection of drawable
+    * and scissor rect, since there are no longer cliprects for shared
+    * buffers with DRI2.
     *
     * Note that the hardware's coordinates are inclusive, while Mesa's min is
     * inclusive but max is exclusive.
@@ -165,6 +164,7 @@ static dri_bo *
 sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
 			dri_bo **reloc_bufs)
 {
+   struct intel_context *intel = &brw->intel;
    struct brw_sf_unit_state sf;
    dri_bo *bo;
    int chipset_max_threads;
@@ -177,7 +177,7 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
 
    sf.thread3.dispatch_grf_start_reg = 3;
 
-   if (BRW_IS_IGDNG(brw))
+   if (intel->is_ironlake)
        sf.thread3.urb_entry_read_offset = 3;
    else
        sf.thread3.urb_entry_read_offset = 1;
@@ -187,10 +187,10 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
    sf.thread4.nr_urb_entries = key->nr_urb_entries;
    sf.thread4.urb_entry_allocation_size = key->sfsize - 1;
 
-   /* Each SF thread produces 1 PUE, and there can be up to 24(Pre-IGDNG) or 
-    * 48(IGDNG) threads 
+   /* Each SF thread produces 1 PUE, and there can be up to 24 (Pre-Ironlake) or
+    * 48 (Ironlake) threads.
     */
-   if (BRW_IS_IGDNG(brw))
+   if (intel->is_ironlake)
       chipset_max_threads = 48;
    else
       chipset_max_threads = 24;
@@ -308,8 +308,7 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
    bo = brw_upload_cache(&brw->cache, BRW_SF_UNIT,
 			 key, sizeof(*key),
 			 reloc_bufs, 2,
-			 &sf, sizeof(sf),
-			 NULL, NULL);
+			 &sf, sizeof(sf));
 
    /* STATE_PREFETCH command description describes this state as being
     * something loaded through the GPE (L2 ISC), so it's INSTRUCTION domain.
diff --git a/i965/brw_state.h b/i965/brw_state.h
index b129b1f..f790cfa 100644
--- a/i965/brw_state.h
+++ b/i965/brw_state.h
@@ -35,7 +35,7 @@
 
 #include "brw_context.h"
 
-static inline void
+static INLINE void
 brw_add_validated_bo(struct brw_context *brw, dri_bo *bo)
 {
    assert(brw->state.validated_bo_count < ARRAY_SIZE(brw->state.validated_bos));
@@ -90,6 +90,23 @@ const struct brw_tracked_state brw_drawing_rect;
 const struct brw_tracked_state brw_indices;
 const struct brw_tracked_state brw_vertices;
 const struct brw_tracked_state brw_index_buffer;
+const struct brw_tracked_state gen6_binding_table_pointers;
+const struct brw_tracked_state gen6_blend_state;
+const struct brw_tracked_state gen6_cc_state_pointers;
+const struct brw_tracked_state gen6_cc_vp;
+const struct brw_tracked_state gen6_clip_state;
+const struct brw_tracked_state gen6_clip_vp;
+const struct brw_tracked_state gen6_color_calc_state;
+const struct brw_tracked_state gen6_depth_stencil_state;
+const struct brw_tracked_state gen6_gs_state;
+const struct brw_tracked_state gen6_sampler_state;
+const struct brw_tracked_state gen6_scissor_state;
+const struct brw_tracked_state gen6_sf_state;
+const struct brw_tracked_state gen6_sf_vp;
+const struct brw_tracked_state gen6_urb;
+const struct brw_tracked_state gen6_viewport_state;
+const struct brw_tracked_state gen6_vs_state;
+const struct brw_tracked_state gen6_wm_state;
 
 /**
  * Use same key for WM and VS surfaces.
@@ -124,16 +141,26 @@ dri_bo *brw_cache_data(struct brw_cache *cache,
 		       dri_bo **reloc_bufs,
 		       GLuint nr_reloc_bufs);
 
-dri_bo *brw_upload_cache( struct brw_cache *cache,
-			  enum brw_cache_id cache_id,
-			  const void *key,
-			  GLuint key_sz,
-			  dri_bo **reloc_bufs,
-			  GLuint nr_reloc_bufs,
-			  const void *data,
-			  GLuint data_sz,
-			  const void *aux,
-			  void *aux_return );
+drm_intel_bo *brw_upload_cache(struct brw_cache *cache,
+			       enum brw_cache_id cache_id,
+			       const void *key,
+			       GLuint key_sz,
+			       dri_bo **reloc_bufs,
+			       GLuint nr_reloc_bufs,
+			       const void *data,
+			       GLuint data_sz);
+
+drm_intel_bo *brw_upload_cache_with_auxdata(struct brw_cache *cache,
+					    enum brw_cache_id cache_id,
+					    const void *key,
+					    GLuint key_sz,
+					    dri_bo **reloc_bufs,
+					    GLuint nr_reloc_bufs,
+					    const void *data,
+					    GLuint data_sz,
+					    const void *aux,
+					    GLuint aux_sz,
+					    void *aux_return);
 
 dri_bo *brw_search_cache( struct brw_cache *cache,
 			  enum brw_cache_id cache_id,
@@ -151,7 +178,7 @@ void brw_state_cache_bo_delete(struct brw_cache *cache, dri_bo *bo);
 /***********************************************************************
  * brw_state_batch.c
  */
-#define BRW_BATCH_STRUCT(brw, s) intel_batchbuffer_data( brw->intel.batch, (s), sizeof(*(s)), IGNORE_CLIPRECTS)
+#define BRW_BATCH_STRUCT(brw, s) intel_batchbuffer_data( brw->intel.batch, (s), sizeof(*(s)))
 #define BRW_CACHED_BATCH_STRUCT(brw, s) brw_cached_batch_struct( brw, (s), sizeof(*(s)) )
 
 GLboolean brw_cached_batch_struct( struct brw_context *brw,
diff --git a/i965/brw_state_batch.c b/i965/brw_state_batch.c
index 7821898..3901941 100644
--- a/i965/brw_state_batch.c
+++ b/i965/brw_state_batch.c
@@ -48,7 +48,7 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw,
    struct header *newheader = (struct header *)data;
 
    if (brw->emit_state_always) {
-      intel_batchbuffer_data(brw->intel.batch, data, sz, IGNORE_CLIPRECTS);
+      intel_batchbuffer_data(brw->intel.batch, data, sz);
       return GL_TRUE;
    }
 
@@ -57,8 +57,8 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw,
 	 if (item->sz == sz && memcmp(item->header, newheader, sz) == 0)
 	    return GL_FALSE;
 	 if (item->sz != sz) {
-	    _mesa_free(item->header);
-	    item->header = _mesa_malloc(sz);
+	    free(item->header);
+	    item->header = malloc(sz);
 	    item->sz = sz;
 	 }
 	 goto emit;
@@ -68,14 +68,14 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw,
 
    assert(!item);
    item = CALLOC_STRUCT(brw_cached_batch_item);
-   item->header = _mesa_malloc(sz);
+   item->header = malloc(sz);
    item->sz = sz;
    item->next = brw->cached_batch_items;
    brw->cached_batch_items = item;
 
  emit:
    memcpy(item->header, newheader, sz);
-   intel_batchbuffer_data(brw->intel.batch, data, sz, IGNORE_CLIPRECTS);
+   intel_batchbuffer_data(brw->intel.batch, data, sz);
    return GL_TRUE;
 }
 
diff --git a/i965/brw_state_cache.c b/i965/brw_state_cache.c
index e4c9ba7..c08cb45 100644
--- a/i965/brw_state_cache.c
+++ b/i965/brw_state_cache.c
@@ -59,37 +59,27 @@
 #include "main/imports.h"
 #include "brw_state.h"
 #include "intel_batchbuffer.h"
-
-/* XXX: Fixme - have to include these to get the sizes of the prog_key
- * structs:
- */
 #include "brw_wm.h"
-#include "brw_vs.h"
-#include "brw_clip.h"
-#include "brw_sf.h"
-#include "brw_gs.h"
 
 
 static GLuint
-hash_key(const void *key, GLuint key_size,
-         dri_bo **reloc_bufs, GLuint nr_reloc_bufs)
+hash_key(struct brw_cache_item *item)
 {
-   GLuint *ikey = (GLuint *)key;
-   GLuint hash = 0, i;
+   GLuint *ikey = (GLuint *)item->key;
+   GLuint hash = item->cache_id, i;
 
-   assert(key_size % 4 == 0);
+   assert(item->key_size % 4 == 0);
 
    /* I'm sure this can be improved on:
     */
-   for (i = 0; i < key_size/4; i++) {
+   for (i = 0; i < item->key_size/4; i++) {
       hash ^= ikey[i];
       hash = (hash << 5) | (hash >> 27);
    }
 
    /* Include the BO pointers as key data as well */
-   ikey = (GLuint *)reloc_bufs;
-   key_size = nr_reloc_bufs * sizeof(dri_bo *);
-   for (i = 0; i < key_size/4; i++) {
+   ikey = (GLuint *)item->reloc_bufs;
+   for (i = 0; i < item->nr_reloc_bufs * sizeof(drm_intel_bo *) / 4; i++) {
       hash ^= ikey[i];
       hash = (hash << 5) | (hash >> 27);
    }
@@ -114,11 +104,22 @@ update_cache_last(struct brw_cache *cache, enum brw_cache_id cache_id,
    cache->brw->state.dirty.cache |= 1 << cache_id;
 }
 
+static int
+brw_cache_item_equals(const struct brw_cache_item *a,
+		      const struct brw_cache_item *b)
+{
+   return a->cache_id == b->cache_id &&
+      a->hash == b->hash &&
+      a->key_size == b->key_size &&
+      (memcmp(a->key, b->key, a->key_size) == 0) &&
+      a->nr_reloc_bufs == b->nr_reloc_bufs &&
+      (memcmp(a->reloc_bufs, b->reloc_bufs,
+	      a->nr_reloc_bufs * sizeof(dri_bo *)) == 0);
+}
 
 static struct brw_cache_item *
-search_cache(struct brw_cache *cache, enum brw_cache_id cache_id,
-	     GLuint hash, const void *key, GLuint key_size,
-	     dri_bo **reloc_bufs, GLuint nr_reloc_bufs)
+search_cache(struct brw_cache *cache, GLuint hash,
+	     struct brw_cache_item *lookup)
 {
    struct brw_cache_item *c;
 
@@ -133,13 +134,7 @@ search_cache(struct brw_cache *cache, enum brw_cache_id cache_id,
 #endif
 
    for (c = cache->items[hash % cache->size]; c; c = c->next) {
-      if (c->cache_id == cache_id &&
-	  c->hash == hash &&
-	  c->key_size == key_size &&
-	  memcmp(c->key, key, key_size) == 0 &&
-	  c->nr_reloc_bufs == nr_reloc_bufs &&
-	  memcmp(c->reloc_bufs, reloc_bufs,
-		 nr_reloc_bufs * sizeof(dri_bo *)) == 0)
+      if (brw_cache_item_equals(lookup, c))
 	 return c;
    }
 
@@ -155,7 +150,7 @@ rehash(struct brw_cache *cache)
    GLuint size, i;
 
    size = cache->size * 3;
-   items = (struct brw_cache_item**) _mesa_calloc(size * sizeof(*items));
+   items = (struct brw_cache_item**) calloc(1, size * sizeof(*items));
 
    for (i = 0; i < cache->size; i++)
       for (c = cache->items[i]; c; c = next) {
@@ -182,10 +177,18 @@ brw_search_cache(struct brw_cache *cache,
                  void *aux_return)
 {
    struct brw_cache_item *item;
-   GLuint hash = hash_key(key, key_size, reloc_bufs, nr_reloc_bufs);
+   struct brw_cache_item lookup;
+   GLuint hash;
 
-   item = search_cache(cache, cache_id, hash, key, key_size,
-		       reloc_bufs, nr_reloc_bufs);
+   lookup.cache_id = cache_id;
+   lookup.key = key;
+   lookup.key_size = key_size;
+   lookup.reloc_bufs = reloc_bufs;
+   lookup.nr_reloc_bufs = nr_reloc_bufs;
+   hash = hash_key(&lookup);
+   lookup.hash = hash;
+
+   item = search_cache(cache, hash, &lookup);
 
    if (item == NULL)
       return NULL;
@@ -200,48 +203,52 @@ brw_search_cache(struct brw_cache *cache,
 }
 
 
-dri_bo *
-brw_upload_cache( struct brw_cache *cache,
-		  enum brw_cache_id cache_id,
-		  const void *key,
-		  GLuint key_size,
-		  dri_bo **reloc_bufs,
-		  GLuint nr_reloc_bufs,
-		  const void *data,
-		  GLuint data_size,
-		  const void *aux,
-		  void *aux_return )
+drm_intel_bo *
+brw_upload_cache_with_auxdata(struct brw_cache *cache,
+			      enum brw_cache_id cache_id,
+			      const void *key,
+			      GLuint key_size,
+			      dri_bo **reloc_bufs,
+			      GLuint nr_reloc_bufs,
+			      const void *data,
+			      GLuint data_size,
+			      const void *aux,
+			      GLuint aux_size,
+			      void *aux_return)
 {
    struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item);
-   GLuint hash = hash_key(key, key_size, reloc_bufs, nr_reloc_bufs);
+   GLuint hash;
    GLuint relocs_size = nr_reloc_bufs * sizeof(dri_bo *);
-   GLuint aux_size = cache->aux_size[cache_id];
    void *tmp;
    dri_bo *bo;
    int i;
 
+   item->cache_id = cache_id;
+   item->key = key;
+   item->key_size = key_size;
+   item->reloc_bufs = reloc_bufs;
+   item->nr_reloc_bufs = nr_reloc_bufs;
+   hash = hash_key(item);
+   item->hash = hash;
+
    /* Create the buffer object to contain the data */
    bo = dri_bo_alloc(cache->brw->intel.bufmgr,
 		     cache->name[cache_id], data_size, 1 << 6);
 
 
    /* Set up the memory containing the key, aux_data, and reloc_bufs */
-   tmp = _mesa_malloc(key_size + aux_size + relocs_size);
+   tmp = malloc(key_size + aux_size + relocs_size);
 
    memcpy(tmp, key, key_size);
-   memcpy(tmp + key_size, aux, cache->aux_size[cache_id]);
+   memcpy(tmp + key_size, aux, aux_size);
    memcpy(tmp + key_size + aux_size, reloc_bufs, relocs_size);
    for (i = 0; i < nr_reloc_bufs; i++) {
       if (reloc_bufs[i] != NULL)
 	 dri_bo_reference(reloc_bufs[i]);
    }
 
-   item->cache_id = cache_id;
    item->key = tmp;
-   item->hash = hash;
-   item->key_size = key_size;
    item->reloc_bufs = tmp + key_size + aux_size;
-   item->nr_reloc_bufs = nr_reloc_bufs;
 
    item->bo = bo;
    dri_bo_reference(bo);
@@ -255,12 +262,11 @@ brw_upload_cache( struct brw_cache *cache,
    cache->n_items++;
 
    if (aux_return) {
-      assert(cache->aux_size[cache_id]);
       *(void **)aux_return = (void *)((char *)item->key + item->key_size);
    }
 
    if (INTEL_DEBUG & DEBUG_STATE)
-      _mesa_printf("upload %s: %d bytes to cache id %d\n",
+      printf("upload %s: %d bytes to cache id %d\n",
 		   cache->name[cache_id],
 		   data_size, cache_id);
 
@@ -272,6 +278,23 @@ brw_upload_cache( struct brw_cache *cache,
    return bo;
 }
 
+drm_intel_bo *
+brw_upload_cache(struct brw_cache *cache,
+		 enum brw_cache_id cache_id,
+		 const void *key,
+		 GLuint key_size,
+		 dri_bo **reloc_bufs,
+		 GLuint nr_reloc_bufs,
+		 const void *data,
+		 GLuint data_size)
+{
+   return brw_upload_cache_with_auxdata(cache, cache_id,
+					key, key_size,
+					reloc_bufs, nr_reloc_bufs,
+					data, data_size,
+					NULL, 0,
+					NULL);
+}
 
 /**
  * Wrapper around brw_cache_data_sz using the cache_id's canonical key size.
@@ -292,11 +315,18 @@ brw_cache_data(struct brw_cache *cache,
 	       GLuint nr_reloc_bufs)
 {
    dri_bo *bo;
-   struct brw_cache_item *item;
-   GLuint hash = hash_key(data, data_size, reloc_bufs, nr_reloc_bufs);
-
-   item = search_cache(cache, cache_id, hash, data, data_size,
-		       reloc_bufs, nr_reloc_bufs);
+   struct brw_cache_item *item, lookup;
+   GLuint hash;
+
+   lookup.cache_id = cache_id;
+   lookup.key = data;
+   lookup.key_size = data_size;
+   lookup.reloc_bufs = reloc_bufs;
+   lookup.nr_reloc_bufs = nr_reloc_bufs;
+   hash = hash_key(&lookup);
+   lookup.hash = hash;
+
+   item = search_cache(cache, hash, &lookup);
    if (item) {
       update_cache_last(cache, cache_id, item->bo);
       dri_bo_reference(item->bo);
@@ -306,8 +336,7 @@ brw_cache_data(struct brw_cache *cache,
    bo = brw_upload_cache(cache, cache_id,
 			 data, data_size,
 			 reloc_bufs, nr_reloc_bufs,
-			 data, data_size,
-			 NULL, NULL);
+			 data, data_size);
 
    return bo;
 }
@@ -321,11 +350,9 @@ enum pool_type {
 static void
 brw_init_cache_id(struct brw_cache *cache,
                   const char *name,
-                  enum brw_cache_id id,
-                  GLuint aux_size)
+                  enum brw_cache_id id)
 {
    cache->name[id] = strdup(name);
-   cache->aux_size[id] = aux_size;
 }
 
 
@@ -339,82 +366,31 @@ brw_init_non_surface_cache(struct brw_context *brw)
    cache->size = 7;
    cache->n_items = 0;
    cache->items = (struct brw_cache_item **)
-      _mesa_calloc(cache->size * sizeof(struct brw_cache_item));
-
-   brw_init_cache_id(cache,
-		     "CC_VP",
-		     BRW_CC_VP,
-		     0);
-
-   brw_init_cache_id(cache,
-		     "CC_UNIT",
-		     BRW_CC_UNIT,
-		     0);
-
-   brw_init_cache_id(cache,
-		     "WM_PROG",
-		     BRW_WM_PROG,
-		     sizeof(struct brw_wm_prog_data));
-
-   brw_init_cache_id(cache,
-		     "SAMPLER_DEFAULT_COLOR",
-		     BRW_SAMPLER_DEFAULT_COLOR,
-		     0);
-
-   brw_init_cache_id(cache,
-		     "SAMPLER",
-		     BRW_SAMPLER,
-		     0);
-
-   brw_init_cache_id(cache,
-		     "WM_UNIT",
-		     BRW_WM_UNIT,
-		     0);
-
-   brw_init_cache_id(cache,
-		     "SF_PROG",
-		     BRW_SF_PROG,
-		     sizeof(struct brw_sf_prog_data));
-
-   brw_init_cache_id(cache,
-		     "SF_VP",
-		     BRW_SF_VP,
-		     0);
-
-   brw_init_cache_id(cache,
-		     "SF_UNIT",
-		     BRW_SF_UNIT,
-		     0);
-
-   brw_init_cache_id(cache,
-		     "VS_UNIT",
-		     BRW_VS_UNIT,
-		     0);
-
-   brw_init_cache_id(cache,
-		     "VS_PROG",
-		     BRW_VS_PROG,
-		     sizeof(struct brw_vs_prog_data));
-
-   brw_init_cache_id(cache,
-		     "CLIP_UNIT",
-		     BRW_CLIP_UNIT,
-		     0);
-
-   brw_init_cache_id(cache,
-		     "CLIP_PROG",
-		     BRW_CLIP_PROG,
-		     sizeof(struct brw_clip_prog_data));
-
-   brw_init_cache_id(cache,
-		     "GS_UNIT",
-		     BRW_GS_UNIT,
-		     0);
-
-   brw_init_cache_id(cache,
-		     "GS_PROG",
-		     BRW_GS_PROG,
-		     sizeof(struct brw_gs_prog_data));
+      calloc(1, cache->size * sizeof(struct brw_cache_item));
+
+   brw_init_cache_id(cache, "CC_VP", BRW_CC_VP);
+   brw_init_cache_id(cache, "CC_UNIT", BRW_CC_UNIT);
+   brw_init_cache_id(cache, "WM_PROG", BRW_WM_PROG);
+   brw_init_cache_id(cache, "SAMPLER_DEFAULT_COLOR", BRW_SAMPLER_DEFAULT_COLOR);
+   brw_init_cache_id(cache, "SAMPLER", BRW_SAMPLER);
+   brw_init_cache_id(cache, "WM_UNIT", BRW_WM_UNIT);
+   brw_init_cache_id(cache, "SF_PROG", BRW_SF_PROG);
+   brw_init_cache_id(cache, "SF_VP", BRW_SF_VP);
+
+   brw_init_cache_id(cache, "SF_UNIT", BRW_SF_UNIT);
+
+   brw_init_cache_id(cache, "VS_UNIT", BRW_VS_UNIT);
+
+   brw_init_cache_id(cache, "VS_PROG", BRW_VS_PROG);
+
+   brw_init_cache_id(cache, "CLIP_UNIT", BRW_CLIP_UNIT);
+
+   brw_init_cache_id(cache, "CLIP_PROG", BRW_CLIP_PROG);
+
+   brw_init_cache_id(cache, "GS_UNIT", BRW_GS_UNIT);
+
+   brw_init_cache_id(cache, "GS_PROG", BRW_GS_PROG);
+   brw_init_cache_id(cache, "BLEND_STATE", BRW_BLEND_STATE);
 }
 
 
@@ -428,17 +404,10 @@ brw_init_surface_cache(struct brw_context *brw)
    cache->size = 7;
    cache->n_items = 0;
    cache->items = (struct brw_cache_item **)
-      _mesa_calloc(cache->size * sizeof(struct brw_cache_item));
-
-   brw_init_cache_id(cache,
-		     "SS_SURFACE",
-		     BRW_SS_SURFACE,
-		     0);
+      calloc(1, cache->size * sizeof(struct brw_cache_item));
 
-   brw_init_cache_id(cache,
-		     "SS_SURF_BIND",
-		     BRW_SS_SURF_BIND,
-		     0);
+   brw_init_cache_id(cache, "SS_SURFACE", BRW_SS_SURFACE);
+   brw_init_cache_id(cache, "SS_SURF_BIND", BRW_SS_SURF_BIND);
 }
 
 
@@ -457,7 +426,7 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache *cache)
    GLuint i;
 
    if (INTEL_DEBUG & DEBUG_STATE)
-      _mesa_printf("%s\n", __FUNCTION__);
+      printf("%s\n", __FUNCTION__);
 
    for (i = 0; i < cache->size; i++) {
       for (c = cache->items[i]; c; c = next) {
@@ -476,7 +445,7 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache *cache)
    cache->n_items = 0;
 
    if (brw->curbe.last_buf) {
-      _mesa_free(brw->curbe.last_buf);
+      free(brw->curbe.last_buf);
       brw->curbe.last_buf = NULL;
    }
 
@@ -497,7 +466,7 @@ brw_state_cache_bo_delete(struct brw_cache *cache, dri_bo *bo)
    GLuint i;
 
    if (INTEL_DEBUG & DEBUG_STATE)
-      _mesa_printf("%s\n", __FUNCTION__);
+      printf("%s\n", __FUNCTION__);
 
    for (i = 0; i < cache->size; i++) {
       for (prev = &cache->items[i]; *prev;) {
@@ -525,7 +494,7 @@ void
 brw_state_cache_check_size(struct brw_context *brw)
 {
    if (INTEL_DEBUG & DEBUG_STATE)
-      _mesa_printf("%s (n_items=%d)\n", __FUNCTION__, brw->cache.n_items);
+      printf("%s (n_items=%d)\n", __FUNCTION__, brw->cache.n_items);
 
    /* un-tuned guess.  We've got around 20 state objects for a total of around
     * 32k, so 1000 of them is around 1.5MB.
@@ -544,7 +513,7 @@ brw_destroy_cache(struct brw_context *brw, struct brw_cache *cache)
    GLuint i;
 
    if (INTEL_DEBUG & DEBUG_STATE)
-      _mesa_printf("%s\n", __FUNCTION__);
+      printf("%s\n", __FUNCTION__);
 
    brw_clear_cache(brw, cache);
    for (i = 0; i < BRW_MAX_CACHE; i++) {
diff --git a/i965/brw_state_dump.c b/i965/brw_state_dump.c
index e94fa7d..020ac52 100644
--- a/i965/brw_state_dump.c
+++ b/i965/brw_state_dump.c
@@ -28,7 +28,6 @@
 #include "main/mtypes.h"
 
 #include "brw_context.h"
-#include "brw_state.h"
 #include "brw_defines.h"
 
 /**
diff --git a/i965/brw_state_upload.c b/i965/brw_state_upload.c
index af8dfb4..9e54f29 100644
--- a/i965/brw_state_upload.c
+++ b/i965/brw_state_upload.c
@@ -35,6 +35,7 @@
 #include "brw_state.h"
 #include "intel_batchbuffer.h"
 #include "intel_buffers.h"
+#include "intel_chipset.h"
 
 /* This is used to initialize brw->state.atoms[].  We could use this
  * list directly except for a single atom, brw_constant_buffer, which
@@ -42,7 +43,7 @@
  * current fragment and vertex programs, and so cannot be a static
  * value.
  */
-const struct brw_tracked_state *atoms[] =
+static const struct brw_tracked_state *gen4_atoms[] =
 {
    &brw_check_fallback,
 
@@ -101,6 +102,63 @@ const struct brw_tracked_state *atoms[] =
    &brw_constant_buffer
 };
 
+const struct brw_tracked_state *gen6_atoms[] =
+{
+   &brw_check_fallback,
+
+   &brw_wm_input_sizes,
+   &brw_vs_prog,
+   &brw_gs_prog,
+   &brw_wm_prog,
+
+   &gen6_clip_vp,
+   &gen6_sf_vp,
+   &gen6_cc_vp,
+
+   /* Command packets: */
+   &brw_invarient_state,
+
+   &gen6_viewport_state,	/* must do after *_vp stages */
+
+   &gen6_urb,
+   &gen6_blend_state,		/* must do before cc unit */
+   &gen6_color_calc_state,	/* must do before cc unit */
+   &gen6_depth_stencil_state,	/* must do before cc unit */
+   &gen6_cc_state_pointers,
+
+   &brw_vs_surfaces,		/* must do before unit */
+   &brw_wm_constant_surface,	/* must do before wm surfaces/bind bo */
+   &brw_wm_surfaces,		/* must do before samplers and unit */
+
+   &brw_wm_samplers,
+   &gen6_sampler_state,
+
+   &gen6_vs_state,
+   &gen6_gs_state,
+   &gen6_clip_state,
+   &gen6_sf_state,
+   &gen6_wm_state,
+
+   &gen6_scissor_state,
+
+   &brw_state_base_address,
+
+   &gen6_binding_table_pointers,
+
+   &brw_depthbuffer,
+
+   &brw_polygon_stipple,
+   &brw_polygon_stipple_offset,
+
+   &brw_line_stipple,
+   &brw_aa_line_parameters,
+
+   &brw_drawing_rect,
+
+   &brw_indices,
+   &brw_index_buffer,
+   &brw_vertices,
+};
 
 void brw_init_state( struct brw_context *brw )
 {
@@ -208,7 +266,6 @@ static struct dirty_bit_map brw_bits[] = {
    DEFINE_BIT(BRW_NEW_CONTEXT),
    DEFINE_BIT(BRW_NEW_WM_INPUT_DIMENSIONS),
    DEFINE_BIT(BRW_NEW_PSP),
-   DEFINE_BIT(BRW_NEW_FENCE),
    DEFINE_BIT(BRW_NEW_INDICES),
    DEFINE_BIT(BRW_NEW_INDEX_BUFFER),
    DEFINE_BIT(BRW_NEW_VERTICES),
@@ -218,6 +275,7 @@ static struct dirty_bit_map brw_bits[] = {
 };
 
 static struct dirty_bit_map cache_bits[] = {
+   DEFINE_BIT(CACHE_NEW_BLEND_STATE),
    DEFINE_BIT(CACHE_NEW_CC_VP),
    DEFINE_BIT(CACHE_NEW_CC_UNIT),
    DEFINE_BIT(CACHE_NEW_WM_PROG),
@@ -277,6 +335,8 @@ void brw_validate_state( struct brw_context *brw )
    struct intel_context *intel = &brw->intel;
    struct brw_state_flags *state = &brw->state.dirty;
    GLuint i;
+   const struct brw_tracked_state **atoms;
+   int num_atoms;
 
    brw_clear_validated_bos(brw);
 
@@ -285,6 +345,14 @@ void brw_validate_state( struct brw_context *brw )
 
    brw_add_validated_bo(brw, intel->batch->buf);
 
+   if (IS_GEN6(intel->intelScreen->deviceID)) {
+      atoms = gen6_atoms;
+      num_atoms = ARRAY_SIZE(gen6_atoms);
+   } else {
+      atoms = gen4_atoms;
+      num_atoms = ARRAY_SIZE(gen4_atoms);
+   }
+
    if (brw->emit_state_always) {
       state->mesa |= ~0;
       state->brw |= ~0;
@@ -312,7 +380,7 @@ void brw_validate_state( struct brw_context *brw )
    brw->intel.Fallback = GL_FALSE; /* boolean, not bitfield */
 
    /* do prepare stage for all atoms */
-   for (i = 0; i < Elements(atoms); i++) {
+   for (i = 0; i < num_atoms; i++) {
       const struct brw_tracked_state *atom = atoms[i];
 
       if (brw->intel.Fallback)
@@ -344,9 +412,20 @@ void brw_validate_state( struct brw_context *brw )
 
 void brw_upload_state(struct brw_context *brw)
 {
+   struct intel_context *intel = &brw->intel;
    struct brw_state_flags *state = &brw->state.dirty;
    int i;
    static int dirty_count = 0;
+   const struct brw_tracked_state **atoms;
+   int num_atoms;
+
+   if (IS_GEN6(intel->intelScreen->deviceID)) {
+      atoms = gen6_atoms;
+      num_atoms = ARRAY_SIZE(gen6_atoms);
+   } else {
+      atoms = gen4_atoms;
+      num_atoms = ARRAY_SIZE(gen4_atoms);
+   }
 
    brw_clear_validated_bos(brw);
 
@@ -356,10 +435,10 @@ void brw_upload_state(struct brw_context *brw)
        * state atoms are ordered correctly in the list.
        */
       struct brw_state_flags examined, prev;      
-      _mesa_memset(&examined, 0, sizeof(examined));
+      memset(&examined, 0, sizeof(examined));
       prev = *state;
 
-      for (i = 0; i < Elements(atoms); i++) {	 
+      for (i = 0; i < num_atoms; i++) {
 	 const struct brw_tracked_state *atom = atoms[i];
 	 struct brw_state_flags generated;
 
@@ -388,7 +467,7 @@ void brw_upload_state(struct brw_context *brw)
       }
    }
    else {
-      for (i = 0; i < Elements(atoms); i++) {	 
+      for (i = 0; i < num_atoms; i++) {
 	 const struct brw_tracked_state *atom = atoms[i];
 
 	 if (brw->intel.Fallback)
diff --git a/i965/brw_structs.h b/i965/brw_structs.h
index 66d4127..3c2adfc 100644
--- a/i965/brw_structs.h
+++ b/i965/brw_structs.h
@@ -658,7 +658,105 @@ struct brw_clip_unit_state
    GLfloat viewport_ymax;  
 };
 
+struct gen6_blend_state
+{
+   struct {
+      GLuint dest_blend_factor:5;
+      GLuint source_blend_factor:5;
+      GLuint pad3:1;
+      GLuint blend_func:3;
+      GLuint pad2:1;
+      GLuint ia_dest_blend_factor:5;
+      GLuint ia_source_blend_factor:5;
+      GLuint pad1:1;
+      GLuint ia_blend_func:3;
+      GLuint pad0:1;
+      GLuint ia_blend_enable:1;
+      GLuint blend_enable:1;
+   } blend0;
+
+   struct {
+      GLuint post_blend_clamp_enable:1;
+      GLuint pre_blend_clamp_enable:1;
+      GLuint clamp_range:2;
+      GLuint pad0:4;
+      GLuint x_dither_offset:2;
+      GLuint y_dither_offset:2;
+      GLuint dither_enable:1;
+      GLuint alpha_test_func:3;
+      GLuint alpha_test_enable:1;
+      GLuint pad1:1;
+      GLuint logic_op_func:4;
+      GLuint logic_op_enable:1;
+      GLuint pad2:1;
+      GLuint write_disable_b:1;
+      GLuint write_disable_g:1;
+      GLuint write_disable_r:1;
+      GLuint write_disable_a:1;
+      GLuint pad3:1;
+      GLuint alpha_to_coverage_dither:1;
+      GLuint alpha_to_one:1;
+      GLuint alpha_to_coverage:1;
+   } blend1;
+};
+
+struct gen6_color_calc_state
+{
+   struct {
+      GLuint alpha_test_format:1;
+      GLuint pad0:14;
+      GLuint round_disable:1;
+      GLuint bf_stencil_ref:8;
+      GLuint stencil_ref:8;
+   } cc0;
 
+   union {
+      GLfloat alpha_ref_f;
+      struct {
+	 GLuint ui:8;
+	 GLuint pad0:24;
+      } alpha_ref_fi;
+   } cc1;
+
+   GLfloat constant_r;
+   GLfloat constant_g;
+   GLfloat constant_b;
+   GLfloat constant_a;
+};
+
+struct gen6_depth_stencil_state
+{
+   struct {
+      GLuint pad0:3;
+      GLuint bf_stencil_pass_depth_pass_op:3;
+      GLuint bf_stencil_pass_depth_fail_op:3;
+      GLuint bf_stencil_fail_op:3;
+      GLuint bf_stencil_func:3;
+      GLuint bf_stencil_enable:1;
+      GLuint pad1:2;
+      GLuint stencil_write_enable:1;
+      GLuint stencil_pass_depth_pass_op:3;
+      GLuint stencil_pass_depth_fail_op:3;
+      GLuint stencil_fail_op:3;
+      GLuint stencil_func:3;
+      GLuint stencil_enable:1;
+   } ds0;
+
+   struct {
+      GLuint bf_stencil_write_mask:8;
+      GLuint bf_stencil_test_mask:8;
+      GLuint stencil_write_mask:8;
+      GLuint stencil_test_mask:8;
+   } ds1;
+
+   struct {
+      GLuint pad0:25;
+      GLuint depth_write_enable:1;
+      GLuint depth_test_func:3;
+      GLuint pad1:1;
+      GLuint depth_test_enable:1;
+   } ds2;
+};
 
 struct brw_cc_unit_state
 {
@@ -752,8 +850,6 @@ struct brw_cc_unit_state
    } cc7;
 };
 
-
-
 struct brw_sf_unit_state
 {
    struct thread0 thread0;
@@ -813,6 +909,11 @@ struct brw_sf_unit_state
 
 };
 
+struct gen6_scissor_state
+{
+   GLuint ymin, xmin;
+   GLuint ymax, xmax;
+};
 
 struct brw_gs_unit_state
 {
@@ -1043,6 +1144,15 @@ struct brw_sf_viewport
    } scissor;
 };
 
+struct gen6_sf_viewport {
+   GLfloat m00;
+   GLfloat m11;
+   GLfloat m22;
+   GLfloat m30;
+   GLfloat m31;
+   GLfloat m32;
+};
+
 /* Documented in the subsystem/shared-functions/sampler chapter...
  */
 struct brw_surface_state
diff --git a/i965/brw_tex_layout.c b/i965/brw_tex_layout.c
index e59e52e..09edfd8 100644
--- a/i965/brw_tex_layout.c
+++ b/i965/brw_tex_layout.c
@@ -36,7 +36,6 @@
 #include "intel_tex_layout.h"
 #include "intel_context.h"
 #include "main/macros.h"
-#include "intel_chipset.h"
 
 #define FILE_DEBUG_FLAG DEBUG_MIPTREE
 
@@ -49,7 +48,7 @@ GLboolean brw_miptree_layout(struct intel_context *intel,
 
    switch (mt->target) {
    case GL_TEXTURE_CUBE_MAP:
-      if (IS_IGDNG(intel->intelScreen->deviceID)) {
+      if (intel->is_ironlake) {
           GLuint align_h = 2, align_w = 4;
           GLuint level;
           GLuint x = 0;
diff --git a/i965/brw_urb.c b/i965/brw_urb.c
index 8c6f435..4f6b900 100644
--- a/i965/brw_urb.c
+++ b/i965/brw_urb.c
@@ -105,7 +105,8 @@ static GLboolean check_urb_layout( struct brw_context *brw )
    brw->urb.sf_start = brw->urb.clip_start + brw->urb.nr_clip_entries * brw->urb.vsize;
    brw->urb.cs_start = brw->urb.sf_start + brw->urb.nr_sf_entries * brw->urb.sfsize;
 
-   return brw->urb.cs_start + brw->urb.nr_cs_entries * brw->urb.csize <= URB_SIZES(brw);
+   return brw->urb.cs_start + brw->urb.nr_cs_entries *
+      brw->urb.csize <= brw->urb.size;
 }
 
 /* Most minimal update, forces re-emit of URB fence packet after GS
@@ -113,6 +114,7 @@ static GLboolean check_urb_layout( struct brw_context *brw )
  */
 static void recalculate_urb_fence( struct brw_context *brw )
 {
+   struct intel_context *intel = &brw->intel;
    GLuint csize = brw->curbe.total_size;
    GLuint vsize = brw->vs.prog_data->urb_entry_size;
    GLuint sfsize = brw->sf.prog_data->urb_entry_size;
@@ -146,7 +148,7 @@ static void recalculate_urb_fence( struct brw_context *brw )
 
       brw->urb.constrained = 0;
 
-      if (BRW_IS_IGDNG(brw)) {
+      if (intel->is_ironlake) {
          brw->urb.nr_vs_entries = 128;
          brw->urb.nr_sf_entries = 48;
          if (check_urb_layout(brw)) {
@@ -156,7 +158,7 @@ static void recalculate_urb_fence( struct brw_context *brw )
             brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries;
             brw->urb.nr_sf_entries = limits[SF].preferred_nr_entries;
          }
-      } else if (BRW_IS_G4X(brw)) {
+      } else if (intel->is_g4x) {
 	 brw->urb.nr_vs_entries = 64;
 	 if (check_urb_layout(brw)) {
 	    goto done;
@@ -184,23 +186,23 @@ static void recalculate_urb_fence( struct brw_context *brw )
 	     * entries and the values for minimum nr of entries
 	     * provided above.
 	     */
-	    _mesa_printf("couldn't calculate URB layout!\n");
+	    printf("couldn't calculate URB layout!\n");
 	    exit(1);
 	 }
 	 
 	 if (INTEL_DEBUG & (DEBUG_URB|DEBUG_FALLBACKS))
-	    _mesa_printf("URB CONSTRAINED\n");
+	    printf("URB CONSTRAINED\n");
       }
 
 done:
       if (INTEL_DEBUG & DEBUG_URB)
-	 _mesa_printf("URB fence: %d ..VS.. %d ..GS.. %d ..CLP.. %d ..SF.. %d ..CS.. %d\n",
+	 printf("URB fence: %d ..VS.. %d ..GS.. %d ..CLP.. %d ..SF.. %d ..CS.. %d\n",
 		      brw->urb.vs_start,
 		      brw->urb.gs_start,
 		      brw->urb.clip_start,
 		      brw->urb.sf_start,
 		      brw->urb.cs_start, 
-		      URB_SIZES(brw));
+		      brw->urb.size);
       
       brw->state.dirty.brw |= BRW_NEW_URB_FENCE;
    }
@@ -244,7 +246,7 @@ void brw_upload_urb_fence(struct brw_context *brw)
    uf.bits0.gs_fence  = brw->urb.clip_start; 
    uf.bits0.clp_fence = brw->urb.sf_start; 
    uf.bits1.sf_fence  = brw->urb.cs_start; 
-   uf.bits1.cs_fence  = URB_SIZES(brw);
+   uf.bits1.cs_fence  = brw->urb.size;
 
    BRW_BATCH_STRUCT(brw, &uf);
 }
diff --git a/i965/brw_vs.c b/i965/brw_vs.c
index fd055e2..44b085e 100644
--- a/i965/brw_vs.c
+++ b/i965/brw_vs.c
@@ -35,6 +35,7 @@
 #include "brw_util.h"
 #include "brw_state.h"
 #include "shader/prog_print.h"
+#include "shader/prog_parameter.h"
 
 
 
@@ -42,9 +43,11 @@ static void do_vs_prog( struct brw_context *brw,
 			struct brw_vertex_program *vp,
 			struct brw_vs_prog_key *key )
 {
+   GLcontext *ctx = &brw->intel.ctx;
    GLuint program_size;
    const GLuint *program;
    struct brw_vs_compile c;
+   int aux_size;
 
    memset(&c, 0, sizeof(c));
    memcpy(&c.key, key, sizeof(*key));
@@ -73,13 +76,27 @@ static void do_vs_prog( struct brw_context *brw,
     */
    program = brw_get_program(&c.func, &program_size);
 
+   /* We upload from &c.prog_data including the constant_map assuming
+    * they're packed together.  It would be nice to have a
+    * compile-time assert macro here.
+    */
+   assert(c.constant_map == (int8_t *)&c.prog_data +
+	  sizeof(c.prog_data));
+   assert(ctx->Const.VertexProgram.MaxNativeParameters ==
+	  ARRAY_SIZE(c.constant_map));
+
+   aux_size = sizeof(c.prog_data);
+   if (c.vp->use_const_buffer)
+      aux_size += c.vp->program.Base.Parameters->NumParameters;
+
    dri_bo_unreference(brw->vs.prog_bo);
-   brw->vs.prog_bo = brw_upload_cache( &brw->cache, BRW_VS_PROG,
-				       &c.key, sizeof(c.key),
-				       NULL, 0,
-				       program, program_size,
-				       &c.prog_data,
-				       &brw->vs.prog_data );
+   brw->vs.prog_bo = brw_upload_cache_with_auxdata(&brw->cache, BRW_VS_PROG,
+						   &c.key, sizeof(c.key),
+						   NULL, 0,
+						   program, program_size,
+						   &c.prog_data,
+						   aux_size,
+						   &brw->vs.prog_data);
 }
 
 
@@ -109,6 +126,8 @@ static void brw_upload_vs_prog(struct brw_context *brw)
 				      &brw->vs.prog_data);
    if (brw->vs.prog_bo == NULL)
       do_vs_prog(brw, vp, &key);
+   brw->vs.constant_map = ((int8_t *)brw->vs.prog_data +
+			   sizeof(*brw->vs.prog_data));
 }
 
 
diff --git a/i965/brw_vs.h b/i965/brw_vs.h
index 4a59136..95e0501 100644
--- a/i965/brw_vs.h
+++ b/i965/brw_vs.h
@@ -51,6 +51,7 @@ struct brw_vs_compile {
    struct brw_compile func;
    struct brw_vs_prog_key key;
    struct brw_vs_prog_data prog_data;
+   int8_t constant_map[1024];
 
    struct brw_vertex_program *vp;
 
@@ -81,6 +82,8 @@ struct brw_vs_compile {
       GLint index;
       struct brw_reg reg;
    } current_const[3];
+
+   GLboolean needs_stack;
 };
 
 void brw_vs_emit( struct brw_vs_compile *c );
diff --git a/i965/brw_vs_emit.c b/i965/brw_vs_emit.c
index 27aac8b..a7c4b58 100644
--- a/i965/brw_vs_emit.c
+++ b/i965/brw_vs_emit.c
@@ -67,6 +67,7 @@ static void release_tmps( struct brw_vs_compile *c )
  */
 static void brw_vs_alloc_regs( struct brw_vs_compile *c )
 {
+   struct intel_context *intel = &c->func.brw->intel;
    GLuint i, reg = 0, mrf;
    int attributes_in_vue;
 
@@ -103,9 +104,47 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
    /* Vertex program parameters from curbe:
     */
    if (c->vp->use_const_buffer) {
-      /* get constants from a real constant buffer */
-      c->prog_data.curb_read_length = 0;
-      c->prog_data.nr_params = 4; /* XXX 0 causes a bug elsewhere... */
+      int max_constant = BRW_MAX_GRF - 20 - c->vp->program.Base.NumTemporaries;
+      int constant = 0;
+
+      /* We've got more constants than we can load with the push
+       * mechanism.  This is often correlated with reladdr loads where
+       * we should probably be using a pull mechanism anyway to avoid
+       * excessive reading.  However, the pull mechanism is slow in
+       * general.  So, we try to allocate as many non-reladdr-loaded
+       * constants through the push buffer as we can before giving up.
+       */
+      memset(c->constant_map, -1, c->vp->program.Base.Parameters->NumParameters);
+      for (i = 0;
+	   i < c->vp->program.Base.NumInstructions && constant < max_constant;
+	   i++) {
+	 struct prog_instruction *inst = &c->vp->program.Base.Instructions[i];
+	 int arg;
+
+	 for (arg = 0; arg < 3 && constant < max_constant; arg++) {
+	    if ((inst->SrcReg[arg].File != PROGRAM_STATE_VAR &&
+		 inst->SrcReg[arg].File != PROGRAM_CONSTANT &&
+		 inst->SrcReg[arg].File != PROGRAM_UNIFORM &&
+		 inst->SrcReg[arg].File != PROGRAM_ENV_PARAM &&
+		 inst->SrcReg[arg].File != PROGRAM_LOCAL_PARAM) ||
+		inst->SrcReg[arg].RelAddr)
+	       continue;
+
+	    if (c->constant_map[inst->SrcReg[arg].Index] == -1) {
+	       c->constant_map[inst->SrcReg[arg].Index] = constant++;
+	    }
+	 }
+      }
+
+      for (i = 0; i < constant; i++) {
+         c->regs[PROGRAM_STATE_VAR][i] = stride( brw_vec4_grf(reg+i/2,
+							      (i%2) * 4),
+						 0, 4, 1);
+      }
+      reg += (constant + 1) / 2;
+      c->prog_data.curb_read_length = reg - 1;
+      /* XXX 0 causes a bug elsewhere... */
+      c->prog_data.nr_params = MAX2(constant * 4, 4);
    }
    else {
       /* use a section of the GRF for constants */
@@ -141,10 +180,12 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
    c->first_output = reg;
    c->first_overflow_output = 0;
 
-   if (BRW_IS_IGDNG(c->func.brw))
-       mrf = 8;
+   if (intel->gen >= 6)
+      mrf = 6;
+   else if (intel->is_ironlake)
+      mrf = 8;
    else
-       mrf = 4;
+      mrf = 4;
 
    for (i = 0; i < VERT_RESULT_MAX; i++) {
       if (c->prog_data.outputs_written & BITFIELD64_BIT(i)) {
@@ -213,8 +254,10 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
       }
    }
 
-   c->stack =  brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg, 0);
-   reg += 2;
+   if (c->needs_stack) {
+      c->stack =  brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg, 0);
+      reg += 2;
+   }
 
    /* Some opcodes need an internal temporary:
     */
@@ -238,17 +281,19 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
     */
    attributes_in_vue = MAX2(c->nr_outputs, c->nr_inputs);
 
-   if (BRW_IS_IGDNG(c->func.brw))
-       c->prog_data.urb_entry_size = (attributes_in_vue + 6 + 3) / 4;
+   if (intel->gen >= 6)
+      c->prog_data.urb_entry_size = (attributes_in_vue + 4 + 7) / 8;
+   else if (intel->is_ironlake)
+      c->prog_data.urb_entry_size = (attributes_in_vue + 6 + 3) / 4;
    else
-       c->prog_data.urb_entry_size = (attributes_in_vue + 2 + 3) / 4;
+      c->prog_data.urb_entry_size = (attributes_in_vue + 2 + 3) / 4;
 
    c->prog_data.total_grf = reg;
 
    if (INTEL_DEBUG & DEBUG_VS) {
-      _mesa_printf("%s NumAddrRegs %d\n", __FUNCTION__, c->vp->program.Base.NumAddressRegs);
-      _mesa_printf("%s NumTemps %d\n", __FUNCTION__, c->vp->program.Base.NumTemporaries);
-      _mesa_printf("%s reg = %d\n", __FUNCTION__, reg);
+      printf("%s NumAddrRegs %d\n", __FUNCTION__, c->vp->program.Base.NumAddressRegs);
+      printf("%s NumTemps %d\n", __FUNCTION__, c->vp->program.Base.NumTemporaries);
+      printf("%s reg = %d\n", __FUNCTION__, reg);
    }
 }
 
@@ -438,9 +483,11 @@ static void emit_math1( struct brw_vs_compile *c,
     * whether that turns out to be a simulator bug or not:
     */
    struct brw_compile *p = &c->func;
+   struct intel_context *intel = &p->brw->intel;
    struct brw_reg tmp = dst;
-   GLboolean need_tmp = (dst.dw1.bits.writemask != 0xf ||
-			 dst.file != BRW_GENERAL_REGISTER_FILE);
+   GLboolean need_tmp = (intel->gen < 6 &&
+			 (dst.dw1.bits.writemask != 0xf ||
+			  dst.file != BRW_GENERAL_REGISTER_FILE));
 
    if (need_tmp) 
       tmp = get_tmp(c);
@@ -469,9 +516,11 @@ static void emit_math2( struct brw_vs_compile *c,
 			GLuint precision)
 {
    struct brw_compile *p = &c->func;
+   struct intel_context *intel = &p->brw->intel;
    struct brw_reg tmp = dst;
-   GLboolean need_tmp = (dst.dw1.bits.writemask != 0xf ||
-			 dst.file != BRW_GENERAL_REGISTER_FILE);
+   GLboolean need_tmp = (intel->gen < 6 &&
+			 (dst.dw1.bits.writemask != 0xf ||
+			  dst.file != BRW_GENERAL_REGISTER_FILE));
 
    if (need_tmp) 
       tmp = get_tmp(c);
@@ -761,15 +810,14 @@ get_constant(struct brw_vs_compile *c,
 {
    const struct prog_src_register *src = &inst->SrcReg[argIndex];
    struct brw_compile *p = &c->func;
-   struct brw_reg const_reg;
-   struct brw_reg const2_reg;
-   const GLboolean relAddr = src->RelAddr;
+   struct brw_reg const_reg = c->current_const[argIndex].reg;
 
    assert(argIndex < 3);
 
-   if (c->current_const[argIndex].index != src->Index || relAddr) {
+   if (c->current_const[argIndex].index != src->Index) {
       struct brw_reg addrReg = c->regs[PROGRAM_ADDRESS][0];
 
+      /* Keep track of the last constant loaded in this slot, for reuse. */
       c->current_const[argIndex].index = src->Index;
 
 #if 0
@@ -778,48 +826,74 @@ get_constant(struct brw_vs_compile *c,
 #endif
       /* need to fetch the constant now */
       brw_dp_READ_4_vs(p,
-                       c->current_const[argIndex].reg,/* writeback dest */
+                       const_reg,                     /* writeback dest */
                        0,                             /* oword */
-                       relAddr,                       /* relative indexing? */
+                       0,                             /* relative indexing? */
                        addrReg,                       /* address register */
                        16 * src->Index,               /* byte offset */
                        SURF_INDEX_VERT_CONST_BUFFER   /* binding table index */
                        );
-
-      if (relAddr) {
-         /* second read */
-         const2_reg = get_tmp(c);
-
-         /* use upper half of address reg for second read */
-         addrReg = stride(addrReg, 0, 4, 0);
-         addrReg.subnr = 16;
-
-         brw_dp_READ_4_vs(p,
-                          const2_reg,              /* writeback dest */
-                          1,                       /* oword */
-                          relAddr,                 /* relative indexing? */
-                          addrReg,                 /* address register */
-                          16 * src->Index,         /* byte offset */
-                          SURF_INDEX_VERT_CONST_BUFFER
-                          );
-      }
    }
 
-   const_reg = c->current_const[argIndex].reg;
+   /* replicate lower four floats into upper half (to get XYZWXYZW) */
+   const_reg = stride(const_reg, 0, 4, 0);
+   const_reg.subnr = 0;
 
-   if (relAddr) {
-      /* merge the two Owords into the constant register */
-      /* const_reg[7..4] = const2_reg[7..4] */
-      brw_MOV(p,
-              suboffset(stride(const_reg, 0, 4, 1), 4),
-              suboffset(stride(const2_reg, 0, 4, 1), 4));
-      release_tmp(c, const2_reg);
-   }
-   else {
-      /* replicate lower four floats into upper half (to get XYZWXYZW) */
-      const_reg = stride(const_reg, 0, 4, 0);
-      const_reg.subnr = 0;
-   }
+   return const_reg;
+}
+
+static struct brw_reg
+get_reladdr_constant(struct brw_vs_compile *c,
+		     const struct prog_instruction *inst,
+		     GLuint argIndex)
+{
+   const struct prog_src_register *src = &inst->SrcReg[argIndex];
+   struct brw_compile *p = &c->func;
+   struct brw_reg const_reg = c->current_const[argIndex].reg;
+   struct brw_reg const2_reg;
+   struct brw_reg addrReg = c->regs[PROGRAM_ADDRESS][0];
+
+   assert(argIndex < 3);
+
+   /* Can't reuse a reladdr constant load. */
+   c->current_const[argIndex].index = -1;
+
+ #if 0
+   printf("  fetch const[a0.x+%d] for arg %d into reg %d\n",
+	  src->Index, argIndex, c->current_const[argIndex].reg.nr);
+#endif
+
+   /* fetch the first vec4 */
+   brw_dp_READ_4_vs(p,
+		    const_reg,                     /* writeback dest */
+		    0,                             /* oword */
+		    1,                             /* relative indexing? */
+		    addrReg,                       /* address register */
+		    16 * src->Index,               /* byte offset */
+		    SURF_INDEX_VERT_CONST_BUFFER   /* binding table index */
+		    );
+   /* second vec4 */
+   const2_reg = get_tmp(c);
+
+   /* use upper half of address reg for second read */
+   addrReg = stride(addrReg, 0, 4, 0);
+   addrReg.subnr = 16;
+
+   brw_dp_READ_4_vs(p,
+		    const2_reg,              /* writeback dest */
+		    1,                       /* oword */
+		    1,                       /* relative indexing? */
+		    addrReg,                 /* address register */
+		    16 * src->Index,         /* byte offset */
+		    SURF_INDEX_VERT_CONST_BUFFER
+		    );
+
+   /* merge the two Owords into the constant register */
+   /* const_reg[7..4] = const2_reg[7..4] */
+   brw_MOV(p,
+	   suboffset(stride(const_reg, 0, 4, 1), 4),
+	   suboffset(stride(const2_reg, 0, 4, 1), 4));
+   release_tmp(c, const2_reg);
 
    return const_reg;
 }
@@ -927,7 +1001,13 @@ get_src_reg( struct brw_vs_compile *c,
    case PROGRAM_ENV_PARAM:
    case PROGRAM_LOCAL_PARAM:
       if (c->vp->use_const_buffer) {
-         return get_constant(c, inst, argIndex);
+	 if (!relAddr && c->constant_map[index] != -1) {
+	    assert(c->regs[PROGRAM_STATE_VAR][c->constant_map[index]].nr != 0);
+	    return c->regs[PROGRAM_STATE_VAR][c->constant_map[index]];
+	 } else if (relAddr)
+	    return get_reladdr_constant(c, inst, argIndex);
+	 else
+	    return get_constant(c, inst, argIndex);
       }
       else if (relAddr) {
          return deref(c, c->regs[PROGRAM_STATE_VAR][0], index);
@@ -1113,11 +1193,13 @@ static void emit_swz( struct brw_vs_compile *c,
 static void emit_vertex_write( struct brw_vs_compile *c)
 {
    struct brw_compile *p = &c->func;
+   struct brw_context *brw = p->brw;
+   struct intel_context *intel = &brw->intel;
    struct brw_reg m0 = brw_message_reg(0);
    struct brw_reg pos = c->regs[PROGRAM_OUTPUT][VERT_RESULT_HPOS];
    struct brw_reg ndc;
    int eot;
-   GLuint len_vertext_header = 2;
+   GLuint len_vertex_header = 2;
 
    if (c->key.copy_edgeflag) {
       brw_MOV(p, 
@@ -1125,18 +1207,20 @@ static void emit_vertex_write( struct brw_vs_compile *c)
 	      get_reg(c, PROGRAM_INPUT, VERT_ATTRIB_EDGEFLAG));
    }
 
-   /* Build ndc coords */
-   ndc = get_tmp(c);
-   /* ndc = 1.0 / pos.w */
-   emit_math1(c, BRW_MATH_FUNCTION_INV, ndc, brw_swizzle1(pos, 3), BRW_MATH_PRECISION_FULL);
-   /* ndc.xyz = pos * ndc */
-   brw_MUL(p, brw_writemask(ndc, WRITEMASK_XYZ), pos, ndc);
+   if (intel->gen < 6) {
+      /* Build ndc coords */
+      ndc = get_tmp(c);
+      /* ndc = 1.0 / pos.w */
+      emit_math1(c, BRW_MATH_FUNCTION_INV, ndc, brw_swizzle1(pos, 3), BRW_MATH_PRECISION_FULL);
+      /* ndc.xyz = pos * ndc */
+      brw_MUL(p, brw_writemask(ndc, WRITEMASK_XYZ), pos, ndc);
+   }
 
    /* Update the header for point size, user clipping flags, and -ve rhw
     * workaround.
     */
    if ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) ||
-       c->key.nr_userclip || BRW_IS_965(p->brw))
+       c->key.nr_userclip || brw->has_negative_rhw_bug)
    {
       struct brw_reg header1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
       GLuint i;
@@ -1167,7 +1251,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
        * Later, clipping will detect ucp[6] and ensure the primitive is
        * clipped against all fixed planes.
        */
-      if (BRW_IS_965(p->brw)) {
+      if (brw->has_negative_rhw_bug) {
 	 brw_CMP(p,
 		 vec8(brw_null_reg()),
 		 BRW_CONDITIONAL_L,
@@ -1193,21 +1277,41 @@ static void emit_vertex_write( struct brw_vs_compile *c)
     * of zeros followed by two sets of NDC coordinates:
     */
    brw_set_access_mode(p, BRW_ALIGN_1);
-   brw_MOV(p, offset(m0, 2), ndc);
-
-   if (BRW_IS_IGDNG(p->brw)) {
-       /* There are 20 DWs (D0-D19) in VUE vertex header on IGDNG */
-       brw_MOV(p, offset(m0, 3), pos); /* a portion of vertex header */
-       /* m4, m5 contain the distances from vertex to the user clip planeXXX. 
-        * Seems it is useless for us.
-        * m6 is used for aligning, so that the remainder of vertex element is 
-        * reg-aligned.
-        */
-       brw_MOV(p, offset(m0, 7), pos); /* the remainder of vertex element */
-       len_vertext_header = 6;
+
+   if (intel->gen >= 6) {
+      /* There are 16 DWs (D0-D15) in VUE header on Sandybridge:
+       * dword 0-3 (m1) of the header is indices, point width, clip flags.
+       * dword 4-7 (m2) is the 4D space position
+       * dword 8-15 (m3,m4) of the vertex header is the user clip distance.
+       * m5 is the first vertex data we fill, which is the vertex position.
+       */
+      brw_MOV(p, offset(m0, 2), pos);
+      brw_MOV(p, offset(m0, 5), pos);
+      len_vertex_header = 4;
+   } else if (intel->is_ironlake) {
+      /* There are 20 DWs (D0-D19) in VUE header on Ironlake:
+       * dword 0-3 (m1) of the header is indices, point width, clip flags.
+       * dword 4-7 (m2) is the ndc position (set above)
+       * dword 8-11 (m3) of the vertex header is the 4D space position
+       * dword 12-19 (m4,m5) of the vertex header is the user clip distance.
+       * m6 is a pad so that the vertex element data is aligned
+       * m7 is the first vertex data we fill, which is the vertex position.
+       */
+      brw_MOV(p, offset(m0, 2), ndc);
+      brw_MOV(p, offset(m0, 3), pos);
+      brw_MOV(p, offset(m0, 7), pos);
+      len_vertex_header = 6;
    } else {
-       brw_MOV(p, offset(m0, 3), pos);
-       len_vertext_header = 2;
+      /* There are 8 dwords in VUE header pre-Ironlake:
+       * dword 0-3 (m1) is indices, point width, clip flags.
+       * dword 4-7 (m2) is ndc position (set above)
+       *
+       * dword 8-11 (m3) is the first vertex data, which we always have be the
+       * vertex position.
+       */
+      brw_MOV(p, offset(m0, 2), ndc);
+      brw_MOV(p, offset(m0, 3), pos);
+      len_vertex_header = 2;
    }
 
    eot = (c->first_overflow_output == 0);
@@ -1218,7 +1322,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
 		 c->r0,		/* src */
 		 0,		/* allocate */
 		 1,		/* used */
-		 MIN2(c->nr_outputs + 1 + len_vertext_header, (BRW_MAX_MRF-1)), /* msg len */
+		 MIN2(c->nr_outputs + 1 + len_vertex_header, (BRW_MAX_MRF-1)), /* msg len */
 		 0,		/* response len */
 		 eot, 		/* eot */
 		 eot, 		/* writes complete */
@@ -1359,29 +1463,32 @@ void brw_vs_emit(struct brw_vs_compile *c )
 #define MAX_LOOP_DEPTH 32
    struct brw_compile *p = &c->func;
    struct brw_context *brw = p->brw;
+   struct intel_context *intel = &brw->intel;
    const GLuint nr_insns = c->vp->program.Base.NumInstructions;
    GLuint insn, if_depth = 0, loop_depth = 0;
    GLuint end_offset = 0;
    struct brw_instruction *end_inst, *last_inst;
-   struct brw_instruction *if_inst[MAX_IF_DEPTH], *loop_inst[MAX_LOOP_DEPTH];
+   struct brw_instruction *if_inst[MAX_IF_DEPTH], *loop_inst[MAX_LOOP_DEPTH] = { 0 };
    const struct brw_indirect stack_index = brw_indirect(0, 0);   
    GLuint index;
    GLuint file;
 
    if (INTEL_DEBUG & DEBUG_VS) {
-      _mesa_printf("vs-mesa:\n");
+      printf("vs-mesa:\n");
       _mesa_print_program(&c->vp->program.Base); 
-      _mesa_printf("\n");
+      printf("\n");
    }
 
    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
    brw_set_access_mode(p, BRW_ALIGN_16);
-   
-   /* Message registers can't be read, so copy the output into GRF register
-      if they are used in source registers */
+
    for (insn = 0; insn < nr_insns; insn++) {
        GLuint i;
        struct prog_instruction *inst = &c->vp->program.Base.Instructions[insn];
+
+       /* Message registers can't be read, so copy the output into GRF
+	* register if they are used in source registers
+	*/
        for (i = 0; i < 3; i++) {
 	   struct prog_src_register *src = &inst->SrcReg[i];
 	   GLuint index = src->Index;
@@ -1389,12 +1496,23 @@ void brw_vs_emit(struct brw_vs_compile *c )
 	   if (file == PROGRAM_OUTPUT && index != VERT_RESULT_HPOS)
 	       c->output_regs[index].used_in_src = GL_TRUE;
        }
+
+       switch (inst->Opcode) {
+       case OPCODE_CAL:
+       case OPCODE_RET:
+	  c->needs_stack = GL_TRUE;
+	  break;
+       default:
+	  break;
+       }
    }
 
    /* Static register allocation
     */
    brw_vs_alloc_regs(c);
-   brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack));
+
+   if (c->needs_stack)
+      brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack));
 
    for (insn = 0; insn < nr_insns; insn++) {
 
@@ -1592,7 +1710,7 @@ void brw_vs_emit(struct brw_vs_compile *c )
 
             loop_depth--;
 
-	    if (BRW_IS_IGDNG(brw))
+	    if (intel->is_ironlake)
 	       br = 2;
 
             inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]);
@@ -1708,9 +1826,9 @@ void brw_vs_emit(struct brw_vs_compile *c )
    if (INTEL_DEBUG & DEBUG_VS) {
       int i;
 
-      _mesa_printf("vs-native:\n");
+      printf("vs-native:\n");
       for (i = 0; i < p->nr_insn; i++)
 	 brw_disasm(stderr, &p->store[i]);
-      _mesa_printf("\n");
+      printf("\n");
    }
 }
diff --git a/i965/brw_vs_state.c b/i965/brw_vs_state.c
index 7285466..fd9f2fe 100644
--- a/i965/brw_vs_state.c
+++ b/i965/brw_vs_state.c
@@ -82,9 +82,9 @@ vs_unit_populate_key(struct brw_context *brw, struct brw_vs_unit_key *key)
 static dri_bo *
 vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key)
 {
+   struct intel_context *intel = &brw->intel;
    struct brw_vs_unit_state vs;
    dri_bo *bo;
-   int chipset_max_threads;
 
    memset(&vs, 0, sizeof(vs));
 
@@ -98,7 +98,7 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key)
     */
    vs.thread1.single_program_flow = 0;
 
-   if (BRW_IS_IGDNG(brw))
+   if (intel->is_ironlake)
       vs.thread1.binding_table_entry_count = 0; /* hardware requirement */
    else
       vs.thread1.binding_table_entry_count = key->nr_surfaces;
@@ -109,7 +109,7 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key)
    vs.thread3.urb_entry_read_offset = 0;
    vs.thread3.const_urb_entry_read_offset = key->curbe_offset * 2;
 
-   if (BRW_IS_IGDNG(brw)) {
+   if (intel->is_ironlake) {
       switch (key->nr_urb_entries) {
       case 8:
       case 12:
@@ -135,7 +135,7 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key)
       case 32:
 	 break;
       case 64:
-	 assert(BRW_IS_G4X(brw));
+	 assert(intel->is_g4x);
 	 break;
       default:
 	 assert(0);
@@ -145,17 +145,8 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key)
 
    vs.thread4.urb_entry_allocation_size = key->urb_size - 1;
 
-   if (BRW_IS_IGDNG(brw))
-      chipset_max_threads = 72;
-   else if (BRW_IS_G4X(brw))
-      chipset_max_threads = 32;
-   else
-      chipset_max_threads = 16;
    vs.thread4.max_threads = CLAMP(key->nr_urb_entries / 2,
-				  1, chipset_max_threads) - 1;
-
-   if (INTEL_DEBUG & DEBUG_SINGLE_THREAD)
-      vs.thread4.max_threads = 0;
+				  1, brw->vs_max_threads) - 1;
 
    /* No samplers for ARB_vp programs:
     */
@@ -173,8 +164,7 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key)
    bo = brw_upload_cache(&brw->cache, BRW_VS_UNIT,
 			 key, sizeof(*key),
 			 &brw->vs.prog_bo, 1,
-			 &vs, sizeof(vs),
-			 NULL, NULL);
+			 &vs, sizeof(vs));
 
    /* Emit VS program relocation */
    dri_bo_emit_reloc(bo,
diff --git a/i965/brw_vs_surface_state.c b/i965/brw_vs_surface_state.c
index 3bc9840..4007b5a 100644
--- a/i965/brw_vs_surface_state.c
+++ b/i965/brw_vs_surface_state.c
@@ -35,7 +35,6 @@
 
 #include "brw_context.h"
 #include "brw_state.h"
-#include "brw_defines.h"
 
 /* Creates a new VS constant buffer reflecting the current VS program's
  * constants, if needed by the VS program.
@@ -68,13 +67,13 @@ brw_vs_update_constant_buffer(struct brw_context *brw)
     */
    _mesa_load_state_parameters(&brw->intel.ctx, vp->program.Base.Parameters);
 
-   intel_bo_map_gtt_preferred(intel, const_buffer, GL_TRUE);
+   drm_intel_gem_bo_map_gtt(const_buffer);
    for (i = 0; i < params->NumParameters; i++) {
       memcpy(const_buffer->virtual + i * 4 * sizeof(float),
 	     params->ParameterValues[i],
 	     4 * sizeof(float));
    }
-   intel_bo_unmap_gtt_preferred(intel, const_buffer);
+   drm_intel_gem_bo_unmap_gtt(const_buffer);
 
    return const_buffer;
 }
@@ -105,7 +104,7 @@ brw_update_vs_constant_surface( GLcontext *ctx,
    /* If there's no constant buffer, then no surface BO is needed to point at
     * it.
     */
-   if (vp->const_buffer == 0) {
+   if (vp->const_buffer == NULL) {
       drm_intel_bo_unreference(brw->vs.surf_bo[surf]);
       brw->vs.surf_bo[surf] = NULL;
       return;
@@ -133,7 +132,7 @@ brw_update_vs_constant_surface( GLcontext *ctx,
    brw->vs.surf_bo[surf] = brw_search_cache(&brw->surface_cache,
                                             BRW_SS_SURFACE,
                                             &key, sizeof(key),
-                                            &key.bo, key.bo ? 1 : 0,
+                                            &key.bo, 1,
                                             NULL);
    if (brw->vs.surf_bo[surf] == NULL) {
       brw->vs.surf_bo[surf] = brw_create_constant_surface(brw, &key);
@@ -156,7 +155,7 @@ brw_vs_get_binding_table(struct brw_context *brw)
 
    if (bind_bo == NULL) {
       GLuint data_size = BRW_VS_MAX_SURF * sizeof(GLuint);
-      uint32_t *data = malloc(data_size);
+      uint32_t data[BRW_VS_MAX_SURF];
       int i;
 
       for (i = 0; i < BRW_VS_MAX_SURF; i++)
@@ -168,8 +167,7 @@ brw_vs_get_binding_table(struct brw_context *brw)
       bind_bo = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND,
 				  NULL, 0,
 				  brw->vs.surf_bo, BRW_VS_MAX_SURF,
-				  data, data_size,
-				  NULL, NULL);
+				  data, data_size);
 
       /* Emit binding table relocations to surface state */
       for (i = 0; i < BRW_VS_MAX_SURF; i++) {
@@ -182,8 +180,6 @@ brw_vs_get_binding_table(struct brw_context *brw)
 				    I915_GEM_DOMAIN_INSTRUCTION, 0);
 	 }
       }
-
-      free(data);
    }
 
    return bind_bo;
diff --git a/i965/brw_vtbl.c b/i965/brw_vtbl.c
index 34aaea3..96a44bf 100644
--- a/i965/brw_vtbl.c
+++ b/i965/brw_vtbl.c
@@ -44,7 +44,6 @@
 #include "brw_state.h"
 #include "brw_draw.h"
 #include "brw_state.h"
-#include "brw_fallback.h"
 #include "brw_vs.h"
 #include "brw_wm.h"
 
@@ -68,11 +67,11 @@ static void brw_destroy_context( struct intel_context *intel )
    brw_draw_destroy( brw );
    brw_clear_validated_bos(brw);
    if (brw->wm.compile_data) {
-      _mesa_free(brw->wm.compile_data->instruction);
-      _mesa_free(brw->wm.compile_data->vreg);
-      _mesa_free(brw->wm.compile_data->refs);
-      _mesa_free(brw->wm.compile_data->prog_instructions);
-      _mesa_free(brw->wm.compile_data);
+      free(brw->wm.compile_data->instruction);
+      free(brw->wm.compile_data->vreg);
+      free(brw->wm.compile_data->refs);
+      free(brw->wm.compile_data->prog_instructions);
+      free(brw->wm.compile_data);
    }
 
    for (i = 0; i < brw->state.nr_color_regions; i++)
@@ -103,6 +102,9 @@ static void brw_destroy_context( struct intel_context *intel )
    dri_bo_release(&brw->cc.prog_bo);
    dri_bo_release(&brw->cc.state_bo);
    dri_bo_release(&brw->cc.vp_bo);
+   dri_bo_release(&brw->cc.blend_state_bo);
+   dri_bo_release(&brw->cc.depth_stencil_state_bo);
+   dri_bo_release(&brw->cc.color_calc_state_bo);
 }
 
 
@@ -140,6 +142,12 @@ static void brw_finish_batch(struct intel_context *intel)
 {
    struct brw_context *brw = brw_context(&intel->ctx);
    brw_emit_query_end(brw);
+
+   if (brw->curbe.curbe_bo) {
+      drm_intel_gem_bo_unmap_gtt(brw->curbe.curbe_bo);
+      drm_intel_bo_unreference(brw->curbe.curbe_bo);
+      brw->curbe.curbe_bo = NULL;
+   }
 }
 
 
@@ -150,11 +158,6 @@ static void brw_new_batch( struct intel_context *intel )
 {
    struct brw_context *brw = brw_context(&intel->ctx);
 
-   /* Check that we didn't just wrap our batchbuffer at a bad time. */
-   assert(!brw->no_batch_wrap);
-
-   brw->curbe.need_new_bo = GL_TRUE;
-
    /* Mark all context state as needing to be re-emitted.
     * This is probably not as severe as on 915, since almost all of our state
     * is just in referenced buffers.
@@ -175,12 +178,6 @@ static void brw_new_batch( struct intel_context *intel )
    }
 }
 
-
-static void brw_note_fence( struct intel_context *intel, GLuint fence )
-{
-   brw_context(&intel->ctx)->state.dirty.brw |= BRW_NEW_FENCE;
-}
-
 static void brw_invalidate_state( struct intel_context *intel, GLuint new_state )
 {
    /* nothing */
@@ -196,7 +193,6 @@ void brwInitVtbl( struct brw_context *brw )
    brw->intel.vtbl.update_texture_state = 0;
 
    brw->intel.vtbl.invalidate_state = brw_invalidate_state;
-   brw->intel.vtbl.note_fence = brw_note_fence;
    brw->intel.vtbl.new_batch = brw_new_batch;
    brw->intel.vtbl.finish_batch = brw_finish_batch;
    brw->intel.vtbl.destroy = brw_destroy_context;
diff --git a/i965/brw_wm.c b/i965/brw_wm.c
index 6895f64..991e1b9 100644
--- a/i965/brw_wm.c
+++ b/i965/brw_wm.c
@@ -30,7 +30,6 @@
   */
              
 #include "brw_context.h"
-#include "brw_util.h"
 #include "brw_wm.h"
 #include "brw_state.h"
 
@@ -152,11 +151,11 @@ static void do_wm_prog( struct brw_context *brw,
           */
          return;
       }
-      c->instruction = _mesa_calloc(BRW_WM_MAX_INSN * sizeof(*c->instruction));
-      c->prog_instructions = _mesa_calloc(BRW_WM_MAX_INSN *
+      c->instruction = calloc(1, BRW_WM_MAX_INSN * sizeof(*c->instruction));
+      c->prog_instructions = calloc(1, BRW_WM_MAX_INSN *
 					  sizeof(*c->prog_instructions));
-      c->vreg = _mesa_calloc(BRW_WM_MAX_VREG * sizeof(*c->vreg));
-      c->refs = _mesa_calloc(BRW_WM_MAX_REF * sizeof(*c->refs));
+      c->vreg = calloc(1, BRW_WM_MAX_VREG * sizeof(*c->vreg));
+      c->refs = calloc(1, BRW_WM_MAX_REF * sizeof(*c->refs));
    } else {
       void *instruction = c->instruction;
       void *prog_instructions = c->prog_instructions;
@@ -199,12 +198,13 @@ static void do_wm_prog( struct brw_context *brw,
    program = brw_get_program(&c->func, &program_size);
 
    dri_bo_unreference(brw->wm.prog_bo);
-   brw->wm.prog_bo = brw_upload_cache( &brw->cache, BRW_WM_PROG,
-				       &c->key, sizeof(c->key),
-				       NULL, 0,
-				       program, program_size,
-				       &c->prog_data,
-				       &brw->wm.prog_data );
+   brw->wm.prog_bo = brw_upload_cache_with_auxdata(&brw->cache, BRW_WM_PROG,
+						   &c->key, sizeof(c->key),
+						   NULL, 0,
+						   program, program_size,
+						   &c->prog_data,
+						   sizeof(c->prog_data),
+						   &brw->wm.prog_data);
 }
 
 
@@ -336,11 +336,7 @@ static void brw_wm_populate_key( struct brw_context *brw,
     * drawable height in order to invert the Y axis.
     */
    if (fp->program.Base.InputsRead & FRAG_BIT_WPOS) {
-      if (brw->intel.driDrawable != NULL) {
-         key->origin_x = brw->intel.driDrawable->x;
-         key->origin_y = brw->intel.driDrawable->y;
-         key->drawable_height = brw->intel.driDrawable->h;
-      }
+      key->drawable_height = ctx->DrawBuffer->Height;
    }
 
    key->nr_color_regions = brw->state.nr_color_regions;
diff --git a/i965/brw_wm.h b/i965/brw_wm.h
index 9dcb6e1..88d84ee 100644
--- a/i965/brw_wm.h
+++ b/i965/brw_wm.h
@@ -76,10 +76,9 @@ struct brw_wm_prog_key {
 
    GLushort tex_swizzles[BRW_MAX_TEX_UNIT];
 
-   GLuint program_string_id:32;
-   GLushort origin_x, origin_y;
    GLushort drawable_height;
    GLbitfield64 vp_outputs_written;
+   GLuint program_string_id:32;
 };
 
 
diff --git a/i965/brw_wm_debug.c b/i965/brw_wm_debug.c
index 2208210..a78cc8b 100644
--- a/i965/brw_wm_debug.c
+++ b/i965/brw_wm_debug.c
@@ -41,21 +41,21 @@ void brw_wm_print_value( struct brw_wm_compile *c,
    if (c->state >= PASS2_DONE) 
       brw_print_reg(value->hw_reg);
    else if( value == &c->undef_value )
-      _mesa_printf("undef");
+      printf("undef");
    else if( value - c->vreg >= 0 &&
 	    value - c->vreg < BRW_WM_MAX_VREG)
-      _mesa_printf("r%d", value - c->vreg);
+      printf("r%d", value - c->vreg);
    else if (value - c->creg >= 0 &&
 	    value - c->creg < BRW_WM_MAX_PARAM)
-      _mesa_printf("c%d", value - c->creg);
+      printf("c%d", value - c->creg);
    else if (value - c->payload.input_interp >= 0 &&
 	    value - c->payload.input_interp < FRAG_ATTRIB_MAX)
-      _mesa_printf("i%d", value - c->payload.input_interp);
+      printf("i%d", value - c->payload.input_interp);
    else if (value - c->payload.depth >= 0 &&
 	    value - c->payload.depth < FRAG_ATTRIB_MAX)
-      _mesa_printf("d%d", value - c->payload.depth);
+      printf("d%d", value - c->payload.depth);
    else 
-      _mesa_printf("?");
+      printf("?");
 }
 
 void brw_wm_print_ref( struct brw_wm_compile *c,
@@ -64,16 +64,16 @@ void brw_wm_print_ref( struct brw_wm_compile *c,
    struct brw_reg hw_reg = ref->hw_reg;
 
    if (ref->unspill_reg)
-      _mesa_printf("UNSPILL(%x)/", ref->value->spill_slot);
+      printf("UNSPILL(%x)/", ref->value->spill_slot);
 
    if (c->state >= PASS2_DONE)
       brw_print_reg(ref->hw_reg);
    else {
-      _mesa_printf("%s", hw_reg.negate ? "-" : "");
-      _mesa_printf("%s", hw_reg.abs ? "abs/" : "");
+      printf("%s", hw_reg.negate ? "-" : "");
+      printf("%s", hw_reg.abs ? "abs/" : "");
       brw_wm_print_value(c, ref->value);
       if ((hw_reg.nr&1) || hw_reg.subnr) {
-	 _mesa_printf("->%d.%d", (hw_reg.nr&1), hw_reg.subnr);
+	 printf("->%d.%d", (hw_reg.nr&1), hw_reg.subnr);
       }
    }
 }
@@ -84,22 +84,22 @@ void brw_wm_print_insn( struct brw_wm_compile *c,
    GLuint i, arg;
    GLuint nr_args = brw_wm_nr_args(inst->opcode);
 
-   _mesa_printf("[");
+   printf("[");
    for (i = 0; i < 4; i++) {
       if (inst->dst[i]) {
 	 brw_wm_print_value(c, inst->dst[i]);
 	 if (inst->dst[i]->spill_slot)
-	    _mesa_printf("/SPILL(%x)",inst->dst[i]->spill_slot);
+	    printf("/SPILL(%x)",inst->dst[i]->spill_slot);
       }
       else
-	 _mesa_printf("#");
+	 printf("#");
       if (i < 3)      
-	 _mesa_printf(",");
+	 printf(",");
    }
-   _mesa_printf("]");
+   printf("]");
 
    if (inst->writemask != WRITEMASK_XYZW)
-      _mesa_printf(".%s%s%s%s", 
+      printf(".%s%s%s%s", 
 		   GET_BIT(inst->writemask, 0) ? "x" : "",
 		   GET_BIT(inst->writemask, 1) ? "y" : "",
 		   GET_BIT(inst->writemask, 2) ? "z" : "",
@@ -107,58 +107,58 @@ void brw_wm_print_insn( struct brw_wm_compile *c,
 
    switch (inst->opcode) {
    case WM_PIXELXY:
-      _mesa_printf(" = PIXELXY");
+      printf(" = PIXELXY");
       break;
    case WM_DELTAXY:
-      _mesa_printf(" = DELTAXY");
+      printf(" = DELTAXY");
       break;
    case WM_PIXELW:
-      _mesa_printf(" = PIXELW");
+      printf(" = PIXELW");
       break;
    case WM_WPOSXY:
-      _mesa_printf(" = WPOSXY");
+      printf(" = WPOSXY");
       break;
    case WM_PINTERP:
-      _mesa_printf(" = PINTERP");
+      printf(" = PINTERP");
       break;
    case WM_LINTERP:
-      _mesa_printf(" = LINTERP");
+      printf(" = LINTERP");
       break;
    case WM_CINTERP:
-      _mesa_printf(" = CINTERP");
+      printf(" = CINTERP");
       break;
    case WM_FB_WRITE:
-      _mesa_printf(" = FB_WRITE");
+      printf(" = FB_WRITE");
       break;
    case WM_FRONTFACING:
-      _mesa_printf(" = FRONTFACING");
+      printf(" = FRONTFACING");
       break;
    default:
-      _mesa_printf(" = %s", _mesa_opcode_string(inst->opcode));
+      printf(" = %s", _mesa_opcode_string(inst->opcode));
       break;
    }
 
    if (inst->saturate)
-      _mesa_printf("_SAT");
+      printf("_SAT");
 
    for (arg = 0; arg < nr_args; arg++) {
 
-      _mesa_printf(" [");
+      printf(" [");
 
       for (i = 0; i < 4; i++) {
 	 if (inst->src[arg][i]) {
 	    brw_wm_print_ref(c, inst->src[arg][i]);
 	 }
 	 else
-	    _mesa_printf("%%");
+	    printf("%%");
 
 	 if (i < 3) 
-	    _mesa_printf(",");
+	    printf(",");
 	 else
-	    _mesa_printf("]");
+	    printf("]");
       }
    }
-   _mesa_printf("\n");
+   printf("\n");
 }
 
 void brw_wm_print_program( struct brw_wm_compile *c,
@@ -166,9 +166,9 @@ void brw_wm_print_program( struct brw_wm_compile *c,
 {
    GLuint insn;
 
-   _mesa_printf("%s:\n", stage);
+   printf("%s:\n", stage);
    for (insn = 0; insn < c->nr_insns; insn++)
       brw_wm_print_insn(c, &c->instruction[insn]);
-   _mesa_printf("\n");
+   printf("\n");
 }
 
diff --git a/i965/brw_wm_emit.c b/i965/brw_wm_emit.c
index 5390fd2..9315bca 100644
--- a/i965/brw_wm_emit.c
+++ b/i965/brw_wm_emit.c
@@ -138,19 +138,43 @@ void emit_wpos_xy(struct brw_wm_compile *c,
     * X and Y channels.
     */
    if (mask & WRITEMASK_X) {
-      /* X' = X - origin */
-      brw_ADD(p,
-	      dst[0],
-	      retype(arg0[0], BRW_REGISTER_TYPE_W),
-	      brw_imm_d(0 - c->key.origin_x));
+      if (c->fp->program.PixelCenterInteger) {
+	 /* X' = X */
+	 brw_MOV(p,
+		 dst[0],
+		 retype(arg0[0], BRW_REGISTER_TYPE_W));
+      } else {
+	 /* X' = X + 0.5 */
+	 brw_ADD(p,
+		 dst[0],
+		 retype(arg0[0], BRW_REGISTER_TYPE_W),
+		 brw_imm_f(0.5));
+      }
    }
 
    if (mask & WRITEMASK_Y) {
-      /* Y' = height - (Y - origin_y) = height + origin_y - Y */
-      brw_ADD(p,
-	      dst[1],
-	      negate(retype(arg0[1], BRW_REGISTER_TYPE_W)),
-	      brw_imm_d(c->key.origin_y + c->key.drawable_height - 1));
+      if (c->fp->program.OriginUpperLeft) {
+	 if (c->fp->program.PixelCenterInteger) {
+	    /* Y' = Y */
+	    brw_MOV(p,
+		    dst[1],
+		    retype(arg0[1], BRW_REGISTER_TYPE_W));
+	 } else {
+	    /* Y' = Y + 0.5 */
+	    brw_ADD(p,
+		    dst[1],
+		    retype(arg0[1], BRW_REGISTER_TYPE_W),
+		    brw_imm_f(0.5));
+	 }
+      } else {
+	 float center_offset = c->fp->program.PixelCenterInteger ? 0.0 : 0.5;
+
+	 /* Y' = (height - 1) - Y + center */
+	 brw_ADD(p,
+		 dst[1],
+		 negate(retype(arg0[1], BRW_REGISTER_TYPE_W)),
+		 brw_imm_f(c->key.drawable_height - 1 + center_offset));
+      }
    }
 }
 
@@ -692,7 +716,7 @@ void emit_xpd(struct brw_compile *p,
 {
    GLuint i;
 
-   assert(!(mask & WRITEMASK_W) == WRITEMASK_X);
+   assert((mask & WRITEMASK_W) != WRITEMASK_W);
    
    for (i = 0 ; i < 3; i++) {
       if (mask & (1<<i)) {
@@ -830,6 +854,7 @@ void emit_tex(struct brw_wm_compile *c,
 	      GLboolean shadow)
 {
    struct brw_compile *p = &c->func;
+   struct intel_context *intel = &p->brw->intel;
    struct brw_reg dst_retyped;
    GLuint cur_mrf = 2, response_length;
    GLuint i, nr_texcoords;
@@ -873,7 +898,7 @@ void emit_tex(struct brw_wm_compile *c,
    }
 
    /* Pre-Ironlake, the 8-wide sampler always took u,v,r. */
-   if (!BRW_IS_IGDNG(p->brw) && c->dispatch_width == 8)
+   if (!intel->is_ironlake && c->dispatch_width == 8)
       nr_texcoords = 3;
 
    /* For shadow comparisons, we have to supply u,v,r. */
@@ -891,7 +916,7 @@ void emit_tex(struct brw_wm_compile *c,
 
    /* Fill in the shadow comparison reference value. */
    if (shadow) {
-      if (BRW_IS_IGDNG(p->brw)) {
+      if (intel->is_ironlake) {
 	 /* Fill in the cube map array index value. */
 	 brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0));
 	 cur_mrf += mrf_per_channel;
@@ -904,7 +929,7 @@ void emit_tex(struct brw_wm_compile *c,
       cur_mrf += mrf_per_channel;
    }
 
-   if (BRW_IS_IGDNG(p->brw)) {
+   if (intel->is_ironlake) {
       if (shadow)
 	 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_IGDNG;
       else
@@ -944,6 +969,7 @@ void emit_txb(struct brw_wm_compile *c,
 	      GLuint sampler)
 {
    struct brw_compile *p = &c->func;
+   struct intel_context *intel = &p->brw->intel;
    GLuint msgLength;
    GLuint msg_type;
    GLuint mrf_per_channel;
@@ -955,8 +981,8 @@ void emit_txb(struct brw_wm_compile *c,
     * undefined, and trust the execution mask to keep the undefined pixels
     * from mattering.
     */
-   if (c->dispatch_width == 16 || !BRW_IS_IGDNG(p->brw)) {
-      if (BRW_IS_IGDNG(p->brw))
+   if (c->dispatch_width == 16 || !intel->is_ironlake) {
+      if (intel->is_ironlake)
 	 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_IGDNG;
       else
 	 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
@@ -1084,7 +1110,7 @@ static void emit_kil_nv( struct brw_wm_compile *c )
 
    brw_push_insn_state(p);
    brw_set_mask_control(p, BRW_MASK_DISABLE);
-   brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK
+   brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); /* IMASK */
    brw_AND(p, r0uw, c->emit_mask_reg, r0uw);
    brw_pop_insn_state(p);
 }
@@ -1174,7 +1200,7 @@ void emit_fb_write(struct brw_wm_compile *c,
    brw_push_insn_state(p);
 
    for (channel = 0; channel < 4; channel++) {
-      if (c->dispatch_width == 16 && (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw))) {
+      if (c->dispatch_width == 16 && brw->has_compr4) {
 	 /* By setting the high bit of the MRF register number, we indicate
 	  * that we want COMPR4 mode - instead of doing the usual destination
 	  * + 1 for the second half we get destination + 4.
@@ -1596,10 +1622,10 @@ void brw_wm_emit( struct brw_wm_compile *c )
 	 break;
 
       default:
-	 _mesa_printf("Unsupported opcode %i (%s) in fragment shader\n",
-		      inst->opcode, inst->opcode < MAX_OPCODE ?
-				    _mesa_opcode_string(inst->opcode) :
-				    "unknown");
+	 printf("Unsupported opcode %i (%s) in fragment shader\n",
+		inst->opcode, inst->opcode < MAX_OPCODE ?
+		_mesa_opcode_string(inst->opcode) :
+		"unknown");
       }
       
       for (i = 0; i < 4; i++)
@@ -1612,9 +1638,9 @@ void brw_wm_emit( struct brw_wm_compile *c )
    if (INTEL_DEBUG & DEBUG_WM) {
       int i;
 
-      _mesa_printf("wm-native:\n");
+      printf("wm-native:\n");
       for (i = 0; i < p->nr_insn; i++)
 	 brw_disasm(stderr, &p->store[i]);
-      _mesa_printf("\n");
+      printf("\n");
    }
 }
diff --git a/i965/brw_wm_fp.c b/i965/brw_wm_fp.c
index 7d03179..d73c391 100644
--- a/i965/brw_wm_fp.c
+++ b/i965/brw_wm_fp.c
@@ -138,7 +138,6 @@ static struct prog_dst_register dst_reg(GLuint file, GLuint idx)
    reg.CondMask = COND_TR;
    reg.CondSwizzle = 0;
    reg.CondSrc = 0;
-   reg.pad = 0;
    return reg;
 }
 
@@ -160,7 +159,7 @@ static struct prog_dst_register get_temp( struct brw_wm_compile *c )
    int bit = _mesa_ffs( ~c->fp_temp );
 
    if (!bit) {
-      _mesa_printf("%s: out of temporaries\n", __FILE__);
+      printf("%s: out of temporaries\n", __FILE__);
       exit(1);
    }
 
@@ -1035,7 +1034,7 @@ static void print_insns( const struct prog_instruction *insn,
 {
    GLuint i;
    for (i = 0; i < nr; i++, insn++) {
-      _mesa_printf("%3d: ", i);
+      printf("%3d: ", i);
       if (insn->Opcode < MAX_OPCODE)
 	 _mesa_print_instruction(insn);
       else if (insn->Opcode < MAX_WM_OPCODE) {
@@ -1046,7 +1045,7 @@ static void print_insns( const struct prog_instruction *insn,
 				     3);
       }
       else 
-	 _mesa_printf("965 Opcode %d\n", insn->Opcode);
+	 printf("965 Opcode %d\n", insn->Opcode);
    }
 }
 
@@ -1061,9 +1060,9 @@ void brw_wm_pass_fp( struct brw_wm_compile *c )
    GLuint insn;
 
    if (INTEL_DEBUG & DEBUG_WM) {
-      _mesa_printf("pre-fp:\n");
+      printf("pre-fp:\n");
       _mesa_print_program(&fp->program.Base); 
-      _mesa_printf("\n");
+      printf("\n");
    }
 
    c->pixel_xy = src_undef();
@@ -1169,9 +1168,9 @@ void brw_wm_pass_fp( struct brw_wm_compile *c )
    }
 
    if (INTEL_DEBUG & DEBUG_WM) {
-      _mesa_printf("pass_fp:\n");
+      printf("pass_fp:\n");
       print_insns( c->prog_instructions, c->nr_fp_insns );
-      _mesa_printf("\n");
+      printf("\n");
    }
 }
 
diff --git a/i965/brw_wm_glsl.c b/i965/brw_wm_glsl.c
index e8c2cb6..562608e 100644
--- a/i965/brw_wm_glsl.c
+++ b/i965/brw_wm_glsl.c
@@ -743,7 +743,7 @@ static void emit_kil(struct brw_wm_compile *c)
     struct brw_reg depth = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
     brw_push_insn_state(p);
     brw_set_mask_control(p, BRW_MASK_DISABLE);
-    brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK
+    brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); /* IMASK */
     brw_AND(p, depth, c->emit_mask_reg, depth);
     brw_pop_insn_state(p);
 }
@@ -1826,6 +1826,7 @@ get_argument_regs(struct brw_wm_compile *c,
 
 static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
 {
+   struct intel_context *intel = &brw->intel;
 #define MAX_IF_DEPTH 32
 #define MAX_LOOP_DEPTH 32
     struct brw_instruction *if_inst[MAX_IF_DEPTH], *loop_inst[MAX_LOOP_DEPTH];
@@ -1848,7 +1849,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
         c->cur_inst = i;
 
 #if 0
-        _mesa_printf("Inst %d: ", i);
+        printf("Inst %d: ", i);
         _mesa_print_instruction(inst);
 #endif
 
@@ -1876,10 +1877,6 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
 	else
 	    brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE);
 
-	dst_flags = inst->DstReg.WriteMask;
-	if (inst->SaturateMode == SATURATE_ZERO_ONE)
-	    dst_flags |= SATURATE;
-
 	switch (inst->Opcode) {
 	    case WM_PIXELXY:
 		emit_pixel_xy(c, dst, dst_flags);
@@ -2043,6 +2040,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
 		if_inst[if_depth++] = brw_IF(p, BRW_EXECUTE_8);
 		break;
 	    case OPCODE_ELSE:
+		assert(if_depth > 0);
 		if_inst[if_depth-1]  = brw_ELSE(p, if_inst[if_depth-1]);
 		break;
 	    case OPCODE_ENDIF:
@@ -2096,9 +2094,10 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
                   struct brw_instruction *inst0, *inst1;
                   GLuint br = 1;
 
-                  if (BRW_IS_IGDNG(brw))
+                  if (intel->is_ironlake)
                      br = 2;
- 
+
+		  assert(loop_depth > 0);
                   loop_depth--;
                   inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]);
                   /* patch all the BREAK/CONT instructions from last BGNLOOP */
@@ -2116,7 +2115,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
                }
                break;
 	    default:
-		_mesa_printf("unsupported IR in fragment shader %d\n",
+		printf("unsupported IR in fragment shader %d\n",
 			inst->Opcode);
 	}
 
@@ -2128,10 +2127,10 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
     post_wm_emit(c);
 
     if (INTEL_DEBUG & DEBUG_WM) {
-      _mesa_printf("wm-native:\n");
+      printf("wm-native:\n");
       for (i = 0; i < p->nr_insn; i++)
 	 brw_disasm(stderr, &p->store[i]);
-      _mesa_printf("\n");
+      printf("\n");
     }
 }
 
@@ -2142,7 +2141,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
 void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c)
 {
     if (INTEL_DEBUG & DEBUG_WM) {
-        _mesa_printf("brw_wm_glsl_emit:\n");
+        printf("brw_wm_glsl_emit:\n");
     }
 
     /* initial instruction translation/simplification */
diff --git a/i965/brw_wm_pass0.c b/i965/brw_wm_pass0.c
index ff4c082..60bd92e 100644
--- a/i965/brw_wm_pass0.c
+++ b/i965/brw_wm_pass0.c
@@ -105,7 +105,7 @@ static const struct brw_wm_ref *get_param_ref( struct brw_wm_compile *c,
    GLuint i = c->prog_data.nr_params++;
    
    if (i >= BRW_WM_MAX_PARAM) {
-      _mesa_printf("%s: out of params\n", __FUNCTION__);
+      printf("%s: out of params\n", __FUNCTION__);
       c->prog_data.error = 1;
       return NULL;
    }
@@ -154,7 +154,7 @@ static const struct brw_wm_ref *get_const_ref( struct brw_wm_compile *c,
       return c->constref[i].ref;
    }
    else {
-      _mesa_printf("%s: out of constrefs\n", __FUNCTION__);
+      printf("%s: out of constrefs\n", __FUNCTION__);
       c->prog_data.error = 1;
       return NULL;
    }
diff --git a/i965/brw_wm_sampler_state.c b/i965/brw_wm_sampler_state.c
index aa2e519..d7650af 100644
--- a/i965/brw_wm_sampler_state.c
+++ b/i965/brw_wm_sampler_state.c
@@ -89,7 +89,6 @@ struct wm_sampler_key {
       float max_aniso;
       GLenum minfilter, magfilter;
       GLenum comparemode, comparefunc;
-      dri_bo *sdc_bo;
 
       /** If target is cubemap, take context setting.
        */
@@ -105,7 +104,7 @@ static void brw_update_sampler_state(struct wm_sampler_entry *key,
 				     dri_bo *sdc_bo,
 				     struct brw_sampler_state *sampler)
 {
-   _mesa_memset(sampler, 0, sizeof(*sampler));
+   memset(sampler, 0, sizeof(*sampler));
 
    switch (key->minfilter) {
    case GL_NEAREST:
@@ -230,7 +229,7 @@ brw_wm_sampler_populate_key(struct brw_context *brw,
    GLcontext *ctx = &brw->intel.ctx;
    int unit;
 
-   memset(key, 0, sizeof(*key));
+   key->sampler_count = 0;
 
    for (unit = 0; unit < BRW_MAX_TEX_UNIT; unit++) {
       if (ctx->Texture.Unit[unit]._ReallyEnabled) {
@@ -241,6 +240,8 @@ brw_wm_sampler_populate_key(struct brw_context *brw,
 	 struct gl_texture_image *firstImage =
 	    texObj->Image[0][intelObj->firstLevel];
 
+	 memset(entry, 0, sizeof(*entry));
+
          entry->tex_target = texObj->Target;
 
 	 entry->seamless_cube_map = (texObj->Target == GL_TEXTURE_CUBE_MAP)
@@ -262,10 +263,10 @@ brw_wm_sampler_populate_key(struct brw_context *brw,
 	 dri_bo_unreference(brw->wm.sdc_bo[unit]);
 	 if (firstImage->_BaseFormat == GL_DEPTH_COMPONENT) {
 	    float bordercolor[4] = {
-	       texObj->BorderColor[0],
-	       texObj->BorderColor[0],
-	       texObj->BorderColor[0],
-	       texObj->BorderColor[0]
+	       texObj->BorderColor.f[0],
+	       texObj->BorderColor.f[0],
+	       texObj->BorderColor.f[0],
+	       texObj->BorderColor.f[0]
 	    };
 	    /* GL specs that border color for depth textures is taken from the
 	     * R channel, while the hardware uses A.  Spam R into all the
@@ -274,7 +275,7 @@ brw_wm_sampler_populate_key(struct brw_context *brw,
 	    brw->wm.sdc_bo[unit] = upload_default_color(brw, bordercolor);
 	 } else {
 	    brw->wm.sdc_bo[unit] = upload_default_color(brw,
-							texObj->BorderColor);
+							texObj->BorderColor.f);
 	 }
 	 key->sampler_count = unit + 1;
       }
@@ -289,7 +290,7 @@ static void upload_wm_samplers( struct brw_context *brw )
 {
    GLcontext *ctx = &brw->intel.ctx;
    struct wm_sampler_key key;
-   int i;
+   int i, sampler_key_size;
 
    brw_wm_sampler_populate_key(brw, &key);
 
@@ -303,8 +304,11 @@ static void upload_wm_samplers( struct brw_context *brw )
    if (brw->wm.sampler_count == 0)
       return;
 
+   /* Only include the populated portion of the key in the search. */
+   sampler_key_size = offsetof(struct wm_sampler_key,
+			       sampler[key.sampler_count]);
    brw->wm.sampler_bo = brw_search_cache(&brw->cache, BRW_SAMPLER,
-					 &key, sizeof(key),
+					 &key, sampler_key_size,
 					 brw->wm.sdc_bo, key.sampler_count,
 					 NULL);
 
@@ -324,10 +328,9 @@ static void upload_wm_samplers( struct brw_context *brw )
       }
 
       brw->wm.sampler_bo = brw_upload_cache(&brw->cache, BRW_SAMPLER,
-					    &key, sizeof(key),
+					    &key, sampler_key_size,
 					    brw->wm.sdc_bo, key.sampler_count,
-					    &sampler, sizeof(sampler),
-					    NULL, NULL);
+					    &sampler, sizeof(sampler));
 
       /* Emit SDC relocations */
       for (i = 0; i < BRW_MAX_TEX_UNIT; i++) {
diff --git a/i965/brw_wm_state.c b/i965/brw_wm_state.c
index f89ed9b..a7f80db 100644
--- a/i965/brw_wm_state.c
+++ b/i965/brw_wm_state.c
@@ -49,8 +49,6 @@ struct brw_wm_unit_key {
    unsigned int curbe_offset;
    unsigned int urb_size;
 
-   unsigned int max_threads;
-
    unsigned int nr_surfaces, sampler_count;
    GLboolean uses_depth, computes_depth, uses_kill, is_glsl;
    GLboolean polygon_stipple, stats_wm, line_stipple, offset_enable;
@@ -67,18 +65,6 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
 
    memset(key, 0, sizeof(*key));
 
-   if (INTEL_DEBUG & DEBUG_SINGLE_THREAD)
-      key->max_threads = 1;
-   else {
-      /* WM maximum threads is number of EUs times number of threads per EU. */
-      if (BRW_IS_IGDNG(brw))
-         key->max_threads = 12 * 6;
-      else if (BRW_IS_G4X(brw))
-	 key->max_threads = 10 * 5;
-      else
-	 key->max_threads = 8 * 4;
-   }
-
    /* CACHE_NEW_WM_PROG */
    key->total_grf = brw->wm.prog_data->total_grf;
    key->urb_entry_read_length = brw->wm.prog_data->urb_read_length;
@@ -140,6 +126,7 @@ static dri_bo *
 wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
 			dri_bo **reloc_bufs)
 {
+   struct intel_context *intel = &brw->intel;
    struct brw_wm_unit_state wm;
    dri_bo *bo;
 
@@ -150,7 +137,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
    wm.thread1.depth_coef_urb_read_offset = 1;
    wm.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
 
-   if (BRW_IS_IGDNG(brw))
+   if (intel->is_ironlake)
       wm.thread1.binding_table_entry_count = 0; /* hardware requirement */
    else
       wm.thread1.binding_table_entry_count = key->nr_surfaces;
@@ -170,7 +157,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
    wm.thread3.const_urb_entry_read_length = key->curb_entry_read_length;
    wm.thread3.const_urb_entry_read_offset = key->curbe_offset * 2;
 
-   if (BRW_IS_IGDNG(brw)) 
+   if (intel->is_ironlake)
       wm.wm4.sampler_count = 0; /* hardware requirement */
    else
       wm.wm4.sampler_count = (key->sampler_count + 1) / 4;
@@ -191,7 +178,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
    else
       wm.wm5.enable_16_pix = 1;
 
-   wm.wm5.max_threads = key->max_threads - 1;
+   wm.wm5.max_threads = brw->wm_max_threads - 1;
    wm.wm5.thread_dispatch_enable = 1;	/* AKA: color_write */
    wm.wm5.legacy_line_rast = 0;
    wm.wm5.legacy_global_depth_bias = 0;
@@ -223,8 +210,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
    bo = brw_upload_cache(&brw->cache, BRW_WM_UNIT,
 			 key, sizeof(*key),
 			 reloc_bufs, 3,
-			 &wm, sizeof(wm),
-			 NULL, NULL);
+			 &wm, sizeof(wm));
 
    /* Emit WM program relocation */
    dri_bo_emit_reloc(bo,
@@ -268,7 +254,7 @@ static void upload_wm_unit( struct brw_context *brw )
     */
    assert(key.total_scratch <= 12 * 1024);
    if (key.total_scratch) {
-      GLuint total = key.total_scratch * key.max_threads;
+      GLuint total = key.total_scratch * brw->wm_max_threads;
 
       if (brw->wm.scratch_bo && total > brw->wm.scratch_bo->size) {
 	 dri_bo_unreference(brw->wm.scratch_bo);
diff --git a/i965/brw_wm_surface_state.c b/i965/brw_wm_surface_state.c
index 8335e5a..ce0bf0b 100644
--- a/i965/brw_wm_surface_state.c
+++ b/i965/brw_wm_surface_state.c
@@ -207,33 +207,14 @@ brw_create_texture_surface( struct brw_context *brw,
 
    surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW;
    surf.ss0.surface_type = translate_tex_target(key->target);
-   if (key->bo) {
-      surf.ss0.surface_format = translate_tex_format(key->format,
-						     key->internal_format,
-						     key->depthmode);
-   }
-   else {
-      switch (key->depth) {
-      case 32:
-         surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
-         break;
-      default:
-      case 24:
-         surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8X8_UNORM;
-         break;
-      case 16:
-         surf.ss0.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM;
-         break;
-      }
-   }
+   surf.ss0.surface_format = translate_tex_format(key->format,
+						  key->internal_format,
+						  key->depthmode);
 
    /* This is ok for all textures with channel width 8bit or less:
     */
 /*    surf.ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */
-   if (key->bo)
-      surf.ss1.base_addr = key->bo->offset; /* reloc */
-   else
-      surf.ss1.base_addr = key->offset;
+   surf.ss1.base_addr = key->bo->offset; /* reloc */
 
    surf.ss2.mip_count = key->last_level - key->first_level;
    surf.ss2.width = key->width - 1;
@@ -255,18 +236,14 @@ brw_create_texture_surface( struct brw_context *brw,
 
    bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE,
 			 key, sizeof(*key),
-			 &key->bo, key->bo ? 1 : 0,
-			 &surf, sizeof(surf),
-			 NULL, NULL);
-
-   if (key->bo) {
-      /* Emit relocation to surface contents */
-      dri_bo_emit_reloc(bo,
-			I915_GEM_DOMAIN_SAMPLER, 0,
-			0,
-			offsetof(struct brw_surface_state, ss1),
-			key->bo);
-   }
+			 &key->bo, 1,
+			 &surf, sizeof(surf));
+
+   /* Emit relocation to surface contents */
+   drm_intel_bo_emit_reloc(bo, offsetof(struct brw_surface_state, ss1),
+			   key->bo, 0,
+			   I915_GEM_DOMAIN_SAMPLER, 0);
+
    return bo;
 }
 
@@ -282,19 +259,12 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit )
 
    memset(&key, 0, sizeof(key));
 
-   if (intelObj->imageOverride) {
-      key.pitch = intelObj->pitchOverride / intelObj->mt->cpp;
-      key.depth = intelObj->depthOverride;
-      key.bo = NULL;
-      key.offset = intelObj->textureOffset;
-   } else {
-      key.format = firstImage->TexFormat;
-      key.internal_format = firstImage->InternalFormat;
-      key.pitch = intelObj->mt->pitch;
-      key.depth = firstImage->Depth;
-      key.bo = intelObj->mt->region->buffer;
-      key.offset = 0;
-   }
+   key.format = firstImage->TexFormat;
+   key.internal_format = firstImage->InternalFormat;
+   key.pitch = intelObj->mt->pitch;
+   key.depth = firstImage->Depth;
+   key.bo = intelObj->mt->region->buffer;
+   key.offset = 0;
 
    key.target = tObj->Target;
    key.depthmode = tObj->DepthMode;
@@ -309,7 +279,7 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit )
    brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache,
                                             BRW_SS_SURFACE,
                                             &key, sizeof(key),
-                                            &key.bo, key.bo ? 1 : 0,
+                                            &key.bo, 1,
                                             NULL);
    if (brw->wm.surf_bo[surf] == NULL) {
       brw->wm.surf_bo[surf] = brw_create_texture_surface(brw, &key);
@@ -337,10 +307,7 @@ brw_create_constant_surface( struct brw_context *brw,
    surf.ss0.surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
 
    assert(key->bo);
-   if (key->bo)
-      surf.ss1.base_addr = key->bo->offset; /* reloc */
-   else
-      surf.ss1.base_addr = key->offset;
+   surf.ss1.base_addr = key->bo->offset; /* reloc */
 
    surf.ss2.width = w & 0x7f;            /* bits 6:0 of size or width */
    surf.ss2.height = (w >> 7) & 0x1fff;  /* bits 19:7 of size or width */
@@ -350,21 +317,16 @@ brw_create_constant_surface( struct brw_context *brw,
  
    bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE,
 			 key, sizeof(*key),
-			 &key->bo, key->bo ? 1 : 0,
-			 &surf, sizeof(surf),
-			 NULL, NULL);
-
-   if (key->bo) {
-      /* Emit relocation to surface contents.  Section 5.1.1 of the gen4
-       * bspec ("Data Cache") says that the data cache does not exist as
-       * a separate cache and is just the sampler cache.
-       */
-      dri_bo_emit_reloc(bo,
-			I915_GEM_DOMAIN_SAMPLER, 0,
-			0,
-			offsetof(struct brw_surface_state, ss1),
-			key->bo);
-   }
+			 &key->bo, 1,
+			 &surf, sizeof(surf));
+
+   /* Emit relocation to surface contents.  Section 5.1.1 of the gen4
+    * bspec ("Data Cache") says that the data cache does not exist as
+    * a separate cache and is just the sampler cache.
+    */
+   drm_intel_bo_emit_reloc(bo, offsetof(struct brw_surface_state, ss1),
+			   key->bo, 0,
+			   I915_GEM_DOMAIN_SAMPLER, 0);
 
    return bo;
 }
@@ -422,7 +384,7 @@ brw_update_wm_constant_surface( GLcontext *ctx,
    /* If there's no constant buffer, then no surface BO is needed to point at
     * it.
     */
-   if (fp->const_buffer == 0) {
+   if (fp->const_buffer == NULL) {
       drm_intel_bo_unreference(brw->wm.surf_bo[surf]);
       brw->wm.surf_bo[surf] = NULL;
       return;
@@ -450,7 +412,7 @@ brw_update_wm_constant_surface( GLcontext *ctx,
    brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache,
                                             BRW_SS_SURFACE,
                                             &key, sizeof(key),
-                                            &key.bo, key.bo ? 1 : 0,
+                                            &key.bo, 1,
                                             NULL);
    if (brw->wm.surf_bo[surf] == NULL) {
       brw->wm.surf_bo[surf] = brw_create_constant_surface(brw, &key);
@@ -511,7 +473,8 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
 				struct gl_renderbuffer *rb,
 				unsigned int unit)
 {
-   GLcontext *ctx = &brw->intel.ctx;
+   struct intel_context *intel = &brw->intel;
+   GLcontext *ctx = &intel->ctx;
    dri_bo *region_bo = NULL;
    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
    struct intel_region *region = irb ? irb->region : NULL;
@@ -522,7 +485,8 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
       GLubyte color_mask[4];
       GLboolean color_blend;
       uint32_t tiling;
-      uint32_t draw_offset;
+      uint32_t draw_x;
+      uint32_t draw_y;
    } key;
 
    memset(&key, 0, sizeof(key));
@@ -564,7 +528,8 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
       }
       key.pitch = region->pitch;
       key.cpp = region->cpp;
-      key.draw_offset = region->draw_offset; /* cur 3d or cube face offset */
+      key.draw_x = region->draw_x;
+      key.draw_y = region->draw_y;
    } else {
       key.surface_type = BRW_SURFACE_NULL;
       key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
@@ -572,20 +537,24 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
       key.width = 1;
       key.height = 1;
       key.cpp = 4;
-      key.draw_offset = 0;
+      key.draw_x = 0;
+      key.draw_y = 0;
    }
-   /* _NEW_COLOR */
-   memcpy(key.color_mask, ctx->Color.ColorMask,
-	  sizeof(key.color_mask));
 
-   /* As mentioned above, disable writes to the alpha component when the
-    * renderbuffer is XRGB.
-    */
-   if (ctx->DrawBuffer->Visual.alphaBits == 0)
-     key.color_mask[3] = GL_FALSE;
+   if (intel->gen < 6) {
+      /* _NEW_COLOR */
+      memcpy(key.color_mask, ctx->Color.ColorMask[unit],
+	     sizeof(key.color_mask));
 
-   key.color_blend = (!ctx->Color._LogicOpEnabled &&
-		      ctx->Color.BlendEnabled);
+      /* As mentioned above, disable writes to the alpha component when the
+       * renderbuffer is XRGB.
+       */
+      if (ctx->DrawBuffer->Visual.alphaBits == 0)
+	 key.color_mask[3] = GL_FALSE;
+
+      key.color_blend = (!ctx->Color._LogicOpEnabled &&
+			 (ctx->Color.BlendEnabled & (1 << unit)));
+   }
 
    dri_bo_unreference(brw->wm.surf_bo[unit]);
    brw->wm.surf_bo[unit] = brw_search_cache(&brw->surface_cache,
@@ -602,25 +571,32 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
       surf.ss0.surface_format = key.surface_format;
       surf.ss0.surface_type = key.surface_type;
       if (key.tiling == I915_TILING_NONE) {
-	 surf.ss1.base_addr = key.draw_offset;
+	 surf.ss1.base_addr = (key.draw_x + key.draw_y * key.pitch) * key.cpp;
       } else {
-	 uint32_t tile_offset = key.draw_offset % 4096;
-
-	 surf.ss1.base_addr = key.draw_offset - tile_offset;
-
-	 assert(BRW_IS_G4X(brw) || tile_offset == 0);
-	 if (BRW_IS_G4X(brw)) {
-	    if (key.tiling == I915_TILING_X) {
-	       /* Note that the low bits of these fields are missing, so
-		* there's the possibility of getting in trouble.
-		*/
-	       surf.ss5.x_offset = (tile_offset % 512) / key.cpp / 4;
-	       surf.ss5.y_offset = tile_offset / 512 / 2;
-	    } else {
-	       surf.ss5.x_offset = (tile_offset % 128) / key.cpp / 4;
-	       surf.ss5.y_offset = tile_offset / 128 / 2;
-	    }
+	 uint32_t tile_base, tile_x, tile_y;
+	 uint32_t pitch = key.pitch * key.cpp;
+
+	 if (key.tiling == I915_TILING_X) {
+	    tile_x = key.draw_x % (512 / key.cpp);
+	    tile_y = key.draw_y % 8;
+	    tile_base = ((key.draw_y / 8) * (8 * pitch));
+	    tile_base += (key.draw_x - tile_x) / (512 / key.cpp) * 4096;
+	 } else {
+	    /* Y */
+	    tile_x = key.draw_x % (128 / key.cpp);
+	    tile_y = key.draw_y % 32;
+	    tile_base = ((key.draw_y / 32) * (32 * pitch));
+	    tile_base += (key.draw_x - tile_x) / (128 / key.cpp) * 4096;
 	 }
+	 assert(intel->is_g4x || (tile_x == 0 && tile_y == 0));
+	 assert(tile_x % 4 == 0);
+	 assert(tile_y % 2 == 0);
+	 /* Note that the low bits of these fields are missing, so
+	  * there's the possibility of getting in trouble.
+	  */
+	 surf.ss1.base_addr = tile_base;
+	 surf.ss5.x_offset = tile_x / 4;
+	 surf.ss5.y_offset = tile_y / 2;
       }
       if (region_bo != NULL)
 	 surf.ss1.base_addr += region_bo->offset; /* reloc */
@@ -630,20 +606,21 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
       brw_set_surface_tiling(&surf, key.tiling);
       surf.ss3.pitch = (key.pitch * key.cpp) - 1;
 
-      /* _NEW_COLOR */
-      surf.ss0.color_blend = key.color_blend;
-      surf.ss0.writedisable_red =   !key.color_mask[0];
-      surf.ss0.writedisable_green = !key.color_mask[1];
-      surf.ss0.writedisable_blue =  !key.color_mask[2];
-      surf.ss0.writedisable_alpha = !key.color_mask[3];
+      if (intel->gen < 6) {
+	 /* _NEW_COLOR */
+	 surf.ss0.color_blend = key.color_blend;
+	 surf.ss0.writedisable_red =   !key.color_mask[0];
+	 surf.ss0.writedisable_green = !key.color_mask[1];
+	 surf.ss0.writedisable_blue =  !key.color_mask[2];
+	 surf.ss0.writedisable_alpha = !key.color_mask[3];
+      }
 
       /* Key size will never match key size for textures, so we're safe. */
       brw->wm.surf_bo[unit] = brw_upload_cache(&brw->surface_cache,
                                                BRW_SS_SURFACE,
                                                &key, sizeof(key),
 					       &region_bo, 1,
-					       &surf, sizeof(surf),
-					       NULL, NULL);
+					       &surf, sizeof(surf));
       if (region_bo != NULL) {
 	 /* We might sample from it, and we might render to it, so flag
 	  * them both.  We might be able to figure out from other state
@@ -690,8 +667,7 @@ brw_wm_get_binding_table(struct brw_context *brw)
       bind_bo = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND,
 				  NULL, 0,
 				  brw->wm.surf_bo, brw->wm.nr_surfaces,
-				  data, data_size,
-				  NULL, NULL);
+				  data, data_size);
 
       /* Emit binding table relocations to surface state */
       for (i = 0; i < BRW_WM_MAX_SURF; i++) {
diff --git a/i965/gen6_cc.c b/i965/gen6_cc.c
new file mode 100644
index 0000000..f7acad6
--- /dev/null
+++ b/i965/gen6_cc.c
@@ -0,0 +1,296 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_util.h"
+#include "intel_batchbuffer.h"
+#include "main/macros.h"
+
+struct gen6_blend_state_key {
+   GLboolean color_blend, alpha_enabled;
+   GLboolean dither;
+
+   GLenum logic_op;
+
+   GLenum blend_eq_rgb, blend_eq_a;
+   GLenum blend_src_rgb, blend_src_a;
+   GLenum blend_dst_rgb, blend_dst_a;
+
+   GLenum alpha_func;
+};
+
+static void
+blend_state_populate_key(struct brw_context *brw,
+			 struct gen6_blend_state_key *key)
+{
+   GLcontext *ctx = &brw->intel.ctx;
+
+   memset(key, 0, sizeof(*key));
+
+   /* _NEW_COLOR */
+   if (ctx->Color._LogicOpEnabled)
+      key->logic_op = ctx->Color.LogicOp;
+   else
+      key->logic_op = GL_COPY;
+
+   /* _NEW_COLOR */
+   key->color_blend = ctx->Color.BlendEnabled;
+   if (key->color_blend) {
+      key->blend_eq_rgb = ctx->Color.BlendEquationRGB;
+      key->blend_eq_a = ctx->Color.BlendEquationA;
+      key->blend_src_rgb = ctx->Color.BlendSrcRGB;
+      key->blend_dst_rgb = ctx->Color.BlendDstRGB;
+      key->blend_src_a = ctx->Color.BlendSrcA;
+      key->blend_dst_a = ctx->Color.BlendDstA;
+   }
+
+   /* _NEW_COLOR */
+   key->alpha_enabled = ctx->Color.AlphaEnabled;
+   if (key->alpha_enabled) {
+      key->alpha_func = ctx->Color.AlphaFunc;
+   }
+
+   /* _NEW_COLOR */
+   key->dither = ctx->Color.DitherFlag;
+}
+
+/**
+ * Creates the state cache entry for the given CC unit key.
+ */
+static drm_intel_bo *
+blend_state_create_from_key(struct brw_context *brw,
+			    struct gen6_blend_state_key *key)
+{
+   struct gen6_blend_state blend;
+   drm_intel_bo *bo;
+
+   memset(&blend, 0, sizeof(blend));
+
+   if (key->logic_op != GL_COPY) {
+      blend.blend1.logic_op_enable = 1;
+      blend.blend1.logic_op_func = intel_translate_logic_op(key->logic_op);
+   } else if (key->color_blend) {
+      GLenum eqRGB = key->blend_eq_rgb;
+      GLenum eqA = key->blend_eq_a;
+      GLenum srcRGB = key->blend_src_rgb;
+      GLenum dstRGB = key->blend_dst_rgb;
+      GLenum srcA = key->blend_src_a;
+      GLenum dstA = key->blend_dst_a;
+
+      if (eqRGB == GL_MIN || eqRGB == GL_MAX) {
+	 srcRGB = dstRGB = GL_ONE;
+      }
+
+      if (eqA == GL_MIN || eqA == GL_MAX) {
+	 srcA = dstA = GL_ONE;
+      }
+
+      blend.blend0.dest_blend_factor = brw_translate_blend_factor(dstRGB);
+      blend.blend0.source_blend_factor = brw_translate_blend_factor(srcRGB);
+      blend.blend0.blend_func = brw_translate_blend_equation(eqRGB);
+
+      blend.blend0.ia_dest_blend_factor = brw_translate_blend_factor(dstA);
+      blend.blend0.ia_source_blend_factor = brw_translate_blend_factor(srcA);
+      blend.blend0.ia_blend_func = brw_translate_blend_equation(eqA);
+
+      blend.blend0.blend_enable = 1;
+      blend.blend0.ia_blend_enable = (srcA != srcRGB ||
+				      dstA != dstRGB ||
+				      eqA != eqRGB);
+   }
+
+   if (key->alpha_enabled) {
+      blend.blend1.alpha_test_enable = 1;
+      blend.blend1.alpha_test_func = intel_translate_compare_func(key->alpha_func);
+
+   }
+
+   if (key->dither) {
+      blend.blend1.dither_enable = 1;
+      blend.blend1.y_dither_offset = 0;
+      blend.blend1.x_dither_offset = 0;
+   }
+
+   bo = brw_upload_cache(&brw->cache, BRW_BLEND_STATE,
+			 key, sizeof(*key),
+			 NULL, 0,
+			 &blend, sizeof(blend));
+
+   return bo;
+}
+
+static void
+prepare_blend_state(struct brw_context *brw)
+{
+   struct gen6_blend_state_key key;
+
+   blend_state_populate_key(brw, &key);
+
+   drm_intel_bo_unreference(brw->cc.blend_state_bo);
+   brw->cc.blend_state_bo = brw_search_cache(&brw->cache, BRW_BLEND_STATE,
+					     &key, sizeof(key),
+					     NULL, 0,
+					     NULL);
+
+   if (brw->cc.blend_state_bo == NULL)
+      brw->cc.blend_state_bo = blend_state_create_from_key(brw, &key);
+}
+
+const struct brw_tracked_state gen6_blend_state = {
+   .dirty = {
+      .mesa = _NEW_COLOR,
+      .brw = 0,
+      .cache = 0,
+   },
+   .prepare = prepare_blend_state,
+};
+
+struct gen6_color_calc_state_key {
+   GLubyte blend_constant_color[4];
+   GLclampf alpha_ref;
+   GLubyte stencil_ref[2];
+};
+
+static void
+color_calc_state_populate_key(struct brw_context *brw,
+			      struct gen6_color_calc_state_key *key)
+{
+   GLcontext *ctx = &brw->intel.ctx;
+
+   memset(key, 0, sizeof(*key));
+
+   /* _NEW_STENCIL */
+   if (ctx->Stencil._Enabled) {
+      const unsigned back = ctx->Stencil._BackFace;
+
+      key->stencil_ref[0] = ctx->Stencil.Ref[0];
+      if (ctx->Stencil._TestTwoSide)
+	 key->stencil_ref[1] = ctx->Stencil.Ref[back];
+   }
+
+   /* _NEW_COLOR */
+   if (ctx->Color.AlphaEnabled)
+      key->alpha_ref = ctx->Color.AlphaRef;
+
+   key->blend_constant_color[0] = ctx->Color.BlendColor[0];
+   key->blend_constant_color[1] = ctx->Color.BlendColor[1];
+   key->blend_constant_color[2] = ctx->Color.BlendColor[2];
+   key->blend_constant_color[3] = ctx->Color.BlendColor[3];
+}
+
+/**
+ * Creates the state cache entry for the given CC state key.
+ */
+static drm_intel_bo *
+color_calc_state_create_from_key(struct brw_context *brw,
+				 struct gen6_color_calc_state_key *key)
+{
+   struct gen6_color_calc_state cc;
+   drm_intel_bo *bo;
+
+   memset(&cc, 0, sizeof(cc));
+
+   cc.cc0.alpha_test_format = BRW_ALPHATEST_FORMAT_UNORM8;
+   UNCLAMPED_FLOAT_TO_UBYTE(cc.cc1.alpha_ref_fi.ui, key->alpha_ref);
+
+   cc.cc0.stencil_ref = key->stencil_ref[0];
+   cc.cc0.bf_stencil_ref = key->stencil_ref[1];
+
+   cc.constant_r = key->blend_constant_color[0];
+   cc.constant_g = key->blend_constant_color[1];
+   cc.constant_b = key->blend_constant_color[2];
+   cc.constant_a = key->blend_constant_color[3];
+
+   bo = brw_upload_cache(&brw->cache, BRW_COLOR_CALC_STATE,
+			 key, sizeof(*key),
+			 NULL, 0,
+			 &cc, sizeof(cc));
+
+   return bo;
+}
+
+static void
+prepare_color_calc_state(struct brw_context *brw)
+{
+   struct gen6_color_calc_state_key key;
+
+   color_calc_state_populate_key(brw, &key);
+
+   drm_intel_bo_unreference(brw->cc.state_bo);
+   brw->cc.state_bo = brw_search_cache(&brw->cache, BRW_COLOR_CALC_STATE,
+				       &key, sizeof(key),
+				       NULL, 0,
+				       NULL);
+
+   if (brw->cc.state_bo == NULL)
+      brw->cc.state_bo = color_calc_state_create_from_key(brw, &key);
+}
+
+const struct brw_tracked_state gen6_color_calc_state = {
+   .dirty = {
+      .mesa = _NEW_COLOR,
+      .brw = 0,
+      .cache = 0,
+   },
+   .prepare = prepare_color_calc_state,
+};
+
+static void upload_cc_state_pointers(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+
+   BEGIN_BATCH(4);
+   OUT_BATCH(CMD_3D_CC_STATE_POINTERS << 16 | (4 - 2));
+   OUT_RELOC(brw->cc.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
+   OUT_RELOC(brw->cc.blend_state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
+   OUT_RELOC(brw->cc.depth_stencil_state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
+   ADVANCE_BATCH();
+
+   intel_batchbuffer_emit_mi_flush(intel->batch);
+}
+
+
+static void prepare_cc_state_pointers(struct brw_context *brw)
+{
+   brw_add_validated_bo(brw, brw->cc.state_bo);
+   brw_add_validated_bo(brw, brw->cc.blend_state_bo);
+   brw_add_validated_bo(brw, brw->cc.depth_stencil_state_bo);
+}
+
+const struct brw_tracked_state gen6_cc_state_pointers = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_BATCH,
+      .cache = (CACHE_NEW_BLEND_STATE |
+		CACHE_NEW_COLOR_CALC_STATE |
+		CACHE_NEW_DEPTH_STENCIL_STATE)
+   },
+   .prepare = prepare_cc_state_pointers,
+   .emit = upload_cc_state_pointers,
+};
diff --git a/i965/gen6_clip_state.c b/i965/gen6_clip_state.c
new file mode 100644
index 0000000..06f8145
--- /dev/null
+++ b/i965/gen6_clip_state.c
@@ -0,0 +1,75 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "intel_batchbuffer.h"
+
+static void
+upload_clip_state(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+   GLcontext *ctx = &intel->ctx;
+   uint32_t depth_clamp = 0;
+   uint32_t provoking;
+
+   if (!ctx->Transform.DepthClamp)
+      depth_clamp = GEN6_CLIP_Z_TEST;
+
+   if (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION) {
+      provoking = 0;
+   } else {
+      provoking =
+	 (2 << GEN6_CLIP_TRI_PROVOKE_SHIFT) |
+	 (2 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT) |
+	 (1 << GEN6_CLIP_LINE_PROVOKE_SHIFT);
+   }
+
+   BEGIN_BATCH(4);
+   OUT_BATCH(CMD_3D_CLIP_STATE << 16 | (4 - 2));
+   OUT_BATCH(GEN6_CLIP_STATISTICS_ENABLE);
+   OUT_BATCH(GEN6_CLIP_ENABLE |
+	     GEN6_CLIP_API_OGL |
+	     GEN6_CLIP_MODE_REJECT_ALL | /* XXX: debug: get VS working */
+	     GEN6_CLIP_XY_TEST |
+	     depth_clamp |
+	     provoking);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+
+   intel_batchbuffer_emit_mi_flush(intel->batch);
+}
+
+const struct brw_tracked_state gen6_clip_state = {
+   .dirty = {
+      .mesa  = _NEW_TRANSFORM,
+      .brw   = BRW_NEW_CONTEXT,
+      .cache = 0
+   },
+   .emit = upload_clip_state,
+};
diff --git a/i965/gen6_depthstencil.c b/i965/gen6_depthstencil.c
new file mode 100644
index 0000000..4924f0f
--- /dev/null
+++ b/i965/gen6_depthstencil.c
@@ -0,0 +1,165 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#include "brw_context.h"
+#include "brw_state.h"
+
+struct brw_depth_stencil_state_key {
+   GLenum depth_func;
+   GLboolean depth_test, depth_write;
+   GLboolean stencil, stencil_two_side;
+   GLenum stencil_func[2], stencil_fail_op[2];
+   GLenum stencil_pass_depth_fail_op[2], stencil_pass_depth_pass_op[2];
+   GLubyte stencil_write_mask[2], stencil_test_mask[2];
+};
+
+static void
+depth_stencil_state_populate_key(struct brw_context *brw,
+				 struct brw_depth_stencil_state_key *key)
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   const unsigned back = ctx->Stencil._BackFace;
+
+   memset(key, 0, sizeof(*key));
+
+   /* _NEW_STENCIL */
+   key->stencil = ctx->Stencil._Enabled;
+   key->stencil_two_side = ctx->Stencil._TestTwoSide;
+
+   if (key->stencil) {
+      key->stencil_func[0] = ctx->Stencil.Function[0];
+      key->stencil_fail_op[0] = ctx->Stencil.FailFunc[0];
+      key->stencil_pass_depth_fail_op[0] = ctx->Stencil.ZFailFunc[0];
+      key->stencil_pass_depth_pass_op[0] = ctx->Stencil.ZPassFunc[0];
+      key->stencil_write_mask[0] = ctx->Stencil.WriteMask[0];
+      key->stencil_test_mask[0] = ctx->Stencil.ValueMask[0];
+   }
+   if (key->stencil_two_side) {
+      key->stencil_func[1] = ctx->Stencil.Function[back];
+      key->stencil_fail_op[1] = ctx->Stencil.FailFunc[back];
+      key->stencil_pass_depth_fail_op[1] = ctx->Stencil.ZFailFunc[back];
+      key->stencil_pass_depth_pass_op[1] = ctx->Stencil.ZPassFunc[back];
+      key->stencil_write_mask[1] = ctx->Stencil.WriteMask[back];
+      key->stencil_test_mask[1] = ctx->Stencil.ValueMask[back];
+   }
+
+   key->depth_test = ctx->Depth.Test;
+   if (key->depth_test) {
+      key->depth_func = ctx->Depth.Func;
+      key->depth_write = ctx->Depth.Mask;
+   }
+}
+
+/**
+ * Creates the state cache entry for the given DEPTH_STENCIL_STATE state key.
+ */
+static dri_bo *
+depth_stencil_state_create_from_key(struct brw_context *brw,
+				    struct brw_depth_stencil_state_key *key)
+{
+   struct gen6_depth_stencil_state ds;
+   dri_bo *bo;
+
+   memset(&ds, 0, sizeof(ds));
+
+   /* _NEW_STENCIL */
+   if (key->stencil) {
+      ds.ds0.stencil_enable = 1;
+      ds.ds0.stencil_func =
+	 intel_translate_compare_func(key->stencil_func[0]);
+      ds.ds0.stencil_fail_op =
+	 intel_translate_stencil_op(key->stencil_fail_op[0]);
+      ds.ds0.stencil_pass_depth_fail_op =
+	 intel_translate_stencil_op(key->stencil_pass_depth_fail_op[0]);
+      ds.ds0.stencil_pass_depth_pass_op =
+	 intel_translate_stencil_op(key->stencil_pass_depth_pass_op[0]);
+      ds.ds1.stencil_write_mask = key->stencil_write_mask[0];
+      ds.ds1.stencil_test_mask = key->stencil_test_mask[0];
+
+      if (key->stencil_two_side) {
+	 ds.ds0.bf_stencil_enable = 1;
+	 ds.ds0.bf_stencil_func =
+	    intel_translate_compare_func(key->stencil_func[1]);
+	 ds.ds0.bf_stencil_fail_op =
+	    intel_translate_stencil_op(key->stencil_fail_op[1]);
+	 ds.ds0.bf_stencil_pass_depth_fail_op =
+	    intel_translate_stencil_op(key->stencil_pass_depth_fail_op[1]);
+	 ds.ds0.bf_stencil_pass_depth_pass_op =
+	    intel_translate_stencil_op(key->stencil_pass_depth_pass_op[1]);
+	 ds.ds1.bf_stencil_write_mask = key->stencil_write_mask[1];
+	 ds.ds1.bf_stencil_test_mask = key->stencil_test_mask[1];
+      }
+
+      /* Not really sure about this:
+       */
+      if (key->stencil_write_mask[0] ||
+	  (key->stencil_two_side && key->stencil_write_mask[1]))
+	 ds.ds0.stencil_write_enable = 1;
+   }
+
+   /* _NEW_DEPTH */
+   if (key->depth_test) {
+      ds.ds2.depth_test_enable = 1;
+      ds.ds2.depth_test_func = intel_translate_compare_func(key->depth_func);
+      ds.ds2.depth_write_enable = key->depth_write;
+   }
+
+   bo = brw_upload_cache(&brw->cache, BRW_DEPTH_STENCIL_STATE,
+			 key, sizeof(*key),
+			 NULL, 0,
+			 &ds, sizeof(ds));
+
+   return bo;
+}
+
+static void
+prepare_depth_stencil_state(struct brw_context *brw)
+{
+   struct brw_depth_stencil_state_key key;
+
+   depth_stencil_state_populate_key(brw, &key);
+
+   dri_bo_unreference(brw->cc.depth_stencil_state_bo);
+   brw->cc.depth_stencil_state_bo = brw_search_cache(&brw->cache,
+						     BRW_DEPTH_STENCIL_STATE,
+						     &key, sizeof(key),
+						     NULL, 0,
+						     NULL);
+
+   if (brw->cc.depth_stencil_state_bo == NULL)
+      brw->cc.depth_stencil_state_bo =
+	 depth_stencil_state_create_from_key(brw, &key);
+}
+
+const struct brw_tracked_state gen6_depth_stencil_state = {
+   .dirty = {
+      .mesa = _NEW_DEPTH | _NEW_STENCIL,
+      .brw = 0,
+      .cache = 0,
+   },
+   .prepare = prepare_depth_stencil_state,
+};
diff --git a/i965/gen6_gs_state.c b/i965/gen6_gs_state.c
new file mode 100644
index 0000000..161e7b8
--- /dev/null
+++ b/i965/gen6_gs_state.c
@@ -0,0 +1,91 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "intel_batchbuffer.h"
+
+static void
+upload_gs_state(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+
+   /* Disable all the constant buffers. */
+   BEGIN_BATCH(5);
+   OUT_BATCH(CMD_3D_CONSTANT_GS_STATE << 16 | (5 - 2));
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+
+   intel_batchbuffer_emit_mi_flush(intel->batch);
+
+   if (brw->gs.prog_bo) {
+      BEGIN_BATCH(7);
+      OUT_BATCH(CMD_3D_GS_STATE << 16 | (7 - 2));
+      OUT_RELOC(brw->gs.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+      OUT_BATCH((0 << GEN6_GS_SAMPLER_COUNT_SHIFT) |
+		(0 << GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
+      OUT_BATCH(0); /* scratch space base offset */
+      OUT_BATCH((1 << GEN6_GS_DISPATCH_START_GRF_SHIFT) |
+		(brw->gs.prog_data->urb_read_length << GEN6_GS_URB_READ_LENGTH_SHIFT) |
+		(0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT));
+      OUT_BATCH((0 << GEN6_GS_MAX_THREADS_SHIFT) |
+		GEN6_GS_STATISTICS_ENABLE |
+		GEN6_GS_RENDERING_ENABLE);
+      OUT_BATCH(GEN6_GS_ENABLE);
+      ADVANCE_BATCH();
+   } else {
+      BEGIN_BATCH(7);
+      OUT_BATCH(CMD_3D_GS_STATE << 16 | (7 - 2));
+      OUT_BATCH(0); /* prog_bo */
+      OUT_BATCH((0 << GEN6_GS_SAMPLER_COUNT_SHIFT) |
+		(0 << GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
+      OUT_BATCH(0); /* scratch space base offset */
+      OUT_BATCH((1 << GEN6_GS_DISPATCH_START_GRF_SHIFT) |
+		(0 << GEN6_GS_URB_READ_LENGTH_SHIFT) |
+		(0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT));
+      OUT_BATCH((0 << GEN6_GS_MAX_THREADS_SHIFT) |
+		GEN6_GS_STATISTICS_ENABLE |
+		GEN6_GS_RENDERING_ENABLE);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+}
+
+const struct brw_tracked_state gen6_gs_state = {
+   .dirty = {
+      .mesa  = _NEW_TRANSFORM,
+      .brw   = (BRW_NEW_CURBE_OFFSETS |
+		BRW_NEW_URB_FENCE |
+		BRW_NEW_CONTEXT),
+      .cache = CACHE_NEW_GS_PROG
+   },
+   .emit = upload_gs_state,
+};
diff --git a/i965/gen6_sampler_state.c b/i965/gen6_sampler_state.c
new file mode 100644
index 0000000..ab8e751
--- /dev/null
+++ b/i965/gen6_sampler_state.c
@@ -0,0 +1,71 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "intel_batchbuffer.h"
+
+static void
+upload_sampler_state_pointers(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+
+   BEGIN_BATCH(4);
+   OUT_BATCH(CMD_3D_SAMPLER_STATE_POINTERS << 16 |
+	     VS_SAMPLER_STATE_CHANGE |
+	     GS_SAMPLER_STATE_CHANGE |
+	     PS_SAMPLER_STATE_CHANGE |
+	     (4 - 2));
+   OUT_BATCH(0); /* VS */
+   OUT_BATCH(0); /* GS */
+   if (brw->wm.sampler_bo)
+      OUT_RELOC(brw->wm.sampler_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+   else
+      OUT_BATCH(0);
+
+   ADVANCE_BATCH();
+
+   intel_batchbuffer_emit_mi_flush(intel->batch);
+}
+
+
+static void
+prepare_sampler_state_pointers(struct brw_context *brw)
+{
+   brw_add_validated_bo(brw, brw->wm.sampler_bo);
+}
+
+const struct brw_tracked_state gen6_sampler_state = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_BATCH,
+      .cache = CACHE_NEW_SAMPLER
+   },
+   .prepare = prepare_sampler_state_pointers,
+   .emit = upload_sampler_state_pointers,
+};
diff --git a/i965/gen6_scissor_state.c b/i965/gen6_scissor_state.c
new file mode 100644
index 0000000..2e21e5f
--- /dev/null
+++ b/i965/gen6_scissor_state.c
@@ -0,0 +1,105 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "intel_batchbuffer.h"
+
+static void
+prepare_scissor_state(struct brw_context *brw)
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   const GLboolean render_to_fbo = (ctx->DrawBuffer->Name != 0);
+   struct gen6_scissor_state scissor;
+
+   /* _NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT */
+
+   /* The scissor only needs to handle the intersection of drawable and
+    * scissor rect.  Clipping to the boundaries of static shared buffers
+    * for front/back/depth is covered by looping over cliprects in brw_draw.c.
+    *
+    * Note that the hardware's coordinates are inclusive, while Mesa's min is
+    * inclusive but max is exclusive.
+    */
+   if (render_to_fbo) {
+      /* texmemory: Y=0=bottom */
+      scissor.xmin = ctx->DrawBuffer->_Xmin;
+      scissor.xmax = ctx->DrawBuffer->_Xmax - 1;
+      scissor.ymin = ctx->DrawBuffer->_Ymin;
+      scissor.ymax = ctx->DrawBuffer->_Ymax - 1;
+   }
+   else {
+      /* memory: Y=0=top */
+      scissor.xmin = ctx->DrawBuffer->_Xmin;
+      scissor.xmax = ctx->DrawBuffer->_Xmax - 1;
+      scissor.ymin = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymax;
+      scissor.ymax = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymin - 1;
+   }
+
+   drm_intel_bo_unreference(brw->sf.state_bo);
+   brw->sf.state_bo = brw_cache_data(&brw->cache, BRW_SF_UNIT,
+				     &scissor, sizeof(scissor),
+				     NULL, 0);
+}
+
+const struct brw_tracked_state gen6_scissor_state = {
+   .dirty = {
+      .mesa = _NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT,
+      .brw = 0,
+      .cache = 0,
+   },
+   .prepare = prepare_scissor_state,
+};
+
+static void upload_scissor_state_pointers(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+
+   BEGIN_BATCH(2);
+   OUT_BATCH(CMD_3D_SCISSOR_STATE_POINTERS << 16 | (2 - 2));
+   OUT_RELOC(brw->sf.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+   ADVANCE_BATCH();
+
+   intel_batchbuffer_emit_mi_flush(intel->batch);
+}
+
+
+static void prepare_scissor_state_pointers(struct brw_context *brw)
+{
+   brw_add_validated_bo(brw, brw->sf.state_bo);
+}
+
+const struct brw_tracked_state gen6_scissor_state_pointers = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_BATCH,
+      .cache = CACHE_NEW_SF_UNIT
+   },
+   .prepare = prepare_scissor_state_pointers,
+   .emit = upload_scissor_state_pointers,
+};
diff --git a/i965/gen6_sf_state.c b/i965/gen6_sf_state.c
new file mode 100644
index 0000000..8d96b44
--- /dev/null
+++ b/i965/gen6_sf_state.c
@@ -0,0 +1,187 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_util.h"
+#include "main/macros.h"
+#include "intel_batchbuffer.h"
+
+static uint32_t
+get_attr_override(struct brw_context *brw, int attr)
+{
+   uint32_t attr_override;
+   int attr_index = 0, i;
+
+   /* Find the source index (0 = first attribute after the 4D position)
+    * for this output attribute.  attr is currently a VERT_RESULT_* but should
+    * be FRAG_ATTRIB_*.
+    */
+   for (i = 0; i < attr; i++) {
+      if (brw->vs.prog_data->outputs_written & BITFIELD64_BIT(i))
+	 attr_index++;
+   }
+   attr_override = attr_index;
+
+   return attr_index;
+}
+
+static void
+upload_sf_state(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+   GLcontext *ctx = &intel->ctx;
+   /* CACHE_NEW_VS_PROG */
+   uint32_t num_inputs = brw_count_bits(brw->vs.prog_data->outputs_written);
+   /* This should probably be FS inputs read */
+   uint32_t num_outputs = brw_count_bits(brw->vs.prog_data->outputs_written);
+   uint32_t dw1, dw2, dw3, dw4;
+   int i;
+   /* _NEW_BUFFER */
+   GLboolean render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0;
+   int attr = 0;
+
+   dw1 =
+      num_outputs << GEN6_SF_NUM_OUTPUTS_SHIFT |
+      (num_inputs + 1) / 2 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT |
+      3 << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT;
+   dw2 = GEN6_SF_VIEWPORT_TRANSFORM_ENABLE |
+      GEN6_SF_STATISTICS_ENABLE;
+   dw3 = 0;
+   dw4 = 0;
+
+   /* _NEW_POLYGON */
+   if ((ctx->Polygon.FrontFace == GL_CCW) ^ render_to_fbo)
+      dw2 |= GEN6_SF_WINDING_CCW;
+
+   /* _NEW_SCISSOR */
+   if (ctx->Scissor.Enabled)
+      dw3 |= GEN6_SF_SCISSOR_ENABLE;
+
+   /* _NEW_POLYGON */
+   if (ctx->Polygon.CullFlag) {
+      switch (ctx->Polygon.CullFaceMode) {
+      case GL_FRONT:
+	 dw3 |= GEN6_SF_CULL_BOTH;
+	 break;
+      case GL_BACK:
+	 dw3 |= GEN6_SF_CULL_BACK;
+	 break;
+      case GL_FRONT_AND_BACK:
+	 dw3 |= GEN6_SF_CULL_BOTH;
+	 break;
+      default:
+	 assert(0);
+	 break;
+      }
+   } else {
+      dw3 |= GEN6_SF_CULL_NONE;
+   }
+
+   /* _NEW_LINE */
+   dw3 |= U_FIXED(CLAMP(ctx->Line.Width, 0.0, 7.99), 7) <<
+      GEN6_SF_LINE_WIDTH_SHIFT;
+   if (ctx->Line.SmoothFlag) {
+      dw3 |= GEN6_SF_LINE_AA_ENABLE;
+      dw3 |= GEN6_SF_LINE_AA_MODE_TRUE;
+      dw3 |= GEN6_SF_LINE_END_CAP_WIDTH_1_0;
+   }
+
+   /* _NEW_POINT */
+   if (ctx->Point._Attenuated)
+      dw4 |= GEN6_SF_USE_STATE_POINT_WIDTH;
+
+   dw4 |= U_FIXED(CLAMP(ctx->Point.Size, 0.125, 225.875), 3) <<
+      GEN6_SF_POINT_WIDTH_SHIFT;
+   if (render_to_fbo)
+      dw1 |= GEN6_SF_POINT_SPRITE_LOWERLEFT;
+
+   /* _NEW_LIGHT */
+   if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION) {
+      dw4 |=
+	 (2 << GEN6_SF_TRI_PROVOKE_SHIFT) |
+	 (2 << GEN6_SF_TRIFAN_PROVOKE_SHIFT) |
+	 (1 << GEN6_SF_LINE_PROVOKE_SHIFT);
+   } else {
+      dw4 |=
+	 (1 << GEN6_SF_TRIFAN_PROVOKE_SHIFT);
+   }
+
+   BEGIN_BATCH(20);
+   OUT_BATCH(CMD_3D_SF_STATE << 16 | (20 - 2));
+   OUT_BATCH(dw1);
+   OUT_BATCH(dw2);
+   OUT_BATCH(dw3);
+   OUT_BATCH(dw4);
+   OUT_BATCH_F(ctx->Polygon.OffsetUnits * 2); /* constant.  copied from gen4 */
+   OUT_BATCH_F(ctx->Polygon.OffsetFactor); /* scale */
+   OUT_BATCH_F(0.0); /* XXX: global depth offset clamp */
+   for (i = 0; i < 8; i++) {
+      uint32_t attr_overrides = 0;
+
+      /* These should be generating FS inputs read instead of VS
+       * outputs written
+       */
+      for (; attr < 64; attr++) {
+	 if (brw->vs.prog_data->outputs_written & BITFIELD64_BIT(attr)) {
+	    attr_overrides |= get_attr_override(brw, attr);
+	    attr++;
+	    break;
+	 }
+      }
+
+      for (; attr < 64; attr++) {
+	 if (brw->vs.prog_data->outputs_written & BITFIELD64_BIT(attr)) {
+	    attr_overrides |= get_attr_override(brw, attr) << 16;
+	    attr++;
+	    break;
+	 }
+      }
+      OUT_BATCH(attr_overrides);
+   }
+   OUT_BATCH(0); /* point sprite texcoord bitmask */
+   OUT_BATCH(0); /* constant interp bitmask */
+   OUT_BATCH(0); /* wrapshortest enables 0-7 */
+   OUT_BATCH(0); /* wrapshortest enables 8-15 */
+   ADVANCE_BATCH();
+
+   intel_batchbuffer_emit_mi_flush(intel->batch);
+}
+
+const struct brw_tracked_state gen6_sf_state = {
+   .dirty = {
+      .mesa  = (_NEW_LIGHT |
+		_NEW_POLYGON |
+		_NEW_LINE |
+		_NEW_SCISSOR |
+		_NEW_BUFFERS),
+      .brw   = BRW_NEW_CONTEXT,
+      .cache = CACHE_NEW_VS_PROG
+   },
+   .emit = upload_sf_state,
+};
diff --git a/i965/gen6_urb.c b/i965/gen6_urb.c
new file mode 100644
index 0000000..5445e40
--- /dev/null
+++ b/i965/gen6_urb.c
@@ -0,0 +1,83 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#include "main/macros.h"
+#include "intel_batchbuffer.h"
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+
+static void
+prepare_urb( struct brw_context *brw )
+{
+   brw->urb.nr_vs_entries = 24;
+   if (brw->gs.prog_bo)
+      brw->urb.nr_gs_entries = 4;
+   else
+      brw->urb.nr_gs_entries = 0;
+   /* CACHE_NEW_VS_PROG */
+   brw->urb.vs_size = MIN2(brw->vs.prog_data->urb_entry_size, 1);
+
+   /* Check that the number of URB rows (8 floats each) allocated is less
+    * than the URB space.
+    */
+   assert((brw->urb.nr_vs_entries +
+	   brw->urb.nr_gs_entries) * brw->urb.vs_size * 8 < 64 * 1024);
+}
+
+static void
+upload_urb(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+
+   assert(brw->urb.nr_vs_entries % 4 == 0);
+   assert(brw->urb.nr_gs_entries % 4 == 0);
+   /* GS requirement */
+   assert(!brw->gs.prog_bo || brw->urb.vs_size < 5);
+
+   intel_batchbuffer_emit_mi_flush(intel->batch);
+
+   BEGIN_BATCH(3);
+   OUT_BATCH(CMD_URB << 16 | (3 - 2));
+   OUT_BATCH(((brw->urb.vs_size - 1) << GEN6_URB_VS_SIZE_SHIFT) |
+	     ((brw->urb.nr_vs_entries) << GEN6_URB_VS_ENTRIES_SHIFT));
+   OUT_BATCH(((brw->urb.vs_size - 1) << GEN6_URB_GS_SIZE_SHIFT) |
+	     ((brw->urb.nr_gs_entries) << GEN6_URB_GS_ENTRIES_SHIFT));
+   ADVANCE_BATCH();
+
+   intel_batchbuffer_emit_mi_flush(intel->batch);
+}
+
+const struct brw_tracked_state gen6_urb = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_CONTEXT,
+      .cache = CACHE_NEW_VS_PROG,
+   },
+   .prepare = prepare_urb,
+   .emit = upload_urb,
+};
diff --git a/i965/gen6_viewport_state.c b/i965/gen6_viewport_state.c
new file mode 100644
index 0000000..0c2aa42
--- /dev/null
+++ b/i965/gen6_viewport_state.c
@@ -0,0 +1,173 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "intel_batchbuffer.h"
+#include "main/macros.h"
+
+/* The clip VP defines the guardband region where expensive clipping is skipped
+ * and fragments are allowed to be generated and clipped out cheaply by the SF.
+ *
+ * By setting it to NDC bounds of [-1,1], we don't do GB clipping.  It's
+ * supposed to cause seams to become visible in apps due to shared edges taking
+ * different clip/no clip paths depending on whether the rest of the prim ends
+ * up in the guardband or not.
+ */
+static void
+prepare_clip_vp(struct brw_context *brw)
+{
+   struct brw_clipper_viewport vp;
+
+   vp.xmin = -1.0;
+   vp.xmax = 1.0;
+   vp.ymin = -1.0;
+   vp.ymax = 1.0;
+
+   drm_intel_bo_unreference(brw->clip.vp_bo);
+   brw->clip.vp_bo = brw_cache_data(&brw->cache, BRW_CLIP_VP,
+				    &vp, sizeof(vp),
+				    NULL, 0);
+}
+
+const struct brw_tracked_state gen6_clip_vp = {
+   .dirty = {
+      .mesa = _NEW_VIEWPORT, /* XXX: not really, but we need nonzero */
+      .brw = 0,
+      .cache = 0,
+   },
+   .prepare = prepare_clip_vp,
+};
+
+static void
+prepare_sf_vp(struct brw_context *brw)
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   const GLfloat depth_scale = 1.0F / ctx->DrawBuffer->_DepthMaxF;
+   struct brw_sf_viewport sfv;
+   GLfloat y_scale, y_bias;
+   const GLboolean render_to_fbo = (ctx->DrawBuffer->Name != 0);
+   const GLfloat *v = ctx->Viewport._WindowMap.m;
+
+   memset(&sfv, 0, sizeof(sfv));
+
+   /* _NEW_BUFFERS */
+   if (render_to_fbo) {
+      y_scale = 1.0;
+      y_bias = 0;
+   } else {
+      y_scale = -1.0;
+      y_bias = ctx->DrawBuffer->Height;
+   }
+
+   /* _NEW_VIEWPORT */
+   sfv.viewport.m00 = v[MAT_SX];
+   sfv.viewport.m11 = v[MAT_SY] * y_scale;
+   sfv.viewport.m22 = v[MAT_SZ] * depth_scale;
+   sfv.viewport.m30 = v[MAT_TX];
+   sfv.viewport.m31 = v[MAT_TY] * y_scale + y_bias;
+   sfv.viewport.m32 = v[MAT_TZ] * depth_scale;
+
+   drm_intel_bo_unreference(brw->sf.vp_bo);
+   brw->sf.vp_bo = brw_cache_data(&brw->cache, BRW_SF_VP,
+				  &sfv, sizeof(sfv),
+				  NULL, 0);
+}
+
+const struct brw_tracked_state gen6_sf_vp = {
+   .dirty = {
+      .mesa = _NEW_VIEWPORT | _NEW_BUFFERS,
+      .brw = 0,
+      .cache = 0,
+   },
+   .prepare = prepare_sf_vp,
+};
+
+static void
+prepare_cc_vp(struct brw_context *brw)
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   struct brw_cc_viewport ccv;
+
+   /* _NEW_TRANSOFORM */
+   if (ctx->Transform.DepthClamp) {
+      /* _NEW_VIEWPORT */
+      ccv.min_depth = MIN2(ctx->Viewport.Near, ctx->Viewport.Far);
+      ccv.max_depth = MAX2(ctx->Viewport.Near, ctx->Viewport.Far);
+   } else {
+      ccv.min_depth = 0.0;
+      ccv.max_depth = 1.0;
+   }
+
+   drm_intel_bo_unreference(brw->cc.vp_bo);
+   brw->cc.vp_bo = brw_cache_data(&brw->cache, BRW_CC_VP, &ccv, sizeof(ccv),
+				  NULL, 0);
+}
+
+const struct brw_tracked_state gen6_cc_vp = {
+   .dirty = {
+      .mesa = _NEW_VIEWPORT | _NEW_TRANSFORM,
+      .brw = 0,
+      .cache = 0,
+   },
+   .prepare = prepare_cc_vp,
+};
+
+static void prepare_viewport_state_pointers(struct brw_context *brw)
+{
+   brw_add_validated_bo(brw, brw->sf.state_bo);
+}
+
+static void upload_viewport_state_pointers(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+
+   BEGIN_BATCH(4);
+   OUT_BATCH(CMD_VIEWPORT_STATE_POINTERS << 16 | (4 - 2) |
+	     GEN6_CC_VIEWPORT_MODIFY |
+	     GEN6_SF_VIEWPORT_MODIFY |
+	     GEN6_CLIP_VIEWPORT_MODIFY);
+   OUT_RELOC(brw->clip.vp_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+   OUT_RELOC(brw->sf.vp_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+   OUT_RELOC(brw->cc.vp_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+   ADVANCE_BATCH();
+
+   intel_batchbuffer_emit_mi_flush(intel->batch);
+}
+
+const struct brw_tracked_state gen6_viewport_state = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_BATCH,
+      .cache = (CACHE_NEW_CLIP_VP |
+		CACHE_NEW_SF_VP |
+		CACHE_NEW_CC_VP)
+   },
+   .prepare = prepare_viewport_state_pointers,
+   .emit = upload_viewport_state_pointers,
+};
diff --git a/i965/gen6_vs_state.c b/i965/gen6_vs_state.c
new file mode 100644
index 0000000..fe597df
--- /dev/null
+++ b/i965/gen6_vs_state.c
@@ -0,0 +1,119 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_util.h"
+#include "shader/prog_parameter.h"
+#include "shader/prog_statevars.h"
+#include "intel_batchbuffer.h"
+
+static void
+upload_vs_state(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+   GLcontext *ctx = &intel->ctx;
+   const struct brw_vertex_program *vp =
+      brw_vertex_program_const(brw->vertex_program);
+   unsigned int nr_params = vp->program.Base.Parameters->NumParameters;
+   drm_intel_bo *constant_bo;
+   int i;
+
+   if (vp->use_const_buffer || nr_params == 0) {
+      /* Disable the push constant buffers. */
+      BEGIN_BATCH(5);
+      OUT_BATCH(CMD_3D_CONSTANT_VS_STATE << 16 | (5 - 2));
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   } else {
+      if (brw->vertex_program->IsNVProgram)
+	 _mesa_load_tracked_matrices(ctx);
+
+      /* Updates the ParamaterValues[i] pointers for all parameters of the
+       * basic type of PROGRAM_STATE_VAR.
+       */
+      _mesa_load_state_parameters(ctx, vp->program.Base.Parameters);
+
+      constant_bo = drm_intel_bo_alloc(intel->bufmgr, "VS constant_bo",
+				       nr_params * 4 * sizeof(float),
+				       4096);
+      drm_intel_gem_bo_map_gtt(constant_bo);
+      for (i = 0; i < nr_params; i++) {
+	 memcpy((char *)constant_bo->virtual + i * 4 * sizeof(float),
+		vp->program.Base.Parameters->ParameterValues[i],
+		4 * sizeof(float));
+      }
+      drm_intel_gem_bo_unmap_gtt(constant_bo);
+
+      BEGIN_BATCH(5);
+      OUT_BATCH(CMD_3D_CONSTANT_VS_STATE << 16 |
+		GEN6_CONSTANT_BUFFER_0_ENABLE |
+		(5 - 2));
+      OUT_RELOC(constant_bo,
+		I915_GEM_DOMAIN_RENDER, 0, /* XXX: bad domain */
+		ALIGN(nr_params, 2) / 2 - 1);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+
+      drm_intel_bo_unreference(constant_bo);
+   }
+
+   intel_batchbuffer_emit_mi_flush(intel->batch);
+
+   BEGIN_BATCH(6);
+   OUT_BATCH(CMD_3D_VS_STATE << 16 | (6 - 2));
+   OUT_RELOC(brw->vs.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+   OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) |
+	     (brw->vs.nr_surfaces << GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
+   OUT_BATCH(0); /* scratch space base offset */
+   OUT_BATCH((1 << GEN6_VS_DISPATCH_START_GRF_SHIFT) |
+	     (brw->vs.prog_data->urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) |
+	     (0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT));
+   OUT_BATCH((0 << GEN6_VS_MAX_THREADS_SHIFT) |
+	     GEN6_VS_STATISTICS_ENABLE);
+   ADVANCE_BATCH();
+
+   intel_batchbuffer_emit_mi_flush(intel->batch);
+}
+
+const struct brw_tracked_state gen6_vs_state = {
+   .dirty = {
+      .mesa  = _NEW_TRANSFORM | _NEW_PROGRAM_CONSTANTS,
+      .brw   = (BRW_NEW_CURBE_OFFSETS |
+                BRW_NEW_NR_VS_SURFACES |
+		BRW_NEW_URB_FENCE |
+		BRW_NEW_CONTEXT),
+      .cache = CACHE_NEW_VS_PROG
+   },
+   .emit = upload_vs_state,
+};
diff --git a/i965/gen6_wm_state.c b/i965/gen6_wm_state.c
new file mode 100644
index 0000000..1eb17ca
--- /dev/null
+++ b/i965/gen6_wm_state.c
@@ -0,0 +1,160 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_util.h"
+#include "shader/prog_parameter.h"
+#include "shader/prog_statevars.h"
+#include "intel_batchbuffer.h"
+
+static void
+upload_wm_state(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+   GLcontext *ctx = &intel->ctx;
+   const struct brw_fragment_program *fp =
+      brw_fragment_program_const(brw->fragment_program);
+   unsigned int nr_params = fp->program.Base.Parameters->NumParameters;
+   drm_intel_bo *constant_bo;
+   int i;
+   uint32_t dw2, dw4, dw5, dw6;
+
+   if (fp->use_const_buffer || nr_params == 0) {
+      /* Disable the push constant buffers. */
+      BEGIN_BATCH(5);
+      OUT_BATCH(CMD_3D_CONSTANT_PS_STATE << 16 | (5 - 2));
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   } else {
+      /* Updates the ParamaterValues[i] pointers for all parameters of the
+       * basic type of PROGRAM_STATE_VAR.
+       */
+      _mesa_load_state_parameters(ctx, fp->program.Base.Parameters);
+
+      constant_bo = drm_intel_bo_alloc(intel->bufmgr, "WM constant_bo",
+				       nr_params * 4 * sizeof(float),
+				       4096);
+      drm_intel_gem_bo_map_gtt(constant_bo);
+      for (i = 0; i < nr_params; i++) {
+	 memcpy((char *)constant_bo->virtual + i * 4 * sizeof(float),
+		fp->program.Base.Parameters->ParameterValues[i],
+		4 * sizeof(float));
+      }
+      drm_intel_gem_bo_unmap_gtt(constant_bo);
+
+      BEGIN_BATCH(5);
+      OUT_BATCH(CMD_3D_CONSTANT_PS_STATE << 16 |
+		GEN6_CONSTANT_BUFFER_0_ENABLE |
+		(5 - 2));
+      OUT_RELOC(constant_bo,
+		I915_GEM_DOMAIN_RENDER, 0, /* XXX: bad domain */
+		ALIGN(nr_params, 2) / 2 - 1);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+
+      drm_intel_bo_unreference(constant_bo);
+   }
+
+   intel_batchbuffer_emit_mi_flush(intel->batch);
+
+   dw2 = dw4 = dw5 = dw6 = 0;
+   dw4 |= GEN6_WM_STATISTICS_ENABLE;
+   dw5 |= GEN6_WM_LINE_AA_WIDTH_1_0;
+   dw5 |= GEN6_WM_LINE_END_CAP_AA_WIDTH_0_5;
+
+   /* BRW_NEW_NR_SURFACES */
+   dw2 |= brw->wm.nr_surfaces << GEN6_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT;
+
+   /* CACHE_NEW_SAMPLER */
+   dw2 |= (ALIGN(brw->wm.sampler_count, 4) / 4) << GEN6_WM_SAMPLER_COUNT_SHIFT;
+   dw4 |= (1 << GEN6_WM_DISPATCH_START_GRF_SHIFT_0);
+
+   dw5 |= (40 - 1) << GEN6_WM_MAX_THREADS_SHIFT;
+   dw5 |= GEN6_WM_DISPATCH_ENABLE;
+
+   /* BRW_NEW_FRAGMENT_PROGRAM */
+   if (fp->isGLSL)
+      dw5 |= GEN6_WM_8_DISPATCH_ENABLE;
+   else
+      dw5 |= GEN6_WM_16_DISPATCH_ENABLE;
+
+   /* _NEW_LINE */
+   if (ctx->Line.StippleFlag)
+      dw5 |= GEN6_WM_LINE_STIPPLE_ENABLE;
+
+   /* _NEW_POLYGONSTIPPLE */
+   if (ctx->Polygon.StippleFlag)
+      dw5 |= GEN6_WM_POLYGON_STIPPLE_ENABLE;
+
+   /* BRW_NEW_FRAGMENT_PROGRAM */
+   if (fp->program.Base.InputsRead & (1 << FRAG_ATTRIB_WPOS))
+      dw5 |= GEN6_WM_USES_SOURCE_DEPTH | GEN6_WM_USES_SOURCE_W;
+   if (fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
+      dw5 |= GEN6_WM_COMPUTED_DEPTH;
+
+   /* _NEW_COLOR */
+   if (fp->program.UsesKill || ctx->Color.AlphaEnabled)
+      dw5 |= GEN6_WM_KILL_ENABLE;
+
+   /* This should probably be FS inputs read */
+   dw6 |= brw_count_bits(brw->vs.prog_data->outputs_written) <<
+      GEN6_WM_NUM_SF_OUTPUTS_SHIFT;
+
+   BEGIN_BATCH(9);
+   OUT_BATCH(CMD_3D_WM_STATE << 16 | (9 - 2));
+   OUT_RELOC(brw->wm.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+   OUT_BATCH(dw2);
+   OUT_BATCH(0); /* scratch space base offset */
+   OUT_BATCH(dw4);
+   OUT_BATCH(dw5);
+   OUT_BATCH(dw6);
+   OUT_BATCH(0); /* kernel 1 pointer */
+   OUT_BATCH(0); /* kernel 2 pointer */
+   ADVANCE_BATCH();
+
+   intel_batchbuffer_emit_mi_flush(intel->batch);
+}
+
+const struct brw_tracked_state gen6_wm_state = {
+   .dirty = {
+      .mesa  = _NEW_LINE | _NEW_POLYGONSTIPPLE | _NEW_COLOR,
+      .brw   = (BRW_NEW_CURBE_OFFSETS |
+		BRW_NEW_FRAGMENT_PROGRAM |
+                BRW_NEW_NR_WM_SURFACES |
+		BRW_NEW_URB_FENCE |
+		BRW_NEW_BATCH),
+      .cache = CACHE_NEW_SAMPLER
+   },
+   .emit = upload_wm_state,
+};
diff --git a/shared/intel_batchbuffer.c b/shared/intel_batchbuffer.c
index ca6e2fa..9768b0d 100644
--- a/shared/intel_batchbuffer.c
+++ b/shared/intel_batchbuffer.c
@@ -32,44 +32,6 @@
 #include "intel_bufmgr.h"
 #include "intel_buffers.h"
 
-/* Relocations in kernel space:
- *    - pass dma buffer seperately
- *    - memory manager knows how to patch
- *    - pass list of dependent buffers
- *    - pass relocation list
- *
- * Either:
- *    - get back an offset for buffer to fire
- *    - memory manager knows how to fire buffer
- *
- * Really want the buffer to be AGP and pinned.
- *
- */
-
-/* Cliprect fence: The highest fence protecting a dma buffer
- * containing explicit cliprect information.  Like the old drawable
- * lock but irq-driven.  X server must wait for this fence to expire
- * before changing cliprects [and then doing sw rendering?].  For
- * other dma buffers, the scheduler will grab current cliprect info
- * and mix into buffer.  X server must hold the lock while changing
- * cliprects???  Make per-drawable.  Need cliprects in shared memory
- * -- beats storing them with every cmd buffer in the queue.
- *
- * ==> X server must wait for this fence to expire before touching the
- * framebuffer with new cliprects.
- *
- * ==> Cliprect-dependent buffers associated with a
- * cliprect-timestamp.  All of the buffers associated with a timestamp
- * must go to hardware before any buffer with a newer timestamp.
- *
- * ==> Dma should be queued per-drawable for correct X/GL
- * synchronization.  Or can fences be used for this?
- *
- * Applies to: Blit operations, metaops, X server operations -- X
- * server automatically waits on its own dma to complete before
- * modifying cliprects ???
- */
-
 void
 intel_batchbuffer_reset(struct intel_batchbuffer *batch)
 {
@@ -80,7 +42,7 @@ intel_batchbuffer_reset(struct intel_batchbuffer *batch)
       batch->buf = NULL;
    }
 
-   if (!batch->buffer && intel->ttm == GL_TRUE)
+   if (!batch->buffer)
       batch->buffer = malloc (intel->maxBatchSize);
 
    batch->buf = dri_bo_alloc(intel->bufmgr, "batchbuffer",
@@ -94,7 +56,6 @@ intel_batchbuffer_reset(struct intel_batchbuffer *batch)
    batch->size = intel->maxBatchSize;
    batch->ptr = batch->map;
    batch->dirty_state = ~0;
-   batch->cliprect_mode = IGNORE_CLIPRECTS;
 }
 
 struct intel_batchbuffer *
@@ -129,13 +90,10 @@ intel_batchbuffer_free(struct intel_batchbuffer *batch)
 /* TODO: Push this whole function into bufmgr.
  */
 static void
-do_flush_locked(struct intel_batchbuffer *batch,
-		GLuint used, GLboolean allow_unlock)
+do_flush_locked(struct intel_batchbuffer *batch, GLuint used)
 {
    struct intel_context *intel = batch->intel;
    int ret = 0;
-   unsigned int num_cliprects = 0;
-   struct drm_clip_rect *cliprects = NULL;
    int x_off = 0, y_off = 0;
 
    if (batch->buffer)
@@ -146,31 +104,8 @@ do_flush_locked(struct intel_batchbuffer *batch,
    batch->map = NULL;
    batch->ptr = NULL;
 
-
-   if (batch->cliprect_mode == LOOP_CLIPRECTS) {
-      intel_get_cliprects(intel, &cliprects, &num_cliprects, &x_off, &y_off);
-   }
-   /* Dispatch the batchbuffer, if it has some effect (nonzero cliprects).
-    * Can't short-circuit like this once we have hardware contexts, but we
-    * should always be in DRI2 mode by then anyway.
-    */
-   if ((batch->cliprect_mode != LOOP_CLIPRECTS ||
-	num_cliprects != 0) && !intel->no_hw) {
-      dri_bo_exec(batch->buf, used, cliprects, num_cliprects,
-		  (x_off & 0xffff) | (y_off << 16));
-   }
-
-   if (batch->cliprect_mode == LOOP_CLIPRECTS && num_cliprects == 0) {
-      if (allow_unlock) {
-	 /* If we are not doing any actual user-visible rendering,
-	  * do a sched_yield to keep the app from pegging the cpu while
-	  * achieving nothing.
-	  */
-         UNLOCK_HARDWARE(intel);
-         sched_yield();
-         LOCK_HARDWARE(intel);
-      }
-   }
+   if (!intel->no_hw)
+      dri_bo_exec(batch->buf, used, NULL, 0, (x_off & 0xffff) | (y_off << 16));
 
    if (INTEL_DEBUG & DEBUG_BATCH) {
       dri_bo_map(batch->buf, GL_FALSE);
@@ -183,7 +118,6 @@ do_flush_locked(struct intel_batchbuffer *batch,
    }
 
    if (ret != 0) {
-      UNLOCK_HARDWARE(intel);
       exit(1);
    }
    intel->vtbl.new_batch(intel);
@@ -196,15 +130,14 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file,
    struct intel_context *intel = batch->intel;
    GLuint used = batch->ptr - batch->map;
 
-   if (intel->first_post_swapbuffers_batch == NULL) {
+   if (!intel->using_dri2_swapbuffers &&
+       intel->first_post_swapbuffers_batch == NULL) {
       intel->first_post_swapbuffers_batch = intel->batch->buf;
       drm_intel_bo_reference(intel->first_post_swapbuffers_batch);
    }
 
-   if (used == 0) {
-      batch->cliprect_mode = IGNORE_CLIPRECTS;
+   if (used == 0)
       return;
-   }
 
    if (INTEL_DEBUG & DEBUG_BATCH)
       fprintf(stderr, "%s:%d: Batchbuffer flush with %db used\n", file, line,
@@ -212,7 +145,7 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file,
 
    batch->reserved_space = 0;
    /* Emit a flush if the bufmgr doesn't do it for us. */
-   if (intel->always_flush_cache || !intel->ttm) {
+   if (intel->always_flush_cache) {
       intel_batchbuffer_emit_mi_flush(batch);
       used = batch->ptr - batch->map;
    }
@@ -226,9 +159,10 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file,
    }
 
    /* Mark the end of the buffer. */
-   *(GLuint *) (batch->ptr) = MI_BATCH_BUFFER_END; /* noop */
+   *(GLuint *) (batch->ptr) = MI_BATCH_BUFFER_END;
    batch->ptr += 4;
    used = batch->ptr - batch->map;
+   assert (used <= batch->buf->size);
 
    /* Workaround for recursive batchbuffer flushing: If the window is
     * moved, we can get into a case where we try to flush during a
@@ -244,14 +178,15 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file,
    if (intel->vtbl.finish_batch)
       intel->vtbl.finish_batch(intel);
 
+   /* Check that we didn't just wrap our batchbuffer at a bad time. */
+   assert(!intel->no_batch_wrap);
+
    batch->reserved_space = BATCH_RESERVED;
 
    /* TODO: Just pass the relocation list and dma buffer up to the
     * kernel.
     */
-   LOCK_HARDWARE(intel);
-   do_flush_locked(batch, used, GL_FALSE);
-   UNLOCK_HARDWARE(intel);
+   do_flush_locked(batch, used);
 
    if (INTEL_DEBUG & DEBUG_SYNC) {
       fprintf(stderr, "waiting for idle\n");
@@ -275,9 +210,11 @@ intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch,
 {
    int ret;
 
+   assert(delta < buffer->size);
+
    if (batch->ptr - batch->map > batch->buf->size)
-    _mesa_printf ("bad relocation ptr %p map %p offset %d size %d\n",
-		  batch->ptr, batch->map, batch->ptr - batch->map, batch->buf->size);
+    printf ("bad relocation ptr %p map %p offset %d size %lu\n",
+	    batch->ptr, batch->map, batch->ptr - batch->map, batch->buf->size);
    ret = dri_bo_emit_reloc(batch->buf, read_domains, write_domain,
 			   delta, batch->ptr - batch->map, buffer);
 
@@ -291,13 +228,39 @@ intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch,
    return GL_TRUE;
 }
 
+GLboolean
+intel_batchbuffer_emit_reloc_fenced(struct intel_batchbuffer *batch,
+				    drm_intel_bo *buffer,
+				    uint32_t read_domains, uint32_t write_domain,
+				    uint32_t delta)
+{
+   int ret;
+
+   assert(delta < buffer->size);
+
+   if (batch->ptr - batch->map > batch->buf->size)
+    printf ("bad relocation ptr %p map %p offset %d size %lu\n",
+	    batch->ptr, batch->map, batch->ptr - batch->map, batch->buf->size);
+   ret = drm_intel_bo_emit_reloc_fence(batch->buf, batch->ptr - batch->map,
+				       buffer, delta,
+				       read_domains, write_domain);
+
+   /*
+    * Using the old buffer offset, write in what the right data would
+    * be, in case the buffer doesn't move and we can short-circuit the
+    * relocation processing in the kernel
+    */
+   intel_batchbuffer_emit_dword (batch, buffer->offset + delta);
+
+   return GL_TRUE;
+}
+
 void
 intel_batchbuffer_data(struct intel_batchbuffer *batch,
-                       const void *data, GLuint bytes,
-		       enum cliprect_mode cliprect_mode)
+                       const void *data, GLuint bytes)
 {
    assert((bytes & 3) == 0);
-   intel_batchbuffer_require_space(batch, bytes, cliprect_mode);
+   intel_batchbuffer_require_space(batch, bytes);
    __memcpy(batch->ptr, data, bytes);
    batch->ptr += bytes;
 }
@@ -314,7 +277,7 @@ intel_batchbuffer_emit_mi_flush(struct intel_batchbuffer *batch)
    struct intel_context *intel = batch->intel;
 
    if (intel->gen >= 4) {
-      BEGIN_BATCH(4, IGNORE_CLIPRECTS);
+      BEGIN_BATCH(4);
       OUT_BATCH(_3DSTATE_PIPE_CONTROL |
 		PIPE_CONTROL_INSTRUCTION_FLUSH |
 		PIPE_CONTROL_WRITE_FLUSH |
@@ -324,7 +287,7 @@ intel_batchbuffer_emit_mi_flush(struct intel_batchbuffer *batch)
       OUT_BATCH(0); /* write data */
       ADVANCE_BATCH();
    } else {
-      BEGIN_BATCH(1, IGNORE_CLIPRECTS);
+      BEGIN_BATCH(1);
       OUT_BATCH(MI_FLUSH);
       ADVANCE_BATCH();
    }
diff --git a/shared/intel_batchbuffer.h b/shared/intel_batchbuffer.h
index d4a9445..e5ad261 100644
--- a/shared/intel_batchbuffer.h
+++ b/shared/intel_batchbuffer.h
@@ -10,35 +10,6 @@
 #define BATCH_SZ 16384
 #define BATCH_RESERVED 16
 
-enum cliprect_mode {
-   /**
-    * Batchbuffer contents may be looped over per cliprect, but do not
-    * require it.
-    */
-   IGNORE_CLIPRECTS,
-   /**
-    * Batchbuffer contents require looping over per cliprect at batch submit
-    * time.
-    *
-    * This will be upgraded to NO_LOOP_CLIPRECTS when there's a single
-    * constant cliprect, as in DRI2 or FBO rendering.
-    */
-   LOOP_CLIPRECTS,
-   /**
-    * Batchbuffer contents contain drawing that should not be executed multiple
-    * times.
-    */
-   NO_LOOP_CLIPRECTS,
-   /**
-    * Batchbuffer contents contain drawing that already handles cliprects, such
-    * as 2D drawing to front/back/depth that doesn't respect DRAWING_RECTANGLE.
-    *
-    * Equivalent behavior to NO_LOOP_CLIPRECTS, but may not persist in batch
-    * outside of LOCK/UNLOCK.  This is upgraded to just NO_LOOP_CLIPRECTS when
-    * there's a constant cliprect, as in DRI2 or FBO rendering.
-    */
-   REFERENCES_CLIPRECTS
-};
 
 struct intel_batchbuffer
 {
@@ -51,15 +22,15 @@ struct intel_batchbuffer
    GLubyte *map;
    GLubyte *ptr;
 
-   enum cliprect_mode cliprect_mode;
-
    GLuint size;
 
+#ifdef DEBUG
    /** Tracking of BEGIN_BATCH()/OUT_BATCH()/ADVANCE_BATCH() debugging */
    struct {
       GLuint total;
       GLubyte *start_ptr;
    } emit;
+#endif
 
    GLuint dirty_state;
    GLuint reserved_space;
@@ -85,8 +56,7 @@ void intel_batchbuffer_reset(struct intel_batchbuffer *batch);
  * intel_buffer_dword() calls.
  */
 void intel_batchbuffer_data(struct intel_batchbuffer *batch,
-                            const void *data, GLuint bytes,
-			    enum cliprect_mode cliprect_mode);
+                            const void *data, GLuint bytes);
 
 void intel_batchbuffer_release_space(struct intel_batchbuffer *batch,
                                      GLuint bytes);
@@ -96,8 +66,24 @@ GLboolean intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch,
 				       uint32_t read_domains,
 				       uint32_t write_domain,
 				       uint32_t offset);
+GLboolean intel_batchbuffer_emit_reloc_fenced(struct intel_batchbuffer *batch,
+					      drm_intel_bo *buffer,
+					      uint32_t read_domains,
+					      uint32_t write_domain,
+					      uint32_t offset);
 void intel_batchbuffer_emit_mi_flush(struct intel_batchbuffer *batch);
 
+static INLINE uint32_t float_as_int(float f)
+{
+   union {
+      float f;
+      uint32_t d;
+   } fi;
+
+   fi.f = f;
+   return fi.d;
+}
+
 /* Inline functions - might actually be better off with these
  * non-inlined.  Certainly better off switching all command packets to
  * be passed as structs rather than dwords, but that's a little bit of
@@ -113,66 +99,73 @@ intel_batchbuffer_space(struct intel_batchbuffer *batch)
 static INLINE void
 intel_batchbuffer_emit_dword(struct intel_batchbuffer *batch, GLuint dword)
 {
-   assert(batch->map);
+#ifdef DEBUG
    assert(intel_batchbuffer_space(batch) >= 4);
+#endif
    *(GLuint *) (batch->ptr) = dword;
    batch->ptr += 4;
 }
 
 static INLINE void
+intel_batchbuffer_emit_float(struct intel_batchbuffer *batch, float f)
+{
+   intel_batchbuffer_emit_dword(batch, float_as_int(f));
+}
+
+static INLINE void
 intel_batchbuffer_require_space(struct intel_batchbuffer *batch,
-                                GLuint sz,
-				enum cliprect_mode cliprect_mode)
+                                GLuint sz)
 {
+#ifdef DEBUG
    assert(sz < batch->size - 8);
+#endif
    if (intel_batchbuffer_space(batch) < sz)
       intel_batchbuffer_flush(batch);
+}
+
+static INLINE void
+intel_batchbuffer_begin(struct intel_batchbuffer *batch, int n)
+{
+   intel_batchbuffer_require_space(batch, n * 4);
+#ifdef DEBUG
+   assert(batch->map);
+   assert(batch->emit.start_ptr == NULL);
+   batch->emit.total = n * 4;
+   batch->emit.start_ptr = batch->ptr;
+#endif
+}
 
-   if ((cliprect_mode == LOOP_CLIPRECTS ||
-	cliprect_mode == REFERENCES_CLIPRECTS) &&
-       batch->intel->constant_cliprect)
-      cliprect_mode = NO_LOOP_CLIPRECTS;
-
-   if (cliprect_mode != IGNORE_CLIPRECTS) {
-      if (batch->cliprect_mode == IGNORE_CLIPRECTS) {
-	 batch->cliprect_mode = cliprect_mode;
-      } else {
-	 if (batch->cliprect_mode != cliprect_mode) {
-	    intel_batchbuffer_flush(batch);
-	    batch->cliprect_mode = cliprect_mode;
-	 }
-      }
+static INLINE void
+intel_batchbuffer_advance(struct intel_batchbuffer *batch)
+{
+#ifdef DEBUG
+   unsigned int _n = batch->ptr - batch->emit.start_ptr;
+   assert(batch->emit.start_ptr != NULL);
+   if (_n != batch->emit.total) {
+      fprintf(stderr, "ADVANCE_BATCH: %d of %d dwords emitted\n",
+	      _n, batch->emit.total);
+      abort();
    }
+   batch->emit.start_ptr = NULL;
+#endif
 }
 
 /* Here are the crusty old macros, to be removed:
  */
 #define BATCH_LOCALS
 
-#define BEGIN_BATCH(n, cliprect_mode) do {				\
-   intel_batchbuffer_require_space(intel->batch, (n)*4, cliprect_mode); \
-   assert(intel->batch->emit.start_ptr == NULL);			\
-   intel->batch->emit.total = (n) * 4;					\
-   intel->batch->emit.start_ptr = intel->batch->ptr;			\
-} while (0)
-
+#define BEGIN_BATCH(n) intel_batchbuffer_begin(intel->batch, n)
 #define OUT_BATCH(d) intel_batchbuffer_emit_dword(intel->batch, d)
-
+#define OUT_BATCH_F(f) intel_batchbuffer_emit_float(intel->batch,f)
 #define OUT_RELOC(buf, read_domains, write_domain, delta) do {		\
-   assert((unsigned) (delta) < buf->size);				\
    intel_batchbuffer_emit_reloc(intel->batch, buf,			\
 				read_domains, write_domain, delta);	\
 } while (0)
+#define OUT_RELOC_FENCED(buf, read_domains, write_domain, delta) do {	\
+   intel_batchbuffer_emit_reloc_fenced(intel->batch, buf,		\
+				       read_domains, write_domain, delta); \
+} while (0)
 
-#define ADVANCE_BATCH() do {						\
-   unsigned int _n = intel->batch->ptr - intel->batch->emit.start_ptr;	\
-   assert(intel->batch->emit.start_ptr != NULL);			\
-   if (_n != intel->batch->emit.total) {				\
-      fprintf(stderr, "ADVANCE_BATCH: %d of %d dwords emitted\n",	\
-	      _n, intel->batch->emit.total);				\
-      abort();								\
-   }									\
-   intel->batch->emit.start_ptr = NULL;					\
-} while(0)
+#define ADVANCE_BATCH() intel_batchbuffer_advance(intel->batch);
 
 #endif
diff --git a/shared/intel_blit.c b/shared/intel_blit.c
index 9f638b0..f2769aa 100644
--- a/shared/intel_blit.c
+++ b/shared/intel_blit.c
@@ -38,141 +38,9 @@
 #include "intel_reg.h"
 #include "intel_regions.h"
 #include "intel_batchbuffer.h"
-#include "intel_chipset.h"
 
 #define FILE_DEBUG_FLAG DEBUG_BLIT
 
-/**
- * Copy the back color buffer to the front color buffer. 
- * Used for SwapBuffers().
- */
-void
-intelCopyBuffer(const __DRIdrawablePrivate * dPriv,
-                const drm_clip_rect_t * rect)
-{
-
-   struct intel_context *intel;
-   const intelScreenPrivate *intelScreen;
-
-   DBG("%s\n", __FUNCTION__);
-
-   assert(dPriv);
-
-   intel = intelScreenContext(dPriv->driScreenPriv->private);
-   if (!intel)
-      return;
-
-   intelScreen = intel->intelScreen;
-
-   /* The LOCK_HARDWARE is required for the cliprects.  Buffer offsets
-    * should work regardless.
-    */
-   LOCK_HARDWARE(intel);
-
-   if (dPriv && dPriv->numClipRects) {
-      struct intel_framebuffer *intel_fb = dPriv->driverPrivate;
-      struct intel_region *src, *dst;
-      int nbox = dPriv->numClipRects;
-      drm_clip_rect_t *pbox = dPriv->pClipRects;
-      int cpp;
-      int src_pitch, dst_pitch;
-      unsigned short src_x, src_y;
-      int BR13, CMD;
-      int i;
-      dri_bo *aper_array[3];
-
-      src = intel_get_rb_region(&intel_fb->Base, BUFFER_BACK_LEFT);
-      dst = intel_get_rb_region(&intel_fb->Base, BUFFER_FRONT_LEFT);
-
-      src_pitch = src->pitch * src->cpp;
-      dst_pitch = dst->pitch * dst->cpp;
-
-      cpp = src->cpp;
-
-      ASSERT(intel_fb);
-      ASSERT(intel_fb->Base.Name == 0);    /* Not a user-created FBO */
-      ASSERT(src);
-      ASSERT(dst);
-      ASSERT(src->cpp == dst->cpp);
-
-      if (cpp == 2) {
-	 BR13 = (0xCC << 16) | BR13_565;
-	 CMD = XY_SRC_COPY_BLT_CMD;
-      }
-      else {
-	 BR13 = (0xCC << 16) | BR13_8888;
-	 CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
-      }
-
-      assert(src->tiling != I915_TILING_Y);
-      assert(dst->tiling != I915_TILING_Y);
-#ifndef I915
-      if (src->tiling != I915_TILING_NONE) {
-	 CMD |= XY_SRC_TILED;
-	 src_pitch /= 4;
-      }
-      if (dst->tiling != I915_TILING_NONE) {
-	 CMD |= XY_DST_TILED;
-	 dst_pitch /= 4;
-      }
-#endif
-      /* do space/cliprects check before going any further */
-      intel_batchbuffer_require_space(intel->batch, 8 * 4,
-				      REFERENCES_CLIPRECTS);
-   again:
-      aper_array[0] = intel->batch->buf;
-      aper_array[1] = dst->buffer;
-      aper_array[2] = src->buffer;
-
-      if (dri_bufmgr_check_aperture_space(aper_array, 3) != 0) {
-	intel_batchbuffer_flush(intel->batch);
-	goto again;
-      }
-
-      for (i = 0; i < nbox; i++, pbox++) {
-	 drm_clip_rect_t box = *pbox;
-
-	 if (rect) {
-	    if (!intel_intersect_cliprects(&box, &box, rect))
-	       continue;
-	 }
-
-	 if (box.x1 >= box.x2 ||
-	     box.y1 >= box.y2)
-	    continue;
-
-	 assert(box.x1 < box.x2);
-	 assert(box.y1 < box.y2);
-	 src_x = box.x1 - dPriv->x + dPriv->backX;
-	 src_y = box.y1 - dPriv->y + dPriv->backY;
-
-	 BEGIN_BATCH(8, REFERENCES_CLIPRECTS);
-	 OUT_BATCH(CMD);
-	 OUT_BATCH(BR13 | dst_pitch);
-	 OUT_BATCH((box.y1 << 16) | box.x1);
-	 OUT_BATCH((box.y2 << 16) | box.x2);
-
-	 OUT_RELOC(dst->buffer,
-		   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
-		   0);
-	 OUT_BATCH((src_y << 16) | src_x);
-	 OUT_BATCH(src_pitch);
-	 OUT_RELOC(src->buffer,
-		   I915_GEM_DOMAIN_RENDER, 0,
-		   0);
-	 ADVANCE_BATCH();
-      }
-
-      /* Flush the rendering and the batch so that the results all land on the
-       * screen in a timely fashion.
-       */
-      intel_batchbuffer_emit_mi_flush(intel->batch);
-      intel_batchbuffer_flush(intel->batch);
-   }
-
-   UNLOCK_HARDWARE(intel);
-}
-
 static GLuint translate_raster_op(GLenum logicop)
 {
    switch(logicop) {
@@ -221,6 +89,10 @@ intelEmitCopyBlit(struct intel_context *intel,
    dri_bo *aper_array[3];
    BATCH_LOCALS;
 
+   /* Blits are in a different ringbuffer so we don't use them. */
+   if (intel->gen >= 6)
+      return GL_FALSE;
+
    if (dst_tiling != I915_TILING_NONE) {
       if (dst_offset & 4095)
 	 return GL_FALSE;
@@ -234,7 +106,7 @@ intelEmitCopyBlit(struct intel_context *intel,
 	 return GL_FALSE;
    }
 
-   /* do space/cliprects check before going any further */
+   /* do space check before going any further */
    do {
        aper_array[0] = intel->batch->buf;
        aper_array[1] = dst_buffer;
@@ -247,27 +119,26 @@ intelEmitCopyBlit(struct intel_context *intel,
            break;
    } while (pass < 2);
 
+   intel_prepare_render(intel);
+
    if (pass >= 2) {
-       LOCK_HARDWARE(intel);
-       dri_bo_map(dst_buffer, GL_TRUE);
-       dri_bo_map(src_buffer, GL_FALSE);
-       _mesa_copy_rect((GLubyte *)dst_buffer->virtual + dst_offset,
-                       cpp,
-                       dst_pitch,
-                       dst_x, dst_y, 
-                       w, h, 
-                       (GLubyte *)src_buffer->virtual + src_offset, 
-                       src_pitch,
-                       src_x, src_y);
-       
-       dri_bo_unmap(src_buffer);
-       dri_bo_unmap(dst_buffer);
-       UNLOCK_HARDWARE(intel);
-
-       return GL_TRUE;
+      drm_intel_gem_bo_map_gtt(dst_buffer);
+      drm_intel_gem_bo_map_gtt(src_buffer);
+      _mesa_copy_rect((GLubyte *)dst_buffer->virtual + dst_offset,
+		      cpp,
+		      dst_pitch,
+		      dst_x, dst_y,
+		      w, h,
+		      (GLubyte *)src_buffer->virtual + src_offset,
+		      src_pitch,
+		      src_x, src_y);
+      drm_intel_gem_bo_unmap_gtt(src_buffer);
+      drm_intel_gem_bo_unmap_gtt(dst_buffer);
+
+      return GL_TRUE;
    }
 
-   intel_batchbuffer_require_space(intel->batch, 8 * 4, NO_LOOP_CLIPRECTS);
+   intel_batchbuffer_require_space(intel->batch, 8 * 4);
    DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
        __FUNCTION__,
        src_buffer, src_pitch, src_offset, src_x, src_y,
@@ -312,19 +183,19 @@ intelEmitCopyBlit(struct intel_context *intel,
    assert(dst_x < dst_x2);
    assert(dst_y < dst_y2);
 
-   BEGIN_BATCH(8, NO_LOOP_CLIPRECTS);
+   BEGIN_BATCH(8);
    OUT_BATCH(CMD);
    OUT_BATCH(BR13 | (uint16_t)dst_pitch);
    OUT_BATCH((dst_y << 16) | dst_x);
    OUT_BATCH((dst_y2 << 16) | dst_x2);
-   OUT_RELOC(dst_buffer,
-	     I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
-	     dst_offset);
+   OUT_RELOC_FENCED(dst_buffer,
+		    I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+		    dst_offset);
    OUT_BATCH((src_y << 16) | src_x);
    OUT_BATCH((uint16_t)src_pitch);
-   OUT_RELOC(src_buffer,
-	     I915_GEM_DOMAIN_RENDER, 0,
-	     src_offset);
+   OUT_RELOC_FENCED(src_buffer,
+		    I915_GEM_DOMAIN_RENDER, 0,
+		    src_offset);
    ADVANCE_BATCH();
 
    intel_batchbuffer_emit_mi_flush(intel->batch);
@@ -346,12 +217,13 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask)
    struct intel_context *intel = intel_context(ctx);
    struct gl_framebuffer *fb = ctx->DrawBuffer;
    GLuint clear_depth;
-   GLbitfield skipBuffers = 0;
-   unsigned int num_cliprects;
-   struct drm_clip_rect *cliprects;
-   int x_off, y_off;
+   GLboolean all;
+   GLint cx, cy, cw, ch;
    BATCH_LOCALS;
 
+   /* Blits are in a different ringbuffer so we don't use them. */
+   assert(intel->gen < 6);
+
    /*
     * Compute values for clearing the buffers.
     */
@@ -363,187 +235,147 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask)
       clear_depth |= (ctx->Stencil.Clear & 0xff) << 24;
    }
 
-   /* If clearing both depth and stencil, skip BUFFER_BIT_STENCIL in
-    * the loop below.
-    */
-   if ((mask & BUFFER_BIT_DEPTH) && (mask & BUFFER_BIT_STENCIL)) {
-      skipBuffers = BUFFER_BIT_STENCIL;
-   }
-
-   LOCK_HARDWARE(intel);
-
-   intel_get_cliprects(intel, &cliprects, &num_cliprects, &x_off, &y_off);
-   if (num_cliprects) {
-      GLint cx, cy, cw, ch;
-      drm_clip_rect_t clear;
-      int i;
-
-      /* Get clear bounds after locking */
-      cx = fb->_Xmin;
+   cx = fb->_Xmin;
+   if (fb->Name == 0)
+      cy = ctx->DrawBuffer->Height - fb->_Ymax;
+   else
       cy = fb->_Ymin;
-      cw = fb->_Xmax - cx;
-      ch = fb->_Ymax - cy;
+   cw = fb->_Xmax - fb->_Xmin;
+   ch = fb->_Ymax - fb->_Ymin;
 
-      if (fb->Name == 0) {
-         /* clearing a window */
+   if (cw == 0 || ch == 0)
+      return;
 
-         /* flip top to bottom */
-         clear.x1 = cx + x_off;
-         clear.y1 = intel->driDrawable->y + intel->driDrawable->h - cy - ch;
-         clear.x2 = clear.x1 + cw;
-         clear.y2 = clear.y1 + ch;
-      }
-      else {
-         /* clearing FBO */
-         assert(num_cliprects == 1);
-         assert(cliprects == &intel->fboRect);
-         clear.x1 = cx;
-         clear.y1 = cy;
-         clear.x2 = clear.x1 + cw;
-         clear.y2 = clear.y1 + ch;
-         /* no change to mask */
+   GLuint buf;
+   all = (cw == fb->Width && ch == fb->Height);
+
+   intel_prepare_render(intel);
+
+   /* Loop over all renderbuffers */
+   for (buf = 0; buf < BUFFER_COUNT && mask; buf++) {
+      const GLbitfield bufBit = 1 << buf;
+      struct intel_renderbuffer *irb;
+      drm_intel_bo *write_buffer;
+      int x1, y1, x2, y2;
+      uint32_t clear_val;
+      uint32_t BR13, CMD;
+      int pitch, cpp;
+      drm_intel_bo *aper_array[2];
+
+      if (!(mask & bufBit))
+	 continue;
+
+      /* OK, clear this renderbuffer */
+      irb = intel_get_renderbuffer(fb, buf);
+      write_buffer = intel_region_buffer(intel, irb->region,
+					 all ? INTEL_WRITE_FULL :
+					 INTEL_WRITE_PART);
+      x1 = cx + irb->region->draw_x;
+      y1 = cy + irb->region->draw_y;
+      x2 = cx + cw + irb->region->draw_x;
+      y2 = cy + ch + irb->region->draw_y;
+
+      pitch = irb->region->pitch;
+      cpp = irb->region->cpp;
+
+      DBG("%s dst:buf(%p)/%d %d,%d sz:%dx%d\n",
+	  __FUNCTION__,
+	  irb->region->buffer, (pitch * cpp),
+	  x1, y1, x2 - x1, y2 - y1);
+
+      BR13 = 0xf0 << 16;
+      CMD = XY_COLOR_BLT_CMD;
+
+      /* Setup the blit command */
+      if (cpp == 4) {
+	 BR13 |= BR13_8888;
+	 if (buf == BUFFER_DEPTH || buf == BUFFER_STENCIL) {
+	    if (mask & BUFFER_BIT_DEPTH)
+	       CMD |= XY_BLT_WRITE_RGB;
+	    if (mask & BUFFER_BIT_STENCIL)
+	       CMD |= XY_BLT_WRITE_ALPHA;
+	 } else {
+	    /* clearing RGBA */
+	    CMD |= XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
+	 }
+      } else {
+	 ASSERT(cpp == 2);
+	 BR13 |= BR13_565;
       }
 
-      for (i = 0; i < num_cliprects; i++) {
-         const drm_clip_rect_t *box = &cliprects[i];
-         drm_clip_rect_t b;
-         GLuint buf;
-         GLuint clearMask = mask;      /* use copy, since we modify it below */
-         GLboolean all = (cw == fb->Width && ch == fb->Height);
-
-         if (!all) {
-            intel_intersect_cliprects(&b, &clear, box);
-         }
-         else {
-            b = *box;
-         }
-
-         if (b.x1 >= b.x2 || b.y1 >= b.y2)
-            continue;
-
-         if (0)
-            _mesa_printf("clear %d,%d..%d,%d, mask %x\n",
-                         b.x1, b.y1, b.x2, b.y2, mask);
-
-         /* Loop over all renderbuffers */
-         for (buf = 0; buf < BUFFER_COUNT && clearMask; buf++) {
-            const GLbitfield bufBit = 1 << buf;
-            if ((clearMask & bufBit) && !(bufBit & skipBuffers)) {
-               /* OK, clear this renderbuffer */
-	       struct intel_renderbuffer *irb = intel_get_renderbuffer(fb, buf);
-               dri_bo *write_buffer =
-                  intel_region_buffer(intel, irb->region,
-                                      all ? INTEL_WRITE_FULL :
-                                      INTEL_WRITE_PART);
-	       int x1 = b.x1 + irb->region->draw_x;
-	       int y1 = b.y1 + irb->region->draw_y;
-	       int x2 = b.x2 + irb->region->draw_x;
-	       int y2 = b.y2 + irb->region->draw_y;
-
-               GLuint clearVal;
-               GLint pitch, cpp;
-               GLuint BR13, CMD;
-
-               pitch = irb->region->pitch;
-               cpp = irb->region->cpp;
-
-               DBG("%s dst:buf(%p)/%d %d,%d sz:%dx%d\n",
-                   __FUNCTION__,
-                   irb->region->buffer, (pitch * cpp),
-                   x1, y1, x2 - x1, y2 - y1);
-
-	       BR13 = 0xf0 << 16;
-	       CMD = XY_COLOR_BLT_CMD;
-
-               /* Setup the blit command */
-               if (cpp == 4) {
-                  BR13 |= BR13_8888;
-                  if (buf == BUFFER_DEPTH || buf == BUFFER_STENCIL) {
-                     if (clearMask & BUFFER_BIT_DEPTH)
-                        CMD |= XY_BLT_WRITE_RGB;
-                     if (clearMask & BUFFER_BIT_STENCIL)
-                        CMD |= XY_BLT_WRITE_ALPHA;
-                  }
-                  else {
-                     /* clearing RGBA */
-                     CMD |= XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
-                  }
-               }
-               else {
-                  ASSERT(cpp == 2);
-                  BR13 |= BR13_565;
-               }
-
-	       assert(irb->region->tiling != I915_TILING_Y);
+      assert(irb->region->tiling != I915_TILING_Y);
 
 #ifndef I915
-	       if (irb->region->tiling != I915_TILING_NONE) {
-		  CMD |= XY_DST_TILED;
-		  pitch /= 4;
-	       }
+      if (irb->region->tiling != I915_TILING_NONE) {
+	 CMD |= XY_DST_TILED;
+	 pitch /= 4;
+      }
 #endif
-	       BR13 |= (pitch * cpp);
-
-               if (buf == BUFFER_DEPTH || buf == BUFFER_STENCIL) {
-                  clearVal = clear_depth;
-               }
-               else {
-		  uint8_t clear[4];
-		  GLclampf *color = ctx->Color.ClearColor;
-
-		  CLAMPED_FLOAT_TO_UBYTE(clear[0], color[0]);
-		  CLAMPED_FLOAT_TO_UBYTE(clear[1], color[1]);
-		  CLAMPED_FLOAT_TO_UBYTE(clear[2], color[2]);
-		  CLAMPED_FLOAT_TO_UBYTE(clear[3], color[3]);
-
-		  switch (irb->Base.Format) {
-		  case MESA_FORMAT_ARGB8888:
-		  case MESA_FORMAT_XRGB8888:
-		     clearVal = intel->ClearColor8888;
-		     break;
-		  case MESA_FORMAT_RGB565:
-		     clearVal = intel->ClearColor565;
-		     break;
-		  case MESA_FORMAT_ARGB4444:
-		     clearVal = PACK_COLOR_4444(clear[3], clear[0],
-						clear[1], clear[2]);
-		     break;
-		  case MESA_FORMAT_ARGB1555:
-		     clearVal = PACK_COLOR_1555(clear[3], clear[0],
-						clear[1], clear[2]);
-		     break;
-		  default:
-		     _mesa_problem(ctx, "Unexpected renderbuffer format: %d\n",
-				   irb->Base.Format);
-		     clearVal = 0;
-		  }
-	       }
-
-               /*
-                  _mesa_debug(ctx, "hardware blit clear buf %d rb id %d\n",
-                  buf, irb->Base.Name);
-                */
-
-               assert(x1 < x2);
-               assert(y1 < y2);
-
-               BEGIN_BATCH(6, REFERENCES_CLIPRECTS);
-               OUT_BATCH(CMD);
-               OUT_BATCH(BR13);
-               OUT_BATCH((y1 << 16) | x1);
-               OUT_BATCH((y2 << 16) | x2);
-               OUT_RELOC(write_buffer,
-			 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
-                         0);
-               OUT_BATCH(clearVal);
-               ADVANCE_BATCH();
-               clearMask &= ~bufBit;    /* turn off bit, for faster loop exit */
-            }
-         }
+      BR13 |= (pitch * cpp);
+
+      if (buf == BUFFER_DEPTH || buf == BUFFER_STENCIL) {
+	 clear_val = clear_depth;
+      } else {
+	 uint8_t clear[4];
+	 GLclampf *color = ctx->Color.ClearColor;
+
+	 CLAMPED_FLOAT_TO_UBYTE(clear[0], color[0]);
+	 CLAMPED_FLOAT_TO_UBYTE(clear[1], color[1]);
+	 CLAMPED_FLOAT_TO_UBYTE(clear[2], color[2]);
+	 CLAMPED_FLOAT_TO_UBYTE(clear[3], color[3]);
+
+	 switch (irb->Base.Format) {
+	 case MESA_FORMAT_ARGB8888:
+	 case MESA_FORMAT_XRGB8888:
+	    clear_val = PACK_COLOR_8888(clear[3], clear[0],
+					clear[1], clear[2]);
+	    break;
+	 case MESA_FORMAT_RGB565:
+	    clear_val = PACK_COLOR_565(clear[0], clear[1], clear[2]);
+	    break;
+	 case MESA_FORMAT_ARGB4444:
+	    clear_val = PACK_COLOR_4444(clear[3], clear[0],
+					clear[1], clear[2]);
+	    break;
+	 case MESA_FORMAT_ARGB1555:
+	    clear_val = PACK_COLOR_1555(clear[3], clear[0],
+					clear[1], clear[2]);
+	    break;
+	 default:
+	    _mesa_problem(ctx, "Unexpected renderbuffer format: %d\n",
+			  irb->Base.Format);
+	    clear_val = 0;
+	 }
       }
-   }
 
-   UNLOCK_HARDWARE(intel);
+      assert(x1 < x2);
+      assert(y1 < y2);
+
+      /* do space check before going any further */
+      aper_array[0] = intel->batch->buf;
+      aper_array[1] = write_buffer;
+
+      if (drm_intel_bufmgr_check_aperture_space(aper_array,
+						ARRAY_SIZE(aper_array)) != 0) {
+	 intel_batchbuffer_flush(intel->batch);
+      }
+
+      BEGIN_BATCH(6);
+      OUT_BATCH(CMD);
+      OUT_BATCH(BR13);
+      OUT_BATCH((y1 << 16) | x1);
+      OUT_BATCH((y2 << 16) | x2);
+      OUT_RELOC_FENCED(write_buffer,
+		       I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+		       0);
+      OUT_BATCH(clear_val);
+      ADVANCE_BATCH();
+
+      if (buf == BUFFER_DEPTH || buf == BUFFER_STENCIL)
+	 mask &= ~(BUFFER_BIT_DEPTH | BUFFER_BIT_STENCIL);
+      else
+	 mask &= ~bufBit;    /* turn off bit, for faster loop exit */
+   }
 }
 
 GLboolean
@@ -562,6 +394,10 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel,
    int dwords = ALIGN(src_size, 8) / 4;
    uint32_t opcode, br13, blit_cmd;
 
+   /* Blits are in a different ringbuffer so we don't use them. */
+   if (intel->gen >= 6)
+      return GL_FALSE;
+
    if (dst_tiling != I915_TILING_NONE) {
       if (dst_offset & 4095)
 	 return GL_FALSE;
@@ -585,8 +421,7 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel,
    intel_batchbuffer_require_space( intel->batch,
 				    (8 * 4) +
 				    (3 * 4) +
-				    dwords * 4,
-				    REFERENCES_CLIPRECTS );
+				    dwords * 4 );
 
    opcode = XY_SETUP_BLT_CMD;
    if (cpp == 4)
@@ -608,14 +443,14 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel,
    if (dst_tiling != I915_TILING_NONE)
       blit_cmd |= XY_DST_TILED;
 
-   BEGIN_BATCH(8 + 3, REFERENCES_CLIPRECTS);
+   BEGIN_BATCH(8 + 3);
    OUT_BATCH(opcode);
    OUT_BATCH(br13);
    OUT_BATCH((0 << 16) | 0); /* clip x1, y1 */
    OUT_BATCH((100 << 16) | 100); /* clip x2, y2 */
-   OUT_RELOC(dst_buffer,
-	     I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
-	     dst_offset);
+   OUT_RELOC_FENCED(dst_buffer,
+		    I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+		    dst_offset);
    OUT_BATCH(0); /* bg */
    OUT_BATCH(fg_color); /* fg */
    OUT_BATCH(0); /* pattern base addr */
@@ -627,8 +462,7 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel,
 
    intel_batchbuffer_data( intel->batch,
 			   src_bits,
-			   dwords * 4,
-			   REFERENCES_CLIPRECTS );
+			   dwords * 4 );
 
    intel_batchbuffer_emit_mi_flush(intel->batch);
 
@@ -649,6 +483,9 @@ intel_emit_linear_blit(struct intel_context *intel,
 {
    GLuint pitch, height;
 
+   /* Blits are in a different ringbuffer so we don't use them. */
+   assert(intel->gen < 6);
+
    /* The pitch is a signed value. */
    pitch = MIN2(size, (1 << 15) - 1);
    height = size / pitch;
diff --git a/shared/intel_blit.h b/shared/intel_blit.h
index 240cb7c..eb66fe0 100644
--- a/shared/intel_blit.h
+++ b/shared/intel_blit.h
@@ -30,7 +30,7 @@
 
 #include "intel_context.h"
 
-extern void intelCopyBuffer(const __DRIdrawablePrivate * dpriv,
+extern void intelCopyBuffer(const __DRIdrawable * dpriv,
                             const drm_clip_rect_t * rect);
 
 extern void intelClearWithBlit(GLcontext * ctx, GLbitfield mask);
diff --git a/shared/intel_buffer_objects.c b/shared/intel_buffer_objects.c
index 3b7015b..103aaf2 100644
--- a/shared/intel_buffer_objects.c
+++ b/shared/intel_buffer_objects.c
@@ -31,10 +31,12 @@
 #include "main/macros.h"
 #include "main/bufferobj.h"
 
-#include "intel_context.h"
 #include "intel_blit.h"
 #include "intel_buffer_objects.h"
 #include "intel_batchbuffer.h"
+#include "intel_context.h"
+#include "intel_fbo.h"
+#include "intel_mipmap_tree.h"
 #include "intel_regions.h"
 
 static GLboolean
@@ -113,7 +115,7 @@ intel_bufferobj_free(GLcontext * ctx, struct gl_buffer_object *obj)
    if (obj->Pointer)
       intel_bufferobj_unmap(ctx, 0, obj);
 
-   _mesa_free(intel_obj->sys_buffer);
+   free(intel_obj->sys_buffer);
    if (intel_obj->region) {
       intel_bufferobj_release_region(intel, intel_obj);
    }
@@ -121,7 +123,7 @@ intel_bufferobj_free(GLcontext * ctx, struct gl_buffer_object *obj)
       dri_bo_unreference(intel_obj->buffer);
    }
 
-   _mesa_free(intel_obj);
+   free(intel_obj);
 }
 
 
@@ -155,7 +157,7 @@ intel_bufferobj_data(GLcontext * ctx,
       dri_bo_unreference(intel_obj->buffer);
       intel_obj->buffer = NULL;
    }
-   _mesa_free(intel_obj->sys_buffer);
+   free(intel_obj->sys_buffer);
    intel_obj->sys_buffer = NULL;
 
    if (size != 0) {
@@ -164,7 +166,7 @@ intel_bufferobj_data(GLcontext * ctx,
        * with their contents anyway.
        */
       if (target == GL_ARRAY_BUFFER || target == GL_ELEMENT_ARRAY_BUFFER) {
-	 intel_obj->sys_buffer = _mesa_malloc(size);
+	 intel_obj->sys_buffer = malloc(size);
 	 if (intel_obj->sys_buffer != NULL) {
 	    if (data != NULL)
 	       memcpy(intel_obj->sys_buffer, data, size);
@@ -285,7 +287,7 @@ intel_bufferobj_map(GLcontext * ctx,
       return NULL;
    }
 
-   if (write_only && intel->intelScreen->kernel_exec_fencing) {
+   if (write_only) {
       drm_intel_gem_bo_map_gtt(intel_obj->buffer);
       intel_obj->mapped_gtt = GL_TRUE;
    } else {
@@ -373,14 +375,13 @@ intel_bufferobj_map_range(GLcontext * ctx,
    if ((access & GL_MAP_INVALIDATE_RANGE_BIT) &&
        drm_intel_bo_busy(intel_obj->buffer)) {
       if (access & GL_MAP_FLUSH_EXPLICIT_BIT) {
-	 intel_obj->range_map_buffer = _mesa_malloc(length);
+	 intel_obj->range_map_buffer = malloc(length);
 	 obj->Pointer = intel_obj->range_map_buffer;
       } else {
 	 intel_obj->range_map_bo = drm_intel_bo_alloc(intel->bufmgr,
 						      "range map",
 						      length, 64);
-	 if (!(access & GL_MAP_READ_BIT) &&
-	     intel->intelScreen->kernel_exec_fencing) {
+	 if (!(access & GL_MAP_READ_BIT)) {
 	    drm_intel_gem_bo_map_gtt(intel_obj->range_map_bo);
 	    intel_obj->mapped_gtt = GL_TRUE;
 	 } else {
@@ -393,8 +394,7 @@ intel_bufferobj_map_range(GLcontext * ctx,
       return obj->Pointer;
    }
 
-   if (!(access & GL_MAP_READ_BIT) &&
-       intel->intelScreen->kernel_exec_fencing) {
+   if (!(access & GL_MAP_READ_BIT)) {
       drm_intel_gem_bo_map_gtt(intel_obj->buffer);
       intel_obj->mapped_gtt = GL_TRUE;
    } else {
@@ -523,7 +523,7 @@ intel_bufferobj_buffer(struct intel_context *intel,
 			      intel_obj->Base.Size,
 			      sys_buffer,
 			      &intel_obj->Base);
-      _mesa_free(sys_buffer);
+      free(sys_buffer);
       intel_obj->sys_buffer = NULL;
    }
 
@@ -588,6 +588,126 @@ intel_bufferobj_copy_subdata(GLcontext *ctx,
    intel_batchbuffer_emit_mi_flush(intel->batch);
 }
 
+#if FEATURE_APPLE_object_purgeable
+static GLenum
+intel_buffer_purgeable(GLcontext * ctx,
+                       drm_intel_bo *buffer,
+                       GLenum option)
+{
+   int retained = 0;
+
+   if (buffer != NULL)
+      retained = drm_intel_bo_madvise (buffer, I915_MADV_DONTNEED);
+
+   return retained ? GL_VOLATILE_APPLE : GL_RELEASED_APPLE;
+}
+
+static GLenum
+intel_buffer_object_purgeable(GLcontext * ctx,
+                              struct gl_buffer_object *obj,
+                              GLenum option)
+{
+   struct intel_buffer_object *intel;
+
+   intel = intel_buffer_object (obj);
+   if (intel->buffer != NULL)
+      return intel_buffer_purgeable (ctx, intel->buffer, option);
+
+   if (option == GL_RELEASED_APPLE) {
+      if (intel->sys_buffer != NULL) {
+         free(intel->sys_buffer);
+         intel->sys_buffer = NULL;
+      }
+
+      return GL_RELEASED_APPLE;
+   } else {
+      /* XXX Create the buffer and madvise(MADV_DONTNEED)? */
+      return intel_buffer_purgeable (ctx,
+                                     intel_bufferobj_buffer(intel_context(ctx),
+                                                            intel, INTEL_READ),
+                                     option);
+   }
+}
+
+static GLenum
+intel_texture_object_purgeable(GLcontext * ctx,
+                               struct gl_texture_object *obj,
+                               GLenum option)
+{
+   struct intel_texture_object *intel;
+
+   intel = intel_texture_object(obj);
+   if (intel->mt == NULL || intel->mt->region == NULL)
+      return GL_RELEASED_APPLE;
+
+   return intel_buffer_purgeable (ctx, intel->mt->region->buffer, option);
+}
+
+static GLenum
+intel_render_object_purgeable(GLcontext * ctx,
+                              struct gl_renderbuffer *obj,
+                              GLenum option)
+{
+   struct intel_renderbuffer *intel;
+
+   intel = intel_renderbuffer(obj);
+   if (intel->region == NULL)
+      return GL_RELEASED_APPLE;
+
+   return intel_buffer_purgeable (ctx, intel->region->buffer, option);
+}
+
+static GLenum
+intel_buffer_unpurgeable(GLcontext * ctx,
+                         drm_intel_bo *buffer,
+                         GLenum option)
+{
+   int retained;
+
+   retained = 0;
+   if (buffer != NULL)
+      retained = drm_intel_bo_madvise (buffer, I915_MADV_WILLNEED);
+
+   return retained ? GL_RETAINED_APPLE : GL_UNDEFINED_APPLE;
+}
+
+static GLenum
+intel_buffer_object_unpurgeable(GLcontext * ctx,
+                                struct gl_buffer_object *obj,
+                                GLenum option)
+{
+   return intel_buffer_unpurgeable (ctx, intel_buffer_object (obj)->buffer, option);
+}
+
+static GLenum
+intel_texture_object_unpurgeable(GLcontext * ctx,
+                                 struct gl_texture_object *obj,
+                                 GLenum option)
+{
+   struct intel_texture_object *intel;
+
+   intel = intel_texture_object(obj);
+   if (intel->mt == NULL || intel->mt->region == NULL)
+      return GL_UNDEFINED_APPLE;
+
+   return intel_buffer_unpurgeable (ctx, intel->mt->region->buffer, option);
+}
+
+static GLenum
+intel_render_object_unpurgeable(GLcontext * ctx,
+                                struct gl_renderbuffer *obj,
+                                GLenum option)
+{
+   struct intel_renderbuffer *intel;
+
+   intel = intel_renderbuffer(obj);
+   if (intel->region == NULL)
+      return GL_UNDEFINED_APPLE;
+
+   return intel_buffer_unpurgeable (ctx, intel->region->buffer, option);
+}
+#endif
+
 void
 intelInitBufferObjectFuncs(struct dd_function_table *functions)
 {
@@ -601,4 +721,14 @@ intelInitBufferObjectFuncs(struct dd_function_table *functions)
    functions->FlushMappedBufferRange = intel_bufferobj_flush_mapped_range;
    functions->UnmapBuffer = intel_bufferobj_unmap;
    functions->CopyBufferSubData = intel_bufferobj_copy_subdata;
+
+#if FEATURE_APPLE_object_purgeable
+   functions->BufferObjectPurgeable = intel_buffer_object_purgeable;
+   functions->TextureObjectPurgeable = intel_texture_object_purgeable;
+   functions->RenderObjectPurgeable = intel_render_object_purgeable;
+
+   functions->BufferObjectUnpurgeable = intel_buffer_object_unpurgeable;
+   functions->TextureObjectUnpurgeable = intel_texture_object_unpurgeable;
+   functions->RenderObjectUnpurgeable = intel_render_object_unpurgeable;
+#endif
 }
diff --git a/shared/intel_buffers.c b/shared/intel_buffers.c
index 0564318..b106930 100644
--- a/shared/intel_buffers.c
+++ b/shared/intel_buffers.c
@@ -28,45 +28,7 @@
 #include "intel_context.h"
 #include "intel_buffers.h"
 #include "intel_fbo.h"
-#include "intel_regions.h"
-#include "intel_batchbuffer.h"
 #include "main/framebuffer.h"
-#include "drirenderbuffer.h"
-
-
-/**
- * XXX move this into a new dri/common/cliprects.c file.
- */
-GLboolean
-intel_intersect_cliprects(drm_clip_rect_t * dst,
-                          const drm_clip_rect_t * a,
-                          const drm_clip_rect_t * b)
-{
-   GLint bx = b->x1;
-   GLint by = b->y1;
-   GLint bw = b->x2 - bx;
-   GLint bh = b->y2 - by;
-
-   if (bx < a->x1)
-      bw -= a->x1 - bx, bx = a->x1;
-   if (by < a->y1)
-      bh -= a->y1 - by, by = a->y1;
-   if (bx + bw > a->x2)
-      bw = a->x2 - bx;
-   if (by + bh > a->y2)
-      bh = a->y2 - by;
-   if (bw <= 0)
-      return GL_FALSE;
-   if (bh <= 0)
-      return GL_FALSE;
-
-   dst->x1 = bx;
-   dst->y1 = by;
-   dst->x2 = bx + bw;
-   dst->y2 = by + bh;
-
-   return GL_TRUE;
-}
 
 /**
  * Return pointer to current color drawing region, or NULL.
@@ -96,42 +58,6 @@ intel_readbuf_region(struct intel_context *intel)
       return NULL;
 }
 
-void
-intel_get_cliprects(struct intel_context *intel,
-		    struct drm_clip_rect **cliprects,
-		    unsigned int *num_cliprects,
-		    int *x_off, int *y_off)
-{
-   __DRIdrawablePrivate *dPriv = intel->driDrawable;
-
-   if (intel->constant_cliprect) {
-      /* FBO or DRI2 rendering, which can just use the fb's size. */
-      intel->fboRect.x1 = 0;
-      intel->fboRect.y1 = 0;
-      intel->fboRect.x2 = intel->ctx.DrawBuffer->Width;
-      intel->fboRect.y2 = intel->ctx.DrawBuffer->Height;
-
-      *cliprects = &intel->fboRect;
-      *num_cliprects = 1;
-      *x_off = 0;
-      *y_off = 0;
-   } else if (intel->front_cliprects || dPriv->numBackClipRects == 0) {
-      /* use the front clip rects */
-      *cliprects = dPriv->pClipRects;
-      *num_cliprects = dPriv->numClipRects;
-      *x_off = dPriv->x;
-      *y_off = dPriv->y;
-   }
-   else {
-      /* use the back clip rects */
-      *num_cliprects = dPriv->numBackClipRects;
-      *cliprects = dPriv->pBackClipRects;
-      *x_off = dPriv->backX;
-      *y_off = dPriv->backY;
-   }
-}
-
-
 /**
  * Check if we're about to draw into the front color buffer.
  * If so, set the intel->front_buffer_dirty field to true.
@@ -202,7 +128,6 @@ intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb)
        || (fb->_NumColorDrawBuffers == 0)) {
       /* writing to 0  */
       colorRegions[0] = NULL;
-      intel->constant_cliprect = GL_TRUE;
    }
    else if (fb->_NumColorDrawBuffers > 1) {
        int i;
@@ -212,34 +137,23 @@ intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb)
            irb = intel_renderbuffer(fb->_ColorDrawBuffers[i]);
            colorRegions[i] = irb ? irb->region : NULL;
        }
-       intel->constant_cliprect = GL_TRUE;
    }
    else {
       /* Get the intel_renderbuffer for the single colorbuffer we're drawing
-       * into, and set up cliprects if it's a DRI1 window front buffer.
+       * into.
        */
       if (fb->Name == 0) {
-	 intel->constant_cliprect = intel->driScreen->dri2.enabled;
 	 /* drawing to window system buffer */
-	 if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) {
-	    if (!intel->constant_cliprect && !intel->front_cliprects)
-	       intel_batchbuffer_flush(intel->batch);
-	    intel->front_cliprects = GL_TRUE;
+	 if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT)
 	    colorRegions[0] = intel_get_rb_region(fb, BUFFER_FRONT_LEFT);
-	 }
-	 else {
-	    if (!intel->constant_cliprect && intel->front_cliprects)
-	       intel_batchbuffer_flush(intel->batch);
-	    intel->front_cliprects = GL_FALSE;
+	 else
 	    colorRegions[0] = intel_get_rb_region(fb, BUFFER_BACK_LEFT);
-	 }
       }
       else {
 	 /* drawing to user-created FBO */
 	 struct intel_renderbuffer *irb;
 	 irb = intel_renderbuffer(fb->_ColorDrawBuffers[0]);
 	 colorRegions[0] = (irb && irb->region) ? irb->region : NULL;
-	 intel->constant_cliprect = GL_TRUE;
       }
    }
 
@@ -291,6 +205,12 @@ intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb)
       FALLBACK(intel, INTEL_FALLBACK_STENCIL_BUFFER, GL_FALSE);
    }
 
+   /* If we have a (packed) stencil buffer attached but no depth buffer,
+    * we still need to set up the shared depth/stencil state so we can use it.
+    */
+   if (depthRegion == NULL && irbStencil && irbStencil->region)
+      depthRegion = irbStencil->region;
+
    /*
     * Update depth and stencil test state
     */
@@ -351,13 +271,12 @@ intelDrawBuffer(GLcontext * ctx, GLenum mode)
       intel->is_front_buffer_rendering = (mode == GL_FRONT_LEFT)
 	|| (mode == GL_FRONT);
 
-      /* If we weren't front-buffer rendering before but we are now, make sure
-       * that the front-buffer has actually been allocated.
+      /* If we weren't front-buffer rendering before but we are now,
+       * invalidate our DRI drawable so we'll ask for new buffers
+       * (including the fake front) before we start rendering again.
        */
-      if (!was_front_buffer_rendering && intel->is_front_buffer_rendering) {
-	 intel_update_renderbuffers(intel->driContext,
-				    intel->driContext->driDrawablePriv);
-      }
+      if (!was_front_buffer_rendering && intel->is_front_buffer_rendering)
+	 dri2InvalidateDrawable(intel->driContext->driDrawablePriv);
    }
 
    intel_draw_buffer(ctx, ctx->DrawBuffer);
@@ -375,13 +294,12 @@ intelReadBuffer(GLcontext * ctx, GLenum mode)
       intel->is_front_buffer_reading = (mode == GL_FRONT_LEFT)
 	|| (mode == GL_FRONT);
 
-      /* If we weren't front-buffer reading before but we are now, make sure
-       * that the front-buffer has actually been allocated.
+      /* If we weren't front-buffer reading before but we are now,
+       * invalidate our DRI drawable so we'll ask for new buffers
+       * (including the fake front) before we start reading again.
        */
-      if (!was_front_buffer_reading && intel->is_front_buffer_reading) {
-	 intel_update_renderbuffers(intel->driContext,
-				    intel->driContext->driDrawablePriv);
-      }
+      if (!was_front_buffer_reading && intel->is_front_buffer_reading)
+	 dri2InvalidateDrawable(intel->driContext->driReadablePriv);
    }
 
    if (ctx->ReadBuffer == ctx->DrawBuffer) {
diff --git a/shared/intel_buffers.h b/shared/intel_buffers.h
index d7800f2..abb86aa 100644
--- a/shared/intel_buffers.h
+++ b/shared/intel_buffers.h
@@ -35,12 +35,6 @@
 struct intel_context;
 struct intel_framebuffer;
 
-
-extern GLboolean
-intel_intersect_cliprects(drm_clip_rect_t * dest,
-                          const drm_clip_rect_t * a,
-                          const drm_clip_rect_t * b);
-
 extern struct intel_region *intel_readbuf_region(struct intel_context *intel);
 
 extern struct intel_region *intel_drawbuf_region(struct intel_context *intel);
diff --git a/shared/intel_chipset.h b/shared/intel_chipset.h
index 3dc8653..a0b2266 100644
--- a/shared/intel_chipset.h
+++ b/shared/intel_chipset.h
@@ -1,4 +1,4 @@
-/*
+ /*
  * Copyright © 2007 Intel Corporation
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -71,6 +71,8 @@
 #define PCI_CHIP_ILD_G                  0x0042
 #define PCI_CHIP_ILM_G                  0x0046
 
+#define PCI_CHIP_SANDYBRIDGE		0x0102
+
 #define IS_MOBILE(devid)	(devid == PCI_CHIP_I855_GM || \
 				 devid == PCI_CHIP_I915_GM || \
 				 devid == PCI_CHIP_I945_GM || \
@@ -104,14 +106,20 @@
 				 devid == PCI_CHIP_Q33_G || \
 				 devid == PCI_CHIP_Q35_G || IS_IGD(devid))
 
-#define IS_965(devid)		(devid == PCI_CHIP_I965_G || \
+#define IS_GEN4(devid)		(devid == PCI_CHIP_I965_G || \
 				 devid == PCI_CHIP_I965_Q || \
 				 devid == PCI_CHIP_I965_G_1 || \
 				 devid == PCI_CHIP_I965_GM || \
 				 devid == PCI_CHIP_I965_GME || \
 				 devid == PCI_CHIP_I946_GZ || \
+				 IS_G4X(devid))
+
+#define IS_GEN6(devid)		(devid == PCI_CHIP_SANDYBRIDGE)
+
+#define IS_965(devid)		(IS_GEN4(devid) || \
 				 IS_G4X(devid) || \
-				 IS_IGDNG(devid))
+				 IS_IGDNG(devid) || \
+				 IS_GEN6(devid))
 
 #define IS_9XX(devid)		(IS_915(devid) || \
 				 IS_945(devid) || \
diff --git a/shared/intel_clear.c b/shared/intel_clear.c
index f682ee3..03b24e2 100644
--- a/shared/intel_clear.c
+++ b/shared/intel_clear.c
@@ -33,12 +33,9 @@
 
 #include "intel_context.h"
 #include "intel_blit.h"
-#include "intel_chipset.h"
 #include "intel_clear.h"
 #include "intel_fbo.h"
-#include "intel_pixel.h"
 #include "intel_regions.h"
-#include "intel_batchbuffer.h"
 
 #define FILE_DEBUG_FLAG DEBUG_BLIT
 
@@ -68,7 +65,7 @@ static void
 intelClear(GLcontext *ctx, GLbitfield mask)
 {
    struct intel_context *intel = intel_context(ctx);
-   const GLuint colorMask = *((GLuint *) & ctx->Color.ColorMask);
+   const GLuint colorMask = *((GLuint *) & ctx->Color.ColorMask[0]);
    GLbitfield tri_mask = 0;
    GLbitfield blit_mask = 0;
    GLbitfield swrast_mask = 0;
@@ -136,6 +133,12 @@ intelClear(GLcontext *ctx, GLbitfield mask)
       }
    }
 
+   if (intel->gen >= 6) {
+      /* Blits are in a different ringbuffer so we don't use them. */
+      tri_mask |= blit_mask;
+      blit_mask = 0;
+   }
+
    /* SW fallback clearing */
    swrast_mask = mask & ~tri_mask & ~blit_mask;
 
diff --git a/shared/intel_context.c b/shared/intel_context.c
index dd24b71..d6a1ba6 100644
--- a/shared/intel_context.c
+++ b/shared/intel_context.c
@@ -28,7 +28,6 @@
 
 #include "main/glheader.h"
 #include "main/context.h"
-/* #include "main/arrayobj.h" */
 #include "main/extensions.h"
 #include "main/framebuffer.h"
 #include "main/imports.h"
@@ -52,15 +51,11 @@
 #include "intel_regions.h"
 #include "intel_buffer_objects.h"
 #include "intel_fbo.h"
-#include "intel_decode.h"
 #include "intel_bufmgr.h"
 #include "intel_screen.h"
-#include "intel_swapbuffers.h"
 
 #include "drirenderbuffer.h"
-#include "vblank.h"
 #include "utils.h"
-#include "xmlpool.h"            /* for symbolic values of enum-type options */
 
 
 #ifndef INTEL_DEBUG
@@ -68,12 +63,10 @@ int INTEL_DEBUG = (0);
 #endif
 
 
-#define DRIVER_DATE                     "20091221 2009Q4"
+#define DRIVER_DATE                     "20091221 DEVELOPMENT"
 #define DRIVER_DATE_GEM                 "GEM " DRIVER_DATE
 
 
-static void intel_flush(GLcontext *ctx, GLboolean needs_mi_flush);
-
 static const GLubyte *
 intelGetString(GLcontext * ctx, GLenum name)
 {
@@ -176,9 +169,7 @@ intelGetString(GLcontext * ctx, GLenum name)
          break;
       }
 
-      (void) driGetRendererString(buffer, chipset, 
-				  (intel->ttm) ? DRIVER_DATE_GEM : DRIVER_DATE,
-				  0);
+      (void) driGetRendererString(buffer, chipset, DRIVER_DATE_GEM, 0);
       return (GLubyte *) buffer;
 
    default:
@@ -195,17 +186,31 @@ intel_bits_per_pixel(const struct intel_renderbuffer *rb)
 void
 intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
 {
-   struct intel_framebuffer *intel_fb = drawable->driverPrivate;
+   struct gl_framebuffer *fb = drawable->driverPrivate;
    struct intel_renderbuffer *rb;
    struct intel_region *region, *depth_region;
    struct intel_context *intel = context->driverPrivate;
+   struct intel_renderbuffer *front_rb, *back_rb, *depth_rb, *stencil_rb;
    __DRIbuffer *buffers = NULL;
    __DRIscreen *screen;
    int i, count;
    unsigned int attachments[10];
-   uint32_t name;
    const char *region_name;
 
+   /* If we're rendering to the fake front buffer, make sure all the
+    * pending drawing has landed on the real front buffer.  Otherwise
+    * when we eventually get to DRI2GetBuffersWithFormat the stale
+    * real front buffer contents will get copied to the new fake front
+    * buffer.
+    */
+   if (intel->is_front_buffer_rendering)
+      intel_flush(&intel->ctx, GL_FALSE);
+
+   /* Set this up front, so that in case our buffers get invalidated
+    * while we're getting new buffers, we don't clobber the stamp and
+    * thus ignore the invalidate. */
+   drawable->lastStamp = drawable->dri2.stamp;
+
    if (INTEL_DEBUG & DEBUG_DRI)
       fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
 
@@ -214,26 +219,25 @@ intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
    if (screen->dri2.loader
        && (screen->dri2.loader->base.version > 2)
        && (screen->dri2.loader->getBuffersWithFormat != NULL)) {
-      struct intel_renderbuffer *depth_rb;
-      struct intel_renderbuffer *stencil_rb;
+
+      front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
+      back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
+      depth_rb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
+      stencil_rb = intel_get_renderbuffer(fb, BUFFER_STENCIL);
 
       i = 0;
       if ((intel->is_front_buffer_rendering ||
 	   intel->is_front_buffer_reading ||
-	   !intel_fb->color_rb[1])
-	   && intel_fb->color_rb[0]) {
+	   !back_rb) && front_rb) {
 	 attachments[i++] = __DRI_BUFFER_FRONT_LEFT;
-	 attachments[i++] = intel_bits_per_pixel(intel_fb->color_rb[0]);
+	 attachments[i++] = intel_bits_per_pixel(front_rb);
       }
 
-      if (intel_fb->color_rb[1]) {
+      if (back_rb) {
 	 attachments[i++] = __DRI_BUFFER_BACK_LEFT;
-	 attachments[i++] = intel_bits_per_pixel(intel_fb->color_rb[1]);
+	 attachments[i++] = intel_bits_per_pixel(back_rb);
       }
 
-      depth_rb = intel_get_renderbuffer(&intel_fb->Base, BUFFER_DEPTH);
-      stencil_rb = intel_get_renderbuffer(&intel_fb->Base, BUFFER_STENCIL);
-
       if ((depth_rb != NULL) && (stencil_rb != NULL)) {
 	 attachments[i++] = __DRI_BUFFER_DEPTH_STENCIL;
 	 attachments[i++] = intel_bits_per_pixel(depth_rb);
@@ -254,13 +258,13 @@ intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
 						      drawable->loaderPrivate);
    } else if (screen->dri2.loader) {
       i = 0;
-      if (intel_fb->color_rb[0])
+      if (intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT))
 	 attachments[i++] = __DRI_BUFFER_FRONT_LEFT;
-      if (intel_fb->color_rb[1])
+      if (intel_get_renderbuffer(fb, BUFFER_BACK_LEFT))
 	 attachments[i++] = __DRI_BUFFER_BACK_LEFT;
-      if (intel_get_renderbuffer(&intel_fb->Base, BUFFER_DEPTH))
+      if (intel_get_renderbuffer(fb, BUFFER_DEPTH))
 	 attachments[i++] = __DRI_BUFFER_DEPTH;
-      if (intel_get_renderbuffer(&intel_fb->Base, BUFFER_STENCIL))
+      if (intel_get_renderbuffer(fb, BUFFER_STENCIL))
 	 attachments[i++] = __DRI_BUFFER_STENCIL;
 
       buffers = (*screen->dri2.loader->getBuffers)(drawable,
@@ -293,32 +297,32 @@ intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
    for (i = 0; i < count; i++) {
        switch (buffers[i].attachment) {
        case __DRI_BUFFER_FRONT_LEFT:
-	   rb = intel_fb->color_rb[0];
+	   rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
 	   region_name = "dri2 front buffer";
 	   break;
 
        case __DRI_BUFFER_FAKE_FRONT_LEFT:
-	   rb = intel_fb->color_rb[0];
+	   rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
 	   region_name = "dri2 fake front buffer";
 	   break;
 
        case __DRI_BUFFER_BACK_LEFT:
-	   rb = intel_fb->color_rb[1];
+	   rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
 	   region_name = "dri2 back buffer";
 	   break;
 
        case __DRI_BUFFER_DEPTH:
-	   rb = intel_get_renderbuffer(&intel_fb->Base, BUFFER_DEPTH);
+	   rb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
 	   region_name = "dri2 depth buffer";
 	   break;
 
        case __DRI_BUFFER_DEPTH_STENCIL:
-	   rb = intel_get_renderbuffer(&intel_fb->Base, BUFFER_DEPTH);
+	   rb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
 	   region_name = "dri2 depth / stencil buffer";
 	   break;
 
        case __DRI_BUFFER_STENCIL:
-	   rb = intel_get_renderbuffer(&intel_fb->Base, BUFFER_STENCIL);
+	   rb = intel_get_renderbuffer(fb, BUFFER_STENCIL);
 	   region_name = "dri2 stencil buffer";
 	   break;
 
@@ -333,11 +337,8 @@ intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
        if (rb == NULL)
 	  continue;
 
-       if (rb->region) {
-	  dri_bo_flink(rb->region->buffer, &name);
-	  if (name == buffers[i].name)
+       if (rb->region && rb->region->name == buffers[i].name)
 	     continue;
-       }
 
        if (INTEL_DEBUG & DEBUG_DRI)
 	  fprintf(stderr,
@@ -365,15 +366,12 @@ intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
        intel_region_release(&region);
 
        if (buffers[i].attachment == __DRI_BUFFER_DEPTH_STENCIL) {
-	  rb = intel_get_renderbuffer(&intel_fb->Base, BUFFER_STENCIL);
+	  rb = intel_get_renderbuffer(fb, BUFFER_STENCIL);
 	  if (rb != NULL) {
 	     struct intel_region *stencil_region = NULL;
 
-	     if (rb->region) {
-		dri_bo_flink(rb->region->buffer, &name);
-		if (name == buffers[i].name)
+	     if (rb->region && rb->region->name == buffers[i].name)
 		   continue;
-	     }
 
 	     intel_region_reference(&stencil_region, region);
 	     intel_renderbuffer_set_region(rb, stencil_region);
@@ -386,40 +384,40 @@ intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
 }
 
 void
+intel_prepare_render(struct intel_context *intel)
+{
+   __DRIcontext *driContext = intel->driContext;
+   __DRIdrawable *drawable;
+
+   drawable = intel->driDrawable;
+   if (drawable->dri2.stamp != driContext->dri2.draw_stamp) {
+      if (drawable->lastStamp != drawable->dri2.stamp)
+	 intel_update_renderbuffers(driContext, drawable);
+      intel_draw_buffer(&intel->ctx, intel->ctx.DrawBuffer);
+      driContext->dri2.draw_stamp = drawable->dri2.stamp;
+   }
+
+   drawable = intel->driReadDrawable;
+   if (drawable->dri2.stamp != driContext->dri2.read_stamp) {
+      if (drawable->lastStamp != drawable->dri2.stamp)
+	 intel_update_renderbuffers(driContext, drawable);
+      driContext->dri2.read_stamp = drawable->dri2.stamp;
+   }
+}
+
+void
 intel_viewport(GLcontext *ctx, GLint x, GLint y, GLsizei w, GLsizei h)
 {
     struct intel_context *intel = intel_context(ctx);
     __DRIcontext *driContext = intel->driContext;
-    void (*old_viewport)(GLcontext *ctx, GLint x, GLint y,
-			 GLsizei w, GLsizei h);
-
-    if (!driContext->driScreenPriv->dri2.enabled)
-	return;
-
-    if (!intel->meta.internal_viewport_call && ctx->DrawBuffer->Name == 0) {
-       /* If we're rendering to the fake front buffer, make sure all the pending
-	* drawing has landed on the real front buffer.  Otherwise when we
-	* eventually get to DRI2GetBuffersWithFormat the stale real front
-	* buffer contents will get copied to the new fake front buffer.
-	*/
-       if (intel->is_front_buffer_rendering) {
-	  intel_flush(ctx, GL_FALSE);
-       }
 
-       intel_update_renderbuffers(driContext, driContext->driDrawablePriv);
-       if (driContext->driDrawablePriv != driContext->driReadablePriv)
-	  intel_update_renderbuffers(driContext, driContext->driReadablePriv);
+    if (!intel->using_dri2_swapbuffers &&
+	!intel->meta.internal_viewport_call && ctx->DrawBuffer->Name == 0) {
+       dri2InvalidateDrawable(driContext->driDrawablePriv);
+       dri2InvalidateDrawable(driContext->driReadablePriv);
     }
-
-    old_viewport = ctx->Driver.Viewport;
-    ctx->Driver.Viewport = NULL;
-    intel->driDrawable = driContext->driDrawablePriv;
-    intelWindowMoved(intel);
-    intel_draw_buffer(ctx, intel->ctx.DrawBuffer);
-    ctx->Driver.Viewport = old_viewport;
 }
 
-
 static const struct dri_debug_control debug_control[] = {
    { "tex",   DEBUG_TEXTURE},
    { "state", DEBUG_STATE},
@@ -469,7 +467,7 @@ intelInvalidateState(GLcontext * ctx, GLuint new_state)
       intel->vtbl.invalidate_state( intel, new_state );
 }
 
-static void
+void
 intel_flush(GLcontext *ctx, GLboolean needs_mi_flush)
 {
    struct intel_context *intel = intel_context(ctx);
@@ -480,13 +478,6 @@ intel_flush(GLcontext *ctx, GLboolean needs_mi_flush)
    if (intel->gen < 4)
       INTEL_FIREVERTICES(intel);
 
-   /* Emit a flush so that any frontbuffer rendering that might have occurred
-    * lands onscreen in a timely manner, even if the X Server doesn't trigger
-    * a flush for us.
-    */
-   if (!intel->driScreen->dri2.enabled && needs_mi_flush)
-      intel_batchbuffer_emit_mi_flush(intel->batch);
-
    if (intel->batch->map != intel->batch->ptr)
       intel_batchbuffer_flush(intel->batch);
 
@@ -538,7 +529,8 @@ intel_glFlush(GLcontext *ctx)
     * and getting our hands on that doesn't seem worth it, so we just us the
     * first batch we emitted after the last swap.
     */
-   if (intel->first_post_swapbuffers_batch != NULL) {
+   if (!intel->using_dri2_swapbuffers &&
+       intel->first_post_swapbuffers_batch != NULL) {
       drm_intel_bo_wait_rendering(intel->first_post_swapbuffers_batch);
       drm_intel_bo_unreference(intel->first_post_swapbuffers_batch);
       intel->first_post_swapbuffers_batch = NULL;
@@ -592,39 +584,55 @@ intelInitDriverFunctions(struct dd_function_table *functions)
 GLboolean
 intelInitContext(struct intel_context *intel,
                  const __GLcontextModes * mesaVis,
-                 __DRIcontextPrivate * driContextPriv,
+                 __DRIcontext * driContextPriv,
                  void *sharedContextPrivate,
                  struct dd_function_table *functions)
 {
    GLcontext *ctx = &intel->ctx;
    GLcontext *shareCtx = (GLcontext *) sharedContextPrivate;
-   __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv;
-   intelScreenPrivate *intelScreen = (intelScreenPrivate *) sPriv->private;
-   int fthrottle_mode;
+   __DRIscreen *sPriv = driContextPriv->driScreenPriv;
+   struct intel_screen *intelScreen = sPriv->private;
+   int bo_reuse_mode;
+
+   /* we can't do anything without a connection to the device */
+   if (intelScreen->bufmgr == NULL)
+      return GL_FALSE;
 
    if (!_mesa_initialize_context(&intel->ctx, mesaVis, shareCtx,
                                  functions, (void *) intel)) {
-      _mesa_printf("%s: failed to init mesa context\n", __FUNCTION__);
+      printf("%s: failed to init mesa context\n", __FUNCTION__);
       return GL_FALSE;
    }
 
    driContextPriv->driverPrivate = intel;
    intel->intelScreen = intelScreen;
    intel->driScreen = sPriv;
-   intel->sarea = intelScreen->sarea;
    intel->driContext = driContextPriv;
+   intel->driFd = sPriv->fd;
 
-   if (IS_965(intel->intelScreen->deviceID))
+   if (IS_GEN6(intel->intelScreen->deviceID)) {
+      intel->gen = 6;
+      intel->needs_ff_sync = GL_TRUE;
+      intel->has_luminance_srgb = GL_TRUE;
+   } else if (IS_965(intel->intelScreen->deviceID)) {
       intel->gen = 4;
-   else if (IS_9XX(intel->intelScreen->deviceID))
+   } else if (IS_9XX(intel->intelScreen->deviceID)) {
       intel->gen = 3;
-   else
+      if (IS_945(intel->intelScreen->deviceID)) {
+	 intel->is_945 = GL_TRUE;
+      }
+   } else {
       intel->gen = 2;
+   }
 
-   /* Dri stuff */
-   intel->hHWContext = driContextPriv->hHWContext;
-   intel->driFd = sPriv->fd;
-   intel->driHwLock = sPriv->lock;
+   if (IS_IGDNG(intel->intelScreen->deviceID)) {
+      intel->is_ironlake = GL_TRUE;
+      intel->needs_ff_sync = GL_TRUE;
+      intel->has_luminance_srgb = GL_TRUE;
+   } else if (IS_G4X(intel->intelScreen->deviceID)) {
+      intel->has_luminance_srgb = GL_TRUE;
+      intel->is_g4x = GL_TRUE;
+   }
 
    driParseConfigFiles(&intel->optionCache, &intelScreen->optionCache,
                        intel->driScreen->myNum,
@@ -635,18 +643,14 @@ intelInitContext(struct intel_context *intel,
       intel->maxBatchSize = BATCH_SZ;
 
    intel->bufmgr = intelScreen->bufmgr;
-   intel->ttm = intelScreen->ttm;
-   if (intel->ttm) {
-      int bo_reuse_mode;
 
-      bo_reuse_mode = driQueryOptioni(&intel->optionCache, "bo_reuse");
-      switch (bo_reuse_mode) {
-      case DRI_CONF_BO_REUSE_DISABLED:
-	 break;
-      case DRI_CONF_BO_REUSE_ALL:
-	 intel_bufmgr_gem_enable_reuse(intel->bufmgr);
-	 break;
-      }
+   bo_reuse_mode = driQueryOptioni(&intel->optionCache, "bo_reuse");
+   switch (bo_reuse_mode) {
+   case DRI_CONF_BO_REUSE_DISABLED:
+      break;
+   case DRI_CONF_BO_REUSE_ALL:
+      intel_bufmgr_gem_enable_reuse(intel->bufmgr);
+      break;
    }
 
    /* This doesn't yet catch all non-conformant rendering, but it's a
@@ -732,28 +736,12 @@ intelInitContext(struct intel_context *intel,
 
    intel->RenderIndex = ~0;
 
-   fthrottle_mode = driQueryOptioni(&intel->optionCache, "fthrottle_mode");
-   intel->irqsEmitted = 0;
-
-   intel->do_irqs = (intel->intelScreen->irq_active &&
-                     fthrottle_mode == DRI_CONF_FTHROTTLE_IRQS);
-
-   intel->do_usleeps = (fthrottle_mode == DRI_CONF_FTHROTTLE_USLEEPS);
-
-   if (intel->gen >= 4 && !intel->intelScreen->irq_active) {
-      _mesa_printf("IRQs not active.  Exiting\n");
-      exit(1);
-   }
-
    intelInitExtensions(ctx);
 
    INTEL_DEBUG = driParseDebugString(getenv("INTEL_DEBUG"), debug_control);
    if (INTEL_DEBUG & DEBUG_BUFMGR)
       dri_bufmgr_set_debug(intel->bufmgr, GL_TRUE);
 
-   if (!sPriv->dri2.enabled)
-      intel_recreate_static_regions(intel);
-
    intel->batch = intel_batchbuffer_alloc(intel);
 
    intel_fbo_init(intel);
@@ -767,12 +755,6 @@ intelInitContext(struct intel_context *intel,
    }
    intel->use_texture_tiling = driQueryOptionb(&intel->optionCache,
 					       "texture_tiling");
-   if (intel->use_texture_tiling &&
-       !intel->intelScreen->kernel_exec_fencing) {
-      fprintf(stderr, "No kernel support for execution fencing, "
-	      "disabling texture tiling\n");
-      intel->use_texture_tiling = GL_FALSE;
-   }
    intel->use_early_z = driQueryOptionb(&intel->optionCache, "early_z");
 
    intel->prim.primitive = ~0;
@@ -802,7 +784,7 @@ intelInitContext(struct intel_context *intel,
 }
 
 void
-intelDestroyContext(__DRIcontextPrivate * driContextPriv)
+intelDestroyContext(__DRIcontext * driContextPriv)
 {
    struct intel_context *intel =
       (struct intel_context *) driContextPriv->driverPrivate;
@@ -849,57 +831,6 @@ intelDestroyContext(__DRIcontextPrivate * driContextPriv)
           */
       }
 
-      /* XXX In intelMakeCurrent() below, the context's static regions are 
-       * referenced inside the frame buffer; it's listed as a hack,
-       * with a comment of "XXX FBO temporary fix-ups!", but
-       * as long as it's there, we should release the regions here.
-       * The do/while loop around the block is used to allow the
-       * "continue" statements inside the block to exit the block,
-       * to avoid many layers of "if" constructs.
-       */
-      do {
-         __DRIdrawablePrivate * driDrawPriv = intel->driDrawable;
-         struct intel_framebuffer *intel_fb;
-         struct intel_renderbuffer *irbDepth, *irbStencil;
-         if (!driDrawPriv) {
-            /* We're already detached from the drawable; exit this block. */
-            continue;
-         }
-         intel_fb = (struct intel_framebuffer *) driDrawPriv->driverPrivate;
-         if (!intel_fb) {
-            /* The frame buffer is already gone; exit this block. */
-            continue;
-         }
-         irbDepth = intel_get_renderbuffer(&intel_fb->Base, BUFFER_DEPTH);
-         irbStencil = intel_get_renderbuffer(&intel_fb->Base, BUFFER_STENCIL);
-
-         /* If the regions of the frame buffer still match the regions
-          * of the context, release them.  If they've changed somehow,
-          * leave them alone.
-          */
-         if (intel_fb->color_rb[0] && intel_fb->color_rb[0]->region == intel->front_region) {
-	    intel_renderbuffer_set_region(intel_fb->color_rb[0], NULL);
-         }
-         if (intel_fb->color_rb[1] && intel_fb->color_rb[1]->region == intel->back_region) {
-	    intel_renderbuffer_set_region(intel_fb->color_rb[1], NULL);
-         }
-
-         if (irbDepth && irbDepth->region == intel->depth_region) {
-	    intel_renderbuffer_set_region(irbDepth, NULL);
-         }
-         /* Usually, the stencil buffer is the same as the depth buffer;
-          * but they're handled separately in MakeCurrent, so we'll
-          * handle them separately here.
-          */
-         if (irbStencil && irbStencil->region == intel->depth_region) {
-	    intel_renderbuffer_set_region(irbStencil, NULL);
-         }
-      } while (0);
-
-      intel_region_release(&intel->front_region);
-      intel_region_release(&intel->back_region);
-      intel_region_release(&intel->depth_region);
-
       driDestroyOptionCache(&intel->optionCache);
 
       /* free the Mesa context */
@@ -911,7 +842,7 @@ intelDestroyContext(__DRIcontextPrivate * driContextPriv)
 }
 
 GLboolean
-intelUnbindContext(__DRIcontextPrivate * driContextPriv)
+intelUnbindContext(__DRIcontext * driContextPriv)
 {
    struct intel_context *intel =
       (struct intel_context *) driContextPriv->driverPrivate;
@@ -925,11 +856,10 @@ intelUnbindContext(__DRIcontextPrivate * driContextPriv)
 }
 
 GLboolean
-intelMakeCurrent(__DRIcontextPrivate * driContextPriv,
-                 __DRIdrawablePrivate * driDrawPriv,
-                 __DRIdrawablePrivate * driReadPriv)
+intelMakeCurrent(__DRIcontext * driContextPriv,
+                 __DRIdrawable * driDrawPriv,
+                 __DRIdrawable * driReadPriv)
 {
-   __DRIscreenPrivate *psp = driDrawPriv->driScreenPriv;
    struct intel_context *intel;
    GET_CURRENT_CONTEXT(curCtx);
 
@@ -947,80 +877,15 @@ intelMakeCurrent(__DRIcontextPrivate * driContextPriv,
    }
 
    if (driContextPriv) {
-      struct intel_framebuffer *intel_fb =
-	 (struct intel_framebuffer *) driDrawPriv->driverPrivate;
-      GLframebuffer *readFb = (GLframebuffer *) driReadPriv->driverPrivate;
- 
-      if (driContextPriv->driScreenPriv->dri2.enabled) {     
-          intel_update_renderbuffers(driContextPriv, driDrawPriv);
-          if (driDrawPriv != driReadPriv)
-              intel_update_renderbuffers(driContextPriv, driReadPriv);
-      } else {
-          /* XXX FBO temporary fix-ups!  These are released in 
-           * intelDextroyContext(), above.  Changes here should be
-           * reflected there.
-           */
-          /* if the renderbuffers don't have regions, init them from the context */
-         struct intel_renderbuffer *irbDepth
-            = intel_get_renderbuffer(&intel_fb->Base, BUFFER_DEPTH);
-         struct intel_renderbuffer *irbStencil
-            = intel_get_renderbuffer(&intel_fb->Base, BUFFER_STENCIL);
-
-         if (intel_fb->color_rb[0]) {
-	    intel_renderbuffer_set_region(intel_fb->color_rb[0],
-					  intel->front_region);
-         }
-         if (intel_fb->color_rb[1]) {
-	    intel_renderbuffer_set_region(intel_fb->color_rb[1],
-					  intel->back_region);
-         }
-
-         if (irbDepth) {
-	    intel_renderbuffer_set_region(irbDepth, intel->depth_region);
-         }
-         if (irbStencil) {
-	    intel_renderbuffer_set_region(irbStencil, intel->depth_region);
-         }
-      }
-
-      /* set GLframebuffer size to match window, if needed */
-      driUpdateFramebufferSize(&intel->ctx, driDrawPriv);
-
-      if (driReadPriv != driDrawPriv) {
-	 driUpdateFramebufferSize(&intel->ctx, driReadPriv);
-      }
-
-      _mesa_make_current(&intel->ctx, &intel_fb->Base, readFb);
+      struct gl_framebuffer *fb = driDrawPriv->driverPrivate;
+      struct gl_framebuffer *readFb = driReadPriv->driverPrivate;
 
+      _mesa_make_current(&intel->ctx, fb, readFb);
       intel->driReadDrawable = driReadPriv;
-
-      if (intel->driDrawable != driDrawPriv) {
-         if (driDrawPriv->swap_interval == (unsigned)-1) {
-            int i;
-
-            driDrawPriv->vblFlags = (intel->intelScreen->irq_active != 0)
-               ? driGetDefaultVBlankFlags(&intel->optionCache)
-               : VBLANK_FLAG_NO_IRQ;
-
-            /* Prevent error printf if one crtc is disabled, this will
-             * be properly calculated in intelWindowMoved() next.
-             */
-            driDrawPriv->vblFlags = intelFixupVblank(intel, driDrawPriv);
-
-            (*psp->systemTime->getUST) (&intel_fb->swap_ust);
-            driDrawableInitVBlank(driDrawPriv);
-            intel_fb->vbl_waited = driDrawPriv->vblSeq;
-
-            for (i = 0; i < 2; i++) {
-               if (intel_fb->color_rb[i])
-                  intel_fb->color_rb[i]->vbl_pending = driDrawPriv->vblSeq;
-            }
-         }
-         intel->driDrawable = driDrawPriv;
-         intelWindowMoved(intel);
-      }
-
-      intel_draw_buffer(&intel->ctx, &intel_fb->Base);
+      intel->driDrawable = driDrawPriv;
+      driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1;
+      driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1;
+      intel_prepare_render(intel);
    }
    else {
       _mesa_make_current(NULL, NULL, NULL);
@@ -1028,143 +893,3 @@ intelMakeCurrent(__DRIcontextPrivate * driContextPriv,
 
    return GL_TRUE;
 }
-
-static void
-intelContendedLock(struct intel_context *intel, GLuint flags)
-{
-   __DRIdrawablePrivate *dPriv = intel->driDrawable;
-   __DRIscreenPrivate *sPriv = intel->driScreen;
-   volatile drm_i915_sarea_t *sarea = intel->sarea;
-   int me = intel->hHWContext;
-
-   drmGetLock(intel->driFd, intel->hHWContext, flags);
-
-   if (INTEL_DEBUG & DEBUG_LOCK)
-      _mesa_printf("%s - got contended lock\n", __progname);
-
-   /* If the window moved, may need to set a new cliprect now.
-    *
-    * NOTE: This releases and regains the hw lock, so all state
-    * checking must be done *after* this call:
-    */
-   if (dPriv)
-       DRI_VALIDATE_DRAWABLE_INFO(sPriv, dPriv);
-
-   if (sarea && sarea->ctxOwner != me) {
-      if (INTEL_DEBUG & DEBUG_BUFMGR) {
-	 fprintf(stderr, "Lost Context: sarea->ctxOwner %x me %x\n",
-		 sarea->ctxOwner, me);
-      }
-      sarea->ctxOwner = me;
-   }
-
-   /* If the last consumer of the texture memory wasn't us, notify the fake
-    * bufmgr and record the new owner.  We should have the memory shared
-    * between contexts of a single fake bufmgr, but this will at least make
-    * things correct for now.
-    */
-   if (!intel->ttm && sarea->texAge != intel->hHWContext) {
-      sarea->texAge = intel->hHWContext;
-      intel_bufmgr_fake_contended_lock_take(intel->bufmgr);
-      if (INTEL_DEBUG & DEBUG_BATCH)
-	 intel_decode_context_reset();
-      if (INTEL_DEBUG & DEBUG_BUFMGR)
-	 fprintf(stderr, "Lost Textures: sarea->texAge %x hw context %x\n",
-		 sarea->ctxOwner, intel->hHWContext);
-   }
-
-   /* Drawable changed?
-    */
-   if (dPriv && intel->lastStamp != dPriv->lastStamp) {
-       intelWindowMoved(intel);
-       intel->lastStamp = dPriv->lastStamp;
-   }
-}
-
-
-_glthread_DECLARE_STATIC_MUTEX(lockMutex);
-
-/* Lock the hardware and validate our state.  
- */
-void LOCK_HARDWARE( struct intel_context *intel )
-{
-    __DRIdrawable *dPriv = intel->driDrawable;
-    __DRIscreen *sPriv = intel->driScreen;
-    char __ret = 0;
-    struct intel_framebuffer *intel_fb = NULL;
-    struct intel_renderbuffer *intel_rb = NULL;
-
-    intel->locked++;
-    if (intel->locked >= 2)
-       return;
-
-    if (!sPriv->dri2.enabled)
-       _glthread_LOCK_MUTEX(lockMutex);
-
-    if (intel->driDrawable) {
-       intel_fb = intel->driDrawable->driverPrivate;
-
-       if (intel_fb)
-	  intel_rb =
-	     intel_get_renderbuffer(&intel_fb->Base,
-				    intel_fb->Base._ColorDrawBufferIndexes[0]);
-    }
-
-    if (intel_rb && dPriv->vblFlags &&
-	!(dPriv->vblFlags & VBLANK_FLAG_NO_IRQ) &&
-	(intel_fb->vbl_waited - intel_rb->vbl_pending) > (1<<23)) {
-	drmVBlank vbl;
-
-	vbl.request.type = DRM_VBLANK_ABSOLUTE;
-
-	if ( dPriv->vblFlags & VBLANK_FLAG_SECONDARY ) {
-	    vbl.request.type |= DRM_VBLANK_SECONDARY;
-	}
-
-	vbl.request.sequence = intel_rb->vbl_pending;
-	drmWaitVBlank(intel->driFd, &vbl);
-	intel_fb->vbl_waited = vbl.reply.sequence;
-    }
-
-    if (!sPriv->dri2.enabled) {
-	DRM_CAS(intel->driHwLock, intel->hHWContext,
-		(DRM_LOCK_HELD|intel->hHWContext), __ret);
-
-	if (__ret)
-	    intelContendedLock( intel, 0 );
-    }
-
-
-    if (INTEL_DEBUG & DEBUG_LOCK)
-      _mesa_printf("%s - locked\n", __progname);
-}
-
-
-/* Unlock the hardware using the global current context 
- */
-void UNLOCK_HARDWARE( struct intel_context *intel )
-{
-    __DRIscreen *sPriv = intel->driScreen;
-
-   intel->locked--;
-   if (intel->locked > 0)
-      return;
-
-   assert(intel->locked == 0);
-
-   if (!sPriv->dri2.enabled) {
-      DRM_UNLOCK(intel->driFd, intel->driHwLock, intel->hHWContext);
-      _glthread_UNLOCK_MUTEX(lockMutex);
-   }
-
-   if (INTEL_DEBUG & DEBUG_LOCK)
-      _mesa_printf("%s - unlocked\n", __progname);
-
-   /**
-    * Nothing should be left in batch outside of LOCK/UNLOCK which references
-    * cliprects.
-    */
-   if (intel->batch->cliprect_mode == REFERENCES_CLIPRECTS)
-      intel_batchbuffer_flush(intel->batch);
-}
-
diff --git a/shared/intel_context.h b/shared/intel_context.h
index eb7be7d..22736a9 100644
--- a/shared/intel_context.h
+++ b/shared/intel_context.h
@@ -107,7 +107,6 @@ struct intel_context
       void (*finish_batch) (struct intel_context * intel);
       void (*new_batch) (struct intel_context * intel);
       void (*emit_invarient_state) (struct intel_context * intel);
-      void (*note_fence) (struct intel_context *intel, GLuint fence);
       void (*update_texture_state) (struct intel_context * intel);
 
       void (*render_start) (struct intel_context * intel);
@@ -125,48 +124,6 @@ struct intel_context
       void (*invalidate_state) (struct intel_context *intel,
 				GLuint new_state);
 
-
-      /* Metaops: 
-       */
-      void (*install_meta_state) (struct intel_context * intel);
-      void (*leave_meta_state) (struct intel_context * intel);
-
-      void (*meta_draw_region) (struct intel_context * intel,
-                                struct intel_region * draw_region,
-                                struct intel_region * depth_region);
-
-      void (*meta_draw_quad)(struct intel_context *intel,
-			     GLfloat x0, GLfloat x1,
-			     GLfloat y0, GLfloat y1,
-			     GLfloat z,
-			     GLuint color, /* ARGB32 */
-			     GLfloat s0, GLfloat s1,
-			     GLfloat t0, GLfloat t1);
-
-      void (*meta_color_mask) (struct intel_context * intel, GLboolean);
-
-      void (*meta_stencil_replace) (struct intel_context * intel,
-                                    GLuint mask, GLuint clear);
-
-      void (*meta_depth_replace) (struct intel_context * intel);
-
-      void (*meta_texture_blend_replace) (struct intel_context * intel);
-
-      void (*meta_no_stencil_write) (struct intel_context * intel);
-      void (*meta_no_depth_write) (struct intel_context * intel);
-      void (*meta_no_texture) (struct intel_context * intel);
-
-      void (*meta_import_pixel_state) (struct intel_context * intel);
-      void (*meta_frame_buffer_texture) (struct intel_context *intel,
-					 GLint xoff, GLint yoff);
-
-      GLboolean(*meta_tex_rect_source) (struct intel_context * intel,
-					dri_bo * buffer,
-					GLuint offset,
-					GLuint pitch,
-					GLuint height,
-					GLenum format, GLenum type);
-
       void (*assert_not_dirty) (struct intel_context *intel);
 
       void (*debug_batch)(struct intel_context *intel);
@@ -184,20 +141,18 @@ struct intel_context
     * Generation number of the hardware: 2 is 8xx, 3 is 9xx pre-965, 4 is 965.
     */
    int gen;
+   GLboolean needs_ff_sync;
+   GLboolean is_ironlake;
+   GLboolean is_g4x;
+   GLboolean is_945;
+   GLboolean has_luminance_srgb;
 
-   struct intel_region *front_region;
-   struct intel_region *back_region;
-   struct intel_region *depth_region;
-
-   /**
-    * This value indicates that the kernel memory manager is being used
-    * instead of the fake client-side memory manager.
-    */
-   GLboolean ttm;
+   int urb_size;
 
    struct intel_batchbuffer *batch;
    drm_intel_bo *first_post_swapbuffers_batch;
    GLboolean no_batch_wrap;
+   GLboolean using_dri2_swapbuffers;
 
    struct
    {
@@ -217,10 +172,6 @@ struct intel_context
    char *prevLockFile;
    int prevLockLine;
 
-   GLuint ClearColor565;
-   GLuint ClearColor8888;
-
-
    /* Offsets of fields within the current vertex:
     */
    GLuint coloroffset;
@@ -237,6 +188,7 @@ struct intel_context
    GLboolean hw_stipple;
    GLboolean depth_buffer_is_float;
    GLboolean no_rast;
+   GLboolean no_hw;
    GLboolean always_flush_batch;
    GLboolean always_flush_cache;
 
@@ -262,19 +214,6 @@ struct intel_context
    intel_tri_func draw_tri;
 
    /**
-    * Set to true if a single constant cliprect should be used in the
-    * batchbuffer.  Otherwise, cliprects must be calculated at batchbuffer
-    * flush time while the lock is held.
-    */
-   GLboolean constant_cliprect;
-
-   /**
-    * In !constant_cliprect mode, set to true if the front cliprects should be
-    * used instead of back.
-    */
-   GLboolean front_cliprects;
-
-   /**
     * Set if rendering has occured to the drawable's front buffer.
     *
     * This is used in the DRI2 case to detect that glFlush should also copy
@@ -300,50 +239,21 @@ struct intel_context
 
    GLboolean use_texture_tiling;
    GLboolean use_early_z;
-   drm_clip_rect_t fboRect;     /**< cliprect for FBO rendering */
-
-   int perf_boxes;
 
-   GLuint do_usleeps;
-   int do_irqs;
-   GLuint irqsEmitted;
-
-   GLboolean scissor;
-   drm_clip_rect_t draw_rect;
-   drm_clip_rect_t scissor_rect;
-
-   drm_context_t hHWContext;
-   drmLock *driHwLock;
    int driFd;
 
-   __DRIcontextPrivate *driContext;
-   __DRIdrawablePrivate *driDrawable;
-   __DRIdrawablePrivate *driReadDrawable;
-   __DRIscreenPrivate *driScreen;
-   intelScreenPrivate *intelScreen;
-   volatile drm_i915_sarea_t *sarea;
-
-   GLuint lastStamp;
-
-   GLboolean no_hw;
+   __DRIcontext *driContext;
+   __DRIdrawable *driDrawable;
+   __DRIdrawable *driReadDrawable;
+   __DRIscreen *driScreen;
+   struct intel_screen *intelScreen;
 
    /**
     * Configuration cache
     */
    driOptionCache optionCache;
-
-   int64_t swap_ust;
-   int64_t swap_missed_ust;
-
-   GLuint swap_count;
-   GLuint swap_missed_count;
 };
 
-/* These are functions now:
- */
-void LOCK_HARDWARE( struct intel_context *intel );
-void UNLOCK_HARDWARE( struct intel_context *intel );
-
 extern char *__progname;
 
 
@@ -354,14 +264,14 @@ extern char *__progname;
 #define ALIGN(value, alignment)  ((value + alignment - 1) & ~(alignment - 1))
 #define IS_POWER_OF_TWO(val) (((val) & (val - 1)) == 0)
 
-static inline uint32_t
+static INLINE uint32_t
 U_FIXED(float value, uint32_t frac_bits)
 {
    value *= (1 << frac_bits);
    return value < 0 ? 0 : value;
 }
 
-static inline uint32_t
+static INLINE uint32_t
 S_FIXED(float value, uint32_t frac_bits)
 {
    return value * (1 << frac_bits);
@@ -374,29 +284,6 @@ do {						\
 } while (0)
 
 /* ================================================================
- * Color packing:
- */
-
-#define INTEL_PACKCOLOR4444(r,g,b,a) \
-  ((((a) & 0xf0) << 8) | (((r) & 0xf0) << 4) | ((g) & 0xf0) | ((b) >> 4))
-
-#define INTEL_PACKCOLOR1555(r,g,b,a) \
-  ((((r) & 0xf8) << 7) | (((g) & 0xf8) << 2) | (((b) & 0xf8) >> 3) | \
-    ((a) ? 0x8000 : 0))
-
-#define INTEL_PACKCOLOR565(r,g,b) \
-  ((((r) & 0xf8) << 8) | (((g) & 0xfc) << 3) | (((b) & 0xf8) >> 3))
-
-#define INTEL_PACKCOLOR8888(r,g,b,a) \
-  ((a<<24) | (r<<16) | (g<<8) | b)
-
-#define INTEL_PACKCOLOR(format, r,  g,  b, a)		\
-(format == DV_PF_555 ? INTEL_PACKCOLOR1555(r,g,b,a) :	\
- (format == DV_PF_565 ? INTEL_PACKCOLOR565(r,g,b) :	\
-  (format == DV_PF_8888 ? INTEL_PACKCOLOR8888(r,g,b,a) :	\
-   0)))
-
-/* ================================================================
  * From linux kernel i386 header files, copes with odd sizes better
  * than COPY_DWORDS would:
  * XXX Put this in src/mesa/main/imports.h ???
@@ -458,7 +345,7 @@ extern int INTEL_DEBUG;
 
 #define DBG(...) do {						\
 	if (INTEL_DEBUG & FILE_DEBUG_FLAG)			\
-		_mesa_printf(__VA_ARGS__);			\
+		printf(__VA_ARGS__);			\
 } while(0)
 
 #define PCI_CHIP_845_G			0x2562
@@ -481,14 +368,13 @@ extern int INTEL_DEBUG;
 
 extern GLboolean intelInitContext(struct intel_context *intel,
                                   const __GLcontextModes * mesaVis,
-                                  __DRIcontextPrivate * driContextPriv,
+                                  __DRIcontext * driContextPriv,
                                   void *sharedContextPrivate,
                                   struct dd_function_table *functions);
 
-extern void intelGetLock(struct intel_context *intel, GLuint flags);
-
 extern void intelFinish(GLcontext * ctx);
 extern void intelFlush(GLcontext * ctx);
+extern void intel_flush(GLcontext * ctx, GLboolean needs_mi_flush);
 
 extern void intelInitDriverFunctions(struct dd_function_table *functions);
 
@@ -568,6 +454,7 @@ void intel_viewport(GLcontext * ctx, GLint x, GLint y,
 
 void intel_update_renderbuffers(__DRIcontext *context,
 				__DRIdrawable *drawable);
+void intel_prepare_render(struct intel_context *intel);
 
 void i915_set_buf_info_for_region(uint32_t *state, struct intel_region *region,
 				  uint32_t buffer_id);
@@ -588,25 +475,4 @@ is_power_of_two(uint32_t value)
    return (value & (value - 1)) == 0;
 }
 
-static inline void
-intel_bo_map_gtt_preferred(struct intel_context *intel,
-			   drm_intel_bo *bo,
-			   GLboolean write)
-{
-   if (intel->intelScreen->kernel_exec_fencing)
-      drm_intel_gem_bo_map_gtt(bo);
-   else
-      drm_intel_bo_map(bo, write);
-}
-
-static inline void
-intel_bo_unmap_gtt_preferred(struct intel_context *intel,
-			     drm_intel_bo *bo)
-{
-   if (intel->intelScreen->kernel_exec_fencing)
-      drm_intel_gem_bo_unmap_gtt(bo);
-   else
-      drm_intel_bo_unmap(bo);
-}
-
 #endif
diff --git a/shared/intel_decode.c b/shared/intel_decode.c
index a9dfe28..5293482 100644
--- a/shared/intel_decode.c
+++ b/shared/intel_decode.c
@@ -1437,6 +1437,12 @@ decode_3d_965(uint32_t *data, int count, uint32_t hw_offset, int *failures)
 	{ 0x7909, 2, 2, "3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP" },
 	{ 0x790a, 3, 3, "3DSTATE_AA_LINE_PARAMETERS" },
 	{ 0x7b00, 6, 6, "3DPRIMITIVE" },
+	{ 0x780e, 4, 4, "3DSTATE_CC_STATE_POINTERS" },
+	{ 0x7810, 6, 6, "3DSTATE_VS_STATE" },
+	{ 0x7811, 6, 6, "3DSTATE_GS_STATE" },
+	{ 0x7812, 4, 4, "3DSTATE_CLIP_STATE" },
+	{ 0x7815, 5, 5, "3DSTATE_CONSTANT_VS_STATE" },
+	{ 0x7816, 5, 5, "3DSTATE_CONSTANT_GS_STATE" },
     };
 
     len = (data[0] & 0x0000ffff) + 2;
@@ -1592,7 +1598,7 @@ decode_3d_965(uint32_t *data, int count, uint32_t hw_offset, int *failures)
 	return len;
 
     case 0x7905:
-	if (len != 5 && len != 6)
+	if (len < 5 || len > 7)
 	    fprintf(out, "Bad count in 3DSTATE_DEPTH_BUFFER\n");
 	if (count < len)
 	    BUFFER_FAIL(count, len, "3DSTATE_DEPTH_BUFFER");
@@ -1611,6 +1617,8 @@ decode_3d_965(uint32_t *data, int count, uint32_t hw_offset, int *failures)
 	instr_out(data, hw_offset, 4, "volume depth\n");
 	if (len == 6)
 	    instr_out(data, hw_offset, 5, "\n");
+	if (len == 7)
+	    instr_out(data, hw_offset, 6, "render target view extent\n");
 
 	return len;
 
diff --git a/shared/intel_depthtmp.h b/shared/intel_depthtmp.h
deleted file mode 100644
index a9c75d4..0000000
--- a/shared/intel_depthtmp.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright © 2009 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Authors:
- *    Eric Anholt <eric@anholt.net>
- *
- */
-
-/**
- * Wrapper around the depthtmp.h macrofest to generate spans code for
- * all the tiling styles.
- */
-
-#define VALUE_TYPE INTEL_VALUE_TYPE
-#define WRITE_DEPTH(_x, _y, d) \
-   (*(INTEL_VALUE_TYPE *)(irb->region->buffer->virtual +	\
-			  NO_TILE(_x, _y)) = d)
-#define READ_DEPTH(d, _x, _y) \
-   d = *(INTEL_VALUE_TYPE *)(irb->region->buffer->virtual +	\
-			     NO_TILE(_x, _y))
-#define TAG(x) INTEL_TAG(intel_gttmap_##x)
-#include "depthtmp.h"
-
-#define VALUE_TYPE INTEL_VALUE_TYPE
-#define WRITE_DEPTH(_x, _y, d) INTEL_WRITE_DEPTH(NO_TILE(_x, _y), d)
-#define READ_DEPTH(d, _x, _y) d = INTEL_READ_DEPTH(NO_TILE(_x, _y))
-#define TAG(x) INTEL_TAG(intel##x)
-#include "depthtmp.h"
-
-#define VALUE_TYPE INTEL_VALUE_TYPE
-#define WRITE_DEPTH(_x, _y, d) INTEL_WRITE_DEPTH(X_TILE(_x, _y), d)
-#define READ_DEPTH(d, _x, _y) d = INTEL_READ_DEPTH(X_TILE(_x, _y))
-#define TAG(x) INTEL_TAG(intel_XTile_##x)
-#include "depthtmp.h"
-
-#define VALUE_TYPE INTEL_VALUE_TYPE
-#define WRITE_DEPTH(_x, _y, d) INTEL_WRITE_DEPTH(Y_TILE(_x, _y), d)
-#define READ_DEPTH(d, _x, _y) d = INTEL_READ_DEPTH(Y_TILE(_x, _y))
-#define TAG(x) INTEL_TAG(intel_YTile_##x)
-#include "depthtmp.h"
-
-#undef INTEL_VALUE_TYPE
-#undef INTEL_WRITE_DEPTH
-#undef INTEL_READ_DEPTH
-#undef INTEL_TAG
diff --git a/shared/intel_extensions.c b/shared/intel_extensions.c
index 48cdae5..a1aac69 100644
--- a/shared/intel_extensions.c
+++ b/shared/intel_extensions.c
@@ -48,6 +48,7 @@
 #define need_GL_EXT_blend_func_separate
 #define need_GL_EXT_blend_minmax
 #define need_GL_EXT_cull_vertex
+#define need_GL_EXT_draw_buffers2
 #define need_GL_EXT_fog_coord
 #define need_GL_EXT_framebuffer_object
 #define need_GL_EXT_framebuffer_blit
@@ -57,6 +58,7 @@
 #define need_GL_EXT_secondary_color
 #define need_GL_EXT_stencil_two_side
 #define need_GL_APPLE_vertex_array_object
+#define need_GL_APPLE_object_purgeable
 #define need_GL_ATI_separate_stencil
 #define need_GL_ATI_envmap_bumpmap
 #define need_GL_NV_point_sprite
@@ -79,6 +81,7 @@ static const struct dri_extension card_extensions[] = {
    { "GL_ARB_half_float_pixel",           NULL },
    { "GL_ARB_map_buffer_range",           GL_ARB_map_buffer_range_functions },
    { "GL_ARB_multitexture",               NULL },
+   { "GL_ARB_pixel_buffer_object",      NULL },
    { "GL_ARB_point_parameters",           GL_ARB_point_parameters_functions },
    { "GL_ARB_point_sprite",               NULL },
    { "GL_ARB_shader_objects",             GL_ARB_shader_objects_functions },
@@ -104,6 +107,8 @@ static const struct dri_extension card_extensions[] = {
    { "GL_EXT_blend_logic_op",             NULL },
    { "GL_EXT_blend_subtract",             NULL },
    { "GL_EXT_cull_vertex",                GL_EXT_cull_vertex_functions },
+   { "GL_EXT_framebuffer_blit",         GL_EXT_framebuffer_blit_functions },
+   { "GL_EXT_framebuffer_object",       GL_EXT_framebuffer_object_functions },
    { "GL_EXT_fog_coord",                  GL_EXT_fog_coord_functions },
    { "GL_EXT_gpu_program_parameters",     GL_EXT_gpu_program_parameters_functions },
    { "GL_EXT_packed_depth_stencil",       NULL },
@@ -117,6 +122,7 @@ static const struct dri_extension card_extensions[] = {
    { "GL_EXT_texture_lod_bias",           NULL },
    { "GL_3DFX_texture_compression_FXT1",  NULL },
    { "GL_APPLE_client_storage",           NULL },
+   { "GL_APPLE_object_purgeable",         GL_APPLE_object_purgeable_functions },
    { "GL_APPLE_vertex_array_object",      GL_APPLE_vertex_array_object_functions},
    { "GL_MESA_pack_invert",               NULL },
    { "GL_MESA_ycbcr_texture",             NULL },
@@ -147,16 +153,19 @@ static const struct dri_extension i915_extensions[] = {
 static const struct dri_extension brw_extensions[] = {
    { "GL_ARB_depth_clamp",                NULL },
    { "GL_ARB_depth_texture",              NULL },
+   { "GL_ARB_fragment_coord_conventions", NULL },
    { "GL_ARB_fragment_program",           NULL },
    { "GL_ARB_fragment_program_shadow",    NULL },
    { "GL_ARB_fragment_shader",            NULL },
    { "GL_ARB_framebuffer_object",         GL_ARB_framebuffer_object_functions},
+   { "GL_ARB_half_float_vertex",          NULL },
    { "GL_ARB_occlusion_query",            GL_ARB_occlusion_query_functions },
    { "GL_ARB_point_sprite", 		  NULL },
    { "GL_ARB_seamless_cube_map",          NULL },
    { "GL_ARB_shadow",                     NULL },
    { "GL_MESA_texture_signed_rgba",       NULL },
    { "GL_ARB_texture_non_power_of_two",   NULL },
+   { "GL_EXT_draw_buffers2",              GL_EXT_draw_buffers2_functions },
    { "GL_EXT_shadow_funcs",               NULL },
    { "GL_EXT_stencil_two_side",           GL_EXT_stencil_two_side_functions },
    { "GL_EXT_texture_sRGB",		  NULL },
@@ -176,13 +185,6 @@ static const struct dri_extension arb_oq_extensions[] = {
 };
 
 
-static const struct dri_extension ttm_extensions[] = {
-   { "GL_ARB_pixel_buffer_object",      NULL },
-   { "GL_EXT_framebuffer_blit",         GL_EXT_framebuffer_blit_functions },
-   { "GL_EXT_framebuffer_object",       GL_EXT_framebuffer_object_functions },
-   { NULL, NULL }
-};
-
 static const struct dri_extension fragment_shader_extensions[] = {
    { "GL_ARB_fragment_shader",            NULL },
    { NULL, NULL }
@@ -201,14 +203,10 @@ intelInitExtensions(GLcontext *ctx)
     */
    driInitExtensions(ctx, card_extensions, GL_FALSE);
 
-   if (intel->ttm)
-      driInitExtensions(ctx, ttm_extensions, GL_FALSE);
-
-   if (IS_965(intel->intelScreen->deviceID))
+   if (intel->gen >= 4)
       driInitExtensions(ctx, brw_extensions, GL_FALSE);
 
-   if (IS_915(intel->intelScreen->deviceID)
-       || IS_945(intel->intelScreen->deviceID)) {
+   if (intel->gen == 3) {
       driInitExtensions(ctx, i915_extensions, GL_FALSE);
 
       if (driQueryOptionb(&intel->optionCache, "fragment_shader"))
diff --git a/shared/intel_extensions.h b/shared/intel_extensions.h
index 1d1c97a..e78e073 100644
--- a/shared/intel_extensions.h
+++ b/shared/intel_extensions.h
@@ -32,5 +32,8 @@
 extern void
 intelInitExtensions(GLcontext *ctx);
 
+extern void
+intelFlushDrawable(__DRIdrawable *drawable);
+
 
 #endif
diff --git a/shared/intel_fbo.c b/shared/intel_fbo.c
index 608f75b..a429f8d 100644
--- a/shared/intel_fbo.c
+++ b/shared/intel_fbo.c
@@ -70,14 +70,11 @@ intel_delete_renderbuffer(struct gl_renderbuffer *rb)
 
    ASSERT(irb);
 
-   if (irb->span_cache != NULL)
-      _mesa_free(irb->span_cache);
-
    if (intel && irb->region) {
       intel_region_release(&irb->region);
    }
 
-   _mesa_free(irb);
+   free(irb);
 }
 
 
@@ -200,6 +197,38 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb,
 }
 
 
+#if FEATURE_OES_EGL_image
+static void
+intel_image_target_renderbuffer_storage(GLcontext *ctx,
+					struct gl_renderbuffer *rb,
+					void *image_handle)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_renderbuffer *irb;
+   __DRIscreen *screen;
+   __DRIimage *image;
+
+   screen = intel->intelScreen->driScrnPriv;
+   image = screen->dri2.image->lookupEGLImage(intel->driContext, image_handle,
+					      intel->driContext->loaderPrivate);
+   if (image == NULL)
+      return;
+
+   irb = intel_renderbuffer(rb);
+   if (irb->region)
+      intel_region_release(&irb->region);
+   intel_region_reference(&irb->region, image->region);
+
+   rb->InternalFormat = image->internal_format;
+   rb->Width = image->region->width;
+   rb->Height = image->region->height;
+   rb->Format = image->format;
+   rb->DataType = image->data_type;
+   rb->_BaseFormat = _mesa_base_fbo_format(&intel->ctx,
+					   image->internal_format);
+}
+#endif
+
 /**
  * Called for each hardware renderbuffer when a _window_ is resized.
  * Just update fields.
@@ -222,7 +251,6 @@ static void
 intel_resize_buffers(GLcontext *ctx, struct gl_framebuffer *fb,
 		     GLuint width, GLuint height)
 {
-   struct intel_framebuffer *intel_fb = (struct intel_framebuffer*)fb;
    int i;
 
    _mesa_resize_framebuffer(ctx, fb, width, height);
@@ -233,9 +261,10 @@ intel_resize_buffers(GLcontext *ctx, struct gl_framebuffer *fb,
       return;
    }
 
+
    /* Make sure all window system renderbuffers are up to date */
-   for (i = 0; i < 2; i++) {
-      struct gl_renderbuffer *rb = &intel_fb->color_rb[i]->Base;
+   for (i = BUFFER_FRONT_LEFT; i <= BUFFER_BACK_RIGHT; i++) {
+      struct gl_renderbuffer *rb = fb->Attachment[i].Renderbuffer;
 
       /* only resize if size is changing */
       if (rb && (rb->Width != width || rb->Height != height)) {
@@ -316,7 +345,7 @@ intel_create_renderbuffer(gl_format format)
    default:
       _mesa_problem(NULL,
                     "Unexpected intFormat in intel_create_renderbuffer");
-      _mesa_free(irb);
+      free(irb);
       return NULL;
    }
 
@@ -398,8 +427,6 @@ static GLboolean
 intel_update_wrapper(GLcontext *ctx, struct intel_renderbuffer *irb, 
 		     struct gl_texture_image *texImage)
 {
-   gl_format texFormat;
-
    if (texImage->TexFormat == MESA_FORMAT_ARGB8888) {
       irb->Base.DataType = GL_UNSIGNED_BYTE;
       DBG("Render to RGBA8 texture OK\n");
@@ -429,14 +456,13 @@ intel_update_wrapper(GLcontext *ctx, struct intel_renderbuffer *irb,
       DBG("Render to DEPTH_STENCIL texture OK\n");
    }
    else {
-      DBG("Render to texture BAD FORMAT %d\n", texImage->TexFormat);
+      DBG("Render to texture BAD FORMAT %s\n",
+	  _mesa_get_format_name(texImage->TexFormat));
       return GL_FALSE;
    }
 
    irb->Base.Format = texImage->TexFormat;
 
-   texFormat = texImage->TexFormat;
-
    irb->Base.InternalFormat = texImage->InternalFormat;
    irb->Base._BaseFormat = _mesa_base_fbo_format(ctx, irb->Base.InternalFormat);
    irb->Base.Width = texImage->Width;
@@ -471,7 +497,7 @@ intel_wrap_texture(GLcontext * ctx, struct gl_texture_image *texImage)
    irb->Base.ClassID = INTEL_RB_CLASS;
 
    if (!intel_update_wrapper(ctx, irb, texImage)) {
-      _mesa_free(irb);
+      free(irb);
       return NULL;
    }
 
@@ -528,7 +554,7 @@ intel_render_texture(GLcontext * ctx,
        return;
    }
 
-   DBG("Begin render texture tid %x tex=%u w=%d h=%d refcount=%d\n",
+   DBG("Begin render texture tid %lx tex=%u w=%d h=%d refcount=%d\n",
        _glthread_GetID(),
        att->Texture->Name, newImage->Width, newImage->Height,
        irb->Base.RefCount);
@@ -594,11 +620,21 @@ intel_validate_framebuffer(GLcontext *ctx, struct gl_framebuffer *fb)
       intel_get_renderbuffer(fb, BUFFER_STENCIL);
    int i;
 
-   if (stencilRb && stencilRb != depthRb) {
-      /* we only support combined depth/stencil buffers, not separate
-       * stencil buffers.
-       */
-      fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED_EXT;
+   if (depthRb && stencilRb && stencilRb != depthRb) {
+      if (ctx->DrawBuffer->Attachment[BUFFER_DEPTH].Type == GL_TEXTURE &&
+	  ctx->DrawBuffer->Attachment[BUFFER_STENCIL].Type == GL_TEXTURE &&
+	  (ctx->DrawBuffer->Attachment[BUFFER_DEPTH].Texture->Name ==
+	   ctx->DrawBuffer->Attachment[BUFFER_STENCIL].Texture->Name)) {
+	 /* OK */
+      } else {
+	 /* we only support combined depth/stencil buffers, not separate
+	  * stencil buffers.
+	  */
+	 DBG("Only supports combined depth/stencil (found %s, %s)\n",
+	     depthRb ? _mesa_get_format_name(depthRb->Base.Format): "NULL",
+	     stencilRb ? _mesa_get_format_name(stencilRb->Base.Format): "NULL");
+	 fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED_EXT;
+      }
    }
 
    for (i = 0; i < ctx->Const.MaxDrawBuffers; i++) {
@@ -609,6 +645,7 @@ intel_validate_framebuffer(GLcontext *ctx, struct gl_framebuffer *fb)
 	 continue;
 
       if (irb == NULL) {
+	 DBG("software rendering renderbuffer\n");
 	 fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED_EXT;
 	 continue;
       }
@@ -643,4 +680,9 @@ intel_fbo_init(struct intel_context *intel)
    intel->ctx.Driver.ResizeBuffers = intel_resize_buffers;
    intel->ctx.Driver.ValidateFramebuffer = intel_validate_framebuffer;
    intel->ctx.Driver.BlitFramebuffer = _mesa_meta_BlitFramebuffer;
+
+#if FEATURE_OES_EGL_image
+   intel->ctx.Driver.EGLImageTargetRenderbufferStorage =
+      intel_image_target_renderbuffer_storage;
+#endif   
 }
diff --git a/shared/intel_fbo.h b/shared/intel_fbo.h
index fa43077..72413f7 100644
--- a/shared/intel_fbo.h
+++ b/shared/intel_fbo.h
@@ -34,38 +34,12 @@
 struct intel_context;
 
 /**
- * Intel framebuffer, derived from gl_framebuffer.
- */
-struct intel_framebuffer
-{
-   struct gl_framebuffer Base;
-
-   struct intel_renderbuffer *color_rb[2];
-
-   /* VBI
-    */
-   GLuint vbl_waited;
-
-   int64_t swap_ust;
-   int64_t swap_missed_ust;
-
-   GLuint swap_count;
-   GLuint swap_missed_count;
-};
-
-
-/**
  * Intel renderbuffer, derived from gl_renderbuffer.
  */
 struct intel_renderbuffer
 {
    struct gl_renderbuffer Base;
    struct intel_region *region;
-
-   GLuint vbl_pending;   /**< vblank sequence number of pending flip */
-
-   uint8_t *span_cache;
-   unsigned long span_cache_offset;
 };
 
 
@@ -121,7 +95,7 @@ intel_fbo_init(struct intel_context *intel);
 
 
 extern void
-intel_flip_renderbuffers(struct intel_framebuffer *intel_fb);
+intel_flip_renderbuffers(struct gl_framebuffer *fb);
 
 
 static INLINE struct intel_region *
diff --git a/shared/intel_mipmap_tree.c b/shared/intel_mipmap_tree.c
index abb3024..4f14946 100644
--- a/shared/intel_mipmap_tree.c
+++ b/shared/intel_mipmap_tree.c
@@ -29,7 +29,6 @@
 #include "intel_mipmap_tree.h"
 #include "intel_regions.h"
 #include "intel_tex_layout.h"
-#include "intel_chipset.h"
 #ifndef I915
 #include "brw_state.h"
 #endif
@@ -87,7 +86,7 @@ intel_miptree_create_internal(struct intel_context *intel,
    mt->pitch = 0;
 
 #ifdef I915
-   if (IS_945(intel->intelScreen->deviceID))
+   if (intel->is_945)
       ok = i945_miptree_layout(intel, mt, tiling);
    else
       ok = i915_miptree_layout(intel, mt, tiling);
@@ -120,8 +119,7 @@ intel_miptree_create(struct intel_context *intel,
    struct intel_mipmap_tree *mt;
    uint32_t tiling;
 
-   if (intel->use_texture_tiling && compress_byte == 0 &&
-       intel->intelScreen->kernel_exec_fencing) {
+   if (intel->use_texture_tiling && compress_byte == 0) {
       if (intel->gen >= 4 &&
 	  (base_format == GL_DEPTH_COMPONENT ||
 	   base_format == GL_DEPTH_STENCIL_EXT))
@@ -224,16 +222,12 @@ int intel_miptree_pitch_align (struct intel_context *intel,
    if (!mt->compressed) {
       int pitch_align;
 
-      if (intel->ttm) {
-	 /* XXX: Align pitch to multiple of 64 bytes for now to allow
-	  * render-to-texture to work in all cases. This should probably be
-	  * replaced at some point by some scheme to only do this when really
-	  * necessary.
-	  */
-	 pitch_align = 64;
-      } else {
-	 pitch_align = 4;
-      }
+      /* XXX: Align pitch to multiple of 64 bytes for now to allow
+       * render-to-texture to work in all cases. This should probably be
+       * replaced at some point by some scheme to only do this when really
+       * necessary.
+       */
+      pitch_align = 64;
 
       if (tiling == I915_TILING_X)
 	 pitch_align = 512;
@@ -243,11 +237,11 @@ int intel_miptree_pitch_align (struct intel_context *intel,
       pitch = ALIGN(pitch * mt->cpp, pitch_align);
 
 #ifdef I915
-      /* XXX: At least the i915 seems very upset when the pitch is a multiple
-       * of 1024 and sometimes 512 bytes - performance can drop by several
-       * times. Go to the next multiple of the required alignment for now.
+      /* Do a little adjustment to linear allocations so that we avoid
+       * hitting the same channel of memory for 2 different pages when
+       * reading a 2x2 subspan or doing bilinear filtering.
        */
-      if (!(pitch & 511) && 
+      if (tiling == I915_TILING_NONE && !(pitch & 511) &&
 	 (pitch + pitch_align) < (1 << ctx->Const.MaxTextureLevels))
 	 pitch += pitch_align;
 #endif
diff --git a/shared/intel_pixel.c b/shared/intel_pixel.c
index 993e427..cb088e4 100644
--- a/shared/intel_pixel.c
+++ b/shared/intel_pixel.c
@@ -29,14 +29,7 @@
 #include "main/state.h"
 #include "main/bufferobj.h"
 #include "main/context.h"
-#include "main/enable.h"
-#include "main/matrix.h"
-#include "main/texstate.h"
-#include "main/varray.h"
-#include "main/viewport.h"
 #include "swrast/swrast.h"
-#include "shader/arbprogram.h"
-#include "shader/program.h"
 
 #include "intel_context.h"
 #include "intel_pixel.h"
@@ -88,10 +81,10 @@ intel_check_blit_fragment_ops(GLcontext * ctx, GLboolean src_alpha_is_one)
       return GL_FALSE;
    }
 
-   if (!(ctx->Color.ColorMask[0] &&
-	 ctx->Color.ColorMask[1] &&
-	 ctx->Color.ColorMask[2] &&
-	 ctx->Color.ColorMask[3])) {
+   if (!(ctx->Color.ColorMask[0][0] &&
+	 ctx->Color.ColorMask[0][1] &&
+	 ctx->Color.ColorMask[0][2] &&
+	 ctx->Color.ColorMask[0][3])) {
       DBG("fallback due to color masking\n");
       return GL_FALSE;
    }
diff --git a/shared/intel_pixel_bitmap.c b/shared/intel_pixel_bitmap.c
index 204a233..076fee8 100644
--- a/shared/intel_pixel_bitmap.c
+++ b/shared/intel_pixel_bitmap.c
@@ -32,11 +32,11 @@
 #include "main/mtypes.h"
 #include "main/macros.h"
 #include "main/bufferobj.h"
+#include "main/polygon.h"
 #include "main/pixelstore.h"
 #include "main/polygon.h"
 #include "main/state.h"
 #include "main/teximage.h"
-#include "main/texenv.h"
 #include "main/texobj.h"
 #include "main/texstate.h"
 #include "main/texparam.h"
@@ -45,7 +45,6 @@
 #include "main/enable.h"
 #include "main/viewport.h"
 #include "shader/arbprogram.h"
-#include "glapi/dispatch.h"
 #include "swrast/swrast.h"
 
 #include "intel_screen.h"
@@ -53,7 +52,6 @@
 #include "intel_batchbuffer.h"
 #include "intel_blit.h"
 #include "intel_regions.h"
-#include "intel_buffer_objects.h"
 #include "intel_buffers.h"
 #include "intel_pixel.h"
 #include "intel_reg.h"
@@ -105,7 +103,7 @@ static void set_bit( GLubyte *dest, GLuint bit )
 }
 
 /* Extract a rectangle's worth of data from the bitmap.  Called
- * per-cliprect.
+ * per chunk of HW-sized bitmap.
  */
 static GLuint get_bitmap_rect(GLsizei width, GLsizei height,
 			      const struct gl_pixelstore_attrib *unpack,
@@ -125,7 +123,7 @@ static GLuint get_bitmap_rect(GLsizei width, GLsizei height,
    GLuint count = 0;
 
    if (INTEL_DEBUG & DEBUG_PIXEL)
-      _mesa_printf("%s %d,%d %dx%d bitmap %dx%d skip %d src_offset %d mask %d\n",
+      printf("%s %d,%d %dx%d bitmap %dx%d skip %d src_offset %d mask %d\n",
 		   __FUNCTION__, x,y,w,h,width,height,unpack->SkipPixels, src_offset, mask);
 
    if (invert) {
@@ -165,7 +163,7 @@ static GLuint get_bitmap_rect(GLsizei width, GLsizei height,
  * Returns the low Y value of the vertical range given, flipped according to
  * whether the framebuffer is or not.
  */
-static inline int
+static INLINE int
 y_flip(struct gl_framebuffer *fb, int y, int height)
 {
    if (fb->Name != 0)
@@ -190,11 +188,12 @@ do_blit_bitmap( GLcontext *ctx,
    GLfloat tmpColor[4];
    GLubyte ubcolor[4];
    GLuint color;
-   unsigned int num_cliprects;
-   drm_clip_rect_t *cliprects;
-   int x_off, y_off;
    GLsizei bitmap_width = width;
    GLsizei bitmap_height = height;
+   GLint px, py;
+   GLuint stipple[32];
+   GLint orig_dstx = dstx;
+   GLint orig_dsty = dsty;
 
    /* Update draw buffer bounds */
    _mesa_update_state(ctx);
@@ -228,104 +227,72 @@ do_blit_bitmap( GLcontext *ctx,
    UNCLAMPED_FLOAT_TO_UBYTE(ubcolor[3], tmpColor[3]);
 
    if (dst->cpp == 2)
-      color = INTEL_PACKCOLOR565(ubcolor[0], ubcolor[1], ubcolor[2]);
+      color = PACK_COLOR_565(ubcolor[0], ubcolor[1], ubcolor[2]);
    else
-      color = INTEL_PACKCOLOR8888(ubcolor[0], ubcolor[1],
-				  ubcolor[2], ubcolor[3]);
+      color = PACK_COLOR_8888(ubcolor[3], ubcolor[0], ubcolor[1], ubcolor[2]);
 
    if (!intel_check_blit_fragment_ops(ctx, tmpColor[3] == 1.0F))
       return GL_FALSE;
 
-   LOCK_HARDWARE(intel);
-
-   intel_get_cliprects(intel, &cliprects, &num_cliprects, &x_off, &y_off);
-   if (num_cliprects != 0) {
-      GLuint i;
-      GLint orig_dstx = dstx;
-      GLint orig_dsty = dsty;
-
-      /* Clip to buffer bounds and scissor. */
-      if (!_mesa_clip_to_region(fb->_Xmin, fb->_Ymin,
-				fb->_Xmax, fb->_Ymax,
-				&dstx, &dsty, &width, &height))
-            goto out;
-
-      dstx = x_off + dstx;
-      dsty = y_off + y_flip(fb, dsty, height);
-
-      for (i = 0; i < num_cliprects; i++) {
-	 int box_x, box_y, box_w, box_h;
-	 GLint px, py;
-	 GLuint stipple[32];  
-
-	 box_x = dstx;
-	 box_y = dsty;
-	 box_w = width;
-	 box_h = height;
-
-	 /* Clip to drawable cliprect */
-         if (!_mesa_clip_to_region(cliprects[i].x1,
-				   cliprects[i].y1,
-				   cliprects[i].x2,
-				   cliprects[i].y2,
-				   &box_x, &box_y, &box_w, &box_h))
-	    continue;
+   intel_prepare_render(intel);
+
+   /* Clip to buffer bounds and scissor. */
+   if (!_mesa_clip_to_region(fb->_Xmin, fb->_Ymin,
+			     fb->_Xmax, fb->_Ymax,
+			     &dstx, &dsty, &width, &height))
+      goto out;
+
+   dsty = y_flip(fb, dsty, height);
 
 #define DY 32
 #define DX 32
 
-	 /* Then, finally, chop it all into chunks that can be
-	  * digested by hardware:
+   /* Chop it all into chunks that can be digested by hardware: */
+   for (py = 0; py < height; py += DY) {
+      for (px = 0; px < width; px += DX) {
+	 int h = MIN2(DY, height - py);
+	 int w = MIN2(DX, width - px);
+	 GLuint sz = ALIGN(ALIGN(w,8) * h, 64)/8;
+	 GLenum logic_op = ctx->Color.ColorLogicOpEnabled ?
+	    ctx->Color.LogicOp : GL_COPY;
+
+	 assert(sz <= sizeof(stipple));
+	 memset(stipple, 0, sz);
+
+	 /* May need to adjust this when padding has been introduced in
+	  * sz above:
+	  *
+	  * Have to translate destination coordinates back into source
+	  * coordinates.
 	  */
-	 for (py = 0; py < box_h; py += DY) { 
-	    for (px = 0; px < box_w; px += DX) { 
-	       int h = MIN2(DY, box_h - py);
-	       int w = MIN2(DX, box_w - px); 
-	       GLuint sz = ALIGN(ALIGN(w,8) * h, 64)/8;
-	       GLenum logic_op = ctx->Color.ColorLogicOpEnabled ?
-		  ctx->Color.LogicOp : GL_COPY;
-
-	       assert(sz <= sizeof(stipple));
-	       memset(stipple, 0, sz);
-
-	       /* May need to adjust this when padding has been introduced in
-		* sz above:
-		*
-		* Have to translate destination coordinates back into source
-		* coordinates.
-		*/
-	       if (get_bitmap_rect(bitmap_width, bitmap_height, unpack,
-				   bitmap,
-				   -orig_dstx + (box_x + px - x_off),
-				   -orig_dsty + y_flip(fb,
-						       box_y + py - y_off, h),
-				   w, h,
-				   (GLubyte *)stipple,
-				   8,
-				   fb->Name == 0 ? GL_TRUE : GL_FALSE) == 0)
-		  continue;
-
-	       if (!intelEmitImmediateColorExpandBlit(intel,
-						      dst->cpp,
-						      (GLubyte *)stipple,
-						      sz,
-						      color,
-						      dst->pitch,
-						      dst->buffer,
-						      0,
-						      dst->tiling,
-						      box_x + px,
-						      box_y + py,
-						      w, h,
-						      logic_op)) {
-		  return GL_FALSE;
-	       }
-	    } 
-	 } 
+	 if (get_bitmap_rect(bitmap_width, bitmap_height, unpack,
+			     bitmap,
+			     -orig_dstx + (dstx + px),
+			     -orig_dsty + y_flip(fb, dsty + py, h),
+			     w, h,
+			     (GLubyte *)stipple,
+			     8,
+			     fb->Name == 0 ? GL_TRUE : GL_FALSE) == 0)
+	    continue;
+
+	 if (!intelEmitImmediateColorExpandBlit(intel,
+						dst->cpp,
+						(GLubyte *)stipple,
+						sz,
+						color,
+						dst->pitch,
+						dst->buffer,
+						0,
+						dst->tiling,
+						dstx + px,
+						dsty + py,
+						w, h,
+						logic_op)) {
+	    return GL_FALSE;
+	 }
       }
    }
 out:
-   UNLOCK_HARDWARE(intel);
 
    if (INTEL_DEBUG & DEBUG_SYNC)
       intel_batchbuffer_flush(intel->batch);
@@ -428,7 +395,7 @@ intel_texture_bitmap(GLcontext * ctx,
    }
 
    /* Convert the A1 bitmap to an A8 format suitable for glTexImage */
-   a8_bitmap = _mesa_calloc(width * height);
+   a8_bitmap = calloc(1, width * height);
    _mesa_expand_bitmap(width, height, unpack, bitmap, a8_bitmap, width, 0xff);
 
    if (_mesa_is_bufferobj(unpack->BufferObj)) {
@@ -463,7 +430,7 @@ intel_texture_bitmap(GLcontext * ctx,
    _mesa_PixelStorei(GL_UNPACK_ALIGNMENT, 1);
    _mesa_TexImage2D(GL_TEXTURE_2D, 0, GL_ALPHA, width, height, 0,
 		    GL_ALPHA, GL_UNSIGNED_BYTE, a8_bitmap);
-   _mesa_free(a8_bitmap);
+   free(a8_bitmap);
 
    meta_set_fragment_program(&intel->meta, &intel->meta.bitmap_fp, fp);
    _mesa_ProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, 0,
@@ -504,6 +471,7 @@ intel_texture_bitmap(GLcontext * ctx,
    meta_restore_fragment_program(&intel->meta);
    meta_restore_vertex_program(&intel->meta);
 
+   _mesa_ActiveTextureARB(GL_TEXTURE0_ARB + old_active_texture);
    _mesa_PopClientAttrib();
    _mesa_PopAttrib();
 
@@ -548,7 +516,7 @@ intelBitmap(GLcontext * ctx,
       return;
 
    if (INTEL_DEBUG & DEBUG_PIXEL)
-      _mesa_printf("%s: fallback to swrast\n", __FUNCTION__);
+      printf("%s: fallback to swrast\n", __FUNCTION__);
 
    _swrast_Bitmap(ctx, x, y, width, height, unpack, pixels);
 }
diff --git a/shared/intel_pixel_copy.c b/shared/intel_pixel_copy.c
index 622aaa2..f4f3fd6 100644
--- a/shared/intel_pixel_copy.c
+++ b/shared/intel_pixel_copy.c
@@ -35,28 +35,33 @@
 #include "intel_buffers.h"
 #include "intel_regions.h"
 #include "intel_pixel.h"
+#include "intel_fbo.h"
 
 #define FILE_DEBUG_FLAG DEBUG_PIXEL
 
 static struct intel_region *
 copypix_src_region(struct intel_context *intel, GLenum type)
 {
+   struct intel_renderbuffer *depth;
+
+   depth = (struct intel_renderbuffer *)
+      &intel->ctx.DrawBuffer->Attachment[BUFFER_DEPTH].Renderbuffer;
+
    switch (type) {
    case GL_COLOR:
       return intel_readbuf_region(intel);
    case GL_DEPTH:
-      /* Don't think this is really possible execpt at 16bpp, when we have no stencil.
-       */
-      if (intel->depth_region && intel->depth_region->cpp == 2)
-         return intel->depth_region;
+      /* Don't think this is really possible execpt at 16bpp, when we
+       * have no stencil. */
+      if (depth && depth->region->cpp == 2)
+         return depth->region;
    case GL_STENCIL:
-      /* Don't think this is really possible. 
-       */
+      /* Don't think this is really possible. */
       break;
    case GL_DEPTH_STENCIL_EXT:
       /* Does it matter whether it is stencil/depth or depth/stencil?
        */
-      return intel->depth_region;
+      return depth->region;
    default:
       break;
    }
@@ -83,10 +88,10 @@ intel_check_copypixel_blit_fragment_ops(GLcontext * ctx)
             ctx->Depth.Test ||
             ctx->Fog.Enabled ||
             ctx->Stencil._Enabled ||
-            !ctx->Color.ColorMask[0] ||
-            !ctx->Color.ColorMask[1] ||
-            !ctx->Color.ColorMask[2] ||
-            !ctx->Color.ColorMask[3] ||
+            !ctx->Color.ColorMask[0][0] ||
+            !ctx->Color.ColorMask[0][1] ||
+            !ctx->Color.ColorMask[0][2] ||
+            !ctx->Color.ColorMask[0][3] ||
             ctx->Texture._EnabledUnits ||
 	    ctx->FragmentProgram._Enabled ||
 	    ctx->Color.BlendEnabled);
@@ -107,9 +112,10 @@ do_blit_copypixels(GLcontext * ctx,
    struct intel_region *src = copypix_src_region(intel, type);
    struct gl_framebuffer *fb = ctx->DrawBuffer;
    struct gl_framebuffer *read_fb = ctx->ReadBuffer;
-   unsigned int num_cliprects;
-   drm_clip_rect_t *cliprects;
-   int x_off, y_off;
+   GLint orig_dstx;
+   GLint orig_dsty;
+   GLint orig_srcx;
+   GLint orig_srcy;
 
    if (type == GL_DEPTH || type == GL_STENCIL) {
       if (INTEL_DEBUG & DEBUG_FALLBACKS)
@@ -130,98 +136,58 @@ do_blit_copypixels(GLcontext * ctx,
    if (!src || !dst)
       return GL_FALSE;
 
-
-
    intelFlush(&intel->ctx);
 
-   LOCK_HARDWARE(intel);
-
-   intel_get_cliprects(intel, &cliprects, &num_cliprects, &x_off, &y_off);
-   if (num_cliprects != 0) {
-      GLint delta_x;
-      GLint delta_y;
-      GLint orig_dstx;
-      GLint orig_dsty;
-      GLint orig_srcx;
-      GLint orig_srcy;
-      GLuint i;
-
-      /* XXX: We fail to handle different inversion between read and draw framebuffer. */
-
-      /* Clip to destination buffer. */
-      orig_dstx = dstx;
-      orig_dsty = dsty;
-      if (!_mesa_clip_to_region(fb->_Xmin, fb->_Ymin,
-				fb->_Xmax, fb->_Ymax,
-				&dstx, &dsty, &width, &height))
-	 goto out;
-      /* Adjust src coords for our post-clipped destination origin */
-      srcx += dstx - orig_dstx;
-      srcy += dsty - orig_dsty;
-
-      /* Clip to source buffer. */
-      orig_srcx = srcx;
-      orig_srcy = srcy;
-      if (!_mesa_clip_to_region(0, 0,
-				read_fb->Width, read_fb->Height,
-				&srcx, &srcy, &width, &height))
-	 goto out;
-      /* Adjust dst coords for our post-clipped source origin */
-      dstx += srcx - orig_srcx;
-      dsty += srcy - orig_srcy;
-
-      /* Convert from GL to hardware coordinates:
-       */
-      if (fb->Name == 0) {
-	 /* copypixels to a system framebuffer */
-	 dstx = x_off + dstx;
-	 dsty = y_off + (fb->Height - dsty - height);
-      } else {
-	 /* copypixels to a user framebuffer object */
-	 dstx = x_off + dstx;
-	 dsty = y_off + dsty;
-      }
-
-      /* Flip source Y if it's a system framebuffer. */
-      if (read_fb->Name == 0) {
-	 srcx = intel->driReadDrawable->x + srcx;
-	 srcy = intel->driReadDrawable->y + (fb->Height - srcy - height);
-      }
-
-      delta_x = srcx - dstx;
-      delta_y = srcy - dsty;
-      /* Could do slightly more clipping: Eg, take the intersection of
-       * the destination cliprects and the read drawable cliprects
-       *
-       * This code will not overwrite other windows, but will
-       * introduce garbage when copying from obscured window regions.
-       */
-      for (i = 0; i < num_cliprects; i++) {
-	 GLint clip_x = dstx;
-	 GLint clip_y = dsty;
-	 GLint clip_w = width;
-	 GLint clip_h = height;
-
-         if (!_mesa_clip_to_region(cliprects[i].x1, cliprects[i].y1,
-				   cliprects[i].x2, cliprects[i].y2,
-				   &clip_x, &clip_y, &clip_w, &clip_h))
-            continue;
-
-	 if (!intel_region_copy(intel,
-				dst, 0, clip_x, clip_y,
-				src, 0, clip_x + delta_x, clip_y + delta_y,
-				clip_w, clip_h,
-				ctx->Color.ColorLogicOpEnabled ?
-				ctx->Color.LogicOp : GL_COPY)) {
-	    DBG("%s: blit failure\n", __FUNCTION__);
-	    UNLOCK_HARDWARE(intel);
-	    return GL_FALSE;
-	 }
-      }
+   intel_prepare_render(intel);
+
+   /* XXX: We fail to handle different inversion between read and draw framebuffer. */
+
+   /* Clip to destination buffer. */
+   orig_dstx = dstx;
+   orig_dsty = dsty;
+   if (!_mesa_clip_to_region(fb->_Xmin, fb->_Ymin,
+			     fb->_Xmax, fb->_Ymax,
+			     &dstx, &dsty, &width, &height))
+      goto out;
+   /* Adjust src coords for our post-clipped destination origin */
+   srcx += dstx - orig_dstx;
+   srcy += dsty - orig_dsty;
+
+   /* Clip to source buffer. */
+   orig_srcx = srcx;
+   orig_srcy = srcy;
+   if (!_mesa_clip_to_region(0, 0,
+			     read_fb->Width, read_fb->Height,
+			     &srcx, &srcy, &width, &height))
+      goto out;
+   /* Adjust dst coords for our post-clipped source origin */
+   dstx += srcx - orig_srcx;
+   dsty += srcy - orig_srcy;
+
+   /* Convert from GL to hardware coordinates: */
+   if (fb->Name == 0) {
+      /* copypixels to a system framebuffer */
+      dsty = fb->Height - dsty - height;
+   } else {
+      /* copypixels to a user framebuffer object */
+      dsty = dsty;
+   }
+
+   /* Flip source Y if it's a system framebuffer. */
+   if (read_fb->Name == 0)
+      srcy = fb->Height - srcy - height;
+
+   if (!intel_region_copy(intel,
+			  dst, 0, dstx, dsty,
+			  src, 0, srcx, srcy,
+			  width, height,
+			  ctx->Color.ColorLogicOpEnabled ?
+			  ctx->Color.LogicOp : GL_COPY)) {
+      DBG("%s: blit failure\n", __FUNCTION__);
+      return GL_FALSE;
    }
-out:
-   UNLOCK_HARDWARE(intel);
 
+out:
    intel_check_front_buffer_rendering(intel);
 
    DBG("%s: success\n", __FUNCTION__);
diff --git a/shared/intel_pixel_draw.c b/shared/intel_pixel_draw.c
index 9b382e3..bd1dd13 100644
--- a/shared/intel_pixel_draw.c
+++ b/shared/intel_pixel_draw.c
@@ -46,10 +46,6 @@
 #include "drivers/common/meta.h"
 
 #include "intel_context.h"
-#include "intel_batchbuffer.h"
-#include "intel_blit.h"
-#include "intel_buffers.h"
-#include "intel_regions.h"
 #include "intel_pixel.h"
 #include "intel_fbo.h"
 
@@ -69,7 +65,6 @@ intel_stencil_drawpixels(GLcontext * ctx,
    GLfloat vertices[4][2];
    struct intel_renderbuffer *irb;
    struct intel_renderbuffer *depth_irb;
-   struct gl_renderbuffer *rb;
    struct gl_pixelstore_attrib old_unpack;
    GLstencil *stencil_pixels;
    int row, y1, y2;
@@ -152,7 +147,7 @@ intel_stencil_drawpixels(GLcontext * ctx,
 
    /* Unpack the supplied stencil values into a ubyte buffer. */
    assert(sizeof(GLstencil) == sizeof(GLubyte));
-   stencil_pixels = _mesa_malloc(width * height * sizeof(GLstencil));
+   stencil_pixels = malloc(width * height * sizeof(GLstencil));
    for (row = 0; row < height; row++) {
       GLvoid *source = _mesa_image_address2d(unpack, pixels,
 					     width, height,
@@ -170,7 +165,6 @@ intel_stencil_drawpixels(GLcontext * ctx,
     */
    depth_irb = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH);
    irb = intel_create_renderbuffer(MESA_FORMAT_ARGB8888);
-   rb = &irb->Base;
    irb->Base.Width = depth_irb->Base.Width;
    irb->Base.Height = depth_irb->Base.Height;
    intel_renderbuffer_set_region(irb, depth_irb->region);
@@ -207,7 +201,7 @@ intel_stencil_drawpixels(GLcontext * ctx,
    _mesa_TexImage2D(GL_TEXTURE_2D, 0, GL_INTENSITY, width, height, 0,
 		    GL_RED, GL_UNSIGNED_BYTE, stencil_pixels);
    ctx->Unpack = old_unpack;
-   _mesa_free(stencil_pixels);
+   free(stencil_pixels);
 
    meta_set_passthrough_transform(&intel->meta);
 
diff --git a/shared/intel_pixel_read.c b/shared/intel_pixel_read.c
index 20424e2..2ac3da7 100644
--- a/shared/intel_pixel_read.c
+++ b/shared/intel_pixel_read.c
@@ -36,7 +36,6 @@
 
 #include "intel_screen.h"
 #include "intel_context.h"
-#include "intel_batchbuffer.h"
 #include "intel_blit.h"
 #include "intel_buffers.h"
 #include "intel_regions.h"
@@ -65,103 +64,6 @@
  * any case.
  */
 
-
-static GLboolean
-do_texture_readpixels(GLcontext * ctx,
-                      GLint x, GLint y, GLsizei width, GLsizei height,
-                      GLenum format, GLenum type,
-                      const struct gl_pixelstore_attrib *pack,
-                      struct intel_region *dest_region)
-{
-#if 0
-   struct intel_context *intel = intel_context(ctx);
-   intelScreenPrivate *screen = intel->intelScreen;
-   GLint pitch = pack->RowLength ? pack->RowLength : width;
-   __DRIdrawablePrivate *dPriv = intel->driDrawable;
-   int textureFormat;
-   GLenum glTextureFormat;
-   int destFormat, depthFormat, destPitch;
-   drm_clip_rect_t tmp;
-
-   if (INTEL_DEBUG & DEBUG_PIXEL)
-      fprintf(stderr, "%s\n", __FUNCTION__);
-
-
-   if (ctx->_ImageTransferState ||
-       pack->SwapBytes || pack->LsbFirst || !pack->Invert) {
-      if (INTEL_DEBUG & DEBUG_PIXEL)
-         fprintf(stderr, "%s: check_color failed\n", __FUNCTION__);
-      return GL_FALSE;
-   }
-
-   intel->vtbl.meta_texrect_source(intel, intel_readbuf_region(intel));
-
-   if (!intel->vtbl.meta_render_dest(intel, dest_region, type, format)) {
-      if (INTEL_DEBUG & DEBUG_PIXEL)
-         fprintf(stderr, "%s: couldn't set dest %s/%s\n",
-                 __FUNCTION__,
-                 _mesa_lookup_enum_by_nr(type),
-                 _mesa_lookup_enum_by_nr(format));
-      return GL_FALSE;
-   }
-
-   LOCK_HARDWARE(intel);
-
-   if (intel->driDrawable->numClipRects) {
-      intel->vtbl.install_meta_state(intel);
-      intel->vtbl.meta_no_depth_write(intel);
-      intel->vtbl.meta_no_stencil_write(intel);
-
-      if (!driClipRectToFramebuffer(ctx->ReadBuffer, &x, &y, &width, &height)) {
-         UNLOCK_HARDWARE(intel);
-         SET_STATE(i830, state);
-         if (INTEL_DEBUG & DEBUG_PIXEL)
-            fprintf(stderr, "%s: cliprect failed\n", __FUNCTION__);
-         return GL_TRUE;
-      }
-
-      y = dPriv->h - y - height;
-      x += dPriv->x;
-      y += dPriv->y;
-
-
-      /* Set the frontbuffer up as a large rectangular texture.
-       */
-      intel->vtbl.meta_tex_rect_source(intel, src_region, textureFormat);
-
-
-      intel->vtbl.meta_texture_blend_replace(i830, glTextureFormat);
-
-
-      /* Set the 3d engine to draw into the destination region:
-       */
-
-      intel->vtbl.meta_draw_region(intel, dest_region);
-      intel->vtbl.meta_draw_format(intel, destFormat, depthFormat);     /* ?? */
-
-
-      /* Draw a single quad, no cliprects:
-       */
-      intel->vtbl.meta_disable_cliprects(intel);
-
-      intel->vtbl.draw_quad(intel,
-                            0, width, 0, height,
-                            0x00ff00ff, x, x + width, y, y + height);
-
-      intel->vtbl.leave_meta_state(intel);
-   }
-   UNLOCK_HARDWARE(intel);
-
-   intel_region_wait_fence(ctx, dest_region);   /* required by GL */
-   return GL_TRUE;
-#endif
-
-   return GL_FALSE;
-}
-
-
-
-
 static GLboolean
 do_blit_readpixels(GLcontext * ctx,
                    GLint x, GLint y, GLsizei width, GLsizei height,
@@ -173,9 +75,12 @@ do_blit_readpixels(GLcontext * ctx,
    struct intel_buffer_object *dst = intel_buffer_object(pack->BufferObj);
    GLuint dst_offset;
    GLuint rowLength;
+   drm_intel_bo *dst_buffer;
+   GLboolean all;
+   GLint dst_x, dst_y;
 
    if (INTEL_DEBUG & DEBUG_PIXEL)
-      _mesa_printf("%s\n", __FUNCTION__);
+      printf("%s\n", __FUNCTION__);
 
    if (!src)
       return GL_FALSE;
@@ -184,7 +89,7 @@ do_blit_readpixels(GLcontext * ctx,
       /* PBO only for now:
        */
       if (INTEL_DEBUG & DEBUG_PIXEL)
-         _mesa_printf("%s - not PBO\n", __FUNCTION__);
+         printf("%s - not PBO\n", __FUNCTION__);
       return GL_FALSE;
    }
 
@@ -192,13 +97,13 @@ do_blit_readpixels(GLcontext * ctx,
    if (ctx->_ImageTransferState ||
        !intel_check_blit_format(src, format, type)) {
       if (INTEL_DEBUG & DEBUG_PIXEL)
-         _mesa_printf("%s - bad format for blit\n", __FUNCTION__);
+         printf("%s - bad format for blit\n", __FUNCTION__);
       return GL_FALSE;
    }
 
    if (pack->Alignment != 1 || pack->SwapBytes || pack->LsbFirst) {
       if (INTEL_DEBUG & DEBUG_PIXEL)
-         _mesa_printf("%s: bad packing params\n", __FUNCTION__);
+         printf("%s: bad packing params\n", __FUNCTION__);
       return GL_FALSE;
    }
 
@@ -209,67 +114,52 @@ do_blit_readpixels(GLcontext * ctx,
 
    if (pack->Invert) {
       if (INTEL_DEBUG & DEBUG_PIXEL)
-         _mesa_printf("%s: MESA_PACK_INVERT not done yet\n", __FUNCTION__);
+         printf("%s: MESA_PACK_INVERT not done yet\n", __FUNCTION__);
       return GL_FALSE;
    }
    else {
-      rowLength = -rowLength;
+      if (ctx->ReadBuffer->Name == 0)
+	 rowLength = -rowLength;
    }
 
    dst_offset = (GLintptr) _mesa_image_address(2, pack, pixels, width, height,
 					       format, type, 0, 0, 0);
 
+   if (!_mesa_clip_copytexsubimage(ctx,
+				   &dst_x, &dst_y,
+				   &x, &y,
+				   &width, &height)) {
+      return GL_TRUE;
+   }
 
-   /* Although the blits go on the command buffer, need to do this and
-    * fire with lock held to guarentee cliprects are correct.
-    */
-   intelFlush(&intel->ctx);
-   LOCK_HARDWARE(intel);
-
-   if (intel->driReadDrawable->numClipRects) {
-      GLboolean all = (width * height * src->cpp == dst->Base.Size &&
-                       x == 0 && dst_offset == 0);
-
-      dri_bo *dst_buffer = intel_bufferobj_buffer(intel, dst,
-						  all ? INTEL_WRITE_FULL :
-						  INTEL_WRITE_PART);
-      __DRIdrawablePrivate *dPriv = intel->driReadDrawable;
-      int nbox = dPriv->numClipRects;
-      drm_clip_rect_t *box = dPriv->pClipRects;
-      drm_clip_rect_t rect;
-      drm_clip_rect_t src_rect;
-      int i;
+   intel_prepare_render(intel);
 
-      src_rect.x1 = dPriv->x + x;
-      src_rect.y1 = dPriv->y + dPriv->h - (y + height);
-      src_rect.x2 = src_rect.x1 + width;
-      src_rect.y2 = src_rect.y1 + height;
+   all = (width * height * src->cpp == dst->Base.Size &&
+	  x == 0 && dst_offset == 0);
 
+   dst_x = 0;
+   dst_y = 0;
 
+   dst_buffer = intel_bufferobj_buffer(intel, dst,
+					       all ? INTEL_WRITE_FULL :
+					       INTEL_WRITE_PART);
 
-      for (i = 0; i < nbox; i++) {
-         if (!intel_intersect_cliprects(&rect, &src_rect, &box[i]))
-            continue;
+   if (ctx->ReadBuffer->Name == 0)
+      y = ctx->ReadBuffer->Height - (y + height);
 
-         if (!intelEmitCopyBlit(intel,
-				src->cpp,
-				src->pitch, src->buffer, 0, src->tiling,
-				rowLength, dst_buffer, dst_offset, GL_FALSE,
-				rect.x1,
-				rect.y1,
-				rect.x1 - src_rect.x1,
-				rect.y2 - src_rect.y2,
-				rect.x2 - rect.x1, rect.y2 - rect.y1,
-				GL_COPY)) {
-	    UNLOCK_HARDWARE(intel);
-	    return GL_FALSE;
-	 }
-      }
+   if (!intelEmitCopyBlit(intel,
+			  src->cpp,
+			  src->pitch, src->buffer, 0, src->tiling,
+			  rowLength, dst_buffer, dst_offset, GL_FALSE,
+			  x, y,
+			  dst_x, dst_y,
+			  width, height,
+			  GL_COPY)) {
+      return GL_FALSE;
    }
-   UNLOCK_HARDWARE(intel);
 
    if (INTEL_DEBUG & DEBUG_PIXEL)
-      _mesa_printf("%s - DONE\n", __FUNCTION__);
+      printf("%s - DONE\n", __FUNCTION__);
 
    return GL_TRUE;
 }
@@ -284,22 +174,14 @@ intelReadPixels(GLcontext * ctx,
       fprintf(stderr, "%s\n", __FUNCTION__);
 
    intelFlush(ctx);
+   intel_prepare_render(intel_context(ctx));
 
    if (do_blit_readpixels
        (ctx, x, y, width, height, format, type, pack, pixels))
       return;
 
-#ifdef I915
-   if (do_texture_readpixels
-       (ctx, x, y, width, height, format, type, pack, pixels))
-      return;
-#else
-   (void)do_blit_readpixels;
-   (void)do_texture_readpixels;
-#endif
-
    if (INTEL_DEBUG & DEBUG_PIXEL)
-      _mesa_printf("%s: fallback to swrast\n", __FUNCTION__);
+      printf("%s: fallback to swrast\n", __FUNCTION__);
 
    /* Update Mesa state before calling down into _swrast_ReadPixels, as
     * the spans code requires the computed buffer states to be up to date,
diff --git a/shared/intel_regions.c b/shared/intel_regions.c
index 8097516..f042bcb 100644
--- a/shared/intel_regions.c
+++ b/shared/intel_regions.c
@@ -42,13 +42,13 @@
 #include <sys/ioctl.h>
 #include <errno.h>
 
+#include "main/hash.h"
 #include "intel_context.h"
 #include "intel_regions.h"
 #include "intel_blit.h"
 #include "intel_buffer_objects.h"
 #include "intel_bufmgr.h"
 #include "intel_batchbuffer.h"
-#include "intel_chipset.h"
 
 #define FILE_DEBUG_FLAG DEBUG_REGION
 
@@ -118,8 +118,7 @@ intel_region_map(struct intel_context *intel, struct intel_region *region)
       if (region->pbo)
          intel_region_cow(intel, region);
 
-      if (region->tiling != I915_TILING_NONE &&
-	  intel->intelScreen->kernel_exec_fencing)
+      if (region->tiling != I915_TILING_NONE)
 	 drm_intel_gem_bo_map_gtt(region->buffer);
       else
 	 dri_bo_map(region->buffer, GL_TRUE);
@@ -134,8 +133,7 @@ intel_region_unmap(struct intel_context *intel, struct intel_region *region)
 {
    _DBG("%s %p\n", __FUNCTION__, region);
    if (!--region->map_refcount) {
-      if (region->tiling != I915_TILING_NONE &&
-	  intel->intelScreen->kernel_exec_fencing)
+      if (region->tiling != I915_TILING_NONE)
 	 drm_intel_gem_bo_unmap_gtt(region->buffer);
       else
 	 dri_bo_unmap(region->buffer);
@@ -180,36 +178,19 @@ intel_region_alloc(struct intel_context *intel,
 {
    dri_bo *buffer;
    struct intel_region *region;
+   unsigned long flags = 0;
+   unsigned long aligned_pitch;
 
-   /* If we're tiled, our allocations are in 8 or 32-row blocks, so
-    * failure to align our height means that we won't allocate enough pages.
-    *
-    * If we're untiled, we still have to align to 2 rows high because the
-    * data port accesses 2x2 blocks even if the bottom row isn't to be
-    * rendered, so failure to align means we could walk off the end of the
-    * GTT and fault.
-    */
-   if (tiling == I915_TILING_X)
-      height = ALIGN(height, 8);
-   else if (tiling == I915_TILING_Y)
-      height = ALIGN(height, 32);
-   else
-      height = ALIGN(height, 2);
-
-   /* If we're untiled, we have to align to 2 rows high because the
-    * data port accesses 2x2 blocks even if the bottom row isn't to be
-    * rendered, so failure to align means we could walk off the end of the
-    * GTT and fault.
+   if (expect_accelerated_upload)
+      flags |= BO_ALLOC_FOR_RENDER;
+
+   buffer = drm_intel_bo_alloc_tiled(intel->bufmgr, "region",
+				     width, height, cpp,
+				     &tiling, &aligned_pitch, flags);
+   /* We've already chosen a pitch as part of miptree layout.  It had
+    * better be the same.
     */
-   height = ALIGN(height, 2);
-
-   if (expect_accelerated_upload) {
-      buffer = drm_intel_bo_alloc_for_render(intel->bufmgr, "region",
-					     pitch * cpp * height, 64);
-   } else {
-      buffer = drm_intel_bo_alloc(intel->bufmgr, "region",
-				  pitch * cpp * height, 64);
-   }
+   assert(aligned_pitch == pitch * cpp);
 
    region = intel_region_alloc_internal(intel, cpp, width, height,
 					pitch, buffer);
@@ -229,10 +210,24 @@ intel_region_alloc_for_handle(struct intel_context *intel,
 			      GLuint width, GLuint height, GLuint pitch,
 			      GLuint handle, const char *name)
 {
-   struct intel_region *region;
+   struct intel_region *region, *dummy;
    dri_bo *buffer;
    int ret;
 
+   region = _mesa_HashLookup(intel->intelScreen->named_regions, handle);
+   if (region != NULL) {
+      dummy = NULL;
+      if (region->width != width || region->height != height ||
+	  region->cpp != cpp || region->pitch != pitch) {
+	 fprintf(stderr,
+		 "Region for name %d already exists but is not compatible\n",
+		 handle);
+	 return NULL;
+      }
+      intel_region_reference(&dummy, region);
+      return dummy;
+   }
+
    buffer = intel_bo_gem_create_from_name(intel->bufmgr, name, handle);
 
    region = intel_region_alloc_internal(intel, cpp,
@@ -249,6 +244,10 @@ intel_region_alloc_for_handle(struct intel_context *intel,
       return NULL;
    }
 
+   region->name = handle;
+   region->screen = intel->intelScreen;
+   _mesa_HashInsert(intel->intelScreen->named_regions, handle, region);
+
    return region;
 }
 
@@ -288,10 +287,8 @@ intel_region_release(struct intel_region **region_handle)
       region->pbo = NULL;
       dri_bo_unreference(region->buffer);
 
-      if (region->classic_map != NULL) {
-	 drmUnmap(region->classic_map,
-			region->pitch * region->cpp * region->height);
-      }
+      if (region->name > 0)
+	 _mesa_HashRemove(region->screen->named_regions, region->name);
 
       free(region);
    }
@@ -362,14 +359,14 @@ intel_region_data(struct intel_context *intel,
          intel_region_cow(intel, dst);
    }
 
-   LOCK_HARDWARE(intel);
+   intel_prepare_render(intel);
+
    _mesa_copy_rect(intel_region_map(intel, dst) + dst_offset,
                    dst->cpp,
                    dst->pitch,
                    dstx, dsty, width, height, src, src_pitch, srcx, srcy);
 
    intel_region_unmap(intel, dst);
-   UNLOCK_HARDWARE(intel);
 }
 
 /* Copy rectangular sub-regions. Need better logic about when to
@@ -445,6 +442,7 @@ intel_region_attach_pbo(struct intel_context *intel,
    region->pbo->region = region;
    dri_bo_reference(buffer);
    region->buffer = buffer;
+   region->tiling = I915_TILING_NONE;
 }
 
 
@@ -485,7 +483,7 @@ intel_region_cow(struct intel_context *intel, struct intel_region *region)
    /* Now blit from the texture buffer to the new buffer: 
     */
 
-   LOCK_HARDWARE(intel);
+   intel_prepare_render(intel);
    ok = intelEmitCopyBlit(intel,
                           region->cpp,
                           region->pitch, pbo->buffer, 0, region->tiling,
@@ -494,7 +492,6 @@ intel_region_cow(struct intel_context *intel, struct intel_region *region)
                           region->pitch, region->height,
                           GL_COPY);
    assert(ok);
-   UNLOCK_HARDWARE(intel);
 }
 
 dri_bo *
@@ -510,125 +507,3 @@ intel_region_buffer(struct intel_context *intel,
 
    return region->buffer;
 }
-
-static struct intel_region *
-intel_recreate_static(struct intel_context *intel,
-		      const char *name,
-		      struct intel_region *region,
-		      intelRegion *region_desc)
-{
-   intelScreenPrivate *intelScreen = intel->intelScreen;
-   int ret;
-
-   if (region == NULL) {
-      region = calloc(sizeof(*region), 1);
-      region->refcount = 1;
-      _DBG("%s creating new region %p\n", __FUNCTION__, region);
-   }
-   else {
-      _DBG("%s %p\n", __FUNCTION__, region);
-   }
-
-   if (intel->ctx.Visual.rgbBits == 24)
-      region->cpp = 4;
-   else
-      region->cpp = intel->ctx.Visual.rgbBits / 8;
-   region->pitch = intelScreen->pitch;
-   region->width = intelScreen->width;
-   region->height = intelScreen->height;
-
-   if (region->buffer != NULL) {
-      dri_bo_unreference(region->buffer);
-      region->buffer = NULL;
-   }
-
-   if (intel->ttm) {
-      assert(region_desc->bo_handle != -1);
-      region->buffer = intel_bo_gem_create_from_name(intel->bufmgr,
-						     name,
-						     region_desc->bo_handle);
-
-      ret = dri_bo_get_tiling(region->buffer, &region->tiling,
-			      &region->bit_6_swizzle);
-      if (ret != 0) {
-	 fprintf(stderr, "Couldn't get tiling of buffer %d (%s): %s\n",
-		 region_desc->bo_handle, name, strerror(-ret));
-	 intel_region_release(&region);
-	 return NULL;
-      }
-   } else {
-      if (region->classic_map != NULL) {
-	 drmUnmap(region->classic_map,
-		  region->pitch * region->cpp * region->height);
-	 region->classic_map = NULL;
-      }
-      ret = drmMap(intel->driFd, region_desc->handle,
-		   region->pitch * region->cpp * region->height,
-		   &region->classic_map);
-      if (ret != 0) {
-	 fprintf(stderr, "Failed to drmMap %s buffer\n", name);
-	 free(region);
-	 return NULL;
-      }
-
-      region->buffer = intel_bo_fake_alloc_static(intel->bufmgr,
-						  name,
-						  region_desc->offset,
-						  region->pitch * region->cpp *
-						  region->height,
-						  region->classic_map);
-
-      /* The sarea just gives us a boolean for whether it's tiled or not,
-       * instead of which tiling mode it is.  Guess.
-       */
-      if (region_desc->tiled) {
-	 if (intel->gen >= 4 && region_desc == &intelScreen->depth)
-	    region->tiling = I915_TILING_Y;
-	 else
-	    region->tiling = I915_TILING_X;
-      } else {
-	 region->tiling = I915_TILING_NONE;
-      }
-
-      region->bit_6_swizzle = I915_BIT_6_SWIZZLE_NONE;
-   }
-
-   assert(region->buffer != NULL);
-
-   return region;
-}
-
-/**
- * Create intel_region structs to describe the static front, back, and depth
- * buffers created by the xserver.
- *
- * Although FBO's mean we now no longer use these as render targets in
- * all circumstances, they won't go away until the back and depth
- * buffers become private, and the front buffer will remain even then.
- *
- * Note that these don't allocate video memory, just describe
- * allocations alread made by the X server.
- */
-void
-intel_recreate_static_regions(struct intel_context *intel)
-{
-   intelScreenPrivate *intelScreen = intel->intelScreen;
-
-   intel->front_region =
-      intel_recreate_static(intel, "front",
-			    intel->front_region,
-			    &intelScreen->front);
-
-   intel->back_region =
-      intel_recreate_static(intel, "back",
-			    intel->back_region,
-			    &intelScreen->back);
-
-   /* Still assumes front.cpp == depth.cpp.  We can kill this when we move to
-    * private buffers.
-    */
-   intel->depth_region =
-      intel_recreate_static(intel, "depth",
-			    intel->depth_region,
-			    &intelScreen->depth);
-}
diff --git a/shared/intel_regions.h b/shared/intel_regions.h
index 535fcd7..7ee6a98 100644
--- a/shared/intel_regions.h
+++ b/shared/intel_regions.h
@@ -52,7 +52,7 @@ struct intel_buffer_object;
  */
 struct intel_region
 {
-   dri_bo *buffer;  /**< buffer manager's buffer */
+   drm_intel_bo *buffer;  /**< buffer manager's buffer */
    GLuint refcount; /**< Reference count for region */
    GLuint cpp;      /**< bytes per pixel */
    GLuint width;    /**< in pixels */
@@ -66,8 +66,10 @@ struct intel_region
 
    uint32_t tiling; /**< Which tiling mode the region is in */
    uint32_t bit_6_swizzle; /**< GEM flag for address swizzling requirement */
-   drmAddress classic_map; /**< drmMap of the region when not in GEM mode */
    struct intel_buffer_object *pbo;     /* zero-copy uploads */
+
+   uint32_t name; /**< Global name for the bo */
+   struct intel_screen *screen;
 };
 
 
@@ -146,4 +148,12 @@ void _mesa_copy_rect(GLubyte * dst,
                 const GLubyte * src,
                 GLuint src_pitch, GLuint src_x, GLuint src_y);
 
+struct __DRIimageRec {
+   struct intel_region *region;
+   GLenum internal_format;
+   GLuint format;
+   GLenum data_type;
+   void *data;
+};
+
 #endif
diff --git a/shared/intel_screen.c b/shared/intel_screen.c
index 789135b..6e4bb64 100644
--- a/shared/intel_screen.c
+++ b/shared/intel_screen.c
@@ -29,35 +29,28 @@
 #include "main/context.h"
 #include "main/framebuffer.h"
 #include "main/renderbuffer.h"
+#include "main/hash.h"
+#include "main/fbobject.h"
 
 #include "utils.h"
-#include "vblank.h"
 #include "xmlpool.h"
 
 #include "intel_batchbuffer.h"
 #include "intel_buffers.h"
 #include "intel_bufmgr.h"
 #include "intel_chipset.h"
-#include "intel_extensions.h"
 #include "intel_fbo.h"
-#include "intel_regions.h"
-#include "intel_swapbuffers.h"
 #include "intel_screen.h"
-#include "intel_span.h"
 #include "intel_tex.h"
+#include "intel_regions.h"
 
 #include "i915_drm.h"
-#include "i830_dri.h"
 
 #define DRI_CONF_TEXTURE_TILING(def) \
-	DRI_CONF_OPT_BEGIN(texture_tiling, bool, def)		\
-		DRI_CONF_DESC(en, "Enable texture tiling")	\
-	DRI_CONF_OPT_END					\
 
 PUBLIC const char __driConfigOptions[] =
    DRI_CONF_BEGIN
    DRI_CONF_SECTION_PERFORMANCE
-      DRI_CONF_FTHROTTLE_MODE(DRI_CONF_FTHROTTLE_IRQS)
       DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_ALWAYS_SYNC)
       /* Options correspond to DRI_CONF_BO_REUSE_DISABLED,
        * DRI_CONF_BO_REUSE_ALL
@@ -69,11 +62,9 @@ PUBLIC const char __driConfigOptions[] =
 	 DRI_CONF_DESC_END
       DRI_CONF_OPT_END
 
-#ifdef I915
-     DRI_CONF_TEXTURE_TILING(false)
-#else
-     DRI_CONF_TEXTURE_TILING(true)
-#endif
+      DRI_CONF_OPT_BEGIN(texture_tiling, bool, true)
+	 DRI_CONF_DESC(en, "Enable texture tiling")
+      DRI_CONF_OPT_END
 
       DRI_CONF_OPT_BEGIN(early_z, bool, false)
 	 DRI_CONF_DESC(en, "Enable early Z in classic mode (unstable, 945-only).")
@@ -99,157 +90,146 @@ PUBLIC const char __driConfigOptions[] =
    DRI_CONF_SECTION_END
 DRI_CONF_END;
 
-const GLuint __driNConfigOptions = 12;
+const GLuint __driNConfigOptions = 11;
 
 #ifdef USE_NEW_INTERFACE
 static PFNGLXCREATECONTEXTMODES create_context_modes = NULL;
 #endif /*USE_NEW_INTERFACE */
 
-/**
- * Map all the memory regions described by the screen.
- * \return GL_TRUE if success, GL_FALSE if error.
- */
-GLboolean
-intelMapScreenRegions(__DRIscreenPrivate * sPriv)
+static const __DRItexBufferExtension intelTexBufferExtension = {
+    { __DRI_TEX_BUFFER, __DRI_TEX_BUFFER_VERSION },
+   intelSetTexBuffer,
+   intelSetTexBuffer2,
+};
+
+static void
+intelDRI2Flush(__DRIdrawable *drawable)
 {
-   intelScreenPrivate *intelScreen = (intelScreenPrivate *) sPriv->private;
-
-   if (0)
-      _mesa_printf("TEX 0x%08x ", intelScreen->tex.handle);
-   if (intelScreen->tex.size != 0) {
-      if (drmMap(sPriv->fd,
-		 intelScreen->tex.handle,
-		 intelScreen->tex.size,
-		 (drmAddress *) & intelScreen->tex.map) != 0) {
-	 intelUnmapScreenRegions(intelScreen);
-	 return GL_FALSE;
-      }
-   }
+   struct intel_context *intel = drawable->driContextPriv->driverPrivate;
 
-   return GL_TRUE;
-}
+   if (intel->gen < 4)
+      INTEL_FIREVERTICES(intel);
 
-void
-intelUnmapScreenRegions(intelScreenPrivate * intelScreen)
-{
-   if (intelScreen->tex.map) {
-      drmUnmap(intelScreen->tex.map, intelScreen->tex.size);
-      intelScreen->tex.map = NULL;
-   }
+   if (intel->batch->map != intel->batch->ptr)
+      intel_batchbuffer_flush(intel->batch);
 }
 
-
 static void
-intelPrintDRIInfo(intelScreenPrivate * intelScreen,
-                  __DRIscreenPrivate * sPriv, I830DRIPtr gDRIPriv)
+intelDRI2Invalidate(__DRIdrawable *drawable)
 {
-   fprintf(stderr, "*** Front size:   0x%x  offset: 0x%x  pitch: %d\n",
-           intelScreen->front.size, intelScreen->front.offset,
-           intelScreen->pitch);
-   fprintf(stderr, "*** Back size:    0x%x  offset: 0x%x  pitch: %d\n",
-           intelScreen->back.size, intelScreen->back.offset,
-           intelScreen->pitch);
-   fprintf(stderr, "*** Depth size:   0x%x  offset: 0x%x  pitch: %d\n",
-           intelScreen->depth.size, intelScreen->depth.offset,
-           intelScreen->pitch);
-   fprintf(stderr, "*** Texture size: 0x%x  offset: 0x%x\n",
-           intelScreen->tex.size, intelScreen->tex.offset);
-   fprintf(stderr, "*** Memory : 0x%x\n", gDRIPriv->mem);
+   struct intel_context *intel = drawable->driContextPriv->driverPrivate;
+
+   intel->using_dri2_swapbuffers = GL_TRUE;
+   dri2InvalidateDrawable(drawable);
 }
 
+static const struct __DRI2flushExtensionRec intelFlushExtension = {
+    { __DRI2_FLUSH, __DRI2_FLUSH_VERSION },
+    intelDRI2Flush,
+    intelDRI2Invalidate,
+};
 
-static void
-intelPrintSAREA(const drm_i915_sarea_t * sarea)
+static __DRIimage *
+intel_create_image_from_name(__DRIcontext *context,
+			     int width, int height, int format,
+			     int name, int pitch, void *loaderPrivate)
 {
-   fprintf(stderr, "SAREA: sarea width %d  height %d\n", sarea->width,
-           sarea->height);
-   fprintf(stderr, "SAREA: pitch: %d\n", sarea->pitch);
-   fprintf(stderr,
-           "SAREA: front offset: 0x%08x  size: 0x%x  handle: 0x%x tiled: %d\n",
-           sarea->front_offset, sarea->front_size,
-           (unsigned) sarea->front_handle, sarea->front_tiled);
-   fprintf(stderr,
-           "SAREA: back  offset: 0x%08x  size: 0x%x  handle: 0x%x tiled: %d\n",
-           sarea->back_offset, sarea->back_size,
-           (unsigned) sarea->back_handle, sarea->back_tiled);
-   fprintf(stderr, "SAREA: depth offset: 0x%08x  size: 0x%x  handle: 0x%x tiled: %d\n",
-           sarea->depth_offset, sarea->depth_size,
-           (unsigned) sarea->depth_handle, sarea->depth_tiled);
-   fprintf(stderr, "SAREA: tex   offset: 0x%08x  size: 0x%x  handle: 0x%x\n",
-           sarea->tex_offset, sarea->tex_size, (unsigned) sarea->tex_handle);
-}
+    __DRIimage *image;
+    struct intel_context *intel = context->driverPrivate;
+    int cpp;
+
+    image = CALLOC(sizeof *image);
+    if (image == NULL)
+	return NULL;
+
+    switch (format) {
+    case __DRI_IMAGE_FORMAT_RGB565:
+       image->format = MESA_FORMAT_RGB565;
+       image->internal_format = GL_RGB;
+       image->data_type = GL_UNSIGNED_BYTE;
+       break;
+    case __DRI_IMAGE_FORMAT_XRGB8888:
+       image->format = MESA_FORMAT_XRGB8888;
+       image->internal_format = GL_RGB;
+       image->data_type = GL_UNSIGNED_BYTE;
+       break;
+    case __DRI_IMAGE_FORMAT_ARGB8888:
+       image->format = MESA_FORMAT_ARGB8888;
+       image->internal_format = GL_RGBA;
+       image->data_type = GL_UNSIGNED_BYTE;
+       break;
+    default:
+       free(image);
+       return NULL;
+    }
 
+    image->data = loaderPrivate;
+    cpp = _mesa_get_format_bytes(image->format);
 
-/**
- * A number of the screen parameters are obtained/computed from
- * information in the SAREA.  This function updates those parameters.
- */
-static void
-intelUpdateScreenFromSAREA(intelScreenPrivate * intelScreen,
-                           drm_i915_sarea_t * sarea)
+    image->region = intel_region_alloc_for_handle(intel, cpp, width, height,
+						  pitch, name, "image");
+    if (image->region == NULL) {
+       FREE(image);
+       return NULL;
+    }
+
+    return image;	
+}
+
+static __DRIimage *
+intel_create_image_from_renderbuffer(__DRIcontext *context,
+				     int renderbuffer, void *loaderPrivate)
 {
-   intelScreen->width = sarea->width;
-   intelScreen->height = sarea->height;
-   intelScreen->pitch = sarea->pitch;
-
-   intelScreen->front.offset = sarea->front_offset;
-   intelScreen->front.handle = sarea->front_handle;
-   intelScreen->front.size = sarea->front_size;
-   intelScreen->front.tiled = sarea->front_tiled;
-
-   intelScreen->back.offset = sarea->back_offset;
-   intelScreen->back.handle = sarea->back_handle;
-   intelScreen->back.size = sarea->back_size;
-   intelScreen->back.tiled = sarea->back_tiled;
-
-   intelScreen->depth.offset = sarea->depth_offset;
-   intelScreen->depth.handle = sarea->depth_handle;
-   intelScreen->depth.size = sarea->depth_size;
-   intelScreen->depth.tiled = sarea->depth_tiled;
-
-   if (intelScreen->driScrnPriv->ddx_version.minor >= 9) {
-      intelScreen->front.bo_handle = sarea->front_bo_handle;
-      intelScreen->back.bo_handle = sarea->back_bo_handle;
-      intelScreen->depth.bo_handle = sarea->depth_bo_handle;
-   } else {
-      intelScreen->front.bo_handle = -1;
-      intelScreen->back.bo_handle = -1;
-      intelScreen->depth.bo_handle = -1;
+   __DRIimage *image;
+   struct intel_context *intel = context->driverPrivate;
+   struct gl_renderbuffer *rb;
+   struct intel_renderbuffer *irb;
+
+   rb = _mesa_lookup_renderbuffer(&intel->ctx, renderbuffer);
+   if (!rb) {
+      _mesa_error(&intel->ctx,
+		  GL_INVALID_OPERATION, "glRenderbufferExternalMESA");
+      return NULL;
    }
 
-   intelScreen->tex.offset = sarea->tex_offset;
-   intelScreen->logTextureGranularity = sarea->log_tex_granularity;
-   intelScreen->tex.handle = sarea->tex_handle;
-   intelScreen->tex.size = sarea->tex_size;
+   irb = intel_renderbuffer(rb);
+   image = CALLOC(sizeof *image);
+   if (image == NULL)
+      return NULL;
 
-   if (0)
-      intelPrintSAREA(sarea);
+   image->internal_format = rb->InternalFormat;
+   image->format = rb->Format;
+   image->data_type = rb->DataType;
+   image->data = loaderPrivate;
+   intel_region_reference(&image->region, irb->region);
+
+   return image;
 }
 
-static const __DRItexOffsetExtension intelTexOffsetExtension = {
-   { __DRI_TEX_OFFSET },
-   intelSetTexOffset,
-};
+static void
+intel_destroy_image(__DRIimage *image)
+{
+    intel_region_release(&image->region);
+    FREE(image);
+}
 
-static const __DRItexBufferExtension intelTexBufferExtension = {
-    { __DRI_TEX_BUFFER, __DRI_TEX_BUFFER_VERSION },
-   intelSetTexBuffer,
-   intelSetTexBuffer2,
+static struct __DRIimageExtensionRec intelImageExtension = {
+    { __DRI_IMAGE, __DRI_IMAGE_VERSION },
+    intel_create_image_from_name,
+    intel_create_image_from_renderbuffer,
+    intel_destroy_image,
 };
 
 static const __DRIextension *intelScreenExtensions[] = {
     &driReadDrawableExtension,
-    &driCopySubBufferExtension.base,
-    &driSwapControlExtension.base,
-    &driFrameTrackingExtension.base,
-    &driMediaStreamCounterExtension.base,
-    &intelTexOffsetExtension.base,
     &intelTexBufferExtension.base,
+    &intelFlushExtension.base,
+    &intelImageExtension.base,
     NULL
 };
 
 static GLboolean
-intel_get_param(__DRIscreenPrivate *psp, int param, int *value)
+intel_get_param(__DRIscreen *psp, int param, int *value)
 {
    int ret;
    struct drm_i915_getparam gp;
@@ -266,70 +246,25 @@ intel_get_param(__DRIscreenPrivate *psp, int param, int *value)
    return GL_TRUE;
 }
 
-static GLboolean intelInitDriver(__DRIscreenPrivate *sPriv)
+static void
+nop_callback(GLuint key, void *data, void *userData)
 {
-   intelScreenPrivate *intelScreen;
-   I830DRIPtr gDRIPriv = (I830DRIPtr) sPriv->pDevPriv;
-   drm_i915_sarea_t *sarea;
-
-   if (sPriv->devPrivSize != sizeof(I830DRIRec)) {
-      fprintf(stderr,
-              "\nERROR!  sizeof(I830DRIRec) does not match passed size from device driver\n");
-      return GL_FALSE;
-   }
-
-   /* Allocate the private area */
-   intelScreen = (intelScreenPrivate *) CALLOC(sizeof(intelScreenPrivate));
-   if (!intelScreen) {
-      fprintf(stderr, "\nERROR!  Allocating private area failed\n");
-      return GL_FALSE;
-   }
-   /* parse information in __driConfigOptions */
-   driParseOptionInfo(&intelScreen->optionCache,
-                      __driConfigOptions, __driNConfigOptions);
-
-   intelScreen->driScrnPriv = sPriv;
-   sPriv->private = (void *) intelScreen;
-   sarea = (drm_i915_sarea_t *)
-      (((GLubyte *) sPriv->pSAREA) + gDRIPriv->sarea_priv_offset);
-   intelScreen->sarea = sarea;
-
-   intelScreen->deviceID = gDRIPriv->deviceID;
-
-   intelUpdateScreenFromSAREA(intelScreen, sarea);
-
-   if (!intelMapScreenRegions(sPriv)) {
-      fprintf(stderr, "\nERROR!  mapping regions\n");
-      _mesa_free(intelScreen);
-      sPriv->private = NULL;
-      return GL_FALSE;
-   }
-
-   if (0)
-      intelPrintDRIInfo(intelScreen, sPriv, gDRIPriv);
-
-   intelScreen->drmMinor = sPriv->drm_version.minor;
-
-   /* Determine if IRQs are active? */
-   if (!intel_get_param(sPriv, I915_PARAM_IRQ_ACTIVE,
-			&intelScreen->irq_active))
-      return GL_FALSE;
-
-   sPriv->extensions = intelScreenExtensions;
-
-   return GL_TRUE;
 }
 
-
 static void
-intelDestroyScreen(__DRIscreenPrivate * sPriv)
+intelDestroyScreen(__DRIscreen * sPriv)
 {
-   intelScreenPrivate *intelScreen = (intelScreenPrivate *) sPriv->private;
+   struct intel_screen *intelScreen = sPriv->private;
 
    dri_bufmgr_destroy(intelScreen->bufmgr);
-   intelUnmapScreenRegions(intelScreen);
    driDestroyOptionInfo(&intelScreen->optionCache);
 
+   /* Some regions may still have references to them at this point, so
+    * flush the hash table to prevent _mesa_DeleteHashTable() from
+    * complaining about the hash not being empty; */
+   _mesa_HashDeleteAll(intelScreen->named_regions, nop_callback, NULL);
+   _mesa_DeleteHashTable(intelScreen->named_regions);
+
    FREE(intelScreen);
    sPriv->private = NULL;
 }
@@ -339,10 +274,12 @@ intelDestroyScreen(__DRIscreenPrivate * sPriv)
  * This is called when we need to set up GL rendering to a new X window.
  */
 static GLboolean
-intelCreateBuffer(__DRIscreenPrivate * driScrnPriv,
-                  __DRIdrawablePrivate * driDrawPriv,
+intelCreateBuffer(__DRIscreen * driScrnPriv,
+                  __DRIdrawable * driDrawPriv,
                   const __GLcontextModes * mesaVis, GLboolean isPixmap)
 {
+   struct intel_renderbuffer *rb;
+
    if (isPixmap) {
       return GL_FALSE;          /* not implemented */
    }
@@ -351,12 +288,12 @@ intelCreateBuffer(__DRIscreenPrivate * driScrnPriv,
                              mesaVis->depthBits != 24);
       gl_format rgbFormat;
 
-      struct intel_framebuffer *intel_fb = CALLOC_STRUCT(intel_framebuffer);
+      struct gl_framebuffer *fb = CALLOC_STRUCT(gl_framebuffer);
 
-      if (!intel_fb)
+      if (!fb)
 	 return GL_FALSE;
 
-      _mesa_initialize_framebuffer(&intel_fb->Base, mesaVis);
+      _mesa_initialize_window_framebuffer(fb, mesaVis);
 
       if (mesaVis->redBits == 5)
 	 rgbFormat = MESA_FORMAT_RGB565;
@@ -366,16 +303,12 @@ intelCreateBuffer(__DRIscreenPrivate * driScrnPriv,
 	 rgbFormat = MESA_FORMAT_ARGB8888;
 
       /* setup the hardware-based renderbuffers */
-      intel_fb->color_rb[0] = intel_create_renderbuffer(rgbFormat);
-      _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_FRONT_LEFT,
-			     &intel_fb->color_rb[0]->Base);
+      rb = intel_create_renderbuffer(rgbFormat);
+      _mesa_add_renderbuffer(fb, BUFFER_FRONT_LEFT, &rb->Base);
 
       if (mesaVis->doubleBufferMode) {
-	 intel_fb->color_rb[1] = intel_create_renderbuffer(rgbFormat);
-
-         _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_BACK_LEFT,
-				&intel_fb->color_rb[1]->Base);
-
+	 rb = intel_create_renderbuffer(rgbFormat);
+         _mesa_add_renderbuffer(fb, BUFFER_BACK_LEFT, &rb->Base);
       }
 
       if (mesaVis->depthBits == 24) {
@@ -384,116 +317,64 @@ intelCreateBuffer(__DRIscreenPrivate * driScrnPriv,
 	    struct intel_renderbuffer *depthStencilRb
 	       = intel_create_renderbuffer(MESA_FORMAT_S8_Z24);
 	    /* note: bind RB to two attachment points */
-	    _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_DEPTH,
-				   &depthStencilRb->Base);
-	    _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_STENCIL,
-				   &depthStencilRb->Base);
+	    _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depthStencilRb->Base);
+	    _mesa_add_renderbuffer(fb, BUFFER_STENCIL, &depthStencilRb->Base);
 	 } else {
 	    struct intel_renderbuffer *depthRb
 	       = intel_create_renderbuffer(MESA_FORMAT_X8_Z24);
-	    _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_DEPTH,
-				   &depthRb->Base);
+	    _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depthRb->Base);
 	 }
       }
       else if (mesaVis->depthBits == 16) {
          /* just 16-bit depth buffer, no hw stencil */
          struct intel_renderbuffer *depthRb
 	    = intel_create_renderbuffer(MESA_FORMAT_Z16);
-         _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_DEPTH, &depthRb->Base);
+         _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depthRb->Base);
       }
 
       /* now add any/all software-based renderbuffers we may need */
-      _mesa_add_soft_renderbuffers(&intel_fb->Base,
+      _mesa_add_soft_renderbuffers(fb,
                                    GL_FALSE, /* never sw color */
                                    GL_FALSE, /* never sw depth */
                                    swStencil, mesaVis->accumRedBits > 0,
                                    GL_FALSE, /* never sw alpha */
                                    GL_FALSE  /* never sw aux */ );
-      driDrawPriv->driverPrivate = (void *) intel_fb;
+      driDrawPriv->driverPrivate = fb;
 
       return GL_TRUE;
    }
 }
 
 static void
-intelDestroyBuffer(__DRIdrawablePrivate * driDrawPriv)
-{
-   struct intel_framebuffer *intel_fb = driDrawPriv->driverPrivate;
-   struct intel_renderbuffer *depth_rb;
-   struct intel_renderbuffer *stencil_rb;
-
-   if (intel_fb) {
-      if (intel_fb->color_rb[0]) {
-         intel_renderbuffer_set_region(intel_fb->color_rb[0], NULL);
-      }
-
-      if (intel_fb->color_rb[1]) {
-         intel_renderbuffer_set_region(intel_fb->color_rb[1], NULL);
-      }
-
-      depth_rb = intel_get_renderbuffer(&intel_fb->Base, BUFFER_DEPTH);
-      if (depth_rb) {
-         intel_renderbuffer_set_region(depth_rb, NULL);
-      }
-
-      stencil_rb = intel_get_renderbuffer(&intel_fb->Base, BUFFER_STENCIL);
-      if (stencil_rb) {
-         intel_renderbuffer_set_region(stencil_rb, NULL);
-      }
-   }
-
-   _mesa_reference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate)), NULL);
-}
-
-
-/**
- * Get information about previous buffer swaps.
- */
-static int
-intelGetSwapInfo(__DRIdrawablePrivate * dPriv, __DRIswapInfo * sInfo)
+intelDestroyBuffer(__DRIdrawable * driDrawPriv)
 {
-   struct intel_framebuffer *intel_fb;
-
-   if ((dPriv == NULL) || (dPriv->driverPrivate == NULL)
-       || (sInfo == NULL)) {
-      return -1;
-   }
-
-   intel_fb = dPriv->driverPrivate;
-   sInfo->swap_count = intel_fb->swap_count;
-   sInfo->swap_ust = intel_fb->swap_ust;
-   sInfo->swap_missed_count = intel_fb->swap_missed_count;
-
-   sInfo->swap_missed_usage = (sInfo->swap_missed_count != 0)
-      ? driCalculateSwapUsage(dPriv, 0, intel_fb->swap_missed_ust)
-      : 0.0;
-
-   return 0;
+    struct gl_framebuffer *fb = driDrawPriv->driverPrivate;
+  
+    _mesa_reference_framebuffer(&fb, NULL);
 }
 
-
 /* There are probably better ways to do this, such as an
  * init-designated function to register chipids and createcontext
  * functions.
  */
 extern GLboolean i830CreateContext(const __GLcontextModes * mesaVis,
-                                   __DRIcontextPrivate * driContextPriv,
+                                   __DRIcontext * driContextPriv,
                                    void *sharedContextPrivate);
 
 extern GLboolean i915CreateContext(const __GLcontextModes * mesaVis,
-                                   __DRIcontextPrivate * driContextPriv,
+                                   __DRIcontext * driContextPriv,
                                    void *sharedContextPrivate);
 extern GLboolean brwCreateContext(const __GLcontextModes * mesaVis,
-				  __DRIcontextPrivate * driContextPriv,
+				  __DRIcontext * driContextPriv,
 				  void *sharedContextPrivate);
 
 static GLboolean
 intelCreateContext(const __GLcontextModes * mesaVis,
-                   __DRIcontextPrivate * driContextPriv,
+                   __DRIcontext * driContextPriv,
                    void *sharedContextPrivate)
 {
-   __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv;
-   intelScreenPrivate *intelScreen = (intelScreenPrivate *) sPriv->private;
+   __DRIscreen *sPriv = driContextPriv->driScreenPriv;
+   struct intel_screen *intelScreen = sPriv->private;
 
 #ifdef I915
    if (IS_9XX(intelScreen->deviceID)) {
@@ -513,217 +394,33 @@ intelCreateContext(const __GLcontextModes * mesaVis,
    return GL_FALSE;
 }
 
-
-static __DRIconfig **
-intelFillInModes(__DRIscreenPrivate *psp,
-		 unsigned pixel_bits, unsigned depth_bits,
-                 unsigned stencil_bits, GLboolean have_back_buffer)
-{
-   __DRIconfig **configs;
-   __GLcontextModes *m;
-   unsigned depth_buffer_factor;
-   unsigned back_buffer_factor;
-   int i;
-
-   /* GLX_SWAP_COPY_OML is only supported because the Intel driver doesn't
-    * support pageflipping at all.
-    */
-   static const GLenum back_buffer_modes[] = {
-      GLX_NONE, GLX_SWAP_UNDEFINED_OML, GLX_SWAP_COPY_OML
-   };
-
-   uint8_t depth_bits_array[3];
-   uint8_t stencil_bits_array[3];
-   uint8_t msaa_samples_array[1];
-
-   depth_bits_array[0] = 0;
-   depth_bits_array[1] = depth_bits;
-   depth_bits_array[2] = depth_bits;
-
-   /* Just like with the accumulation buffer, always provide some modes
-    * with a stencil buffer.  It will be a sw fallback, but some apps won't
-    * care about that.
-    */
-   stencil_bits_array[0] = 0;
-   stencil_bits_array[1] = 0;
-   if (depth_bits == 24)
-      stencil_bits_array[1] = (stencil_bits == 0) ? 8 : stencil_bits;
-
-   stencil_bits_array[2] = (stencil_bits == 0) ? 8 : stencil_bits;
-
-   msaa_samples_array[0] = 0;
-
-   depth_buffer_factor = ((depth_bits != 0) || (stencil_bits != 0)) ? 3 : 1;
-   back_buffer_factor = (have_back_buffer) ? 3 : 1;
-
-   if (pixel_bits == 16) {
-      configs = driCreateConfigs(GL_RGB, GL_UNSIGNED_SHORT_5_6_5,
-				 depth_bits_array, stencil_bits_array,
-				 depth_buffer_factor, back_buffer_modes,
-				 back_buffer_factor,
-				 msaa_samples_array, 1);
-   }
-   else {
-      __DRIconfig **configs_a8r8g8b8;
-      __DRIconfig **configs_x8r8g8b8;
-
-      configs_a8r8g8b8 = driCreateConfigs(GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV,
-					  depth_bits_array,
-					  stencil_bits_array,
-					  depth_buffer_factor,
-					  back_buffer_modes,
-					  back_buffer_factor,
-					  msaa_samples_array, 1);
-      configs_x8r8g8b8 = driCreateConfigs(GL_BGR, GL_UNSIGNED_INT_8_8_8_8_REV,
-					  depth_bits_array,
-					  stencil_bits_array,
-					  depth_buffer_factor,
-					  back_buffer_modes,
-					  back_buffer_factor,
-					  msaa_samples_array, 1);
-      configs = driConcatConfigs(configs_a8r8g8b8, configs_x8r8g8b8);
-   }
-
-   if (configs == NULL) {
-    fprintf(stderr, "[%s:%u] Error creating FBConfig!\n", __func__,
-              __LINE__);
-      return NULL;
-   }
-
-   /* Mark the visual as slow if there are "fake" stencil bits.
-    */
-   for (i = 0; configs[i]; i++) {
-      m = &configs[i]->modes;
-      if ((m->stencilBits != 0) && (m->stencilBits != stencil_bits)) {
-         m->visualRating = GLX_SLOW_CONFIG;
-      }
-   }
-
-   return configs;
-}
-
 static GLboolean
-intel_init_bufmgr(intelScreenPrivate *intelScreen)
+intel_init_bufmgr(struct intel_screen *intelScreen)
 {
-   GLboolean gem_disable = getenv("INTEL_NO_GEM") != NULL;
-   int gem_kernel = 0;
-   GLboolean gem_supported;
-   struct drm_i915_getparam gp;
-   __DRIscreenPrivate *spriv = intelScreen->driScrnPriv;
+   __DRIscreen *spriv = intelScreen->driScrnPriv;
    int num_fences = 0;
 
    intelScreen->no_hw = getenv("INTEL_NO_HW") != NULL;
 
-   gp.param = I915_PARAM_HAS_GEM;
-   gp.value = &gem_kernel;
-
-   (void) drmCommandWriteRead(spriv->fd, DRM_I915_GETPARAM, &gp, sizeof(gp));
-
-   /* If we've got a new enough DDX that's initializing GEM and giving us
-    * object handles for the shared buffers, use that.
-    */
-   intelScreen->ttm = GL_FALSE;
-   if (intelScreen->driScrnPriv->dri2.enabled)
-       gem_supported = GL_TRUE;
-   else if (intelScreen->driScrnPriv->ddx_version.minor >= 9 &&
-	    gem_kernel &&
-	    intelScreen->front.bo_handle != -1)
-       gem_supported = GL_TRUE;
-   else
-       gem_supported = GL_FALSE;
-
-   if (!gem_disable && gem_supported) {
-      intelScreen->bufmgr = intel_bufmgr_gem_init(spriv->fd, BATCH_SZ);
-      if (intelScreen->bufmgr != NULL)
-	 intelScreen->ttm = GL_TRUE;
-   }
+   intelScreen->bufmgr = intel_bufmgr_gem_init(spriv->fd, BATCH_SZ);
    /* Otherwise, use the classic buffer manager. */
    if (intelScreen->bufmgr == NULL) {
-      if (gem_disable) {
-	 _mesa_warning(NULL, "GEM disabled.  Using classic.");
-      } else {
-	 _mesa_warning(NULL,
-                       "Failed to initialize GEM.  Falling back to classic.");
-      }
-
-      if (intelScreen->tex.size == 0) {
-	 fprintf(stderr, "[%s:%u] Error initializing buffer manager.\n",
-		 __func__, __LINE__);
-	 return GL_FALSE;
-      }
-
-      intelScreen->bufmgr =
-	 intel_bufmgr_fake_init(spriv->fd,
-				intelScreen->tex.offset,
-				intelScreen->tex.map,
-				intelScreen->tex.size,
-				(unsigned int * volatile)
-				&intelScreen->sarea->last_dispatch);
+      fprintf(stderr, "[%s:%u] Error initializing buffer manager.\n",
+	      __func__, __LINE__);
+      return GL_FALSE;
    }
 
-   if (intel_get_param(spriv, I915_PARAM_NUM_FENCES_AVAIL, &num_fences))
-      intelScreen->kernel_exec_fencing = !!num_fences;
-   else
-      intelScreen->kernel_exec_fencing = GL_FALSE;
-
-   return GL_TRUE;
-}
-
-/**
- * This is the driver specific part of the createNewScreen entry point.
- * Called when using legacy DRI.
- * 
- * \todo maybe fold this into intelInitDriver
- *
- * \return the __GLcontextModes supported by this driver
- */
-static const __DRIconfig **intelInitScreen(__DRIscreenPrivate *psp)
-{
-   intelScreenPrivate *intelScreen;
-#ifdef I915
-   static const __DRIversion ddx_expected = { 1, 5, 0 };
-#else
-   static const __DRIversion ddx_expected = { 1, 6, 0 };
-#endif
-   static const __DRIversion dri_expected = { 4, 0, 0 };
-   static const __DRIversion drm_expected = { 1, 5, 0 };
-   I830DRIPtr dri_priv = (I830DRIPtr) psp->pDevPriv;
-
-   if (!driCheckDriDdxDrmVersions2("i915",
-                                   &psp->dri_version, &dri_expected,
-                                   &psp->ddx_version, &ddx_expected,
-                                   &psp->drm_version, &drm_expected)) {
-      return NULL;
+   if (!intel_get_param(spriv, I915_PARAM_NUM_FENCES_AVAIL, &num_fences) ||
+       num_fences == 0) {
+      fprintf(stderr, "[%s: %u] Kernel 2.6.29 required.\n", __func__, __LINE__);
+      return GL_FALSE;
    }
 
-   if (!intelInitDriver(psp))
-       return NULL;
+   drm_intel_bufmgr_gem_enable_fenced_relocs(intelScreen->bufmgr);
 
-   psp->extensions = intelScreenExtensions;
+   intelScreen->named_regions = _mesa_NewHashTable();
 
-   intelScreen = psp->private;
-   if (!intel_init_bufmgr(intelScreen))
-       return GL_FALSE;
-
-   return (const __DRIconfig **)
-       intelFillInModes(psp, dri_priv->cpp * 8,
-			(dri_priv->cpp == 2) ? 16 : 24,
-			(dri_priv->cpp == 2) ? 0  : 8, 1);
-}
-
-struct intel_context *intelScreenContext(intelScreenPrivate *intelScreen)
-{
-  /*
-   * This should probably change to have the screen allocate a dummy
-   * context at screen creation. For now just use the current context.
-   */
-
-  GET_CURRENT_CONTEXT(ctx);
-  if (ctx == NULL) {
-     _mesa_problem(NULL, "No current context in intelScreenContext\n");
-     return NULL;
-  }
-  return intel_context(ctx);
+   return GL_TRUE;
 }
 
 /**
@@ -733,23 +430,21 @@ struct intel_context *intelScreenContext(intelScreenPrivate *intelScreen)
  * \return the __GLcontextModes supported by this driver
  */
 static const
-__DRIconfig **intelInitScreen2(__DRIscreenPrivate *psp)
+__DRIconfig **intelInitScreen2(__DRIscreen *psp)
 {
-   intelScreenPrivate *intelScreen;
+   struct intel_screen *intelScreen;
    GLenum fb_format[3];
    GLenum fb_type[3];
-   /* GLX_SWAP_COPY_OML is only supported because the Intel driver doesn't
-    * support pageflipping at all.
-    */
+
    static const GLenum back_buffer_modes[] = {
-      GLX_NONE, GLX_SWAP_UNDEFINED_OML, GLX_SWAP_COPY_OML
+       GLX_NONE, GLX_SWAP_UNDEFINED_OML, GLX_SWAP_COPY_OML
    };
    uint8_t depth_bits[4], stencil_bits[4], msaa_samples_array[1];
    int color;
    __DRIconfig **configs = NULL;
 
    /* Allocate the private area */
-   intelScreen = (intelScreenPrivate *) CALLOC(sizeof(intelScreenPrivate));
+   intelScreen = CALLOC(sizeof *intelScreen);
    if (!intelScreen) {
       fprintf(stderr, "\nERROR!  Allocating private area failed\n");
       return GL_FALSE;
@@ -761,8 +456,6 @@ __DRIconfig **intelInitScreen2(__DRIscreenPrivate *psp)
    intelScreen->driScrnPriv = psp;
    psp->private = (void *) intelScreen;
 
-   intelScreen->drmMinor = psp->drm_version.minor;
-
    /* Determine chipset ID */
    if (!intel_get_param(psp, I915_PARAM_CHIPSET_ID,
 			&intelScreen->deviceID))
@@ -771,18 +464,8 @@ __DRIconfig **intelInitScreen2(__DRIscreenPrivate *psp)
    if (!intel_init_bufmgr(intelScreen))
        return GL_FALSE;
 
-   intelScreen->irq_active = 1;
    psp->extensions = intelScreenExtensions;
 
-   depth_bits[0] = 0;
-   stencil_bits[0] = 0;
-   depth_bits[1] = 16;
-   stencil_bits[1] = 0;
-   depth_bits[2] = 24;
-   stencil_bits[2] = 0;
-   depth_bits[3] = 24;
-   stencil_bits[3] = 8;
-
    msaa_samples_array[0] = 0;
 
    fb_format[0] = GL_RGB;
@@ -797,27 +480,27 @@ __DRIconfig **intelInitScreen2(__DRIscreenPrivate *psp)
    depth_bits[0] = 0;
    stencil_bits[0] = 0;
 
+   /* Generate a rich set of useful configs that do not include an
+    * accumulation buffer.
+    */
    for (color = 0; color < ARRAY_SIZE(fb_format); color++) {
       __DRIconfig **new_configs;
       int depth_factor;
 
-      /* With DRI2 right now, GetBuffers always returns a depth/stencil buffer
-       * with the same cpp as the drawable.  So we can't support depth cpp !=
-       * color cpp currently.
+      /* Starting with DRI2 protocol version 1.1 we can request a depth/stencil
+       * buffer that has a diffferent number of bits per pixel than the color
+       * buffer.  This isn't yet supported here.
        */
       if (fb_type[color] == GL_UNSIGNED_SHORT_5_6_5) {
 	 depth_bits[1] = 16;
 	 stencil_bits[1] = 0;
-
-	 depth_factor = 2;
       } else {
 	 depth_bits[1] = 24;
-	 stencil_bits[1] = 0;
-	 depth_bits[2] = 24;
-	 stencil_bits[2] = 8;
-
-	 depth_factor = 3;
+	 stencil_bits[1] = 8;
       }
+
+      depth_factor = 2;
+
       new_configs = driCreateConfigs(fb_format[color], fb_type[color],
 				     depth_bits,
 				     stencil_bits,
@@ -825,7 +508,33 @@ __DRIconfig **intelInitScreen2(__DRIscreenPrivate *psp)
 				     back_buffer_modes,
 				     ARRAY_SIZE(back_buffer_modes),
 				     msaa_samples_array,
-				     ARRAY_SIZE(msaa_samples_array));
+				     ARRAY_SIZE(msaa_samples_array),
+				     GL_FALSE);
+      if (configs == NULL)
+	 configs = new_configs;
+      else
+	 configs = driConcatConfigs(configs, new_configs);
+   }
+
+   /* Generate the minimum possible set of configs that include an
+    * accumulation buffer.
+    */
+   for (color = 0; color < ARRAY_SIZE(fb_format); color++) {
+      __DRIconfig **new_configs;
+
+      if (fb_type[color] == GL_UNSIGNED_SHORT_5_6_5) {
+	 depth_bits[0] = 16;
+	 stencil_bits[0] = 0;
+      } else {
+	 depth_bits[0] = 24;
+	 stencil_bits[0] = 8;
+      }
+
+      new_configs = driCreateConfigs(fb_format[color], fb_type[color],
+				     depth_bits, stencil_bits, 1,
+				     back_buffer_modes + 1, 1,
+				     msaa_samples_array, 1,
+				     GL_TRUE);
       if (configs == NULL)
 	 configs = new_configs;
       else
@@ -842,19 +551,19 @@ __DRIconfig **intelInitScreen2(__DRIscreenPrivate *psp)
 }
 
 const struct __DriverAPIRec driDriverAPI = {
-   .InitScreen		 = intelInitScreen,
    .DestroyScreen	 = intelDestroyScreen,
    .CreateContext	 = intelCreateContext,
    .DestroyContext	 = intelDestroyContext,
    .CreateBuffer	 = intelCreateBuffer,
    .DestroyBuffer	 = intelDestroyBuffer,
-   .SwapBuffers		 = intelSwapBuffers,
    .MakeCurrent		 = intelMakeCurrent,
    .UnbindContext	 = intelUnbindContext,
-   .GetSwapInfo		 = intelGetSwapInfo,
-   .GetDrawableMSC	 = driDrawableGetMSC32,
-   .WaitForMSC		 = driWaitForMSC32,
-   .CopySubBuffer	 = intelCopySubBuffer,
-
    .InitScreen2		 = intelInitScreen2,
 };
+
+/* This is the table of extensions that the loader will dlsym() for. */
+PUBLIC const __DRIextension *__driDriverExtensions[] = {
+    &driCoreExtension.base,
+    &driDRI2Extension.base,
+    NULL
+};
diff --git a/shared/intel_screen.h b/shared/intel_screen.h
index a9b9e10..5863093 100644
--- a/shared/intel_screen.h
+++ b/shared/intel_screen.h
@@ -34,74 +34,35 @@
 #include "i915_drm.h"
 #include "xmlconfig.h"
 
-/* XXX: change name or eliminate to avoid conflict with "struct
- * intel_region"!!!
- */
-typedef struct
+struct intel_screen
 {
-   drm_handle_t handle;
-   drmSize size;                /* region size in bytes */
-   char *map;                   /* memory map */
-   int offset;                  /* from start of video mem, in bytes */
-   unsigned int bo_handle;	/* buffer object id if available, or -1 */
-   /**
-    * Flags if the region is tiled.
-    *
-    * Not included is Y versus X tiling.
-    */
-   GLboolean tiled;
-} intelRegion;
-
-typedef struct
-{
-   intelRegion front;
-   intelRegion back;
-   intelRegion depth;
-   intelRegion tex;
-
    int deviceID;
-   int width;
-   int height;
-   int pitch;                   /* common row stride, in pixels */
 
    int logTextureGranularity;
 
-   __DRIscreenPrivate *driScrnPriv;
-
-   volatile drm_i915_sarea_t *sarea;
-
-   int drmMinor;
-
-   int irq_active;
+   __DRIscreen *driScrnPriv;
 
    GLboolean no_hw;
 
    GLboolean no_vbo;
-   int ttm;
    dri_bufmgr *bufmgr;
-   GLboolean kernel_exec_fencing;
+   struct _mesa_HashTable *named_regions;
 
    /**
    * Configuration cache with default values for all contexts
    */
    driOptionCache optionCache;
-} intelScreenPrivate;
-
-
+};
 
-extern GLboolean intelMapScreenRegions(__DRIscreenPrivate * sPriv);
+extern GLboolean intelMapScreenRegions(__DRIscreen * sPriv);
 
-extern void intelUnmapScreenRegions(intelScreenPrivate * intelScreen);
+extern void intelDestroyContext(__DRIcontext * driContextPriv);
 
-extern void intelDestroyContext(__DRIcontextPrivate * driContextPriv);
-
-extern GLboolean intelUnbindContext(__DRIcontextPrivate * driContextPriv);
+extern GLboolean intelUnbindContext(__DRIcontext * driContextPriv);
 
 extern GLboolean
-intelMakeCurrent(__DRIcontextPrivate * driContextPriv,
-                 __DRIdrawablePrivate * driDrawPriv,
-                 __DRIdrawablePrivate * driReadPriv);
-
-extern struct intel_context *intelScreenContext(intelScreenPrivate *intelScreen);
+intelMakeCurrent(__DRIcontext * driContextPriv,
+                 __DRIdrawable * driDrawPriv,
+                 __DRIdrawable * driReadPriv);
 
 #endif
diff --git a/shared/intel_span.c b/shared/intel_span.c
index 34c3d9d..fb5c01b 100644
--- a/shared/intel_span.c
+++ b/shared/intel_span.c
@@ -43,252 +43,24 @@ static void
 intel_set_span_functions(struct intel_context *intel,
 			 struct gl_renderbuffer *rb);
 
-#define SPAN_CACHE_SIZE		4096
-
-static void
-get_span_cache(struct intel_renderbuffer *irb, uint32_t offset)
-{
-   if (irb->span_cache == NULL) {
-      irb->span_cache = _mesa_malloc(SPAN_CACHE_SIZE);
-      irb->span_cache_offset = -1;
-   }
-
-   if ((offset & ~(SPAN_CACHE_SIZE - 1)) != irb->span_cache_offset) {
-      irb->span_cache_offset = offset & ~(SPAN_CACHE_SIZE - 1);
-      dri_bo_get_subdata(irb->region->buffer, irb->span_cache_offset,
-			 SPAN_CACHE_SIZE, irb->span_cache);
-   }
-}
-
-static void
-clear_span_cache(struct intel_renderbuffer *irb)
-{
-   irb->span_cache_offset = -1;
-}
-
-static uint32_t
-pread_32(struct intel_renderbuffer *irb, uint32_t offset)
-{
-   get_span_cache(irb, offset);
-
-   return *(uint32_t *)(irb->span_cache + (offset & (SPAN_CACHE_SIZE - 1)));
-}
-
-static uint32_t
-pread_xrgb8888(struct intel_renderbuffer *irb, uint32_t offset)
-{
-   get_span_cache(irb, offset);
-
-   return *(uint32_t *)(irb->span_cache + (offset & (SPAN_CACHE_SIZE - 1))) |
-      0xff000000;
-}
-
-static uint16_t
-pread_16(struct intel_renderbuffer *irb, uint32_t offset)
-{
-   get_span_cache(irb, offset);
-
-   return *(uint16_t *)(irb->span_cache + (offset & (SPAN_CACHE_SIZE - 1)));
-}
-
-static uint8_t
-pread_8(struct intel_renderbuffer *irb, uint32_t offset)
-{
-   get_span_cache(irb, offset);
-
-   return *(uint8_t *)(irb->span_cache + (offset & (SPAN_CACHE_SIZE - 1)));
-}
-
-static void
-pwrite_32(struct intel_renderbuffer *irb, uint32_t offset, uint32_t val)
-{
-   clear_span_cache(irb);
-
-   dri_bo_subdata(irb->region->buffer, offset, 4, &val);
-}
-
-static void
-pwrite_xrgb8888(struct intel_renderbuffer *irb, uint32_t offset, uint32_t val)
-{
-   clear_span_cache(irb);
-
-   dri_bo_subdata(irb->region->buffer, offset, 3, &val);
-}
-
-static void
-pwrite_16(struct intel_renderbuffer *irb, uint32_t offset, uint16_t val)
-{
-   clear_span_cache(irb);
-
-   dri_bo_subdata(irb->region->buffer, offset, 2, &val);
-}
-
-static void
-pwrite_8(struct intel_renderbuffer *irb, uint32_t offset, uint8_t val)
-{
-   clear_span_cache(irb);
-
-   dri_bo_subdata(irb->region->buffer, offset, 1, &val);
-}
-
-static uint32_t no_tile_swizzle(struct intel_renderbuffer *irb,
-				int x, int y)
-{
-	return (y * irb->region->pitch + x) * irb->region->cpp;
-}
-
-/*
- * Deal with tiled surfaces
- */
-
-static uint32_t x_tile_swizzle(struct intel_renderbuffer *irb,
-			       int x, int y)
-{
-	int	tile_stride;
-	int	xbyte;
-	int	x_tile_off, y_tile_off;
-	int	x_tile_number, y_tile_number;
-	int	tile_off, tile_base;
-	
-        x += irb->region->draw_x;
-        y += irb->region->draw_y;
-
-	tile_stride = (irb->region->pitch * irb->region->cpp) << 3;
-
-	xbyte = x * irb->region->cpp;
-
-	x_tile_off = xbyte & 0x1ff;
-	y_tile_off = y & 7;
-
-	x_tile_number = xbyte >> 9;
-	y_tile_number = y >> 3;
-
-	tile_off = (y_tile_off << 9) + x_tile_off;
-
-	switch (irb->region->bit_6_swizzle) {
-	case I915_BIT_6_SWIZZLE_NONE:
-	   break;
-	case I915_BIT_6_SWIZZLE_9:
-	   tile_off ^= ((tile_off >> 3) & 64);
-	   break;
-	case I915_BIT_6_SWIZZLE_9_10:
-	   tile_off ^= ((tile_off >> 3) & 64) ^ ((tile_off >> 4) & 64);
-	   break;
-	case I915_BIT_6_SWIZZLE_9_11:
-	   tile_off ^= ((tile_off >> 3) & 64) ^ ((tile_off >> 5) & 64);
-	   break;
-	case I915_BIT_6_SWIZZLE_9_10_11:
-	   tile_off ^= ((tile_off >> 3) & 64) ^ ((tile_off >> 4) & 64) ^
-	      ((tile_off >> 5) & 64);
-	   break;
-	default:
-	   fprintf(stderr, "Unknown tile swizzling mode %d\n",
-		   irb->region->bit_6_swizzle);
-	   exit(1);
-	}
-
-	tile_base = (x_tile_number << 12) + y_tile_number * tile_stride;
-
-#if 0
-	printf("(%d,%d) -> %d + %d = %d (pitch = %d, tstride = %d)\n",
-	       x, y, tile_off, tile_base,
-	       tile_off + tile_base,
-	       irb->region->pitch, tile_stride);
-#endif
-
-	return tile_base + tile_off;
-}
-
-static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb,
-			       int x, int y)
-{
-	int	tile_stride;
-	int	xbyte;
-	int	x_tile_off, y_tile_off;
-	int	x_tile_number, y_tile_number;
-	int	tile_off, tile_base;
-	
-        x += irb->region->draw_x;
-        y += irb->region->draw_y;
-
-	tile_stride = (irb->region->pitch * irb->region->cpp) << 5;
-
-	xbyte = x * irb->region->cpp;
-
-	x_tile_off = xbyte & 0x7f;
-	y_tile_off = y & 0x1f;
-
-	x_tile_number = xbyte >> 7;
-	y_tile_number = y >> 5;
-
-	tile_off = ((x_tile_off & ~0xf) << 5) + (y_tile_off << 4) +
-	   (x_tile_off & 0xf);
-
-	switch (irb->region->bit_6_swizzle) {
-	case I915_BIT_6_SWIZZLE_NONE:
-	   break;
-	case I915_BIT_6_SWIZZLE_9:
-	   tile_off ^= ((tile_off >> 3) & 64);
-	   break;
-	case I915_BIT_6_SWIZZLE_9_10:
-	   tile_off ^= ((tile_off >> 3) & 64) ^ ((tile_off >> 4) & 64);
-	   break;
-	case I915_BIT_6_SWIZZLE_9_11:
-	   tile_off ^= ((tile_off >> 3) & 64) ^ ((tile_off >> 5) & 64);
-	   break;
-	case I915_BIT_6_SWIZZLE_9_10_11:
-	   tile_off ^= ((tile_off >> 3) & 64) ^ ((tile_off >> 4) & 64) ^
-	      ((tile_off >> 5) & 64);
-	   break;
-	default:
-	   fprintf(stderr, "Unknown tile swizzling mode %d\n",
-		   irb->region->bit_6_swizzle);
-	   exit(1);
-	}
-
-	tile_base = (x_tile_number << 12) + y_tile_number * tile_stride;
-
-	return tile_base + tile_off;
-}
-
-/*
-  break intelWriteRGBASpan_ARGB8888
-*/
-
 #undef DBG
 #define DBG 0
 
 #define LOCAL_VARS							\
-   struct intel_context *intel = intel_context(ctx);			\
    struct intel_renderbuffer *irb = intel_renderbuffer(rb);		\
    const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1;			\
    const GLint yBias = ctx->DrawBuffer->Name ? 0 : irb->Base.Height - 1;\
-   unsigned int num_cliprects;						\
-   struct drm_clip_rect *cliprects;					\
-   int x_off, y_off;							\
+   int minx = 0, miny = 0;						\
+   int maxx = ctx->DrawBuffer->Width;					\
+   int maxy = ctx->DrawBuffer->Height;					\
    int pitch = irb->region->pitch * irb->region->cpp;			\
    void *buf = irb->region->buffer->virtual;				\
    GLuint p;								\
    (void) p;								\
    (void)buf; (void)pitch; /* unused for non-gttmap. */			\
-   intel_get_cliprects(intel, &cliprects, &num_cliprects, &x_off, &y_off);
 
-/* XXX FBO: this is identical to the macro in spantmp2.h except we get
- * the cliprect info from the context, not the driDrawable.
- * Move this into spantmp2.h someday.
- */
-#define HW_CLIPLOOP()							\
-   do {									\
-      int _nc = num_cliprects;						\
-      while ( _nc-- ) {							\
-	 int minx = cliprects[_nc].x1 - x_off;				\
-	 int miny = cliprects[_nc].y1 - y_off;				\
-	 int maxx = cliprects[_nc].x2 - x_off;				\
-	 int maxy = cliprects[_nc].y2 - y_off;
-	
-#if 0
-      }}
-#endif
+#define HW_CLIPLOOP()
+#define HW_ENDCLIPLOOP()
 
 #define Y_FLIP(_y) ((_y) * yScale + yBias)
 
@@ -296,114 +68,74 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb,
 
 #define HW_UNLOCK()
 
-/* Convenience macros to avoid typing the swizzle argument over and over */
-#define NO_TILE(_X, _Y) no_tile_swizzle(irb, (_X) + x_off, (_Y) + y_off)
-#define X_TILE(_X, _Y) x_tile_swizzle(irb, (_X) + x_off, (_Y) + y_off)
-#define Y_TILE(_X, _Y) y_tile_swizzle(irb, (_X) + x_off, (_Y) + y_off)
+/* Convenience macros to avoid typing the address argument over and over */
+#define NO_TILE(_X, _Y) (((_Y) * irb->region->pitch + (_X)) * irb->region->cpp)
 
 /* r5g6b5 color span and pixel functions */
-#define INTEL_PIXEL_FMT GL_RGB
-#define INTEL_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5
-#define INTEL_READ_VALUE(offset) pread_16(irb, offset)
-#define INTEL_WRITE_VALUE(offset, v) pwrite_16(irb, offset, v)
-#define INTEL_TAG(x) x##_RGB565
-#include "intel_spantmp.h"
+#define SPANTMP_PIXEL_FMT GL_RGB
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5
+#define TAG(x) intel_##x##_RGB565
+#define TAG2(x,y) intel_##x##y_RGB565
+#include "spantmp2.h"
 
 /* a4r4g4b4 color span and pixel functions */
-#define INTEL_PIXEL_FMT GL_BGRA
-#define INTEL_PIXEL_TYPE GL_UNSIGNED_SHORT_4_4_4_4_REV
-#define INTEL_READ_VALUE(offset) pread_16(irb, offset)
-#define INTEL_WRITE_VALUE(offset, v) pwrite_16(irb, offset, v)
-#define INTEL_TAG(x) x##_ARGB4444
-#include "intel_spantmp.h"
+#define SPANTMP_PIXEL_FMT GL_BGRA
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_4_4_4_4_REV
+#define TAG(x) intel_##x##_ARGB4444
+#define TAG2(x,y) intel_##x##y_ARGB4444
+#include "spantmp2.h"
 
 /* a1r5g5b5 color span and pixel functions */
-#define INTEL_PIXEL_FMT GL_BGRA
-#define INTEL_PIXEL_TYPE GL_UNSIGNED_SHORT_1_5_5_5_REV
-#define INTEL_READ_VALUE(offset) pread_16(irb, offset)
-#define INTEL_WRITE_VALUE(offset, v) pwrite_16(irb, offset, v)
-#define INTEL_TAG(x) x##_ARGB1555
-#include "intel_spantmp.h"
+#define SPANTMP_PIXEL_FMT GL_BGRA
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_1_5_5_5_REV
+#define TAG(x) intel_##x##_ARGB1555
+#define TAG2(x,y) intel_##x##y##_ARGB1555
+#include "spantmp2.h"
 
 /* a8r8g8b8 color span and pixel functions */
-#define INTEL_PIXEL_FMT GL_BGRA
-#define INTEL_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
-#define INTEL_READ_VALUE(offset) pread_32(irb, offset)
-#define INTEL_WRITE_VALUE(offset, v) pwrite_32(irb, offset, v)
-#define INTEL_TAG(x) x##_ARGB8888
-#include "intel_spantmp.h"
+#define SPANTMP_PIXEL_FMT GL_BGRA
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
+#define TAG(x) intel_##x##_ARGB8888
+#define TAG2(x,y) intel_##x##y##_ARGB8888
+#include "spantmp2.h"
 
 /* x8r8g8b8 color span and pixel functions */
-#define INTEL_PIXEL_FMT GL_BGR
-#define INTEL_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
-#define INTEL_READ_VALUE(offset) pread_xrgb8888(irb, offset)
-#define INTEL_WRITE_VALUE(offset, v) pwrite_xrgb8888(irb, offset, v)
-#define INTEL_TAG(x) x##_xRGB8888
-#include "intel_spantmp.h"
+#define SPANTMP_PIXEL_FMT GL_BGR
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
+#define TAG(x) intel_##x##_xRGB8888
+#define TAG2(x,y) intel_##x##y##_xRGB8888
+#include "spantmp2.h"
 
 #define LOCAL_DEPTH_VARS						\
-   struct intel_context *intel = intel_context(ctx);			\
    struct intel_renderbuffer *irb = intel_renderbuffer(rb);		\
    const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1;			\
    const GLint yBias = ctx->DrawBuffer->Name ? 0 : irb->Base.Height - 1;\
-   unsigned int num_cliprects;						\
-   struct drm_clip_rect *cliprects;					\
-   int x_off, y_off;							\
+   int minx = 0, miny = 0;						\
+   int maxx = ctx->DrawBuffer->Width;					\
+   int maxy = ctx->DrawBuffer->Height;					\
    int pitch = irb->region->pitch * irb->region->cpp;			\
    void *buf = irb->region->buffer->virtual;				\
    (void)buf; (void)pitch; /* unused for non-gttmap. */			\
-   intel_get_cliprects(intel, &cliprects, &num_cliprects, &x_off, &y_off);
-
 
 #define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS
 
 /* z16 depthbuffer functions. */
-#define INTEL_VALUE_TYPE GLushort
-#define INTEL_WRITE_DEPTH(offset, d) pwrite_16(irb, offset, d)
-#define INTEL_READ_DEPTH(offset) pread_16(irb, offset)
-#define INTEL_TAG(name) name##_z16
-#include "intel_depthtmp.h"
-
-/* z24x8 depthbuffer functions. */
-#define INTEL_VALUE_TYPE GLuint
-#define INTEL_WRITE_DEPTH(offset, d) pwrite_32(irb, offset, d)
-#define INTEL_READ_DEPTH(offset) pread_32(irb, offset)
-#define INTEL_TAG(name) name##_z24_x8
-#include "intel_depthtmp.h"
-
-
-/**
- ** 8-bit stencil function (XXX FBO: This is obsolete)
- **/
-/* XXX */
-#define WRITE_STENCIL(_x, _y, d) pwrite_8(irb, NO_TILE(_x, _y) + 3, d)
-#define READ_STENCIL(d, _x, _y) d = pread_8(irb, NO_TILE(_x, _y) + 3);
-#define TAG(x) intel_gttmap_##x##_z24_s8
-#include "stenciltmp.h"
-
-/**
- ** 8-bit stencil function (XXX FBO: This is obsolete)
- **/
-#define WRITE_STENCIL(_x, _y, d) pwrite_8(irb, NO_TILE(_x, _y) + 3, d)
-#define READ_STENCIL(d, _x, _y) d = pread_8(irb, NO_TILE(_x, _y) + 3);
-#define TAG(x) intel##x##_z24_s8
-#include "stenciltmp.h"
-
-/**
- ** 8-bit x-tile stencil function (XXX FBO: This is obsolete)
- **/
-#define WRITE_STENCIL(_x, _y, d) pwrite_8(irb, X_TILE(_x, _y) + 3, d)
-#define READ_STENCIL(d, _x, _y) d = pread_8(irb, X_TILE(_x, _y) + 3);
-#define TAG(x) intel_XTile_##x##_z24_s8
-#include "stenciltmp.h"
-
-/**
- ** 8-bit y-tile stencil function (XXX FBO: This is obsolete)
- **/
-#define WRITE_STENCIL(_x, _y, d) pwrite_8(irb, Y_TILE(_x, _y) + 3, d)
-#define READ_STENCIL(d, _x, _y) d = pread_8(irb, Y_TILE(_x, _y) + 3)
-#define TAG(x) intel_YTile_##x##_z24_s8
-#include "stenciltmp.h"
+#define VALUE_TYPE GLushort
+#define WRITE_DEPTH(_x, _y, d) \
+   (*(uint16_t *)(irb->region->buffer->virtual + NO_TILE(_x, _y)) = d)
+#define READ_DEPTH(d, _x, _y) \
+   d = *(uint16_t *)(irb->region->buffer->virtual + NO_TILE(_x, _y))
+#define TAG(x) intel_##x##_z16
+#include "depthtmp.h"
+
+/* z24_s8 and z24_x8 depthbuffer functions. */
+#define VALUE_TYPE GLuint
+#define WRITE_DEPTH(_x, _y, d) \
+   (*(uint32_t *)(irb->region->buffer->virtual + NO_TILE(_x, _y)) = d)
+#define READ_DEPTH(d, _x, _y) \
+   d = *(uint32_t *)(irb->region->buffer->virtual + NO_TILE(_x, _y))
+#define TAG(x) intel_##x##_z24_x8
+#include "depthtmp.h"
 
 void
 intel_renderbuffer_map(struct intel_context *intel, struct gl_renderbuffer *rb)
@@ -413,8 +145,7 @@ intel_renderbuffer_map(struct intel_context *intel, struct gl_renderbuffer *rb)
    if (irb == NULL || irb->region == NULL)
       return;
 
-   if (intel->intelScreen->kernel_exec_fencing)
-      drm_intel_gem_bo_map_gtt(irb->region->buffer);
+   drm_intel_gem_bo_map_gtt(irb->region->buffer);
 
    intel_set_span_functions(intel, rb);
 }
@@ -428,10 +159,7 @@ intel_renderbuffer_unmap(struct intel_context *intel,
    if (irb == NULL || irb->region == NULL)
       return;
 
-   if (intel->intelScreen->kernel_exec_fencing)
-      drm_intel_gem_bo_unmap_gtt(irb->region->buffer);
-   else
-      clear_span_cache(irb);
+   drm_intel_gem_bo_unmap_gtt(irb->region->buffer);
 
    rb->GetRow = NULL;
    rb->PutRow = NULL;
@@ -517,7 +245,7 @@ intelSpanRenderStart(GLcontext * ctx)
    GLuint i;
 
    intelFlush(&intel->ctx);
-   LOCK_HARDWARE(intel);
+   intel_prepare_render(intel);
 
    for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
       if (ctx->Texture.Unit[i]._ReallyEnabled) {
@@ -553,8 +281,6 @@ intelSpanRenderFinish(GLcontext * ctx)
    intel_map_unmap_framebuffer(intel, ctx->DrawBuffer, GL_FALSE);
    if (ctx->ReadBuffer != ctx->DrawBuffer)
       intel_map_unmap_framebuffer(intel, ctx->ReadBuffer, GL_FALSE);
-
-   UNLOCK_HARDWARE(intel);
 }
 
 
@@ -613,187 +339,34 @@ intel_set_span_functions(struct intel_context *intel,
 			 struct gl_renderbuffer *rb)
 {
    struct intel_renderbuffer *irb = (struct intel_renderbuffer *) rb;
-   uint32_t tiling;
-
-   /* If in GEM mode, we need to do the tile address swizzling ourselves,
-    * instead of the fence registers handling it.
-    */
-   if (intel->ttm)
-      tiling = irb->region->tiling;
-   else
-      tiling = I915_TILING_NONE;
-
-   if (intel->intelScreen->kernel_exec_fencing) {
-      switch (irb->Base.Format) {
-      case MESA_FORMAT_RGB565:
-	 intel_gttmap_InitPointers_RGB565(rb);
-	 break;
-      case MESA_FORMAT_ARGB4444:
-	 intel_gttmap_InitPointers_ARGB4444(rb);
-	 break;
-      case MESA_FORMAT_ARGB1555:
-	 intel_gttmap_InitPointers_ARGB1555(rb);
-	 break;
-      case MESA_FORMAT_XRGB8888:
-         intel_gttmap_InitPointers_xRGB8888(rb);
-	 break;
-      case MESA_FORMAT_ARGB8888:
-	 intel_gttmap_InitPointers_ARGB8888(rb);
-	 break;
-      case MESA_FORMAT_Z16:
-	 intel_gttmap_InitDepthPointers_z16(rb);
-	 break;
-      case MESA_FORMAT_X8_Z24:
-	 intel_gttmap_InitDepthPointers_z24_x8(rb);
-	 break;
-      case MESA_FORMAT_S8_Z24:
-	 /* There are a few different ways SW asks us to access the S8Z24 data:
-	  * Z24 depth-only depth reads
-	  * S8Z24 depth reads
-	  * S8Z24 stencil reads.
-	  */
-	 if (rb->Format == MESA_FORMAT_S8_Z24) {
-	    intel_gttmap_InitDepthPointers_z24_x8(rb);
-	 } else if (rb->Format == MESA_FORMAT_S8) {
-	    intel_gttmap_InitStencilPointers_z24_s8(rb);
-	 }
-	 break;
-      default:
-	 _mesa_problem(NULL,
-		       "Unexpected MesaFormat %d in intelSetSpanFunctions",
-		       irb->Base.Format);
-	 break;
-      }
-      return;
-   }
 
    switch (irb->Base.Format) {
    case MESA_FORMAT_RGB565:
-      switch (tiling) {
-      case I915_TILING_NONE:
-      default:
-	 intelInitPointers_RGB565(rb);
-	 break;
-      case I915_TILING_X:
-	 intel_XTile_InitPointers_RGB565(rb);
-	 break;
-      case I915_TILING_Y:
-	 intel_YTile_InitPointers_RGB565(rb);
-	 break;
-      }
+      intel_InitPointers_RGB565(rb);
       break;
    case MESA_FORMAT_ARGB4444:
-      switch (tiling) {
-      case I915_TILING_NONE:
-      default:
-	 intelInitPointers_ARGB4444(rb);
-	 break;
-      case I915_TILING_X:
-	 intel_XTile_InitPointers_ARGB4444(rb);
-	 break;
-      case I915_TILING_Y:
-	 intel_YTile_InitPointers_ARGB4444(rb);
-	 break;
-      }
+      intel_InitPointers_ARGB4444(rb);
       break;
    case MESA_FORMAT_ARGB1555:
-      switch (tiling) {
-      case I915_TILING_NONE:
-      default:
-	 intelInitPointers_ARGB1555(rb);
-	 break;
-      case I915_TILING_X:
-	 intel_XTile_InitPointers_ARGB1555(rb);
-	 break;
-      case I915_TILING_Y:
-	 intel_YTile_InitPointers_ARGB1555(rb);
-	 break;
-      }
+      intel_InitPointers_ARGB1555(rb);
       break;
    case MESA_FORMAT_XRGB8888:
-      switch (tiling) {
-      case I915_TILING_NONE:
-      default:
-         intelInitPointers_xRGB8888(rb);
-         break;
-      case I915_TILING_X:
-         intel_XTile_InitPointers_xRGB8888(rb);
-         break;
-      case I915_TILING_Y:
-         intel_YTile_InitPointers_xRGB8888(rb);
-         break;
-      }
+      intel_InitPointers_xRGB8888(rb);
       break;
    case MESA_FORMAT_ARGB8888:
-      /* 8888 RGBA */
-      switch (tiling) {
-      case I915_TILING_NONE:
-      default:
-	 intelInitPointers_ARGB8888(rb);
-	 break;
-      case I915_TILING_X:
-	 intel_XTile_InitPointers_ARGB8888(rb);
-	 break;
-      case I915_TILING_Y:
-	 intel_YTile_InitPointers_ARGB8888(rb);
-	 break;
-      }
+      intel_InitPointers_ARGB8888(rb);
       break;
    case MESA_FORMAT_Z16:
-      switch (tiling) {
-      case I915_TILING_NONE:
-      default:
-	 intelInitDepthPointers_z16(rb);
-	 break;
-      case I915_TILING_X:
-	 intel_XTile_InitDepthPointers_z16(rb);
-	 break;
-      case I915_TILING_Y:
-	 intel_YTile_InitDepthPointers_z16(rb);
-	 break;
-      }
+      intel_InitDepthPointers_z16(rb);
       break;
    case MESA_FORMAT_X8_Z24:
    case MESA_FORMAT_S8_Z24:
-      /* There are a few different ways SW asks us to access the S8Z24 data:
-       * Z24 depth-only depth reads
-       * S8Z24 depth reads
-       * S8Z24 stencil reads.
-       */
-      if (rb->Format == MESA_FORMAT_S8_Z24) {
-	 switch (tiling) {
-	 case I915_TILING_NONE:
-	 default:
-	    intelInitDepthPointers_z24_x8(rb);
-	    break;
-	 case I915_TILING_X:
-	    intel_XTile_InitDepthPointers_z24_x8(rb);
-	    break;
-	 case I915_TILING_Y:
-	    intel_YTile_InitDepthPointers_z24_x8(rb);
-	    break;
-	 }
-      } else if (rb->Format == MESA_FORMAT_S8) {
-	 switch (tiling) {
-	 case I915_TILING_NONE:
-	 default:
-	    intelInitStencilPointers_z24_s8(rb);
-	    break;
-	 case I915_TILING_X:
-	    intel_XTile_InitStencilPointers_z24_s8(rb);
-	    break;
-	 case I915_TILING_Y:
-	    intel_YTile_InitStencilPointers_z24_s8(rb);
-	    break;
-	 }
-      } else {
-	 _mesa_problem(NULL,
-		       "Unexpected ActualFormat in intelSetSpanFunctions");
-      }
+      intel_InitDepthPointers_z24_x8(rb);
       break;
    default:
       _mesa_problem(NULL,
-                    "Unexpected MesaFormat in intelSetSpanFunctions");
+		    "Unexpected MesaFormat %d in intelSetSpanFunctions",
+		    irb->Base.Format);
       break;
    }
 }
diff --git a/shared/intel_spantmp.h b/shared/intel_spantmp.h
deleted file mode 100644
index bad0339..0000000
--- a/shared/intel_spantmp.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright © 2009 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Authors:
- *    Eric Anholt <eric@anholt.net>
- *
- */
-
-/**
- * Wrapper around the spantmp.h macrofest to generate spans code for
- * all the tiling styles.
- */
-
-#define SPANTMP_PIXEL_FMT INTEL_PIXEL_FMT
-#define SPANTMP_PIXEL_TYPE INTEL_PIXEL_TYPE
-#define TAG(x) INTEL_TAG(intel_gttmap_##x)
-#define TAG2(x, y) INTEL_TAG(intel_gttmap_##x##y)
-#include "spantmp2.h"
-
-#define SPANTMP_PIXEL_FMT INTEL_PIXEL_FMT
-#define SPANTMP_PIXEL_TYPE INTEL_PIXEL_TYPE
-#define PUT_VALUE(_x, _y, v) INTEL_WRITE_VALUE(NO_TILE(_x, _y), v)
-#define GET_VALUE(_x, _y) INTEL_READ_VALUE(NO_TILE(_x, _y))
-#define TAG(x) INTEL_TAG(intel##x)
-#define TAG2(x, y) INTEL_TAG(intel##x)##y
-#include "spantmp2.h"
-
-#define SPANTMP_PIXEL_FMT INTEL_PIXEL_FMT
-#define SPANTMP_PIXEL_TYPE INTEL_PIXEL_TYPE
-#define PUT_VALUE(_x, _y, v) INTEL_WRITE_VALUE(X_TILE(_x, _y), v)
-#define GET_VALUE(_x, _y) INTEL_READ_VALUE(X_TILE(_x, _y))
-#define TAG(x) INTEL_TAG(intel_XTile_##x)
-#define TAG2(x, y) INTEL_TAG(intel_XTile_##x)##y
-#include "spantmp2.h"
-
-#define SPANTMP_PIXEL_FMT INTEL_PIXEL_FMT
-#define SPANTMP_PIXEL_TYPE INTEL_PIXEL_TYPE
-#define PUT_VALUE(_x, _y, v) INTEL_WRITE_VALUE(Y_TILE(_x, _y), v)
-#define GET_VALUE(_x, _y) INTEL_READ_VALUE(Y_TILE(_x, _y))
-#define TAG(x) INTEL_TAG(intel_YTile_##x)
-#define TAG2(x, y) INTEL_TAG(intel_YTile_##x)##y
-#include "spantmp2.h"
-
-#undef INTEL_PIXEL_FMT
-#undef INTEL_PIXEL_TYPE
-#undef INTEL_WRITE_VALUE
-#undef INTEL_READ_VALUE
-#undef INTEL_TAG
diff --git a/shared/intel_state.c b/shared/intel_state.c
index 4ee7423..c5ef909 100644
--- a/shared/intel_state.c
+++ b/shared/intel_state.c
@@ -35,8 +35,6 @@
 
 #include "intel_screen.h"
 #include "intel_context.h"
-#include "intel_regions.h"
-#include "swrast/swrast.h"
 
 int
 intel_translate_shadow_compare_func(GLenum func)
@@ -196,25 +194,6 @@ intel_translate_logic_op(GLenum opcode)
    }
 }
 
-
-static void
-intelClearColor(GLcontext *ctx, const GLfloat color[4])
-{
-   struct intel_context *intel = intel_context(ctx);
-   GLubyte clear[4];
-
-   CLAMPED_FLOAT_TO_UBYTE(clear[0], color[0]);
-   CLAMPED_FLOAT_TO_UBYTE(clear[1], color[1]);
-   CLAMPED_FLOAT_TO_UBYTE(clear[2], color[2]);
-   CLAMPED_FLOAT_TO_UBYTE(clear[3], color[3]);
-
-   /* compute both 32 and 16-bit clear values */
-   intel->ClearColor8888 = INTEL_PACKCOLOR8888(clear[0], clear[1],
-                                               clear[2], clear[3]);
-   intel->ClearColor565 = INTEL_PACKCOLOR565(clear[0], clear[1], clear[2]);
-}
-
-
 /* Fallback to swrast for select and feedback.
  */
 static void
@@ -229,5 +208,4 @@ void
 intelInitStateFuncs(struct dd_function_table *functions)
 {
    functions->RenderMode = intelRenderMode;
-   functions->ClearColor = intelClearColor;
 }
diff --git a/shared/intel_swapbuffers.c b/shared/intel_swapbuffers.c
deleted file mode 100644
index 7d035b9..0000000
--- a/shared/intel_swapbuffers.c
+++ /dev/null
@@ -1,248 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#include "intel_blit.h"
-#include "intel_buffers.h"
-#include "intel_swapbuffers.h"
-#include "intel_fbo.h"
-#include "intel_batchbuffer.h"
-#include "drirenderbuffer.h"
-#include "vblank.h"
-#include "i915_drm.h"
-
-
-
-/*
- * Correct a drawablePrivate's set of vblank flags WRT the current context.
- * When considering multiple crtcs.
- */
-GLuint
-intelFixupVblank(struct intel_context *intel, __DRIdrawablePrivate *dPriv)
-{
-   if (!intel->intelScreen->driScrnPriv->dri2.enabled &&
-       intel->intelScreen->driScrnPriv->ddx_version.minor >= 7) {
-      volatile drm_i915_sarea_t *sarea = intel->sarea;
-      drm_clip_rect_t drw_rect = { .x1 = dPriv->x, .x2 = dPriv->x + dPriv->w,
-				   .y1 = dPriv->y, .y2 = dPriv->y + dPriv->h };
-      drm_clip_rect_t planeA_rect = { .x1 = sarea->planeA_x, .y1 = sarea->planeA_y,
-				     .x2 = sarea->planeA_x + sarea->planeA_w,
-				     .y2 = sarea->planeA_y + sarea->planeA_h };
-      drm_clip_rect_t planeB_rect = { .x1 = sarea->planeB_x, .y1 = sarea->planeB_y,
-				     .x2 = sarea->planeB_x + sarea->planeB_w,
-				     .y2 = sarea->planeB_y + sarea->planeB_h };
-      GLint areaA = driIntersectArea( drw_rect, planeA_rect );
-      GLint areaB = driIntersectArea( drw_rect, planeB_rect );
-      GLuint flags = dPriv->vblFlags;
-
-      /* Update vblank info
-       */
-      if (areaB > areaA || (areaA == areaB && areaB > 0)) {
-	 flags = dPriv->vblFlags | VBLANK_FLAG_SECONDARY;
-      } else {
-	 flags = dPriv->vblFlags & ~VBLANK_FLAG_SECONDARY;
-      }
-
-      /* Do the stupid test: Is one of them actually disabled?
-       */
-      if (sarea->planeA_w == 0 || sarea->planeA_h == 0) {
-	 flags = dPriv->vblFlags | VBLANK_FLAG_SECONDARY;
-      } else if (sarea->planeB_w == 0 || sarea->planeB_h == 0) {
-	 flags = dPriv->vblFlags & ~VBLANK_FLAG_SECONDARY;
-      }
-
-      return flags;
-   } else {
-      return dPriv->vblFlags & ~VBLANK_FLAG_SECONDARY;
-   }
-}
-
-
-/**
- * Called from driSwapBuffers()
- */
-void
-intelSwapBuffers(__DRIdrawablePrivate * dPriv)
-{
-   __DRIscreenPrivate *psp = dPriv->driScreenPriv;
-
-   if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) {
-      GET_CURRENT_CONTEXT(ctx);
-      struct intel_context *intel;
-
-      if (ctx == NULL)
-	 return;
-
-      intel = intel_context(ctx);
-
-      if (ctx->Visual.doubleBufferMode) {
-	 GLboolean missed_target;
-	 struct intel_framebuffer *intel_fb = dPriv->driverPrivate;
-	 int64_t ust;
-         
-	 _mesa_notifySwapBuffers(ctx);  /* flush pending rendering comands */
-
-	/*
-	 * The old swapping ioctl was incredibly racy, just wait for vblank
-	 * and do the swap ourselves.
-	 */
-	 driWaitForVBlank(dPriv, &missed_target);
-
-	 /*
-	  * Update each buffer's vbl_pending so we don't get too out of
-	  * sync
-	  */
-	 intel_get_renderbuffer(&intel_fb->Base,
-		   		BUFFER_BACK_LEFT)->vbl_pending = dPriv->vblSeq;
-         intel_get_renderbuffer(&intel_fb->Base,
-		   		BUFFER_FRONT_LEFT)->vbl_pending = dPriv->vblSeq;
-
-	 intelCopyBuffer(dPriv, NULL);
-
-	 intel_fb->swap_count++;
-	 (*psp->systemTime->getUST) (&ust);
-	 if (missed_target) {
-	    intel_fb->swap_missed_count++;
-	    intel_fb->swap_missed_ust = ust - intel_fb->swap_ust;
-	 }
-
-	 intel_fb->swap_ust = ust;
-      }
-      drmCommandNone(intel->driFd, DRM_I915_GEM_THROTTLE);
-   }
-   else {
-      /* XXX this shouldn't be an error but we can't handle it for now */
-      fprintf(stderr, "%s: drawable has no context!\n", __FUNCTION__);
-   }
-}
-
-
-/**
- * Called from driCopySubBuffer()
- */
-void
-intelCopySubBuffer(__DRIdrawablePrivate * dPriv, int x, int y, int w, int h)
-{
-   if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) {
-      struct intel_context *intel =
-         (struct intel_context *) dPriv->driContextPriv->driverPrivate;
-      GLcontext *ctx = &intel->ctx;
-
-      if (ctx->Visual.doubleBufferMode) {
-         drm_clip_rect_t rect;
-         rect.x1 = x + dPriv->x;
-         rect.y1 = (dPriv->h - y - h) + dPriv->y;
-         rect.x2 = rect.x1 + w;
-         rect.y2 = rect.y1 + h;
-         _mesa_notifySwapBuffers(ctx);  /* flush pending rendering comands */
-         intelCopyBuffer(dPriv, &rect);
-      }
-   }
-   else {
-      /* XXX this shouldn't be an error but we can't handle it for now */
-      fprintf(stderr, "%s: drawable has no context!\n", __FUNCTION__);
-   }
-}
-
-
-/**
- * This will be called whenever the currently bound window is moved/resized.
- * XXX: actually, it seems to NOT be called when the window is only moved (BP).
- */
-void
-intelWindowMoved(struct intel_context *intel)
-{
-   GLcontext *ctx = &intel->ctx;
-   __DRIdrawablePrivate *dPriv = intel->driDrawable;
-   struct intel_framebuffer *intel_fb = dPriv->driverPrivate;
-
-   if (!intel->intelScreen->driScrnPriv->dri2.enabled &&
-       intel->intelScreen->driScrnPriv->ddx_version.minor >= 7) {
-      GLuint flags = intelFixupVblank(intel, dPriv);
-
-      /* Check to see if we changed pipes */
-      if (flags != dPriv->vblFlags && dPriv->vblFlags &&
-	  !(dPriv->vblFlags & VBLANK_FLAG_NO_IRQ)) {
-	 int64_t count;
-	 drmVBlank vbl;
-	 int i;
-
-	 /*
-	  * Deal with page flipping
-	  */
-	 vbl.request.type = DRM_VBLANK_ABSOLUTE;
-
-	 if ( dPriv->vblFlags & VBLANK_FLAG_SECONDARY ) {
-	    vbl.request.type |= DRM_VBLANK_SECONDARY;
-	 }
-
-	 for (i = 0; i < 2; i++) {
-	    if (!intel_fb->color_rb[i] ||
-		(intel_fb->vbl_waited - intel_fb->color_rb[i]->vbl_pending) <=
-		(1<<23))
-	       continue;
-
-	    vbl.request.sequence = intel_fb->color_rb[i]->vbl_pending;
-	    drmWaitVBlank(intel->driFd, &vbl);
-	 }
-
-	 /*
-	  * Update msc_base from old pipe
-	  */
-	 driDrawableGetMSC32(dPriv->driScreenPriv, dPriv, &count);
-	 dPriv->msc_base = count;
-	 /*
-	  * Then get new vblank_base and vblSeq values
-	  */
-	 dPriv->vblFlags = flags;
-	 driGetCurrentVBlank(dPriv);
-	 dPriv->vblank_base = dPriv->vblSeq;
-
-	 intel_fb->vbl_waited = dPriv->vblSeq;
-
-	 for (i = 0; i < 2; i++) {
-	    if (intel_fb->color_rb[i])
-	       intel_fb->color_rb[i]->vbl_pending = intel_fb->vbl_waited;
-	 }
-      }
-   } else {
-      dPriv->vblFlags &= ~VBLANK_FLAG_SECONDARY;
-   }
-
-   /* Update Mesa's notion of window size */
-   driUpdateFramebufferSize(ctx, dPriv);
-   intel_fb->Base.Initialized = GL_TRUE; /* XXX remove someday */
-
-   /* Update hardware scissor */
-   if (ctx->Driver.Scissor != NULL) {
-      ctx->Driver.Scissor(ctx, ctx->Scissor.X, ctx->Scissor.Y,
-			  ctx->Scissor.Width, ctx->Scissor.Height);
-   }
-
-   /* Re-calculate viewport related state */
-   if (ctx->Driver.DepthRange != NULL)
-      ctx->Driver.DepthRange( ctx, ctx->Viewport.Near, ctx->Viewport.Far );
-}
diff --git a/shared/intel_swapbuffers.h b/shared/intel_swapbuffers.h
deleted file mode 100644
index 75bb624..0000000
--- a/shared/intel_swapbuffers.h
+++ /dev/null
@@ -1,52 +0,0 @@
-
-/**************************************************************************
- * 
- * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#ifndef INTEL_SWAPBUFFERS_H
-#define INTEL_SWAPBUFFERS_H
-
-#include "dri_util.h"
-#include "drm.h"
-
-struct intel_context;
-struct intel_framebuffer;
-
-
-extern void
-intelSwapBuffers(__DRIdrawablePrivate * dPriv);
-
-extern void
-intelCopySubBuffer(__DRIdrawablePrivate * dPriv, int x, int y, int w, int h);
-
-extern GLuint
-intelFixupVblank(struct intel_context *intel, __DRIdrawablePrivate *dPriv);
-
-extern void
-intelWindowMoved(struct intel_context *intel);
-
-
-#endif /* INTEL_SWAPBUFFERS_H */
diff --git a/shared/intel_syncobj.c b/shared/intel_syncobj.c
index 0d7889d..d67f0cb 100644
--- a/shared/intel_syncobj.c
+++ b/shared/intel_syncobj.c
@@ -50,7 +50,7 @@ intel_new_sync_object(GLcontext *ctx, GLuint id)
 {
    struct intel_sync_object *sync;
 
-   sync = _mesa_calloc(sizeof(struct intel_sync_object));
+   sync = calloc(1, sizeof(struct intel_sync_object));
 
    return &sync->Base;
 }
@@ -61,7 +61,7 @@ intel_delete_sync_object(GLcontext *ctx, struct gl_sync_object *s)
    struct intel_sync_object *sync = (struct intel_sync_object *)s;
 
    drm_intel_bo_unreference(sync->bo);
-   _mesa_free(sync);
+   free(sync);
 }
 
 static void
diff --git a/shared/intel_tex.c b/shared/intel_tex.c
index 215a534..8bb6ae9 100644
--- a/shared/intel_tex.c
+++ b/shared/intel_tex.c
@@ -146,7 +146,7 @@ timed_memcpy(void *dest, const void *src, size_t n)
    double rate;
 
    if ((((unsigned) src) & 63) || (((unsigned) dest) & 63))
-      _mesa_printf("Warning - non-aligned texture copy!\n");
+      printf("Warning - non-aligned texture copy!\n");
 
    t1 = fastrdtsc();
    ret = do_memcpy(dest, src, n);
@@ -154,7 +154,7 @@ timed_memcpy(void *dest, const void *src, size_t n)
 
    rate = time_diff(t1, t2);
    rate /= (double) n;
-   _mesa_printf("timed_memcpy: %u %u --> %f clocks/byte\n", t1, t2, rate);
+   printf("timed_memcpy: %u %u --> %f clocks/byte\n", t1, t2, rate);
    return ret;
 }
 #endif /* DO_DEBUG */
diff --git a/shared/intel_tex.h b/shared/intel_tex.h
index f3cc0ff..4bb012d 100644
--- a/shared/intel_tex.h
+++ b/shared/intel_tex.h
@@ -45,8 +45,6 @@ void intelInitTextureCopyImageFuncs(struct dd_function_table *functions);
 gl_format intelChooseTextureFormat(GLcontext *ctx, GLint internalFormat,
                                    GLenum format, GLenum type);
 
-void intelSetTexOffset(__DRIcontext *pDRICtx, GLint texname,
-		       unsigned long long offset, GLint depth, GLuint pitch);
 void intelSetTexBuffer(__DRIcontext *pDRICtx,
 		       GLint target, __DRIdrawable *pDraw);
 void intelSetTexBuffer2(__DRIcontext *pDRICtx,
diff --git a/shared/intel_tex_copy.c b/shared/intel_tex_copy.c
index 767d04d..13b8bcf 100644
--- a/shared/intel_tex_copy.c
+++ b/shared/intel_tex_copy.c
@@ -36,7 +36,6 @@
 
 #include "intel_screen.h"
 #include "intel_context.h"
-#include "intel_batchbuffer.h"
 #include "intel_buffers.h"
 #include "intel_mipmap_tree.h"
 #include "intel_regions.h"
@@ -109,14 +108,12 @@ do_copy_texsubimage(struct intel_context *intel,
       return GL_FALSE;
    }
 
-   //   intelFlush(ctx);
-   LOCK_HARDWARE(intel);
+   /* intelFlush(ctx); */
+   intel_prepare_render(intel);
    {
       drm_intel_bo *dst_bo = intel_region_buffer(intel,
 						 intelImage->mt->region,
 						 INTEL_WRITE_PART);
-      const GLint orig_x = x;
-      const GLint orig_y = y;
       GLuint image_x, image_y;
       GLshort src_pitch;
 
@@ -126,19 +123,15 @@ do_copy_texsubimage(struct intel_context *intel,
 				     intelImage->face,
 				     0,
 				     &image_x, &image_y);
-      /* Update dst for clipped src.  Need to also clip the source rect. */
-      dstx += x - orig_x;
-      dsty += y - orig_y;
 
       /* Can't blit to tiled buffers with non-tile-aligned offset. */
       if (intelImage->mt->region->tiling == I915_TILING_Y) {
-	 UNLOCK_HARDWARE(intel);
 	 return GL_FALSE;
       }
 
       if (ctx->ReadBuffer->Name == 0) {
 	 /* reading from a window, adjust x, y */
-	 const __DRIdrawablePrivate *dPriv = intel->driReadDrawable;
+	 const __DRIdrawable *dPriv = intel->driReadDrawable;
 	 y = dPriv->y + (dPriv->h - (y + height));
 	 x += dPriv->x;
 
@@ -160,22 +153,20 @@ do_copy_texsubimage(struct intel_context *intel,
 			     intelImage->mt->cpp,
 			     src_pitch,
 			     src->buffer,
-			     src->draw_offset,
+			     0,
 			     src->tiling,
 			     intelImage->mt->pitch,
 			     dst_bo,
 			     0,
 			     intelImage->mt->region->tiling,
-			     x, y, image_x + dstx, image_y + dsty,
+			     src->draw_x + x, src->draw_y + y,
+			     image_x + dstx, image_y + dsty,
 			     width, height,
 			     GL_COPY)) {
-	 UNLOCK_HARDWARE(intel);
 	 return GL_FALSE;
       }
    }
 
-   UNLOCK_HARDWARE(intel);
-
    return GL_TRUE;
 }
 
diff --git a/shared/intel_tex_format.c b/shared/intel_tex_format.c
index 87efb72..7be5231 100644
--- a/shared/intel_tex_format.c
+++ b/shared/intel_tex_format.c
@@ -1,6 +1,5 @@
 #include "intel_context.h"
 #include "intel_tex.h"
-#include "intel_chipset.h"
 #include "main/enums.h"
 
 
@@ -173,13 +172,13 @@ intelChooseTextureFormat(GLcontext * ctx, GLint internalFormat,
       return MESA_FORMAT_SARGB8;
    case GL_SLUMINANCE_EXT:
    case GL_SLUMINANCE8_EXT:
-      if (IS_G4X(intel->intelScreen->deviceID))
+      if (intel->has_luminance_srgb)
          return MESA_FORMAT_SL8;
       else
          return MESA_FORMAT_SARGB8;
    case GL_SLUMINANCE_ALPHA_EXT:
    case GL_SLUMINANCE8_ALPHA8_EXT:
-      if (IS_G4X(intel->intelScreen->deviceID))
+      if (intel->has_luminance_srgb)
          return MESA_FORMAT_SLA8;
       else
          return MESA_FORMAT_SARGB8;
diff --git a/shared/intel_tex_image.c b/shared/intel_tex_image.c
index 66d61f9..bac36ee 100644
--- a/shared/intel_tex_image.c
+++ b/shared/intel_tex_image.c
@@ -7,7 +7,6 @@
 #include "main/convolve.h"
 #include "main/context.h"
 #include "main/formats.h"
-#include "main/image.h"
 #include "main/texcompress.h"
 #include "main/texstore.h"
 #include "main/texgetimage.h"
@@ -178,6 +177,7 @@ check_pbo_format(GLint internalFormat,
    switch (internalFormat) {
    case 4:
    case GL_RGBA:
+   case GL_RGBA8:
       return (format == GL_BGRA &&
               (type == GL_UNSIGNED_BYTE ||
                type == GL_UNSIGNED_INT_8_8_8_8_REV) &&
@@ -187,6 +187,11 @@ check_pbo_format(GLint internalFormat,
       return (format == GL_RGB &&
               type == GL_UNSIGNED_SHORT_5_6_5 &&
               mesa_format == MESA_FORMAT_RGB565);
+   case 1:
+   case GL_LUMINANCE:
+      return (format == GL_LUMINANCE &&
+	      type == GL_UNSIGNED_BYTE &&
+	      mesa_format == MESA_FORMAT_L8);
    case GL_YCBCR_MESA:
       return (type == GL_UNSIGNED_SHORT_8_8_MESA || type == GL_UNSIGNED_BYTE);
    default:
@@ -235,21 +240,20 @@ try_pbo_upload(struct intel_context *intel,
 
    if (drm_intel_bo_references(intel->batch->buf, dst_buffer))
       intelFlush(&intel->ctx);
-   LOCK_HARDWARE(intel);
+   intel_prepare_render(intel);
    {
       dri_bo *src_buffer = intel_bufferobj_buffer(intel, pbo, INTEL_READ);
 
       if (!intelEmitCopyBlit(intel,
 			     intelImage->mt->cpp,
 			     src_stride, src_buffer, src_offset, GL_FALSE,
-			     dst_stride, dst_buffer, 0, GL_FALSE,
+			     dst_stride, dst_buffer, 0,
+			     intelImage->mt->region->tiling,
 			     0, 0, dst_x, dst_y, width, height,
 			     GL_COPY)) {
-	 UNLOCK_HARDWARE(intel);
 	 return GL_FALSE;
       }
    }
-   UNLOCK_HARDWARE(intel);
 
    return GL_TRUE;
 }
@@ -469,7 +473,7 @@ intelTexImage(GLcontext * ctx,
 					   pixels, unpack, "glTexImage");
    }
 
-   LOCK_HARDWARE(intel);
+   intel_prepare_render(intel);
 
    if (intelImage->mt) {
       if (pixels != NULL) {
@@ -551,8 +555,6 @@ intelTexImage(GLcontext * ctx,
          intel_miptree_image_unmap(intel, intelImage->mt);
       texImage->Data = NULL;
    }
-
-   UNLOCK_HARDWARE(intel);
 }
 
 
@@ -704,35 +706,12 @@ intelGetCompressedTexImage(GLcontext *ctx, GLenum target, GLint level,
 		       texObj, texImage, GL_TRUE);
 }
 
-
-void
-intelSetTexOffset(__DRIcontext *pDRICtx, GLint texname,
-		  unsigned long long offset, GLint depth, GLuint pitch)
-{
-   struct intel_context *intel = pDRICtx->driverPrivate;
-   struct gl_texture_object *tObj = _mesa_lookup_texture(&intel->ctx, texname);
-   struct intel_texture_object *intelObj = intel_texture_object(tObj);
-
-   if (!intelObj)
-      return;
-
-   if (intelObj->mt)
-      intel_miptree_release(intel, &intelObj->mt);
-
-   intelObj->imageOverride = GL_TRUE;
-   intelObj->depthOverride = depth;
-   intelObj->pitchOverride = pitch;
-
-   if (offset)
-      intelObj->textureOffset = offset;
-}
-
 void
 intelSetTexBuffer2(__DRIcontext *pDRICtx, GLint target,
-		   GLint glx_texture_format,
+		   GLint texture_format,
 		   __DRIdrawable *dPriv)
 {
-   struct intel_framebuffer *intel_fb = dPriv->driverPrivate;
+   struct gl_framebuffer *fb = dPriv->driverPrivate;
    struct intel_context *intel = pDRICtx->driverPrivate;
    GLcontext *ctx = &intel->ctx;
    struct intel_texture_object *intelObj;
@@ -749,16 +728,17 @@ intelSetTexBuffer2(__DRIcontext *pDRICtx, GLint target,
    if (!intelObj)
       return;
 
-   intel_update_renderbuffers(pDRICtx, dPriv);
+   if (dPriv->lastStamp != dPriv->dri2.stamp)
+      intel_update_renderbuffers(pDRICtx, dPriv);
 
-   rb = intel_fb->color_rb[0];
+   rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
    /* If the region isn't set, then intel_update_renderbuffers was unable
     * to get the buffers for the drawable.
     */
    if (rb->region == NULL)
       return;
 
-   if (glx_texture_format == GLX_TEXTURE_FORMAT_RGB_EXT)
+   if (texture_format == __DRI_TEXTURE_FORMAT_RGB)
       internalFormat = GL_RGB;
    else
       internalFormat = GL_RGBA;
@@ -788,7 +768,7 @@ intelSetTexBuffer2(__DRIcontext *pDRICtx, GLint target,
 
    intelImage->face = target_to_face(target);
    intelImage->level = level;
-   if (glx_texture_format == GLX_TEXTURE_FORMAT_RGB_EXT)
+   if (texture_format == __DRI_TEXTURE_FORMAT_RGB)
       texImage->TexFormat = MESA_FORMAT_XRGB8888;
    else
       texImage->TexFormat = MESA_FORMAT_ARGB8888;
@@ -808,9 +788,57 @@ intelSetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv)
    /* The old interface didn't have the format argument, so copy our
     * implementation's behavior at the time.
     */
-   intelSetTexBuffer2(pDRICtx, target, GLX_TEXTURE_FORMAT_RGBA_EXT, dPriv);
+   intelSetTexBuffer2(pDRICtx, target, __DRI_TEXTURE_FORMAT_RGBA, dPriv);
 }
 
+#if FEATURE_OES_EGL_image
+static void
+intel_image_target_texture_2d(GLcontext *ctx, GLenum target,
+			      struct gl_texture_object *texObj,
+			      struct gl_texture_image *texImage,
+			      GLeglImageOES image_handle)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_texture_object *intelObj = intel_texture_object(texObj);
+   struct intel_texture_image *intelImage = intel_texture_image(texImage);
+   struct intel_mipmap_tree *mt;
+   __DRIscreen *screen;
+   __DRIimage *image;
+
+   screen = intel->intelScreen->driScrnPriv;
+   image = screen->dri2.image->lookupEGLImage(intel->driContext, image_handle,
+					      intel->driContext->loaderPrivate);
+   if (image == NULL)
+      return;
+
+   mt = intel_miptree_create_for_region(intel, target,
+					image->internal_format,
+					0, 0, image->region, 1, 0);
+   if (mt == NULL)
+       return;
+
+   if (intelImage->mt) {
+      intel_miptree_release(intel, &intelImage->mt);
+      assert(!texImage->Data);
+   }
+   if (intelObj->mt)
+      intel_miptree_release(intel, &intelObj->mt);
+
+   intelObj->mt = mt;
+   _mesa_init_teximage_fields(&intel->ctx, target, texImage,
+			      image->region->width, image->region->height, 1,
+			      0, image->internal_format);
+
+   intelImage->face = target_to_face(target);
+   intelImage->level = 0;
+   texImage->TexFormat = image->format;
+   texImage->RowStride = image->region->pitch;
+   intel_miptree_reference(&intelImage->mt, intelObj->mt);
+
+   if (!intel_miptree_match_image(intelObj->mt, &intelImage->base))
+      fprintf(stderr, "miptree doesn't match image\n");
+}
+#endif
 
 void
 intelInitTextureImageFuncs(struct dd_function_table *functions)
@@ -822,4 +850,8 @@ intelInitTextureImageFuncs(struct dd_function_table *functions)
 
    functions->CompressedTexImage2D = intelCompressedTexImage2D;
    functions->GetCompressedTexImage = intelGetCompressedTexImage;
+
+#if FEATURE_OES_EGL_image
+   functions->EGLImageTargetTexture2D = intel_image_target_texture_2d;
+#endif
 }
diff --git a/shared/intel_tex_obj.h b/shared/intel_tex_obj.h
index 3ad10d3..5f60e0e 100644
--- a/shared/intel_tex_obj.h
+++ b/shared/intel_tex_obj.h
@@ -46,10 +46,6 @@ struct intel_texture_object
     * regions will be copied to this region and the old storage freed.
     */
    struct intel_mipmap_tree *mt;
-
-   GLboolean imageOverride;
-   GLint depthOverride;
-   GLuint pitchOverride;
 };
 
 struct intel_texture_image
diff --git a/shared/intel_tex_subimage.c b/shared/intel_tex_subimage.c
index 1f68208..c35d2e8 100644
--- a/shared/intel_tex_subimage.c
+++ b/shared/intel_tex_subimage.c
@@ -72,7 +72,7 @@ intelTexSubimage(GLcontext * ctx,
    if (!pixels)
       return;
 
-   LOCK_HARDWARE(intel);
+   intel_prepare_render(intel);
 
    /* Map buffer if necessary.  Need to lock to prevent other contexts
     * from uploading the buffer under us.
@@ -129,8 +129,6 @@ intelTexSubimage(GLcontext * ctx,
       intel_miptree_image_unmap(intel, intelImage->mt);
       texImage->Data = NULL;
    }
-
-   UNLOCK_HARDWARE(intel);
 }
 
 
diff --git a/shared/intel_tex_validate.c b/shared/intel_tex_validate.c
index c9a24ac..ed5c5d8 100644
--- a/shared/intel_tex_validate.c
+++ b/shared/intel_tex_validate.c
@@ -2,10 +2,8 @@
 #include "main/macros.h"
 
 #include "intel_context.h"
-#include "intel_batchbuffer.h"
 #include "intel_mipmap_tree.h"
 #include "intel_tex.h"
-#include "intel_chipset.h"
 
 #define FILE_DEBUG_FLAG DEBUG_TEXTURE
author	Luc Verhaegen <libv@skynet.be>	2010-03-16 20:19:03 +0100
committer	Luc Verhaegen <libv@skynet.be>	2010-03-16 20:19:03 +0100
commit	54a8e9cc3988d908b5b846a752679127cacefd3b (patch)
tree	ee289172f13f246903151b12db7808b7cb0651e8
parent	6b263d1e2c599ddcc0ce4c84425dbc1ac8de020c (diff)