Import radeon, r200, r300 and r600 dri drivers from mesa 7.6.1.7.6.1

author: Luc Verhaegen <libv@skynet.be> 2010-03-14 08:30:53 +0100
committer: Luc Verhaegen <libv@skynet.be> 2010-03-14 08:30:53 +0100
commit: a682254179a2c3c2ec3ee4597bd71b12c05a778d (patch)
tree: c4d282e4927d130a4c0d4bad45080b03bbabc77d
parent: 0c8469d1892b441c38d1cb09d6bbf85692c89e92 (diff)
43 files changed, 961 insertions, 743 deletions
diff --git a/configure.ac b/configure.ac
index d9be032..b57720d 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,7 +1,7 @@
 # Process this file with autoconf to produce a configure script
 
 AC_PREREQ(2.57)
-AC_INIT([mesa-dri-radeon], 7.6.0, [], mesa-dri-radeon)
+AC_INIT([mesa-dri-radeon], 7.6.1, [], mesa-dri-radeon)
 
 AM_INIT_AUTOMAKE([dist-bzip2])
 
@@ -21,7 +21,7 @@ PKG_CHECK_MODULES([DRI], [libmesadri >= 7.6.0 libmesadri < 7.7.0
 			  libmesadricommon >= 7.6.0 libmesadricommon < 7.7.0])
 
 # libdrm 2.4.17 changed the api significantly.
-PKG_CHECK_MODULES([LIBDRM_RADEON], [libdrm_radeon libdrm <= 2.4.16],
+PKG_CHECK_MODULES([LIBDRM_RADEON], [libdrm_radeon libdrm >= 2.4.17],
 		 HAVE_LIBDRM_RADEON=yes, HAVE_LIBDRM_RADEON=no)
 AM_CONDITIONAL(HAVE_LIBDRM_RADEON, test "x$HAVE_LIBDRM_RADEON" = xyes)
 
diff --git a/r200/Makefile.am b/r200/Makefile.am
index 804b285..0817c07 100644
--- a/r200/Makefile.am
+++ b/r200/Makefile.am
@@ -41,5 +41,7 @@ if HAVE_LIBDRM_RADEON
 r200_dri_la_CFLAGS += -DHAVE_LIBDRM_RADEON=1 $(LIBDRM_RADEON_CFLAGS)
 r200_dri_la_LDFLAGS += $(LIBDRM_RADEON_LIBS)
 r200_dri_la_SOURCES += \
-	../radeon/radeon_cs_space_drm.c
+	../radeon/radeon_cs_space_drm.c \
+	../radeon/radeon_bo.c \
+	../radeon/radeon_cs.c
 endif
diff --git a/r200/r200_context.c b/r200/r200_context.c
index 3ddb5bf..4e34e09 100644
--- a/r200/r200_context.c
+++ b/r200/r200_context.c
@@ -325,9 +325,9 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual,
    _mesa_init_driver_functions(&functions);
    r200InitDriverFuncs(&functions);
    r200InitIoctlFuncs(&functions);
-   r200InitStateFuncs(&functions, screen->kernel_mm);
+   r200InitStateFuncs(&functions);
    r200InitTextureFuncs(&functions);
-   r200InitShaderFuncs(&functions); 
+   r200InitShaderFuncs(&functions);
    radeonInitQueryObjFunctions(&functions);
 
    if (!radeonInitContext(&rmesa->radeon, &functions,
diff --git a/r200/r200_state.c b/r200/r200_state.c
index 76852e3..6d99c03 100644
--- a/r200/r200_state.c
+++ b/r200/r200_state.c
@@ -1578,13 +1578,6 @@ static void r200ClearStencil( GLcontext *ctx, GLint s )
  * Window position and viewport transformation
  */
 
-/*
- * To correctly position primitives:
- */
-#define SUBPIXEL_X 0.125
-#define SUBPIXEL_Y 0.125
-
-
 /**
  * Called when window size or position changes or viewport or depth range
  * state is changed.  We update the hardware viewport state here.
@@ -1609,9 +1602,9 @@ void r200UpdateWindow( GLcontext *ctx )
    }
 
    float_ui32_type sx = { v[MAT_SX] };
-   float_ui32_type tx = { v[MAT_TX] + xoffset + SUBPIXEL_X };
+   float_ui32_type tx = { v[MAT_TX] + xoffset };
    float_ui32_type sy = { v[MAT_SY] * y_scale };
-   float_ui32_type ty = { (v[MAT_TY] * y_scale) + y_bias + SUBPIXEL_Y };
+   float_ui32_type ty = { (v[MAT_TY] * y_scale) + y_bias };
    float_ui32_type sz = { v[MAT_SZ] * depthScale };
    float_ui32_type tz = { v[MAT_TZ] * depthScale };
 
@@ -1680,8 +1673,8 @@ void r200UpdateViewportOffset( GLcontext *ctx )
    float_ui32_type tx;
    float_ui32_type ty;
 
-   tx.f = v[MAT_TX] + xoffset + SUBPIXEL_X;
-   ty.f = (- v[MAT_TY]) + yoffset + SUBPIXEL_Y;
+   tx.f = v[MAT_TX] + xoffset;
+   ty.f = (- v[MAT_TY]) + yoffset;
 
    if ( rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] != tx.ui32 ||
 	rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] != ty.ui32 )
@@ -2483,7 +2476,7 @@ static void r200PolygonStipple( GLcontext *ctx, const GLubyte *mask )
 }
 /* Initialize the driver's state functions.
  */
-void r200InitStateFuncs( struct dd_function_table *functions, GLboolean dri2 )
+void r200InitStateFuncs( struct dd_function_table *functions )
 {
    functions->UpdateState		= r200InvalidateState;
    functions->LightingSpaceChange	= r200LightingSpaceChange;
@@ -2517,10 +2510,7 @@ void r200InitStateFuncs( struct dd_function_table *functions, GLboolean dri2 )
    functions->LogicOpcode		= r200LogicOpCode;
    functions->PolygonMode		= r200PolygonMode;
    functions->PolygonOffset		= r200PolygonOffset;
-   if (dri2)
-      functions->PolygonStipple		= r200PolygonStipple;
-   else
-      functions->PolygonStipple		= radeonPolygonStipplePreKMS;
+   functions->PolygonStipple		= r200PolygonStipple;
    functions->PointParameterfv		= r200PointParameter;
    functions->PointSize			= r200PointSize;
    functions->RenderMode		= r200RenderMode;
diff --git a/r200/r200_state.h b/r200/r200_state.h
index 9c62f0a..7b9b0c1 100644
--- a/r200/r200_state.h
+++ b/r200/r200_state.h
@@ -38,7 +38,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "r200_context.h"
 
 extern void r200InitState( r200ContextPtr rmesa );
-extern void r200InitStateFuncs( struct dd_function_table *functions, GLboolean dri2 );
+extern void r200InitStateFuncs( struct dd_function_table *functions );
 extern void r200InitTnlFuncs( GLcontext *ctx );
 
 extern void r200UpdateMaterial( GLcontext *ctx );
diff --git a/r200/r200_state_init.c b/r200/r200_state_init.c
index 7697306..8553be0 100644
--- a/r200/r200_state_init.c
+++ b/r200/r200_state_init.c
@@ -885,10 +885,8 @@ void r200InitState( r200ContextPtr rmesa )
          }
       }
    }
-   /* polygon stipple is done with irq for non-kms */
-   if (rmesa->radeon.radeonScreen->kernel_mm) {
-       ALLOC_STATE( stp, always, STP_STATE_SIZE, "STP/stp", 0 );
-   }
+
+   ALLOC_STATE( stp, always, STP_STATE_SIZE, "STP/stp", 0 );
 
    for (i = 0; i < 6; i++)
       if (rmesa->radeon.radeonScreen->kernel_mm)
@@ -1120,12 +1118,11 @@ void r200InitState( r200ContextPtr rmesa )
    rmesa->hw.sci.cmd[SCI_CMD_1] = CP_PACKET0(R200_RE_TOP_LEFT, 0);
    rmesa->hw.sci.cmd[SCI_CMD_2] = CP_PACKET0(R200_RE_WIDTH_HEIGHT, 0);
 
-   if (rmesa->radeon.radeonScreen->kernel_mm) {
-
-	rmesa->hw.stp.cmd[STP_CMD_0] = CP_PACKET0(RADEON_RE_STIPPLE_ADDR, 0);
-	rmesa->hw.stp.cmd[STP_DATA_0] = 0;
-	rmesa->hw.stp.cmd[STP_CMD_1] = CP_PACKET0_ONE(RADEON_RE_STIPPLE_DATA, 31);
+   rmesa->hw.stp.cmd[STP_CMD_0] = CP_PACKET0(RADEON_RE_STIPPLE_ADDR, 0);
+   rmesa->hw.stp.cmd[STP_DATA_0] = 0;
+   rmesa->hw.stp.cmd[STP_CMD_1] = CP_PACKET0_ONE(RADEON_RE_STIPPLE_DATA, 31);
 
+   if (rmesa->radeon.radeonScreen->kernel_mm) {
         rmesa->hw.mtl[0].emit = mtl_emit;
         rmesa->hw.mtl[1].emit = mtl_emit;
 
diff --git a/r300/Makefile.am b/r300/Makefile.am
index a678ddb..0456666 100644
--- a/r300/Makefile.am
+++ b/r300/Makefile.am
@@ -44,5 +44,7 @@ if HAVE_LIBDRM_RADEON
 r300_dri_la_CFLAGS += -DHAVE_LIBDRM_RADEON=1 $(LIBDRM_RADEON_CFLAGS)
 r300_dri_la_LDFLAGS += $(LIBDRM_RADEON_LIBS)
 r300_dri_la_SOURCES += \
-	../radeon/radeon_cs_space_drm.c
+	../radeon/radeon_cs_space_drm.c \
+	../radeon/radeon_bo.c \
+	../radeon/radeon_cs.c
 endif
diff --git a/r300/compiler/radeon_code.c b/r300/compiler/radeon_code.c
index a9dedf7..7fadac9 100644
--- a/r300/compiler/radeon_code.c
+++ b/r300/compiler/radeon_code.c
@@ -138,10 +138,11 @@ unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float da
 	unsigned index;
 	int free_index = -1;
 	struct rc_constant constant;
-	unsigned comp;
 
 	for(index = 0; index < c->Count; ++index) {
 		if (c->Constants[index].Type == RC_CONSTANT_IMMEDIATE) {
+			unsigned comp;
+
 			for(comp = 0; comp < c->Constants[index].Size; ++comp) {
 				if (c->Constants[index].u.Immediate[comp] == data) {
 					*swizzle = MAKE_SWIZZLE4(comp, comp, comp, comp);
@@ -155,7 +156,7 @@ unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float da
 	}
 
 	if (free_index >= 0) {
-		comp = c->Constants[free_index].Size++;
+		unsigned comp = c->Constants[free_index].Size++;
 		c->Constants[free_index].u.Immediate[comp] = data;
 		*swizzle = MAKE_SWIZZLE4(comp, comp, comp, comp);
 		return free_index;
diff --git a/r300/r300_context.c b/r300/r300_context.c
index 2ea1b82..9df3897 100644
--- a/r300/r300_context.c
+++ b/r300/r300_context.c
@@ -374,11 +374,21 @@ static void r300InitConstValues(GLcontext *ctx, radeonScreenPtr screen)
 	if (screen->chip_family >= CHIP_FAMILY_RV515) {
 		ctx->Const.FragmentProgram.MaxNativeTemps = R500_PFS_NUM_TEMP_REGS;
 		ctx->Const.FragmentProgram.MaxNativeAttribs = 11;	/* copy i915... */
-		ctx->Const.FragmentProgram.MaxNativeParameters = R500_PFS_NUM_CONST_REGS;
-		ctx->Const.FragmentProgram.MaxNativeAluInstructions = R500_PFS_MAX_INST;
-		ctx->Const.FragmentProgram.MaxNativeTexInstructions = R500_PFS_MAX_INST;
-		ctx->Const.FragmentProgram.MaxNativeInstructions = R500_PFS_MAX_INST;
-		ctx->Const.FragmentProgram.MaxNativeTexIndirections = R500_PFS_MAX_INST;
+
+		/* The hardware limits are higher than this,
+		 * but the non-KMS DRM interface artificially limits us
+		 * to this many instructions.
+		 *
+		 * We could of course work around it in the KMS path,
+		 * but it would be a mess, so it seems wiser
+		 * to leave it as is. Going forward, the Gallium driver
+		 * will not be subject to these limitations.
+		 */
+		ctx->Const.FragmentProgram.MaxNativeParameters = 255;
+		ctx->Const.FragmentProgram.MaxNativeAluInstructions = 255;
+		ctx->Const.FragmentProgram.MaxNativeTexInstructions = 255;
+		ctx->Const.FragmentProgram.MaxNativeInstructions = 255;
+		ctx->Const.FragmentProgram.MaxNativeTexIndirections = 255;
 		ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0;
 	} else {
 		ctx->Const.FragmentProgram.MaxNativeTemps = R300_PFS_NUM_TEMP_REGS;
diff --git a/r300/r300_fragprog_common.c b/r300/r300_fragprog_common.c
index 0bdc90b..70c9252 100644
--- a/r300/r300_fragprog_common.c
+++ b/r300/r300_fragprog_common.c
@@ -239,6 +239,19 @@ static void translate_fragment_program(GLcontext *ctx, struct r300_fragment_prog
 	rewriteFog(&compiler, fp);
 
 	r3xx_compile_fragment_program(&compiler);
+
+	if (compiler.is_r500) {
+		/* We need to support the non-KMS DRM interface, which
+		 * artificially limits the number of instructions and
+		 * constants which are available to us.
+		 *
+		 * See also the comment in r300_context.c where we
+		 * set the MAX_NATIVE_xxx values.
+		 */
+		if (fp->code.code.r500.inst_end >= 255 || fp->code.constants.Count > 255)
+			rc_error(&compiler.Base, "Program is too big (upgrade to r300g to avoid this limitation).\n");
+	}
+
 	fp->error = compiler.Base.Error;
 
 	fp->InputsRead = compiler.Base.Program.InputsRead;
diff --git a/r300/r300_texstate.c b/r300/r300_texstate.c
index f030451..65cabcc 100644
--- a/r300/r300_texstate.c
+++ b/r300/r300_texstate.c
@@ -225,10 +225,10 @@ static void setup_hardware_state(r300ContextPtr rmesa, radeonTexObj *t)
 	if (t->image_override && t->bo)
 		return;
 
-	t->pp_txsize = (((firstImage->Width - 1) << R300_TX_WIDTHMASK_SHIFT)
-			| ((firstImage->Height - 1) << R300_TX_HEIGHTMASK_SHIFT)
-			| ((firstImage->DepthLog2) << R300_TX_DEPTHMASK_SHIFT)
-			| ((t->mt->lastLevel - t->mt->firstLevel) << R300_TX_MAX_MIP_LEVEL_SHIFT));
+	t->pp_txsize = (((R300_TX_WIDTHMASK_MASK & ((firstImage->Width - 1) << R300_TX_WIDTHMASK_SHIFT)))
+			| ((R300_TX_HEIGHTMASK_MASK & ((firstImage->Height - 1) << R300_TX_HEIGHTMASK_SHIFT)))
+			| ((R300_TX_DEPTHMASK_MASK & ((firstImage->DepthLog2) << R300_TX_DEPTHMASK_SHIFT)))
+			| ((R300_TX_MAX_MIP_LEVEL_MASK & ((t->mt->lastLevel - t->mt->firstLevel) << R300_TX_MAX_MIP_LEVEL_SHIFT))));
 
 	t->tile_bits = 0;
 
@@ -248,8 +248,12 @@ static void setup_hardware_state(r300ContextPtr rmesa, radeonTexObj *t)
 	if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
 	    if (firstImage->Width > 2048)
 		t->pp_txpitch |= R500_TXWIDTH_BIT11;
+            else
+		t->pp_txpitch &= ~R500_TXWIDTH_BIT11;
 	    if (firstImage->Height > 2048)
 		t->pp_txpitch |= R500_TXHEIGHT_BIT11;
+            else
+		t->pp_txpitch &= ~R500_TXHEIGHT_BIT11;
 	}
 }
 
@@ -479,16 +483,20 @@ void r300SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_fo
 		break;
 	}
 	pitch_val--;
-	t->pp_txsize = ((rb->base.Width - 1) << R300_TX_WIDTHMASK_SHIFT) |
-              ((rb->base.Height - 1) << R300_TX_HEIGHTMASK_SHIFT);
+	t->pp_txsize = (((R300_TX_WIDTHMASK_MASK & ((rb->base.Width - 1) << R300_TX_WIDTHMASK_SHIFT)))
+			| ((R300_TX_HEIGHTMASK_MASK & ((rb->base.Height - 1) << R300_TX_HEIGHTMASK_SHIFT))));
 	t->pp_txsize |= R300_TX_SIZE_TXPITCH_EN;
 	t->pp_txpitch |= pitch_val;
 
 	if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
 	    if (rb->base.Width > 2048)
 		t->pp_txpitch |= R500_TXWIDTH_BIT11;
+            else
+		t->pp_txpitch &= ~R500_TXWIDTH_BIT11;
 	    if (rb->base.Height > 2048)
 		t->pp_txpitch |= R500_TXHEIGHT_BIT11;
+            else
+		t->pp_txpitch &= ~R500_TXHEIGHT_BIT11;
 	}
 	t->validated = GL_TRUE;
 	_mesa_unlock_texture(radeon->glCtx, texObj);
diff --git a/r300/r300_vertprog.c b/r300/r300_vertprog.c
index 2f7b67c..ed8b788 100644
--- a/r300/r300_vertprog.c
+++ b/r300/r300_vertprog.c
@@ -62,12 +62,6 @@ static int r300VertexProgUpdateParams(GLcontext * ctx, struct r300_vertex_progra
 		}
 	}
 
-	if (vp->code.constants.Count * 4 > VSF_MAX_FRAGMENT_LENGTH) {
-		/* Should have checked this earlier... */
-		fprintf(stderr, "%s:Params exhausted\n", __FUNCTION__);
-		_mesa_exit(-1);
-	}
-
 	for(i = 0; i < vp->code.constants.Count; ++i) {
 		const float * src = 0;
 		const struct rc_constant * constant = &vp->code.constants.Constants[i];
@@ -281,6 +275,11 @@ static struct r300_vertex_program *build_program(GLcontext *ctx,
 	}
 
 	r3xx_compile_vertex_program(&compiler);
+
+	if (vp->code.constants.Count > ctx->Const.VertexProgram.MaxParameters) {
+		rc_error(&compiler.Base, "Program exceeds constant buffer size limit\n");
+	}
+
 	vp->error = compiler.Base.Error;
 
 	vp->Base->Base.InputsRead = vp->code.InputsRead;
@@ -334,7 +333,6 @@ struct r300_vertex_program * r300SelectAndTranslateVertexShader(GLcontext *ctx)
 #define bump_vpu_count(ptr, new_count)   do { \
 		drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr)); \
 		int _nc=(new_count)/4; \
-		assert(_nc < 256); \
 		if(_nc>_p->vpu.count)_p->vpu.count=_nc; \
 	} while(0)
 
diff --git a/r600/Makefile.am b/r600/Makefile.am
index 6b2091e..2f26a86 100644
--- a/r600/Makefile.am
+++ b/r600/Makefile.am
@@ -44,5 +44,7 @@ if HAVE_LIBDRM_RADEON
 r600_dri_la_CFLAGS += -DHAVE_LIBDRM_RADEON=1 $(LIBDRM_RADEON_CFLAGS)
 r600_dri_la_LDFLAGS += $(LIBDRM_RADEON_LIBS)
 r600_dri_la_SOURCES += \
-	../radeon/radeon_cs_space_drm.c
+	../radeon/radeon_cs_space_drm.c \
+	../radeon/radeon_bo.c \
+	../radeon/radeon_cs.c
 endif
diff --git a/r600/r600_cmdbuf.c b/r600/r600_cmdbuf.c
index 3cfe03a..370bb04 100644
--- a/r600/r600_cmdbuf.c
+++ b/r600/r600_cmdbuf.c
@@ -52,29 +52,49 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "radeon_mipmap_tree.h"
 #include "radeon_reg.h"
 
+#ifdef HAVE_LIBDRM_RADEON
+#include "radeon_cs_int.h"
+#else
+#include "radeon_cs_int_drm.h"
+#endif
 
+struct r600_cs_manager_legacy
+{
+    struct radeon_cs_manager    base;
+    struct radeon_context       *ctx;
+    /* hack for scratch stuff */
+    uint32_t                    pending_age;
+    uint32_t                    pending_count;
+};
+
+struct r600_cs_reloc_legacy {
+    struct radeon_cs_reloc  base;
+    uint32_t                cindices;
+    uint32_t                *indices;
+    uint32_t                *reloc_indices;
+};
 
-static struct radeon_cs * r600_cs_create(struct radeon_cs_manager *csm,
-                                   uint32_t ndw)
+static struct radeon_cs_int *r600_cs_create(struct radeon_cs_manager *csm,
+					    uint32_t ndw)
 {
-    struct radeon_cs *cs;
+    struct radeon_cs_int *csi;
 
-    cs = (struct radeon_cs*)calloc(1, sizeof(struct radeon_cs));
-    if (cs == NULL) {
+    csi = (struct radeon_cs_int*)calloc(1, sizeof(struct radeon_cs_int));
+    if (csi == NULL) {
         return NULL;
     }
-    cs->csm = csm;
-    cs->ndw = (ndw + 0x3FF) & (~0x3FF);
-    cs->packets = (uint32_t*)malloc(4*cs->ndw);
-    if (cs->packets == NULL) {
-        free(cs);
+    csi->csm = csm;
+    csi->ndw = (ndw + 0x3FF) & (~0x3FF);
+    csi->packets = (uint32_t*)malloc(4*csi->ndw);
+    if (csi->packets == NULL) {
+        free(csi);
         return NULL;
     }
-    cs->relocs_total_size = 0;
-    return cs;
+    csi->relocs_total_size = 0;
+    return csi;
 }
 
-static int r600_cs_write_reloc(struct radeon_cs *cs,
+static int r600_cs_write_reloc(struct radeon_cs_int *csi,
 			       struct radeon_bo *bo,
 			       uint32_t read_domain,
 			       uint32_t write_domain,
@@ -83,7 +103,7 @@ static int r600_cs_write_reloc(struct radeon_cs *cs,
     struct r600_cs_reloc_legacy *relocs;
     int i;
 
-    relocs = (struct r600_cs_reloc_legacy *)cs->relocs;
+    relocs = (struct r600_cs_reloc_legacy *)csi->relocs;
     /* check domains */
     if ((read_domain && write_domain) || (!read_domain && !write_domain)) {
         /* in one CS a bo can only be in read or write domain but not
@@ -98,7 +118,7 @@ static int r600_cs_write_reloc(struct radeon_cs *cs,
         return -EINVAL;
     }
     /* check if bo is already referenced */
-    for(i = 0; i < cs->crelocs; i++) {
+    for(i = 0; i < csi->crelocs; i++) {
         uint32_t *indices;
         uint32_t *reloc_indices;
 
@@ -129,109 +149,108 @@ static int r600_cs_write_reloc(struct radeon_cs *cs,
             }
             relocs[i].indices = indices;
             relocs[i].reloc_indices = reloc_indices;
-            relocs[i].indices[relocs[i].cindices - 1] = cs->cdw;
-            relocs[i].reloc_indices[relocs[i].cindices - 1] = cs->cdw;
-            cs->section_cdw += 2;
-	    cs->cdw += 2;
+            relocs[i].indices[relocs[i].cindices - 1] = csi->cdw;
+            relocs[i].reloc_indices[relocs[i].cindices - 1] = csi->cdw;
+            csi->section_cdw += 2;
+	    csi->cdw += 2;
 
             return 0;
         }
     }
     /* add bo to reloc */
     relocs = (struct r600_cs_reloc_legacy*)
-             realloc(cs->relocs,
-                     sizeof(struct r600_cs_reloc_legacy) * (cs->crelocs + 1));
+             realloc(csi->relocs,
+                     sizeof(struct r600_cs_reloc_legacy) * (csi->crelocs + 1));
     if (relocs == NULL) {
         return -ENOMEM;
     }
-    cs->relocs = relocs;
-    relocs[cs->crelocs].base.bo = bo;
-    relocs[cs->crelocs].base.read_domain = read_domain;
-    relocs[cs->crelocs].base.write_domain = write_domain;
-    relocs[cs->crelocs].base.flags = flags;
-    relocs[cs->crelocs].indices = (uint32_t*)malloc(4);
-    relocs[cs->crelocs].reloc_indices = (uint32_t*)malloc(4);
-    if ( (relocs[cs->crelocs].indices == NULL) || (relocs[cs->crelocs].reloc_indices == NULL) )
+    csi->relocs = relocs;
+    relocs[csi->crelocs].base.bo = bo;
+    relocs[csi->crelocs].base.read_domain = read_domain;
+    relocs[csi->crelocs].base.write_domain = write_domain;
+    relocs[csi->crelocs].base.flags = flags;
+    relocs[csi->crelocs].indices = (uint32_t*)malloc(4);
+    relocs[csi->crelocs].reloc_indices = (uint32_t*)malloc(4);
+    if ( (relocs[csi->crelocs].indices == NULL) || (relocs[csi->crelocs].reloc_indices == NULL) )
     {
         return -ENOMEM;
     }
 
-    relocs[cs->crelocs].indices[0] = cs->cdw;
-    relocs[cs->crelocs].reloc_indices[0] = cs->cdw;
-    cs->section_cdw += 2;
-    cs->cdw += 2;
-    relocs[cs->crelocs].cindices = 1;
-    cs->relocs_total_size += radeon_bo_legacy_relocs_size(bo);
-    cs->crelocs++;
+    relocs[csi->crelocs].indices[0] = csi->cdw;
+    relocs[csi->crelocs].reloc_indices[0] = csi->cdw;
+    csi->section_cdw += 2;
+    csi->cdw += 2;
+    relocs[csi->crelocs].cindices = 1;
+    csi->relocs_total_size += radeon_bo_legacy_relocs_size(bo);
+    csi->crelocs++;
 
     radeon_bo_ref(bo);
 
     return 0;
 }
 
-static int r600_cs_begin(struct radeon_cs *cs,
+static int r600_cs_begin(struct radeon_cs_int *csi,
                     uint32_t ndw,
                     const char *file,
                     const char *func,
                     int line)
 {
-    if (cs->section) {
+    if (csi->section_ndw) {
         fprintf(stderr, "CS already in a section(%s,%s,%d)\n",
-                cs->section_file, cs->section_func, cs->section_line);
+                csi->section_file, csi->section_func, csi->section_line);
         fprintf(stderr, "CS can't start section(%s,%s,%d)\n",
                 file, func, line);
         return -EPIPE;
     }
 
-    cs->section = 1;
-    cs->section_ndw = ndw;
-    cs->section_cdw = 0;
-    cs->section_file = file;
-    cs->section_func = func;
-    cs->section_line = line;
+    csi->section_ndw = ndw;
+    csi->section_cdw = 0;
+    csi->section_file = file;
+    csi->section_func = func;
+    csi->section_line = line;
 
-    if (cs->cdw + ndw > cs->ndw) {
+    if (csi->cdw + ndw > csi->ndw) {
         uint32_t tmp, *ptr;
 	int num = (ndw > 0x400) ? ndw : 0x400;
 
-        tmp = (cs->cdw + num + 0x3FF) & (~0x3FF);
-        ptr = (uint32_t*)realloc(cs->packets, 4 * tmp);
+        tmp = (csi->cdw + num + 0x3FF) & (~0x3FF);
+        ptr = (uint32_t*)realloc(csi->packets, 4 * tmp);
         if (ptr == NULL) {
             return -ENOMEM;
         }
-        cs->packets = ptr;
-        cs->ndw = tmp;
+        csi->packets = ptr;
+        csi->ndw = tmp;
     }
 
     return 0;
 }
 
-static int r600_cs_end(struct radeon_cs *cs,
+static int r600_cs_end(struct radeon_cs_int *csi,
                   const char *file,
                   const char *func,
                   int line)
 
 {
-    if (!cs->section) {
+    if (!csi->section_ndw) {
         fprintf(stderr, "CS no section to end at (%s,%s,%d)\n",
                 file, func, line);
         return -EPIPE;
     }
-    cs->section = 0;
 
-    if ( cs->section_ndw != cs->section_cdw ) {
+    if ( csi->section_ndw != csi->section_cdw ) {
         fprintf(stderr, "CS section size missmatch start at (%s,%s,%d) %d vs %d\n",
-                cs->section_file, cs->section_func, cs->section_line, cs->section_ndw, cs->section_cdw);
-        fprintf(stderr, "cs->section_ndw = %d, cs->cdw = %d, cs->section_cdw = %d \n",
-                cs->section_ndw, cs->cdw, cs->section_cdw);
+                csi->section_file, csi->section_func, csi->section_line, csi->section_ndw, csi->section_cdw);
+        fprintf(stderr, "csi->section_ndw = %d, csi->cdw = %d, csi->section_cdw = %d \n",
+                csi->section_ndw, csi->cdw, csi->section_cdw);
         fprintf(stderr, "CS section end at (%s,%s,%d)\n",
                 file, func, line);
         return -EPIPE;
     }
+    csi->section_ndw = 0;
 
-    if (cs->cdw > cs->ndw) {
+    if (csi->cdw > csi->ndw) {
 	    fprintf(stderr, "CS section overflow at (%s,%s,%d) cdw %d ndw %d\n",
-		    cs->section_file, cs->section_func, cs->section_line,cs->cdw,cs->ndw);
+		    csi->section_file, csi->section_func, csi->section_line,csi->cdw,csi->ndw);
 	    fprintf(stderr, "CS section end at (%s,%s,%d)\n",
 		    file, func, line);
 	    assert(0);
@@ -240,21 +259,21 @@ static int r600_cs_end(struct radeon_cs *cs,
     return 0;
 }
 
-static int r600_cs_process_relocs(struct radeon_cs *cs, 
+static int r600_cs_process_relocs(struct radeon_cs_int *csi, 
                                   uint32_t * reloc_chunk,
                                   uint32_t * length_dw_reloc_chunk) 
 {
-    struct r600_cs_manager_legacy *csm = (struct r600_cs_manager_legacy*)cs->csm;
+    struct r600_cs_manager_legacy *csm = (struct r600_cs_manager_legacy*)csi->csm;
     struct r600_cs_reloc_legacy *relocs;
     int i, j, r;
 
     uint32_t offset_dw = 0;
 
-    csm = (struct r600_cs_manager_legacy*)cs->csm;
-    relocs = (struct r600_cs_reloc_legacy *)cs->relocs;
+    csm = (struct r600_cs_manager_legacy*)csi->csm;
+    relocs = (struct r600_cs_reloc_legacy *)csi->relocs;
 restart:
-    for (i = 0; i < cs->crelocs; i++) {
-            uint32_t soffset, eoffset, asicoffset;
+    for (i = 0; i < csi->crelocs; i++) {
+            uint32_t soffset, eoffset;
 
             r = radeon_bo_legacy_validate(relocs[i].base.bo,
 					  &soffset, &eoffset);
@@ -262,32 +281,20 @@ restart:
 		    goto restart;
             }
             if (r) {
-		    fprintf(stderr, "validated %p [0x%08X, 0x%08X]\n",
+		    fprintf(stderr, "invalid bo(%p) [0x%08X, 0x%08X]\n",
 			    relocs[i].base.bo, soffset, eoffset);
 		    return r;
             }
-            asicoffset = soffset;
 
 	    for (j = 0; j < relocs[i].cindices; j++) {
-		    if (asicoffset >= eoffset) {
-			    /*                radeon_bo_debug(relocs[i].base.bo, 12); */
-			    fprintf(stderr, "validated %p [0x%08X, 0x%08X]\n",
-				    relocs[i].base.bo, soffset, eoffset);
-			    fprintf(stderr, "above end: %p 0x%08X 0x%08X\n",
-				    relocs[i].base.bo,
-				    cs->packets[relocs[i].indices[j]],
-				    eoffset);
-			    exit(0);
-			    return -EINVAL;
-		    }
 		    /* pkt3 nop header in ib chunk */
-		    cs->packets[relocs[i].reloc_indices[j]] = 0xC0001000;
+		    csi->packets[relocs[i].reloc_indices[j]] = 0xC0001000;
 		    /* reloc index in ib chunk */
-		    cs->packets[relocs[i].reloc_indices[j] + 1] = offset_dw;
+		    csi->packets[relocs[i].reloc_indices[j] + 1] = offset_dw;
 	    }
 
 	    /* asic offset in reloc chunk */ /* see alex drm r600_nomm_relocate */
-	    reloc_chunk[offset_dw] = asicoffset;
+	    reloc_chunk[offset_dw] = soffset;
 	    reloc_chunk[offset_dw + 3] = 0;
 
 	    offset_dw += 4;
@@ -298,14 +305,14 @@ restart:
     return 0;
 }
 
-static int r600_cs_set_age(struct radeon_cs *cs) /* -------------- */
+static int r600_cs_set_age(struct radeon_cs_int *csi) /* -------------- */
 {
-    struct r600_cs_manager_legacy *csm = (struct r600_cs_manager_legacy*)cs->csm;
+    struct r600_cs_manager_legacy *csm = (struct r600_cs_manager_legacy*)csi->csm;
     struct r600_cs_reloc_legacy *relocs;
     int i;
 
-    relocs = (struct r600_cs_reloc_legacy *)cs->relocs;
-    for (i = 0; i < cs->crelocs; i++) {
+    relocs = (struct r600_cs_reloc_legacy *)csi->relocs;
+    for (i = 0; i < csi->crelocs; i++) {
         radeon_bo_legacy_pending(relocs[i].base.bo, csm->pending_age);
         radeon_bo_unref(relocs[i].base.bo);
     }
@@ -313,21 +320,21 @@ static int r600_cs_set_age(struct radeon_cs *cs) /* -------------- */
 }
 
 #if 0
-static void dump_cmdbuf(struct radeon_cs *cs)
+static void dump_cmdbuf(struct radeon_cs_int *csi)
 {
 	int i;
 	fprintf(stderr,"--start--\n");
-	for (i = 0; i < cs->cdw; i++){
-		fprintf(stderr,"0x%08x\n", cs->packets[i]);
+	for (i = 0; i < csi->cdw; i++){
+		fprintf(stderr,"0x%08x\n", csi->packets[i]);
 	}
 	fprintf(stderr,"--end--\n");
 
 }
 #endif
 
-static int r600_cs_emit(struct radeon_cs *cs)
+static int r600_cs_emit(struct radeon_cs_int *csi)
 {
-    struct r600_cs_manager_legacy *csm = (struct r600_cs_manager_legacy*)cs->csm;
+    struct r600_cs_manager_legacy *csm = (struct r600_cs_manager_legacy*)csi->csm;
     struct drm_radeon_cs       cs_cmd;
     struct drm_radeon_cs_chunk cs_chunk[2];
     uint32_t length_dw_reloc_chunk;
@@ -341,9 +348,9 @@ static int r600_cs_emit(struct radeon_cs *cs)
 
     csm->pending_count = 1;
 
-    reloc_chunk = (uint32_t*)calloc(1, cs->crelocs * 4 * 4);
+    reloc_chunk = (uint32_t*)calloc(1, csi->crelocs * 4 * 4);
 
-    r = r600_cs_process_relocs(cs, reloc_chunk, &length_dw_reloc_chunk);
+    r = r600_cs_process_relocs(csi, reloc_chunk, &length_dw_reloc_chunk);
     if (r) {
 	free(reloc_chunk);
         return 0;
@@ -351,8 +358,8 @@ static int r600_cs_emit(struct radeon_cs *cs)
 
     /* raw ib chunk */
     cs_chunk[0].chunk_id   = RADEON_CHUNK_ID_IB;
-    cs_chunk[0].length_dw  = cs->cdw;
-    cs_chunk[0].chunk_data = (unsigned long)(cs->packets);
+    cs_chunk[0].length_dw  = csi->cdw;
+    cs_chunk[0].chunk_data = (unsigned long)(csi->packets);
 
     /* reloc chaunk */
     cs_chunk[1].chunk_id   = RADEON_CHUNK_ID_RELOCS;
@@ -370,7 +377,7 @@ static int r600_cs_emit(struct radeon_cs *cs)
 
     do 
     {
-        r = drmCommandWriteRead(cs->csm->fd, DRM_RADEON_CS, &cs_cmd, sizeof(cs_cmd));
+        r = drmCommandWriteRead(csi->csm->fd, DRM_RADEON_CS, &cs_cmd, sizeof(cs_cmd));
         retry++;
     } while (r == -EAGAIN && retry < 1000);
 
@@ -381,11 +388,11 @@ static int r600_cs_emit(struct radeon_cs *cs)
 
     csm->pending_age = cs_cmd.cs_id;
 
-    r600_cs_set_age(cs);
+    r600_cs_set_age(csi);
 
-    cs->csm->read_used = 0;
-    cs->csm->vram_write_used = 0;
-    cs->csm->gart_write_used = 0;
+    csi->csm->read_used = 0;
+    csi->csm->vram_write_used = 0;
+    csi->csm->gart_write_used = 0;
 
     free(reloc_chunk);
 
@@ -405,35 +412,34 @@ static void inline r600_cs_free_reloc(void *relocs_p, int crelocs)
     }
 }
 
-static int r600_cs_destroy(struct radeon_cs *cs)
+static int r600_cs_destroy(struct radeon_cs_int *csi)
 {
-    r600_cs_free_reloc(cs->relocs, cs->crelocs);
-    free(cs->relocs);
-    free(cs->packets);
-    free(cs);
+    r600_cs_free_reloc(csi->relocs, csi->crelocs);
+    free(csi->relocs);
+    free(csi->packets);
+    free(csi);
     return 0;
 }
 
-static int r600_cs_erase(struct radeon_cs *cs)
+static int r600_cs_erase(struct radeon_cs_int *csi)
 {
-    r600_cs_free_reloc(cs->relocs, cs->crelocs);
-    free(cs->relocs);
-    cs->relocs_total_size = 0;
-    cs->relocs = NULL;
-    cs->crelocs = 0;
-    cs->cdw = 0;
-    cs->section = 0;
+    r600_cs_free_reloc(csi->relocs, csi->crelocs);
+    free(csi->relocs);
+    csi->relocs_total_size = 0;
+    csi->relocs = NULL;
+    csi->crelocs = 0;
+    csi->cdw = 0;
     return 0;
 }
 
-static int r600_cs_need_flush(struct radeon_cs *cs)
+static int r600_cs_need_flush(struct radeon_cs_int *csi)
 {
     /* this function used to flush when the BO usage got to
      * a certain size, now the higher levels handle this better */
     return 0;
 }
 
-static void r600_cs_print(struct radeon_cs *cs, FILE *file)
+static void r600_cs_print(struct radeon_cs_int *csi, FILE *file)
 {
 }
 
diff --git a/r600/r600_cmdbuf.h b/r600/r600_cmdbuf.h
index eba43d3..dff0009 100644
--- a/r600/r600_cmdbuf.h
+++ b/r600/r600_cmdbuf.h
@@ -118,22 +118,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define R600_IT_SET_CTL_CONST                     0x00006F00
 #define R600_IT_SURFACE_BASE_UPDATE               0x00007300
 
-struct r600_cs_manager_legacy
-{
-    struct radeon_cs_manager    base;
-    struct radeon_context       *ctx;
-    /* hack for scratch stuff */
-    uint32_t                    pending_age;
-    uint32_t                    pending_count;
-};
-
-struct r600_cs_reloc_legacy {
-    struct radeon_cs_reloc  base;
-    uint32_t                cindices;
-    uint32_t                *indices;
-    uint32_t                *reloc_indices;
-};
-
 struct radeon_cs_manager * r600_radeon_cs_manager_legacy_ctor(struct radeon_context *ctx);
 
 /**
diff --git a/r600/r600_context.c b/r600/r600_context.c
index e0b77d4..8dab57b 100644
--- a/r600/r600_context.c
+++ b/r600/r600_context.c
@@ -362,6 +362,7 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual,
 	    (&r600->radeon.optionCache, "disable_stencil_two_side"))
 		_mesa_disable_extension(ctx, "GL_EXT_stencil_two_side");
 
+#if 0
 	if (r600->radeon.glCtx->Mesa_DXTn
 	    && !driQueryOptionb(&r600->radeon.optionCache, "disable_s3tc")) {
 		_mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc");
@@ -371,6 +372,9 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual,
 	{
 		_mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc");
 	}
+#else
+	_mesa_disable_extension(ctx, "GL_ARB_texture_compression");
+#endif
 
 	radeon_fbo_init(&r600->radeon);
    	radeonInitSpanFuncs( ctx );
diff --git a/r600/r600_texstate.c b/r600/r600_texstate.c
index bcb8d7c..a30703e 100644
--- a/r600/r600_texstate.c
+++ b/r600/r600_texstate.c
@@ -721,7 +721,9 @@ void r600SetTexOffset(__DRIcontext * pDRICtx, GLint texname,
 	struct gl_texture_object *tObj =
 	    _mesa_lookup_texture(rmesa->radeon.glCtx, texname);
 	radeonTexObjPtr t = radeon_tex_obj(tObj);
-	uint32_t pitch_val, size;
+	int firstlevel = t->mt ? t->mt->firstLevel : 0;
+	const struct gl_texture_image *firstImage;
+	uint32_t pitch_val, size, row_align, bpp;
 
 	if (!tObj)
 		return;
@@ -731,7 +733,13 @@ void r600SetTexOffset(__DRIcontext * pDRICtx, GLint texname,
 	if (!offset)
 		return;
 
-	size = pitch;//h * w * (depth / 8);
+	bpp = depth / 8;
+	if (bpp == 3) 
+		bpp = 4;
+
+	firstImage = t->base.Image[0][firstlevel];
+	row_align = rmesa->radeon.texture_row_align - 1;
+	size = ((firstImage->Width * bpp + row_align) & ~row_align) * firstImage->Height;
 	if (t->bo) {
 		radeon_bo_unref(t->bo);
 		t->bo = NULL;
diff --git a/r600/r700_assembler.c b/r600/r700_assembler.c
index 00eda54..be875ae 100644
--- a/r600/r700_assembler.c
+++ b/r600/r700_assembler.c
@@ -336,7 +336,8 @@ unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm)
 
     switch (pAsm->D.dst.opcode)
     {
-    case SQ_OP2_INST_ADD:                          
+    case SQ_OP2_INST_ADD:
+    case SQ_OP2_INST_KILLGT:
     case SQ_OP2_INST_MUL: 
     case SQ_OP2_INST_MAX:
     case SQ_OP2_INST_MIN:
@@ -354,9 +355,9 @@ unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm)
         return 2;  
 
     case SQ_OP2_INST_MOV: 
+    case SQ_OP2_INST_MOVA_FLOOR:
     case SQ_OP2_INST_FRACT:
     case SQ_OP2_INST_FLOOR:
-    case SQ_OP2_INST_KILLGT:
     case SQ_OP2_INST_EXP_IEEE:
     case SQ_OP2_INST_LOG_CLAMPED:
     case SQ_OP2_INST_LOG_IEEE:
@@ -1180,8 +1181,10 @@ GLboolean tex_src(r700_AssemblerBase *pAsm)
         case PROGRAM_INPUT:
             switch (pILInst->SrcReg[0].Index)
             {
+                case FRAG_ATTRIB_WPOS:
                 case FRAG_ATTRIB_COL0:
                 case FRAG_ATTRIB_COL1:
+                case FRAG_ATTRIB_FOGC:
                 case FRAG_ATTRIB_TEX0:
                 case FRAG_ATTRIB_TEX1:
                 case FRAG_ATTRIB_TEX2:
@@ -1194,7 +1197,16 @@ GLboolean tex_src(r700_AssemblerBase *pAsm)
                     pAsm->S[0].src.reg   =
                         pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index];
                     pAsm->S[0].src.rtype = SRC_REG_INPUT;
-                break;
+                    break;
+                case FRAG_ATTRIB_FACE:
+                    fprintf(stderr, "FRAG_ATTRIB_FACE unsupported\n");
+                    break;
+                case FRAG_ATTRIB_PNTC:
+                    fprintf(stderr, "FRAG_ATTRIB_PNTC unsupported\n");
+                    break;
+                case FRAG_ATTRIB_VAR0:
+                    fprintf(stderr, "FRAG_ATTRIB_VAR0 unsupported\n");
+                    break;
             }
         break;
         }
@@ -2053,7 +2065,7 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm)
         }
 
         //other bits
-        alu_instruction_ptr->m_Word0.f.index_mode = SQ_INDEX_LOOP;
+        alu_instruction_ptr->m_Word0.f.index_mode = SQ_INDEX_AR_X;
 
         if(   (is_single_scalar_operation == GL_TRUE) 
            || (GL_TRUE == bSplitInst) )
@@ -2387,6 +2399,35 @@ GLboolean assemble_ADD(r700_AssemblerBase *pAsm)
     return GL_TRUE;
 }
 
+GLboolean assemble_ARL(r700_AssemblerBase *pAsm)
+{ /* TODO: ar values dont' persist between clauses */
+    if( GL_FALSE == checkop1(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    pAsm->D.dst.opcode = SQ_OP2_INST_MOVA_FLOOR;
+    setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+    pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+    pAsm->D.dst.reg = 0;
+    pAsm->D.dst.writex = 0;
+    pAsm->D.dst.writey = 0;
+    pAsm->D.dst.writez = 0;
+    pAsm->D.dst.writew = 0;
+
+    if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+    {
+        return GL_FALSE;
+    }
+
+    if( GL_FALSE == next_ins(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    return GL_TRUE;
+}
+
 GLboolean assemble_BAD(char *opcode_str) 
 {
     radeon_error("Not yet implemented instruction (%s)\n", opcode_str);
@@ -2508,7 +2549,7 @@ GLboolean assemble_DOT(r700_AssemblerBase *pAsm)
     }
     else if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_DPH) 
     {
-        onecomp_PVSSRC(&(pAsm->S[1].src), 3);
+        onecomp_PVSSRC(&(pAsm->S[0].src), 3);
     } 
 
     if ( GL_FALSE == next_ins(pAsm) ) 
@@ -2617,15 +2658,15 @@ GLboolean assemble_FRC(r700_AssemblerBase *pAsm)
  
 GLboolean assemble_KIL(r700_AssemblerBase *pAsm)
 {
+    /* TODO: doc says KILL has to be last(end) ALU clause */
+    
     checkop1(pAsm);
 
     pAsm->D.dst.opcode = SQ_OP2_INST_KILLGT;  
-  
-    if ( GL_FALSE == assemble_dst(pAsm) )
-    {
-        return GL_FALSE;
-    }
 
+    setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+    pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+    pAsm->D.dst.reg   = 0;
     pAsm->D.dst.writex = 0;
     pAsm->D.dst.writey = 0;
     pAsm->D.dst.writez = 0;
@@ -2638,20 +2679,11 @@ GLboolean assemble_KIL(r700_AssemblerBase *pAsm)
     setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_0);
     noneg_PVSSRC(&(pAsm->S[0].src));
 
-    pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
-
-    if(PROGRAM_TEMPORARY == pAsm->pILInst[pAsm->uiCurInst].DstReg.File)
+    if ( GL_FALSE == assemble_src(pAsm, 0, 1) )
     {
-        pAsm->S[1].src.reg = pAsm->pILInst[pAsm->uiCurInst].DstReg.Index + pAsm->starting_temp_register_number;
-    }
-    else
-    {   //PROGRAM_OUTPUT
-        pAsm->S[1].src.reg = pAsm->uiFP_OutputMap[pAsm->pILInst[pAsm->uiCurInst].DstReg.Index];
+        return GL_FALSE;
     }
   
-    setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
-    noswizzle_PVSSRC(&(pAsm->S[1].src));
-  
     if ( GL_FALSE == next_ins(pAsm) )
     {
         return GL_FALSE;
@@ -2908,6 +2940,7 @@ GLboolean assemble_LIT(r700_AssemblerBase *pAsm)
     pAsm->S[0].src.rtype = srcType;
     pAsm->S[0].src.reg   = srcReg;
     setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+    swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X, SQ_SEL_X, SQ_SEL_X, SQ_SEL_X);
     pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
     pAsm->S[1].src.reg   = tmp;
     setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
@@ -3417,22 +3450,6 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm)
 	    need_barrier = GL_TRUE;
     }
 
-    switch (pAsm->pILInst[pAsm->uiCurInst].Opcode)
-    {
-        case OPCODE_TEX:
-            break;
-        case OPCODE_TXB:
-            radeon_error("do not support TXB yet\n");
-            return GL_FALSE;
-            break;
-        case OPCODE_TXP:
-            break;
-        default:
-            radeon_error("Internal error: bad texture op (not TEX)\n");
-            return GL_FALSE;
-            break;
-    }
-
     if (pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXP)
     {
         GLuint tmp = gethelpr(pAsm);
@@ -3611,7 +3628,15 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm)
 
     }
 
-    pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE;
+    if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXB)
+    {
+        pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE_L;
+    }
+    else
+    {
+        pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE;
+    }
+
     pAsm->is_tex = GL_TRUE;
     if ( GL_TRUE == need_barrier )
     {
@@ -3809,8 +3834,7 @@ GLboolean AssembleInstr(GLuint uiNumberInsts,
             break;  
 
         case OPCODE_ARL: 
-            radeon_error("Not yet implemented instruction OPCODE_ARL \n");
-            //if ( GL_FALSE == assemble_BAD("ARL") ) 
+            if ( GL_FALSE == assemble_ARL(pR700AsmCode) ) 
                 return GL_FALSE;
             break;
         case OPCODE_ARR: 
@@ -4155,6 +4179,7 @@ GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode,
                                    GLbitfield          OutputsWritten)  
 { 
     unsigned int unBit;
+    GLuint export_count = 0;
 
     if(pR700AsmCode->depth_export_register_number >= 0) 
     {
@@ -4176,6 +4201,7 @@ GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode,
         {
             return GL_FALSE;
         }
+        export_count++;
 	}
 	unBit = 1 << FRAG_RESULT_DEPTH;
 	if(OutputsWritten & unBit)
@@ -4189,8 +4215,15 @@ GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode,
         {
             return GL_FALSE;
         }
+        export_count++;
 	}
-
+    /* Need to export something, otherwise we'll hang
+     * results are undefined anyway */
+    if(export_count == 0)
+    {
+        Process_Export(pR700AsmCode, SQ_EXPORT_PIXEL, 0, 1, 0, GL_FALSE);
+    }
+    
     if(pR700AsmCode->cf_last_export_ptr != NULL) 
     {
         pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst        = SQ_CF_INST_EXPORT_DONE;
diff --git a/r600/r700_assembler.h b/r600/r700_assembler.h
index 73bb8ba..d639592 100644
--- a/r600/r700_assembler.h
+++ b/r600/r700_assembler.h
@@ -461,6 +461,7 @@ GLboolean next_ins(r700_AssemblerBase *pAsm);
 GLboolean assemble_math_function(r700_AssemblerBase* pAsm, BITS opcode);
 GLboolean assemble_ABS(r700_AssemblerBase *pAsm);
 GLboolean assemble_ADD(r700_AssemblerBase *pAsm);
+GLboolean assemble_ARL(r700_AssemblerBase *pAsm);
 GLboolean assemble_BAD(char *opcode_str);
 GLboolean assemble_CMP(r700_AssemblerBase *pAsm);
 GLboolean assemble_COS(r700_AssemblerBase *pAsm);
diff --git a/r600/r700_chip.c b/r600/r700_chip.c
index 06d7e9c..dacc2cc 100644
--- a/r600/r700_chip.c
+++ b/r600/r700_chip.c
@@ -794,8 +794,7 @@ static void r700SendDBState(GLcontext *ctx, struct radeon_state_atom *atom)
 	BATCH_LOCALS(&context->radeon);
 	radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
 
-        BEGIN_BATCH_NO_AUTOSTATE(23);
-	R600_OUT_BATCH_REGVAL(DB_HTILE_DATA_BASE, r700->DB_HTILE_DATA_BASE.u32All);
+	BEGIN_BATCH_NO_AUTOSTATE(17);
 
 	R600_OUT_BATCH_REGSEQ(DB_STENCIL_CLEAR, 2);
 	R600_OUT_BATCH(r700->DB_STENCIL_CLEAR.u32All);
@@ -808,7 +807,6 @@ static void r700SendDBState(GLcontext *ctx, struct radeon_state_atom *atom)
 	R600_OUT_BATCH(r700->DB_RENDER_CONTROL.u32All);
 	R600_OUT_BATCH(r700->DB_RENDER_OVERRIDE.u32All);
 
-	R600_OUT_BATCH_REGVAL(DB_HTILE_SURFACE, r700->DB_HTILE_SURFACE.u32All);
 	R600_OUT_BATCH_REGVAL(DB_ALPHA_TO_MASK, r700->DB_ALPHA_TO_MASK.u32All);
 
 	END_BATCH();
@@ -1136,7 +1134,11 @@ static int check_blnd(GLcontext *ctx, struct radeon_state_atom *atom)
 		count += 3;
 
 	if (context->radeon.radeonScreen->chip_family > CHIP_FAMILY_R600) {
-		for (ui = 0; ui < R700_MAX_RENDER_TARGETS; ui++) {
+		/* targets are enabled in r700SetRenderTarget but state
+		   size is calculated before that. Until MRT's are done
+		   hardcode target0 as enabled. */
+		count += 3;
+		for (ui = 1; ui < R700_MAX_RENDER_TARGETS; ui++) {
                         if (r700->render_target[ui].enabled)
 				count += 3;
 		}
@@ -1239,7 +1241,7 @@ void r600InitAtoms(context_t *context)
 	context->radeon.hw.atomlist.name = "atom-list";
 
 	ALLOC_STATE(sq, always, 34, r700SendSQConfig);
-	ALLOC_STATE(db, always, 23, r700SendDBState);
+	ALLOC_STATE(db, always, 17, r700SendDBState);
 	ALLOC_STATE(stencil, always, 4, r700SendStencilState);
 	ALLOC_STATE(db_target, always, 12, r700SendDepthTargetState);
 	ALLOC_STATE(sc, always, 15, r700SendSCState);
@@ -1252,9 +1254,9 @@ void r600InitAtoms(context_t *context)
 	ALLOC_STATE(poly, always, 10, r700SendPolyState);
 	ALLOC_STATE(cb, cb, 18, r700SendCBState);
 	ALLOC_STATE(clrcmp, always, 6, r700SendCBCLRCMPState);
+	ALLOC_STATE(cb_target, always, 25, r700SendRenderTargetState);
 	ALLOC_STATE(blnd, blnd, (6 + (R700_MAX_RENDER_TARGETS * 3)), r700SendCBBlendState);
 	ALLOC_STATE(blnd_clr, always, 6, r700SendCBBlendColorState);
-	ALLOC_STATE(cb_target, always, 25, r700SendRenderTargetState);
 	ALLOC_STATE(sx, always, 9, r700SendSXState);
 	ALLOC_STATE(vgt, always, 41, r700SendVGTState);
 	ALLOC_STATE(spi, always, (59 + R700_MAX_SHADER_EXPORTS), r700SendSPIState);
diff --git a/r600/r700_fragprog.c b/r600/r700_fragprog.c
index 78ce3ae..0f549ea 100644
--- a/r600/r700_fragprog.c
+++ b/r600/r700_fragprog.c
@@ -135,15 +135,19 @@ GLboolean Find_Instruction_Dependencies_fp(struct r700_fragment_program *fp,
 {
     GLuint i, j;
     GLint * puiTEMPwrites;
+    GLint * puiTEMPreads;
     struct prog_instruction * pILInst;
     InstDeps         *pInstDeps;
     struct prog_instruction * texcoord_DepInst;
     GLint              nDepInstID;
 
     puiTEMPwrites = (GLint*) MALLOC(sizeof(GLuint)*mesa_fp->Base.NumTemporaries);
+    puiTEMPreads = (GLint*) MALLOC(sizeof(GLuint)*mesa_fp->Base.NumTemporaries);
+
     for(i=0; i<mesa_fp->Base.NumTemporaries; i++)
     {
         puiTEMPwrites[i] = -1;
+        puiTEMPreads[i] = -1;
     }
 
     pInstDeps = (InstDeps*)MALLOC(sizeof(InstDeps)*mesa_fp->Base.NumInstructions);
@@ -167,6 +171,11 @@ GLboolean Find_Instruction_Dependencies_fp(struct r700_fragment_program *fp,
             {
                 //Set dep.
                 pInstDeps[i].nSrcDeps[j] = puiTEMPwrites[pILInst->SrcReg[j].Index];
+                //Set first read
+                if(puiTEMPreads[pILInst->SrcReg[j].Index] < 0 )
+                {
+                    puiTEMPreads[pILInst->SrcReg[j].Index] = i;
+                }
             }
             else
             {
@@ -177,8 +186,6 @@ GLboolean Find_Instruction_Dependencies_fp(struct r700_fragment_program *fp,
 
     fp->r700AsmCode.pInstDeps = pInstDeps;
 
-    FREE(puiTEMPwrites);
-
     //Find dep for tex inst    
     for(i=0; i<mesa_fp->Base.NumInstructions; i++)
     {
@@ -203,9 +210,25 @@ GLboolean Find_Instruction_Dependencies_fp(struct r700_fragment_program *fp,
                 {   //... other deps?
                 }
             }
+            // make sure that we dont overwrite src used earlier
+            nDepInstID = puiTEMPreads[pILInst->DstReg.Index];
+            if(nDepInstID < i)
+            {
+                pInstDeps[i].nDstDep = puiTEMPreads[pILInst->DstReg.Index];
+                texcoord_DepInst = &(mesa_fp->Base.Instructions[nDepInstID]);
+                if(GL_TRUE == IsAlu(texcoord_DepInst->Opcode) )
+                {
+                    pInstDeps[nDepInstID].nDstDep = i;
+                }
+ 
+            }
+
         }
 	}
 
+    FREE(puiTEMPwrites);
+    FREE(puiTEMPreads);
+
     return GL_TRUE;
 }
 
@@ -251,7 +274,15 @@ GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp,
 		number_of_colors_exported--;
 	}
 
-	fp->r700Shader.exportMode = number_of_colors_exported << 1 | z_enabled;
+	/* illegal to set this to 0 */
+	if(number_of_colors_exported || z_enabled)
+	{
+	    fp->r700Shader.exportMode = number_of_colors_exported << 1 | z_enabled;
+	}
+	else
+	{
+	    fp->r700Shader.exportMode = (1 << 1);
+	}
 
     fp->translated = GL_TRUE;
 
@@ -341,6 +372,11 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx)
         SETbit(r700->SPI_PS_IN_CONTROL_0.u32All, POSITION_ENA_bit);
         SETbit(r700->SPI_INPUT_Z.u32All, PROVIDE_Z_TO_SPI_bit);
     }
+    else
+    {
+        CLEARbit(r700->SPI_PS_IN_CONTROL_0.u32All, POSITION_ENA_bit);
+        CLEARbit(r700->SPI_INPUT_Z.u32All, PROVIDE_Z_TO_SPI_bit);
+    }
 
     ui = (unNumOfReg < ui) ? ui : unNumOfReg;
 
diff --git a/r600/r700_oglprog.c b/r600/r700_oglprog.c
index 5290ef3..0d476fc 100644
--- a/r600/r700_oglprog.c
+++ b/r600/r700_oglprog.c
@@ -40,6 +40,24 @@
 #include "r700_vertprog.h"
 
 
+static void freeVertProgCache(GLcontext *ctx, struct r700_vertex_program_cont *cache)
+{
+	struct r700_vertex_program *tmp, *vp = cache->progs;
+
+	while (vp) {
+		tmp = vp->next;
+		/* Release DMA region */
+		r600DeleteShader(ctx, vp->shaderbo);
+		/* Clean up */
+		Clean_Up_Assembler(&(vp->r700AsmCode));
+		Clean_Up_Shader(&(vp->r700Shader));
+		
+		_mesa_reference_vertprog(ctx, &vp->mesa_program, NULL);
+		_mesa_free(vp);
+		vp = tmp;
+	}
+}
+
 static struct gl_program *r700NewProgram(GLcontext * ctx, 
                                          GLenum target,
 					                     GLuint id)
@@ -84,8 +102,7 @@ static struct gl_program *r700NewProgram(GLcontext * ctx,
 
 static void r700DeleteProgram(GLcontext * ctx, struct gl_program *prog)
 {
-    struct r700_vertex_program_cont   * vpc;
-    struct r700_vertex_program *vp, *tmp;
+    struct r700_vertex_program_cont *vpc = (struct r700_vertex_program_cont *)prog;
     struct r700_fragment_program * fp;
 
 	radeon_print(RADEON_SHADER, RADEON_VERBOSE,
@@ -95,20 +112,7 @@ static void r700DeleteProgram(GLcontext * ctx, struct gl_program *prog)
     {
     case GL_VERTEX_STATE_PROGRAM_NV:
     case GL_VERTEX_PROGRAM_ARB:	    
-        vpc = (struct r700_vertex_program_cont*)prog;
-        vp = vpc->progs;
-	while (vp) {
-		tmp = vp->next;
-		/* Release DMA region */
-	 
-	        r600DeleteShader(ctx, vp->shaderbo);
-
-	        /* Clean up */
-	        Clean_Up_Assembler(&(vp->r700AsmCode));
-	        Clean_Up_Shader(&(vp->r700Shader));
-		_mesa_free(vp);
-		vp = tmp;
-	}
+	    freeVertProgCache(ctx, vpc);
 	    break;
     case GL_FRAGMENT_PROGRAM_NV:
     case GL_FRAGMENT_PROGRAM_ARB:
@@ -131,7 +135,24 @@ static void r700DeleteProgram(GLcontext * ctx, struct gl_program *prog)
 static void
 r700ProgramStringNotify(GLcontext * ctx, GLenum target, struct gl_program *prog)
 {
-
+	struct r700_vertex_program_cont *vpc = (struct r700_vertex_program_cont *)prog;
+	struct r700_fragment_program * fp = (struct r700_fragment_program*)prog;
+
+	switch (target) {
+	case GL_VERTEX_PROGRAM_ARB:
+		freeVertProgCache(ctx, vpc);
+		vpc->progs = NULL;
+		break;
+	case GL_FRAGMENT_PROGRAM_ARB:
+		r600DeleteShader(ctx, fp->shaderbo);
+		Clean_Up_Assembler(&(fp->r700AsmCode));
+		Clean_Up_Shader(&(fp->r700Shader));
+		fp->translated = GL_FALSE;
+		fp->loaded     = GL_FALSE;
+		fp->shaderbo   = NULL;
+		break;
+	}
+		
 }
 
 static GLboolean r700IsProgramNative(GLcontext * ctx, GLenum target, struct gl_program *prog)
diff --git a/r600/r700_state.c b/r600/r700_state.c
index 124469b..9b7f7a7 100644
--- a/r600/r700_state.c
+++ b/r600/r700_state.c
@@ -467,10 +467,10 @@ static void r700SetBlendState(GLcontext * ctx)
 		 eqn, COLOR_COMB_FCN_shift, COLOR_COMB_FCN_mask);
 
 	SETfield(blend_reg,
-		 blend_factor(ctx->Color.BlendSrcRGB, GL_TRUE),
+		 blend_factor(ctx->Color.BlendSrcA, GL_TRUE),
 		 ALPHA_SRCBLEND_shift, ALPHA_SRCBLEND_mask);
 	SETfield(blend_reg,
-		 blend_factor(ctx->Color.BlendDstRGB, GL_FALSE),
+		 blend_factor(ctx->Color.BlendDstA, GL_FALSE),
 		 ALPHA_DESTBLEND_shift, ALPHA_DESTBLEND_mask);
 
 	switch (ctx->Color.BlendEquationA) {
@@ -655,6 +655,10 @@ static void r700UpdateCulling(GLcontext * ctx)
             CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, FACE_bit); /* default: ccw */
             break;
     }
+
+    /* Winding is inverted when rendering to FBO */
+    if (ctx->DrawBuffer && ctx->DrawBuffer->Name)
+	    r700->PA_SU_SC_MODE_CNTL.u32All ^= FACE_bit;
 }
 
 static void r700UpdateLineStipple(GLcontext * ctx)
@@ -745,9 +749,9 @@ static void r700ColorMask(GLcontext * ctx,
 			     (b ? 4 : 0) |
 			     (a ? 8 : 0));
 
-	if (mask != r700->CB_SHADER_MASK.u32All) {
+	if (mask != r700->CB_TARGET_MASK.u32All) {
 		R600_STATECHANGE(context, cb);
-		SETfield(r700->CB_SHADER_MASK.u32All, mask, OUTPUT0_ENABLE_shift, OUTPUT0_ENABLE_mask);
+		SETfield(r700->CB_TARGET_MASK.u32All, mask, TARGET0_ENABLE_shift, TARGET0_ENABLE_mask);
 	}
 }
 
@@ -1164,13 +1168,8 @@ static void r700UpdatePolygonMode(GLcontext * ctx)
 		/* Handle GL_CW (clock wise and GL_CCW (counter clock wise)
 		 * correctly by selecting the correct front and back face
 		 */
-		if (ctx->Polygon.FrontFace == GL_CCW) {
-			f = ctx->Polygon.FrontMode;
-			b = ctx->Polygon.BackMode;
-		} else {
-			f = ctx->Polygon.BackMode;
-			b = ctx->Polygon.FrontMode;
-		}
+		f = ctx->Polygon.FrontMode;
+		b = ctx->Polygon.BackMode;
 
 		/* Enable polygon mode */
 		SETfield(r700->PA_SU_SC_MODE_CNTL.u32All, X_DUAL_MODE, POLY_MODE_shift, POLY_MODE_mask);
@@ -1269,11 +1268,15 @@ void r700SetScissor(context_t *context) //---------------
 		return;
 	}
 	if (context->radeon.state.scissor.enabled) {
-		/* r600 has exclusive scissors */
 		x1 = context->radeon.state.scissor.rect.x1;
 		y1 = context->radeon.state.scissor.rect.y1;
-		x2 = context->radeon.state.scissor.rect.x2 + 1;
-		y2 = context->radeon.state.scissor.rect.y2 + 1;
+		x2 = context->radeon.state.scissor.rect.x2;
+		y2 = context->radeon.state.scissor.rect.y2;
+		/* r600 has exclusive BR scissors */
+		if (context->radeon.radeonScreen->kernel_mm) {
+			x2++;
+			y2++;
+		}
 	} else {
 		if (context->radeon.radeonScreen->driScreen->dri2.enabled) {
 			x1 = 0;
@@ -1754,7 +1757,7 @@ void r700InitState(GLcontext * ctx) //-------------------
     r700->CB_CLRCMP_MSK.u32All = 0xFFFFFFFF;
 
     /* screen/window/view */
-    SETfield(r700->CB_TARGET_MASK.u32All, 0xF, (4 * id), TARGET0_ENABLE_mask);
+    SETfield(r700->CB_SHADER_MASK.u32All, 0xF, (4 * id), OUTPUT0_ENABLE_mask);
 
     context->radeon.hw.all_dirty = GL_TRUE;
 
diff --git a/radeon/Makefile.am b/radeon/Makefile.am
index 554f67e..85d7ff0 100644
--- a/radeon/Makefile.am
+++ b/radeon/Makefile.am
@@ -36,5 +36,7 @@ if HAVE_LIBDRM_RADEON
 radeon_dri_la_CFLAGS += -DHAVE_LIBDRM_RADEON=1 $(LIBDRM_RADEON_CFLAGS)
 radeon_dri_la_LDFLAGS += $(LIBDRM_RADEON_LIBS)
 radeon_dri_la_SOURCES += \
-	radeon_cs_space_drm.c
+	radeon_cs_space_drm.c \
+	radeon_bo.c \
+	radeon_cs.c
 endif
diff --git a/radeon/radeon_bo.c b/radeon/radeon_bo.c
new file mode 100644
index 0000000..393d156
--- /dev/null
+++ b/radeon/radeon_bo.c
@@ -0,0 +1,110 @@
+#include <radeon_bocs_wrapper.h>
+#include <radeon_bo_int_drm.h>
+
+void radeon_bo_debug(struct radeon_bo *bo,
+		     const char *op)
+{
+    struct radeon_bo_int *boi = (struct radeon_bo_int *)bo;
+
+    fprintf(stderr, "%s %p 0x%08X 0x%08X 0x%08X\n",
+            op, bo, bo->handle, boi->size, boi->cref);
+}
+
+struct radeon_bo *radeon_bo_open(struct radeon_bo_manager *bom,
+				 uint32_t handle,
+				 uint32_t size,
+				 uint32_t alignment,
+				 uint32_t domains,
+				 uint32_t flags)
+{
+    struct radeon_bo *bo;
+    bo = bom->funcs->bo_open(bom, handle, size, alignment, domains, flags);
+    return bo;
+}
+
+void radeon_bo_ref(struct radeon_bo *bo)
+{
+    struct radeon_bo_int *boi = (struct radeon_bo_int *)bo;
+    boi->cref++;
+    boi->bom->funcs->bo_ref(boi);
+}
+
+struct radeon_bo *radeon_bo_unref(struct radeon_bo *bo)
+{
+    struct radeon_bo_int *boi = (struct radeon_bo_int *)bo;
+    boi->cref--;
+    return boi->bom->funcs->bo_unref(boi);
+}
+
+int radeon_bo_map(struct radeon_bo *bo, int write)
+{
+    struct radeon_bo_int *boi = (struct radeon_bo_int *)bo;
+    return boi->bom->funcs->bo_map(boi, write);
+}
+
+int radeon_bo_unmap(struct radeon_bo *bo)
+{
+    struct radeon_bo_int *boi = (struct radeon_bo_int *)bo;
+    return boi->bom->funcs->bo_unmap(boi);
+}
+
+int radeon_bo_wait(struct radeon_bo *bo)
+{
+    struct radeon_bo_int *boi = (struct radeon_bo_int *)bo;
+    if (!boi->bom->funcs->bo_wait)
+	return 0;
+    return boi->bom->funcs->bo_wait(boi);
+}
+
+int radeon_bo_is_busy(struct radeon_bo *bo,
+		      uint32_t *domain)
+{
+    struct radeon_bo_int *boi = (struct radeon_bo_int *)bo;
+    return boi->bom->funcs->bo_is_busy(boi, domain);
+}
+
+int radeon_bo_set_tiling(struct radeon_bo *bo,
+			 uint32_t tiling_flags, uint32_t pitch)
+{
+    struct radeon_bo_int *boi = (struct radeon_bo_int *)bo;
+    return boi->bom->funcs->bo_set_tiling(boi, tiling_flags, pitch);
+}
+
+int radeon_bo_get_tiling(struct radeon_bo *bo,
+			  uint32_t *tiling_flags, uint32_t *pitch)
+{
+    struct radeon_bo_int *boi = (struct radeon_bo_int *)bo;
+    return boi->bom->funcs->bo_get_tiling(boi, tiling_flags, pitch);
+}
+
+int radeon_bo_is_static(struct radeon_bo *bo)
+{
+    struct radeon_bo_int *boi = (struct radeon_bo_int *)bo;
+    if (boi->bom->funcs->bo_is_static)
+	return boi->bom->funcs->bo_is_static(boi);
+    return 0;
+}
+
+int radeon_bo_is_referenced_by_cs(struct radeon_bo *bo,
+				  struct radeon_cs *cs)
+{
+    struct radeon_bo_int *boi = (struct radeon_bo_int *)bo;
+    return boi->cref > 1;
+}
+
+uint32_t radeon_bo_get_handle(struct radeon_bo *bo)
+{
+    return bo->handle;
+}
+
+uint32_t radeon_bo_get_src_domain(struct radeon_bo *bo)
+{
+    struct radeon_bo_int *boi = (struct radeon_bo_int *)bo;
+    uint32_t src_domain;
+
+    src_domain = boi->space_accounted & 0xffff;
+    if (!src_domain)
+	src_domain = boi->space_accounted >> 16;
+
+    return src_domain;
+}
diff --git a/radeon/radeon_bo_drm.h b/radeon/radeon_bo_drm.h
index 7141371..beb2369 100644
--- a/radeon/radeon_bo_drm.h
+++ b/radeon/radeon_bo_drm.h
@@ -32,188 +32,44 @@
 
 #include <stdio.h>
 #include <stdint.h>
-//#include "radeon_track.h"
 
 /* bo object */
 #define RADEON_BO_FLAGS_MACRO_TILE  1
 #define RADEON_BO_FLAGS_MICRO_TILE  2
 
 struct radeon_bo_manager;
+struct radeon_cs;
 
 struct radeon_bo {
-    uint32_t                    alignment;
+    void                        *ptr;
+    uint32_t                    flags;
     uint32_t                    handle;
     uint32_t                    size;
-    uint32_t                    domains;
-    uint32_t                    flags;
-    unsigned                    cref;
-#ifdef RADEON_BO_TRACK
-    struct radeon_track         *track;
-#endif
-    void                        *ptr;
-    struct radeon_bo_manager    *bom;
-    uint32_t                    space_accounted;
-};
-
-/* bo functions */
-struct radeon_bo_funcs {
-    struct radeon_bo *(*bo_open)(struct radeon_bo_manager *bom,
-                                 uint32_t handle,
-                                 uint32_t size,
-                                 uint32_t alignment,
-                                 uint32_t domains,
-                                 uint32_t flags);
-    void (*bo_ref)(struct radeon_bo *bo);
-    struct radeon_bo *(*bo_unref)(struct radeon_bo *bo);
-    int (*bo_map)(struct radeon_bo *bo, int write);
-    int (*bo_unmap)(struct radeon_bo *bo);
-    int (*bo_wait)(struct radeon_bo *bo);
-    int (*bo_is_static)(struct radeon_bo *bo);
-    int (*bo_set_tiling)(struct radeon_bo *bo, uint32_t tiling_flags,
-			  uint32_t pitch);
-    int (*bo_get_tiling)(struct radeon_bo *bo, uint32_t *tiling_flags,
-			  uint32_t *pitch);
-    int (*bo_is_busy)(struct radeon_bo *bo, uint32_t *domain);
 };
 
-struct radeon_bo_manager {
-    struct radeon_bo_funcs  *funcs;
-    int                     fd;
-
-#ifdef RADEON_BO_TRACK
-    struct radeon_tracker   tracker;
-#endif
-};
-    
-static inline void _radeon_bo_debug(struct radeon_bo *bo,
-                                    const char *op,
-                                    const char *file,
-                                    const char *func,
-                                    int line)
-{
-    fprintf(stderr, "%s %p 0x%08X 0x%08X 0x%08X [%s %s %d]\n",
-            op, bo, bo->handle, bo->size, bo->cref, file, func, line);
-}
-
-static inline struct radeon_bo *_radeon_bo_open(struct radeon_bo_manager *bom,
-                                                uint32_t handle,
-                                                uint32_t size,
-                                                uint32_t alignment,
-                                                uint32_t domains,
-                                                uint32_t flags,
-                                                const char *file,
-                                                const char *func,
-                                                int line)
-{
-    struct radeon_bo *bo;
-
-    bo = bom->funcs->bo_open(bom, handle, size, alignment, domains, flags);
-
-#ifdef RADEON_BO_TRACK
-    if (bo) {
-        bo->track = radeon_tracker_add_track(&bom->tracker, bo->handle);
-        radeon_track_add_event(bo->track, file, func, "open", line);
-    }
-#endif
-    return bo;
-}
-
-static inline void _radeon_bo_ref(struct radeon_bo *bo,
-                                  const char *file,
-                                  const char *func,
-                                  int line)
-{
-    bo->cref++;
-#ifdef RADEON_BO_TRACK
-    radeon_track_add_event(bo->track, file, func, "ref", line); 
-#endif
-    bo->bom->funcs->bo_ref(bo);
-}
-
-static inline struct radeon_bo *_radeon_bo_unref(struct radeon_bo *bo,
-                                                 const char *file,
-                                                 const char *func,
-                                                 int line)
-{
-    bo->cref--;
-#ifdef RADEON_BO_TRACK
-    radeon_track_add_event(bo->track, file, func, "unref", line);
-    if (bo->cref <= 0) {
-        radeon_tracker_remove_track(&bo->bom->tracker, bo->track);
-        bo->track = NULL;
-    }
-#endif
-    return bo->bom->funcs->bo_unref(bo);
-}
-
-static inline int _radeon_bo_map(struct radeon_bo *bo,
-                                 int write,
-                                 const char *file,
-                                 const char *func,
-                                 int line)
-{
-    return bo->bom->funcs->bo_map(bo, write);
-}
-
-static inline int _radeon_bo_unmap(struct radeon_bo *bo,
-                                   const char *file,
-                                   const char *func,
-                                   int line)
-{
-    return bo->bom->funcs->bo_unmap(bo);
-}
-
-static inline int _radeon_bo_wait(struct radeon_bo *bo,
-                                  const char *file,
-                                  const char *func,
-                                  int line)
-{
-    return bo->bom->funcs->bo_wait(bo);
-}
-
-static inline int _radeon_bo_is_busy(struct radeon_bo *bo,
-				     uint32_t *domain,
-                                     const char *file,
-                                     const char *func,
-                                     int line)
-{
-    return bo->bom->funcs->bo_is_busy(bo, domain);
-}
-
-static inline int radeon_bo_set_tiling(struct radeon_bo *bo,
-				       uint32_t tiling_flags, uint32_t pitch)
-{
-    return bo->bom->funcs->bo_set_tiling(bo, tiling_flags, pitch);
-}
-
-static inline int radeon_bo_get_tiling(struct radeon_bo *bo,
-				       uint32_t *tiling_flags, uint32_t *pitch)
-{
-    return bo->bom->funcs->bo_get_tiling(bo, tiling_flags, pitch);
-}
-
-static inline int radeon_bo_is_static(struct radeon_bo *bo)
-{
-	if (bo->bom->funcs->bo_is_static)
-		return bo->bom->funcs->bo_is_static(bo);
-	return 0;
-}
-
-#define radeon_bo_open(bom, h, s, a, d, f)\
-    _radeon_bo_open(bom, h, s, a, d, f, __FILE__, __FUNCTION__, __LINE__)
-#define radeon_bo_ref(bo)\
-    _radeon_bo_ref(bo, __FILE__, __FUNCTION__, __LINE__)
-#define radeon_bo_unref(bo)\
-    _radeon_bo_unref(bo, __FILE__, __FUNCTION__, __LINE__)
-#define radeon_bo_map(bo, w)\
-    _radeon_bo_map(bo, w, __FILE__, __FUNCTION__, __LINE__)
-#define radeon_bo_unmap(bo)\
-    _radeon_bo_unmap(bo, __FILE__, __FUNCTION__, __LINE__)
-#define radeon_bo_debug(bo, opcode)\
-    _radeon_bo_debug(bo, opcode, __FILE__, __FUNCTION__, __LINE__)
-#define radeon_bo_wait(bo) \
-    _radeon_bo_wait(bo, __FILE__, __func__, __LINE__)
-#define radeon_bo_is_busy(bo, domain) \
-    _radeon_bo_is_busy(bo, domain, __FILE__, __func__, __LINE__)
+struct radeon_bo_manager;
 
+void radeon_bo_debug(struct radeon_bo *bo,
+		     const char *op);
+
+struct radeon_bo *radeon_bo_open(struct radeon_bo_manager *bom,
+				  uint32_t handle,
+				  uint32_t size,
+				  uint32_t alignment,
+				  uint32_t domains,
+				  uint32_t flags);
+
+void radeon_bo_ref(struct radeon_bo *bo);
+struct radeon_bo *radeon_bo_unref(struct radeon_bo *bo);
+int radeon_bo_map(struct radeon_bo *bo, int write);
+int radeon_bo_unmap(struct radeon_bo *bo);
+int radeon_bo_wait(struct radeon_bo *bo);
+int radeon_bo_is_busy(struct radeon_bo *bo, uint32_t *domain);
+int radeon_bo_set_tiling(struct radeon_bo *bo, uint32_t tiling_flags, uint32_t pitch);
+int radeon_bo_get_tiling(struct radeon_bo *bo, uint32_t *tiling_flags, uint32_t *pitch);
+int radeon_bo_is_static(struct radeon_bo *bo);
+int radeon_bo_is_referenced_by_cs(struct radeon_bo *bo,
+				  struct radeon_cs *cs);
+uint32_t radeon_bo_get_handle(struct radeon_bo *bo);
+uint32_t radeon_bo_get_src_domain(struct radeon_bo *bo);
 #endif
diff --git a/radeon/radeon_bo_int_drm.h b/radeon/radeon_bo_int_drm.h
new file mode 100644
index 0000000..190c332
--- /dev/null
+++ b/radeon/radeon_bo_int_drm.h
@@ -0,0 +1,45 @@
+#ifndef RADEON_BO_INT
+#define RADEON_BO_INT
+
+struct radeon_bo_manager {
+    struct radeon_bo_funcs  *funcs;
+    int                     fd;
+};
+
+struct radeon_bo_int {
+    void                        *ptr;
+    uint32_t                    flags;
+    uint32_t                    handle;
+    uint32_t                    size;
+    /* private members */
+    uint32_t                    alignment;
+    uint32_t                    domains;
+    unsigned                    cref;
+    struct radeon_bo_manager    *bom;
+    uint32_t                    space_accounted;
+    uint32_t                    referenced_in_cs;
+};
+
+/* bo functions */
+struct radeon_bo_funcs {
+    struct radeon_bo *(*bo_open)(struct radeon_bo_manager *bom,
+                                 uint32_t handle,
+                                 uint32_t size,
+                                 uint32_t alignment,
+                                 uint32_t domains,
+                                 uint32_t flags);
+    void (*bo_ref)(struct radeon_bo_int *bo);
+    struct radeon_bo *(*bo_unref)(struct radeon_bo_int *bo);
+    int (*bo_map)(struct radeon_bo_int *bo, int write);
+    int (*bo_unmap)(struct radeon_bo_int *bo);
+    int (*bo_wait)(struct radeon_bo_int *bo);
+    int (*bo_is_static)(struct radeon_bo_int *bo);
+    int (*bo_set_tiling)(struct radeon_bo_int *bo, uint32_t tiling_flags,
+			  uint32_t pitch);
+    int (*bo_get_tiling)(struct radeon_bo_int *bo, uint32_t *tiling_flags,
+			  uint32_t *pitch);
+    int (*bo_is_busy)(struct radeon_bo_int *bo, uint32_t *domain);
+    int (*bo_is_referenced_by_cs)(struct radeon_bo_int *bo, struct radeon_cs *cs);
+};
+
+#endif
diff --git a/radeon/radeon_bo_legacy.c b/radeon/radeon_bo_legacy.c
index 3e7547d..cf12664 100644
--- a/radeon/radeon_bo_legacy.c
+++ b/radeon/radeon_bo_legacy.c
@@ -50,6 +50,12 @@
 #include "radeon_bocs_wrapper.h"
 #include "radeon_macros.h"
 
+#ifdef HAVE_LIBDRM_RADEON
+#include "radeon_bo_int.h"
+#else
+#include "radeon_bo_int_drm.h"
+#endif
+
 /* no seriously texmem.c is this screwed up */
 struct bo_legacy_texture_object {
     driTextureObject    base;
@@ -57,7 +63,7 @@ struct bo_legacy_texture_object {
 };
 
 struct bo_legacy {
-    struct radeon_bo    base;
+    struct radeon_bo_int    base;
     int                 map_count;
     uint32_t            pending;
     int                 is_pending;
@@ -187,10 +193,10 @@ static void legacy_get_current_age(struct bo_manager_legacy *boml)
     }
 }
 
-static int legacy_is_pending(struct radeon_bo *bo)
+static int legacy_is_pending(struct radeon_bo_int *boi)
 {
-    struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom;
-    struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
+    struct bo_manager_legacy *boml = (struct bo_manager_legacy *)boi->bom;
+    struct bo_legacy *bo_legacy = (struct bo_legacy*)boi;
 
     if (bo_legacy->is_pending <= 0) {
         bo_legacy->is_pending = 0;
@@ -204,13 +210,13 @@ static int legacy_is_pending(struct radeon_bo *bo)
         if (bo_legacy->pnext) {
             bo_legacy->pnext->pprev = bo_legacy->pprev;
         }
-	assert(bo_legacy->is_pending <= bo->cref);
+	assert(bo_legacy->is_pending <= boi->cref);
         while (bo_legacy->is_pending--) {
-	    bo = radeon_bo_unref(bo);
-	    if (!bo)
+	    boi = (struct radeon_bo_int *)radeon_bo_unref((struct radeon_bo *)boi);
+	    if (!boi)
 	      break;
         }
-	if (bo)
+	if (boi)
 	  bo_legacy->is_pending = 0;
         boml->cpendings--;
         return 0;
@@ -218,7 +224,7 @@ static int legacy_is_pending(struct radeon_bo *bo)
     return 1;
 }
 
-static int legacy_wait_pending(struct radeon_bo *bo)
+static int legacy_wait_pending(struct radeon_bo_int *bo)
 {
     struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom;
     struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
@@ -323,7 +329,7 @@ static struct bo_legacy *bo_allocate(struct bo_manager_legacy *boml,
     return bo_legacy;
 }
 
-static int bo_dma_alloc(struct radeon_bo *bo)
+static int bo_dma_alloc(struct radeon_bo_int *bo)
 {
     struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom;
     struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
@@ -333,7 +339,7 @@ static int bo_dma_alloc(struct radeon_bo *bo)
     int r;
 
     /* align size on 4Kb */
-    size = (((4 * 1024) - 1) + bo->size) & ~((4 * 1024) - 1);
+    size = (((4 * 1024) - 1) + bo_legacy->base.size) & ~((4 * 1024) - 1);
     alloc.region = RADEON_MEM_REGION_GART;
     alloc.alignment = bo_legacy->base.alignment;
     alloc.size = size;
@@ -355,7 +361,7 @@ static int bo_dma_alloc(struct radeon_bo *bo)
     return 0;
 }
 
-static int bo_dma_free(struct radeon_bo *bo)
+static int bo_dma_free(struct radeon_bo_int *bo)
 {
     struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom;
     struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
@@ -428,7 +434,7 @@ static struct radeon_bo *bo_open(struct radeon_bo_manager *bom,
         bo_legacy = boml->bos.next;
         while (bo_legacy) {
             if (bo_legacy->base.handle == handle) {
-                radeon_bo_ref(&(bo_legacy->base));
+                radeon_bo_ref((struct radeon_bo *)&(bo_legacy->base));
                 return (struct radeon_bo*)bo_legacy;
             }
             bo_legacy = bo_legacy->next;
@@ -468,20 +474,20 @@ retry:
             return NULL;
         }
     }
-    radeon_bo_ref(&(bo_legacy->base));
+    radeon_bo_ref((struct radeon_bo *)&(bo_legacy->base));
 
     return (struct radeon_bo*)bo_legacy;
 }
 
-static void bo_ref(struct radeon_bo *bo)
+static void bo_ref(struct radeon_bo_int *bo)
 {
 }
 
-static struct radeon_bo *bo_unref(struct radeon_bo *bo)
+static struct radeon_bo *bo_unref(struct radeon_bo_int *boi)
 {
-    struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
+    struct bo_legacy *bo_legacy = (struct bo_legacy*)boi;
 
-    if (bo->cref <= 0) {
+    if (boi->cref <= 0) {
         bo_legacy->prev->next = bo_legacy->next;
         if (bo_legacy->next) {
             bo_legacy->next->prev = bo_legacy->prev;
@@ -491,10 +497,10 @@ static struct radeon_bo *bo_unref(struct radeon_bo *bo)
         }
         return NULL;
     }
-    return bo;
+    return (struct radeon_bo *)boi;
 }
 
-static int bo_map(struct radeon_bo *bo, int write)
+static int bo_map(struct radeon_bo_int *bo, int write)
 {
     struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom;
     struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
@@ -528,7 +534,7 @@ static int bo_map(struct radeon_bo *bo, int write)
     return 0;
 }
 
-static int bo_unmap(struct radeon_bo *bo)
+static int bo_unmap(struct radeon_bo_int *bo)
 {
     struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
 
@@ -542,7 +548,7 @@ static int bo_unmap(struct radeon_bo *bo)
     return 0;
 }
 
-static int bo_is_busy(struct radeon_bo *bo, uint32_t *domain)
+static int bo_is_busy(struct radeon_bo_int *bo, uint32_t *domain)
 {
     *domain = 0;
     if (bo->domains & RADEON_GEM_DOMAIN_GTT)
@@ -555,7 +561,7 @@ static int bo_is_busy(struct radeon_bo *bo, uint32_t *domain)
         return 0;
 }
 
-static int bo_is_static(struct radeon_bo *bo)
+static int bo_is_static(struct radeon_bo_int *bo)
 {
     struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
     return bo_legacy->static_bo;
@@ -574,7 +580,7 @@ static struct radeon_bo_funcs bo_legacy_funcs = {
     bo_is_busy
 };
 
-static int bo_vram_validate(struct radeon_bo *bo,
+static int bo_vram_validate(struct radeon_bo_int *bo,
                             uint32_t *soffset,
                             uint32_t *eoffset)
 {
@@ -700,25 +706,30 @@ int radeon_bo_legacy_validate(struct radeon_bo *bo,
                               uint32_t *soffset,
                               uint32_t *eoffset)
 {
-    struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom;
+    struct radeon_bo_int *boi = (struct radeon_bo_int *)bo;
+    struct bo_manager_legacy *boml = (struct bo_manager_legacy *)boi->bom;
     struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
     int r;
     int retries = 0;
 
     if (bo_legacy->map_count) {
         fprintf(stderr, "bo(%p, %d) is mapped (%d) can't valide it.\n",
-                bo, bo->size, bo_legacy->map_count);
+                bo, boi->size, bo_legacy->map_count);
+        return -EINVAL;
+    }
+    if(boi->size == 0) {
+        fprintf(stderr, "bo(%p) has size 0.\n", bo);
         return -EINVAL;
     }
     if (bo_legacy->static_bo || bo_legacy->validated) {
         *soffset = bo_legacy->offset;
-        *eoffset = bo_legacy->offset + bo->size;
+        *eoffset = bo_legacy->offset + boi->size;
 
         return 0;
     }
-    if (!(bo->domains & RADEON_GEM_DOMAIN_GTT)) {
+    if (!(boi->domains & RADEON_GEM_DOMAIN_GTT)) {
 
-        r = bo_vram_validate(bo, soffset, eoffset);
+        r = bo_vram_validate(boi, soffset, eoffset);
         if (r) {
 	    legacy_track_pending(&boml->base, 0);
 	    legacy_kick_all_buffers(boml);
@@ -732,7 +743,7 @@ int radeon_bo_legacy_validate(struct radeon_bo *bo,
         }
     }
     *soffset = bo_legacy->offset;
-    *eoffset = bo_legacy->offset + bo->size;
+    *eoffset = bo_legacy->offset + boi->size;
     bo_legacy->validated = 1;
 
     return 0;
@@ -740,7 +751,8 @@ int radeon_bo_legacy_validate(struct radeon_bo *bo,
 
 void radeon_bo_legacy_pending(struct radeon_bo *bo, uint32_t pending)
 {
-    struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom;
+    struct radeon_bo_int *boi = (struct radeon_bo_int *)bo;
+    struct bo_manager_legacy *boml = (struct bo_manager_legacy *)boi->bom;
     struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
 
     bo_legacy->pending = pending;
@@ -795,7 +807,7 @@ static struct bo_legacy *radeon_legacy_bo_alloc_static(struct bo_manager_legacy
     if (bo->base.handle > bom->nhandle) {
         bom->nhandle = bo->base.handle + 1;
     }
-    radeon_bo_ref(&(bo->base));
+    radeon_bo_ref((struct radeon_bo *)&(bo->base));
     return bo;
 }
 
@@ -890,12 +902,13 @@ void radeon_bo_legacy_texture_age(struct radeon_bo_manager *bom)
 
 unsigned radeon_bo_legacy_relocs_size(struct radeon_bo *bo)
 {
+    struct radeon_bo_int *boi = (struct radeon_bo_int *)bo;
     struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
 
-    if (bo_legacy->static_bo || (bo->domains & RADEON_GEM_DOMAIN_GTT)) {
+    if (bo_legacy->static_bo || (boi->domains & RADEON_GEM_DOMAIN_GTT)) {
         return 0;
     }
-    return bo->size;
+    return boi->size;
 }
 
 /*
@@ -920,7 +933,7 @@ struct radeon_bo *radeon_legacy_bo_alloc_fake(struct radeon_bo_manager *bom,
     if (bo->base.handle > boml->nhandle) {
         boml->nhandle = bo->base.handle + 1;
     }
-    radeon_bo_ref(&(bo->base));
-    return &(bo->base);
+    radeon_bo_ref((struct radeon_bo *)&(bo->base));
+    return (struct radeon_bo *)&(bo->base);
 }
 
diff --git a/radeon/radeon_bocs_wrapper.h b/radeon/radeon_bocs_wrapper.h
index 4520a7d..6c2648b 100644
--- a/radeon/radeon_bocs_wrapper.h
+++ b/radeon/radeon_bocs_wrapper.h
@@ -18,8 +18,11 @@
 #define RADEON_TILING_MACRO 0x1
 #define RADEON_TILING_MICRO 0x2
 #define RADEON_TILING_SWAP 0x4
+
+#ifndef RADEON_TILING_SURFACE
 #define RADEON_TILING_SURFACE 0x8 /* this object requires a surface
 				   * when mapped - i.e. front buffer */
+#endif
 
 /* to be used to build locally in mesa with no libdrm bits */
 #include "../radeon/radeon_bo_drm.h"
diff --git a/radeon/radeon_common.c b/radeon/radeon_common.c
index 9817ff8..5b2bcfd 100644
--- a/radeon/radeon_common.c
+++ b/radeon/radeon_common.c
@@ -229,16 +229,15 @@ void radeonUpdateScissor( GLcontext *ctx )
 	}
 	if (!rmesa->radeonScreen->kernel_mm) {
 	   /* Fix scissors for dri 1 */
-
 	   __DRIdrawablePrivate *dPriv = radeon_get_drawable(rmesa);
 	   x1 += dPriv->x;
-	   x2 += dPriv->x;
+	   x2 += dPriv->x + 1;
 	   min_x += dPriv->x;
-	   max_x += dPriv->x;
+	   max_x += dPriv->x + 1;
 	   y1 += dPriv->y;
-	   y2 += dPriv->y;
+	   y2 += dPriv->y + 1;
 	   min_y += dPriv->y;
-	   max_y += dPriv->y;
+	   max_y += dPriv->y + 1;
 	}
 
 	rmesa->state.scissor.rect.x1 = CLAMP(x1,  min_x, max_x);
@@ -263,29 +262,6 @@ void radeonScissor(GLcontext* ctx, GLint x, GLint y, GLsizei w, GLsizei h)
 	}
 }
 
-void radeonPolygonStipplePreKMS( GLcontext *ctx, const GLubyte *mask )
-{
-   radeonContextPtr radeon = RADEON_CONTEXT(ctx);
-   GLuint i;
-   drm_radeon_stipple_t stipple;
-
-   /* Must flip pattern upside down.
-   */
-   for ( i = 0 ; i < 32 ; i++ ) {
-      stipple.mask[31 - i] = ((GLuint *) mask)[i];
-   }
-
-   /* TODO: push this into cmd mechanism
-   */
-   radeon_firevertices(radeon);
-   LOCK_HARDWARE( radeon );
-
-   drmCommandWrite( radeon->dri.fd, DRM_RADEON_STIPPLE,
-	 &stipple, sizeof(stipple) );
-   UNLOCK_HARDWARE( radeon );
-}
-
-
 /* ================================================================
  * SwapBuffers with client-side throttling
  */
diff --git a/radeon/radeon_common.h b/radeon/radeon_common.h
index f320191..a9e1ca4 100644
--- a/radeon/radeon_common.h
+++ b/radeon/radeon_common.h
@@ -10,7 +10,6 @@ void radeonRecalcScissorRects(radeonContextPtr radeon);
 void radeonSetCliprects(radeonContextPtr radeon);
 void radeonUpdateScissor( GLcontext *ctx );
 void radeonScissor(GLcontext* ctx, GLint x, GLint y, GLsizei w, GLsizei h);
-void radeonPolygonStipplePreKMS( GLcontext *ctx, const GLubyte *mask );
 
 void radeonWaitForIdleLocked(radeonContextPtr radeon);
 extern uint32_t radeonGetAge(radeonContextPtr radeon);
diff --git a/radeon/radeon_common_context.h b/radeon/radeon_common_context.h
index 0309345..fdf5e07 100644
--- a/radeon/radeon_common_context.h
+++ b/radeon/radeon_common_context.h
@@ -401,9 +401,6 @@ struct radeon_state {
 	struct radeon_depthbuffer_state depth;
 	struct radeon_scissor_state scissor;
 	struct radeon_stencilbuffer_state stencil;
-
-	struct radeon_cs_space_check bos[RADEON_MAX_BOS];
-	int validated_bo_count;
 };
 
 /**
diff --git a/radeon/radeon_context.h b/radeon/radeon_context.h
index 4e2c52c..12ab33a 100644
--- a/radeon/radeon_context.h
+++ b/radeon/radeon_context.h
@@ -331,8 +331,12 @@ struct r100_hw_state {
 	struct radeon_state_atom stp;
 };
 
+struct radeon_stipple_state {
+	GLuint mask[32];
+};
 
 struct r100_state {
+	struct radeon_stipple_state stipple;
 	struct radeon_texture_state texture;
 };
 
diff --git a/radeon/radeon_cs.c b/radeon/radeon_cs.c
new file mode 100644
index 0000000..17e7433
--- /dev/null
+++ b/radeon/radeon_cs.c
@@ -0,0 +1,95 @@
+
+#include <stdio.h>
+#include <stdint.h>
+#include "drm.h"
+#include "radeon_drm.h"
+#include "radeon_bocs_wrapper.h"
+#include "radeon_cs_int_drm.h"
+
+struct radeon_cs *radeon_cs_create(struct radeon_cs_manager *csm,
+			    uint32_t ndw)
+{
+    struct radeon_cs_int *csi = csm->funcs->cs_create(csm, ndw);
+    return (struct radeon_cs *)csi;
+}
+
+int radeon_cs_write_reloc(struct radeon_cs *cs,
+			  struct radeon_bo *bo,
+			  uint32_t read_domain,
+			  uint32_t write_domain,
+			  uint32_t flags)
+{
+    struct radeon_cs_int *csi = (struct radeon_cs_int *)cs;
+
+    return csi->csm->funcs->cs_write_reloc(csi,
+					   bo,
+					   read_domain,
+					   write_domain,
+					   flags);
+}
+
+int radeon_cs_begin(struct radeon_cs *cs,
+		    uint32_t ndw,
+		    const char *file,
+		    const char *func,
+		    int line)
+{
+    struct radeon_cs_int *csi = (struct radeon_cs_int *)cs;
+    return csi->csm->funcs->cs_begin(csi, ndw, file, func, line);
+}
+
+int radeon_cs_end(struct radeon_cs *cs,
+		  const char *file,
+		  const char *func,
+		  int line)
+{
+    struct radeon_cs_int *csi = (struct radeon_cs_int *)cs;
+    return csi->csm->funcs->cs_end(csi, file, func, line);
+}
+
+int radeon_cs_emit(struct radeon_cs *cs)
+{
+    struct radeon_cs_int *csi = (struct radeon_cs_int *)cs;
+    return csi->csm->funcs->cs_emit(csi);
+}
+
+int radeon_cs_destroy(struct radeon_cs *cs)
+{
+    struct radeon_cs_int *csi = (struct radeon_cs_int *)cs;
+    return csi->csm->funcs->cs_destroy(csi);
+}
+
+int radeon_cs_erase(struct radeon_cs *cs)
+{
+    struct radeon_cs_int *csi = (struct radeon_cs_int *)cs;
+    return csi->csm->funcs->cs_erase(csi);
+}
+
+int radeon_cs_need_flush(struct radeon_cs *cs)
+{
+    struct radeon_cs_int *csi = (struct radeon_cs_int *)cs;
+    return csi->csm->funcs->cs_need_flush(csi);
+}
+
+void radeon_cs_print(struct radeon_cs *cs, FILE *file)
+{
+    struct radeon_cs_int *csi = (struct radeon_cs_int *)cs;
+    csi->csm->funcs->cs_print(csi, file);
+}
+
+void radeon_cs_set_limit(struct radeon_cs *cs, uint32_t domain, uint32_t limit)
+{
+    struct radeon_cs_int *csi = (struct radeon_cs_int *)cs;
+    if (domain == RADEON_GEM_DOMAIN_VRAM)
+	csi->csm->vram_limit = limit;
+    else
+	csi->csm->gart_limit = limit;
+}
+
+void radeon_cs_space_set_flush(struct radeon_cs *cs, void (*fn)(void *), void *data)
+{
+    struct radeon_cs_int *csi = (struct radeon_cs_int *)cs;
+    csi->space_flush_fn = fn;
+    csi->space_flush_data = data;
+}
+
diff --git a/radeon/radeon_cs_drm.h b/radeon/radeon_cs_drm.h
index ab4eca3..a3f1750 100644
--- a/radeon/radeon_cs_drm.h
+++ b/radeon/radeon_cs_drm.h
@@ -36,6 +36,7 @@
 #include <string.h>
 #include "drm.h"
 #include "radeon_drm.h"
+#include "radeon_bo_drm.h"
 
 struct radeon_cs_reloc {
     struct radeon_bo    *bo;
@@ -49,173 +50,41 @@ struct radeon_cs_reloc {
 #define RADEON_CS_SPACE_OP_TO_BIG 1
 #define RADEON_CS_SPACE_FLUSH 2
 
-struct radeon_cs_space_check {
-    struct radeon_bo *bo;
-    uint32_t read_domains;
-    uint32_t write_domain;
-    uint32_t new_accounted;
-};
-
-#define MAX_SPACE_BOS (32)
-
-struct radeon_cs_manager;
-
 struct radeon_cs {
-    struct radeon_cs_manager    *csm;
-    void                        *relocs;
-    uint32_t                    *packets;
-    unsigned                    crelocs;
-    unsigned                    relocs_total_size;
-    unsigned                    cdw;
-    unsigned                    ndw;
-    int                         section;
+    uint32_t *packets;
+    unsigned cdw;
+    unsigned ndw;
     unsigned                    section_ndw;
     unsigned                    section_cdw;
-    const char                  *section_file;
-    const char                  *section_func;
-    int                         section_line;
-    struct radeon_cs_space_check bos[MAX_SPACE_BOS];
-    int                         bo_count;
-    void                        (*space_flush_fn)(void *);
-    void                        *space_flush_data;
-};
-
-/* cs functions */
-struct radeon_cs_funcs {
-    struct radeon_cs *(*cs_create)(struct radeon_cs_manager *csm,
-                                   uint32_t ndw);
-    int (*cs_write_reloc)(struct radeon_cs *cs,
-                          struct radeon_bo *bo,
-                          uint32_t read_domain,
-                          uint32_t write_domain,
-                          uint32_t flags);
-    int (*cs_begin)(struct radeon_cs *cs,
-                    uint32_t ndw,
-                    const char *file,
-                    const char *func,
-                    int line);
-    int (*cs_end)(struct radeon_cs *cs,
-                  const char *file,
-                  const char *func,
-                  int line);
-    int (*cs_emit)(struct radeon_cs *cs);
-    int (*cs_destroy)(struct radeon_cs *cs);
-    int (*cs_erase)(struct radeon_cs *cs);
-    int (*cs_need_flush)(struct radeon_cs *cs);
-    void (*cs_print)(struct radeon_cs *cs, FILE *file);
-};
-
-struct radeon_cs_manager {
-    struct radeon_cs_funcs  *funcs;
-    int                     fd;
-    int32_t vram_limit, gart_limit;
-    int32_t vram_write_used, gart_write_used;
-    int32_t read_used;
 };
 
-static inline struct radeon_cs *radeon_cs_create(struct radeon_cs_manager *csm,
-                                                 uint32_t ndw)
-{
-    return csm->funcs->cs_create(csm, ndw);
-}
-
-static inline int radeon_cs_write_reloc(struct radeon_cs *cs,
-                                        struct radeon_bo *bo,
-                                        uint32_t read_domain,
-                                        uint32_t write_domain,
-                                        uint32_t flags)
-{
-    return cs->csm->funcs->cs_write_reloc(cs,
-                                          bo,
-                                          read_domain,
-                                          write_domain,
-                                          flags);
-}
-
-static inline int radeon_cs_begin(struct radeon_cs *cs,
-                                  uint32_t ndw,
-                                  const char *file,
-                                  const char *func,
-                                  int line)
-{
-    return cs->csm->funcs->cs_begin(cs, ndw, file, func, line);
-}
-
-static inline int radeon_cs_end(struct radeon_cs *cs,
-                                const char *file,
-                                const char *func,
-                                int line)
-{
-    return cs->csm->funcs->cs_end(cs, file, func, line);
-}
-
-static inline int radeon_cs_emit(struct radeon_cs *cs)
-{
-    return cs->csm->funcs->cs_emit(cs);
-}
-
-static inline int radeon_cs_destroy(struct radeon_cs *cs)
-{
-    return cs->csm->funcs->cs_destroy(cs);
-}
-
-static inline int radeon_cs_erase(struct radeon_cs *cs)
-{
-    return cs->csm->funcs->cs_erase(cs);
-}
-
-static inline int radeon_cs_need_flush(struct radeon_cs *cs)
-{
-    return cs->csm->funcs->cs_need_flush(cs);
-}
-
-static inline void radeon_cs_print(struct radeon_cs *cs, FILE *file)
-{
-    cs->csm->funcs->cs_print(cs, file);
-}
-
-static inline void radeon_cs_set_limit(struct radeon_cs *cs, uint32_t domain, uint32_t limit)
-{
-    
-    if (domain == RADEON_GEM_DOMAIN_VRAM)
-	cs->csm->vram_limit = limit;
-    else
-	cs->csm->gart_limit = limit;
-}
-
-static inline void radeon_cs_write_dword(struct radeon_cs *cs, uint32_t dword)
-{
-    cs->packets[cs->cdw++] = dword;
-    if (cs->section) {
-        cs->section_cdw++;
-    }
-}
-
-static inline void radeon_cs_write_qword(struct radeon_cs *cs, uint64_t qword)
-{
-
-    memcpy(cs->packets + cs->cdw, &qword, sizeof(qword));
-    cs->cdw+=2;
-    if (cs->section) {
-        cs->section_cdw+=2;
-    }
-}
-
-static inline void radeon_cs_write_table(struct radeon_cs *cs, void *data, uint32_t size)
-{
-    memcpy(cs->packets + cs->cdw, data, size * 4);
-    cs->cdw += size;
-    if (cs->section) {
-	    cs->section_cdw += size;
-    }
-}
+#define MAX_SPACE_BOS (32)
 
-static inline void radeon_cs_space_set_flush(struct radeon_cs *cs, void (*fn)(void *), void *data)
-{
-    cs->space_flush_fn = fn;
-    cs->space_flush_data = data;
-}
+struct radeon_cs_manager;
 
+extern struct radeon_cs *radeon_cs_create(struct radeon_cs_manager *csm,
+					  uint32_t ndw);
+
+extern int radeon_cs_begin(struct radeon_cs *cs,
+			   uint32_t ndw,
+			   const char *file,
+			   const char *func, int line);
+extern int radeon_cs_end(struct radeon_cs *cs,
+			 const char *file,
+			 const char *func,
+			 int line);
+extern int radeon_cs_emit(struct radeon_cs *cs);
+extern int radeon_cs_destroy(struct radeon_cs *cs);
+extern int radeon_cs_erase(struct radeon_cs *cs);
+extern int radeon_cs_need_flush(struct radeon_cs *cs);
+extern void radeon_cs_print(struct radeon_cs *cs, FILE *file);
+extern void radeon_cs_set_limit(struct radeon_cs *cs, uint32_t domain, uint32_t limit);
+extern void radeon_cs_space_set_flush(struct radeon_cs *cs, void (*fn)(void *), void *data);
+extern int radeon_cs_write_reloc(struct radeon_cs *cs,
+				 struct radeon_bo *bo,
+				 uint32_t read_domain,
+				 uint32_t write_domain,
+				 uint32_t flags);
 
 /*
  * add a persistent BO to the list
@@ -243,4 +112,30 @@ int radeon_cs_space_check_with_bo(struct radeon_cs *cs,
 				  uint32_t read_domains,
 				  uint32_t write_domain);
 
+static inline void radeon_cs_write_dword(struct radeon_cs *cs, uint32_t dword)
+{
+    cs->packets[cs->cdw++] = dword;
+    if (cs->section_ndw) {
+        cs->section_cdw++;
+    }
+}
+
+static inline void radeon_cs_write_qword(struct radeon_cs *cs, uint64_t qword)
+{
+    memcpy(cs->packets + cs->cdw, &qword, sizeof(uint64_t));
+    cs->cdw += 2;
+    if (cs->section_ndw) {
+        cs->section_cdw += 2;
+    }
+}
+
+static inline void radeon_cs_write_table(struct radeon_cs *cs,
+					 void *data, uint32_t size)
+{
+    memcpy(cs->packets + cs->cdw, data, size * 4);
+    cs->cdw += size;
+    if (cs->section_ndw) {
+	cs->section_cdw += size;
+    }
+}
 #endif
diff --git a/radeon/radeon_cs_int_drm.h b/radeon/radeon_cs_int_drm.h
new file mode 100644
index 0000000..8ba76bf
--- /dev/null
+++ b/radeon/radeon_cs_int_drm.h
@@ -0,0 +1,66 @@
+
+#ifndef _RADEON_CS_INT_H_
+#define _RADEON_CS_INT_H_
+
+struct radeon_cs_space_check {
+    struct radeon_bo_int *bo;
+    uint32_t read_domains;
+    uint32_t write_domain;
+    uint32_t new_accounted;
+};
+
+struct radeon_cs_int {
+    /* keep first two in same place */
+    uint32_t                    *packets;    
+    unsigned                    cdw;
+    unsigned                    ndw;
+    unsigned                    section_ndw;
+    unsigned                    section_cdw;
+    /* private members */
+    struct radeon_cs_manager    *csm;
+    void                        *relocs;
+    unsigned                    crelocs;
+    unsigned                    relocs_total_size;
+    const char                  *section_file;
+    const char                  *section_func;
+    int                         section_line;
+    struct radeon_cs_space_check bos[MAX_SPACE_BOS];
+    int                         bo_count;
+    void                        (*space_flush_fn)(void *);
+    void                        *space_flush_data;
+};
+
+/* cs functions */
+struct radeon_cs_funcs {
+    struct radeon_cs_int *(*cs_create)(struct radeon_cs_manager *csm,
+                                   uint32_t ndw);
+    int (*cs_write_reloc)(struct radeon_cs_int *cs,
+                          struct radeon_bo *bo,
+                          uint32_t read_domain,
+                          uint32_t write_domain,
+                          uint32_t flags);
+    int (*cs_begin)(struct radeon_cs_int *cs,
+                    uint32_t ndw,
+		    const char *file,
+		    const char *func,
+		    int line);
+    int (*cs_end)(struct radeon_cs_int *cs,
+		  const char *file, const char *func,
+		  int line);
+
+
+    int (*cs_emit)(struct radeon_cs_int *cs);
+    int (*cs_destroy)(struct radeon_cs_int *cs);
+    int (*cs_erase)(struct radeon_cs_int *cs);
+    int (*cs_need_flush)(struct radeon_cs_int *cs);
+    void (*cs_print)(struct radeon_cs_int *cs, FILE *file);
+};
+
+struct radeon_cs_manager {
+    struct radeon_cs_funcs  *funcs;
+    int                     fd;
+    int32_t vram_limit, gart_limit;
+    int32_t vram_write_used, gart_write_used;
+    int32_t read_used;
+};
+#endif
diff --git a/radeon/radeon_cs_legacy.c b/radeon/radeon_cs_legacy.c
index f1addb2..45b608a 100644
--- a/radeon/radeon_cs_legacy.c
+++ b/radeon/radeon_cs_legacy.c
@@ -30,10 +30,18 @@
  *      Jérôme Glisse <glisse@freedesktop.org>
  */
 #include <errno.h>
+#include <unistd.h>
+#include <stdint.h>
+#include "drm.h"
+#include "radeon_drm.h"
 
 #include "radeon_bocs_wrapper.h"
 #include "radeon_common.h"
-
+#ifdef HAVE_LIBDRM_RADEON
+#include "radeon_cs_int.h"
+#else
+#include "radeon_cs_int_drm.h"
+#endif
 struct cs_manager_legacy {
     struct radeon_cs_manager    base;
     struct radeon_context       *ctx;
@@ -51,27 +59,27 @@ struct cs_reloc_legacy {
 };
 
 
-static struct radeon_cs *cs_create(struct radeon_cs_manager *csm,
-                                   uint32_t ndw)
+static struct radeon_cs_int *cs_create(struct radeon_cs_manager *csm,
+				       uint32_t ndw)
 {
-    struct radeon_cs *cs;
+    struct radeon_cs_int *csi;
 
-    cs = (struct radeon_cs*)calloc(1, sizeof(struct radeon_cs));
-    if (cs == NULL) {
+    csi = (struct radeon_cs_int*)calloc(1, sizeof(struct radeon_cs_int));
+    if (csi == NULL) {
         return NULL;
     }
-    cs->csm = csm;
-    cs->ndw = (ndw + 0x3FF) & (~0x3FF);
-    cs->packets = (uint32_t*)malloc(4*cs->ndw);
-    if (cs->packets == NULL) {
-        free(cs);
+    csi->csm = csm;
+    csi->ndw = (ndw + 0x3FF) & (~0x3FF);
+    csi->packets = (uint32_t*)malloc(4*csi->ndw);
+    if (csi->packets == NULL) {
+        free(csi);
         return NULL;
     }
-    cs->relocs_total_size = 0;
-    return cs;
+    csi->relocs_total_size = 0;
+    return csi;
 }
 
-static int cs_write_reloc(struct radeon_cs *cs,
+static int cs_write_reloc(struct radeon_cs_int *cs,
                           struct radeon_bo *bo,
                           uint32_t read_domain,
                           uint32_t write_domain,
@@ -150,20 +158,19 @@ static int cs_write_reloc(struct radeon_cs *cs,
     return 0;
 }
 
-static int cs_begin(struct radeon_cs *cs,
+static int cs_begin(struct radeon_cs_int *cs,
                     uint32_t ndw,
                     const char *file,
                     const char *func,
                     int line)
 {
-    if (cs->section) {
+    if (cs->section_ndw) {
         fprintf(stderr, "CS already in a section(%s,%s,%d)\n",
                 cs->section_file, cs->section_func, cs->section_line);
         fprintf(stderr, "CS can't start section(%s,%s,%d)\n",
                 file, func, line);
         return -EPIPE;
     }
-    cs->section = 1;
     cs->section_ndw = ndw;
     cs->section_cdw = 0;
     cs->section_file = file;
@@ -187,18 +194,17 @@ static int cs_begin(struct radeon_cs *cs,
     return 0;
 }
 
-static int cs_end(struct radeon_cs *cs,
+static int cs_end(struct radeon_cs_int *cs,
                   const char *file,
                   const char *func,
                   int line)
 
 {
-    if (!cs->section) {
+    if (!cs->section_ndw) {
         fprintf(stderr, "CS no section to end at (%s,%s,%d)\n",
                 file, func, line);
         return -EPIPE;
     }
-    cs->section = 0;
     if (cs->section_ndw != cs->section_cdw) {
         fprintf(stderr, "CS section size missmatch start at (%s,%s,%d) %d vs %d\n",
                 cs->section_file, cs->section_func, cs->section_line, cs->section_ndw, cs->section_cdw);
@@ -206,10 +212,12 @@ static int cs_end(struct radeon_cs *cs,
                 file, func, line);
         return -EPIPE;
     }
+    cs->section_ndw = 0;
+
     return 0;
 }
 
-static int cs_process_relocs(struct radeon_cs *cs)
+static int cs_process_relocs(struct radeon_cs_int *cs)
 {
     struct cs_manager_legacy *csm = (struct cs_manager_legacy*)cs->csm;
     struct cs_reloc_legacy *relocs;
@@ -254,7 +262,7 @@ restart:
     return 0;
 }
 
-static int cs_set_age(struct radeon_cs *cs)
+static int cs_set_age(struct radeon_cs_int *cs)
 {
     struct cs_manager_legacy *csm = (struct cs_manager_legacy*)cs->csm;
     struct cs_reloc_legacy *relocs;
@@ -268,7 +276,7 @@ static int cs_set_age(struct radeon_cs *cs)
     return 0;
 }
 
-static int cs_emit(struct radeon_cs *cs)
+static int cs_emit(struct radeon_cs_int *cs)
 {
     struct cs_manager_legacy *csm = (struct cs_manager_legacy*)cs->csm;
     drm_radeon_cmd_buffer_t cmd;
@@ -276,7 +284,7 @@ static int cs_emit(struct radeon_cs *cs)
     uint64_t ull;
     int r;
 
-    csm->ctx->vtbl.emit_cs_header(cs, csm->ctx);
+    csm->ctx->vtbl.emit_cs_header((struct radeon_cs *)cs, csm->ctx);
 
     /* append buffer age */
     if ( IS_R300_CLASS(csm->ctx->radeonScreen) )
@@ -289,9 +297,9 @@ static int cs_emit(struct radeon_cs *cs)
       age.scratch.reg = 2;
       age.scratch.n_bufs = 1;
       age.scratch.flags = 0;
-      radeon_cs_write_dword(cs, age.u);
-      radeon_cs_write_qword(cs, ull);
-      radeon_cs_write_dword(cs, 0);
+      radeon_cs_write_dword((struct radeon_cs *)cs, age.u);
+      radeon_cs_write_qword((struct radeon_cs *)cs, ull);
+      radeon_cs_write_dword((struct radeon_cs *)cs, 0);
     }
 
     r = cs_process_relocs(cs);
@@ -342,7 +350,7 @@ static void inline cs_free_reloc(void *relocs_p, int crelocs)
       free(relocs[i].indices);
 }
 
-static int cs_destroy(struct radeon_cs *cs)
+static int cs_destroy(struct radeon_cs_int *cs)
 {
     cs_free_reloc(cs->relocs, cs->crelocs);
     free(cs->relocs);
@@ -351,7 +359,7 @@ static int cs_destroy(struct radeon_cs *cs)
     return 0;
 }
 
-static int cs_erase(struct radeon_cs *cs)
+static int cs_erase(struct radeon_cs_int *cs)
 {
     cs_free_reloc(cs->relocs, cs->crelocs);
     free(cs->relocs);
@@ -359,18 +367,18 @@ static int cs_erase(struct radeon_cs *cs)
     cs->relocs = NULL;
     cs->crelocs = 0;
     cs->cdw = 0;
-    cs->section = 0;
+    cs->section_ndw = 0;
     return 0;
 }
 
-static int cs_need_flush(struct radeon_cs *cs)
+static int cs_need_flush(struct radeon_cs_int *cs)
 {
     /* this function used to flush when the BO usage got to
      * a certain size, now the higher levels handle this better */
     return 0;
 }
 
-static void cs_print(struct radeon_cs *cs, FILE *file)
+static void cs_print(struct radeon_cs_int *cs, FILE *file)
 {
 }
 
diff --git a/radeon/radeon_cs_space_drm.c b/radeon/radeon_cs_space_drm.c
index 89cbbb5..e22b437 100644
--- a/radeon/radeon_cs_space_drm.c
+++ b/radeon/radeon_cs_space_drm.c
@@ -29,6 +29,8 @@
 #include <errno.h>
 #include <stdlib.h>
 #include "radeon_bocs_wrapper.h"
+#include "radeon_bo_int_drm.h"
+#include "radeon_cs_int_drm.h"
 
 struct rad_sizes {
     int32_t op_read;
@@ -39,7 +41,7 @@ struct rad_sizes {
 static inline int radeon_cs_setup_bo(struct radeon_cs_space_check *sc, struct rad_sizes *sizes)
 {
     uint32_t read_domains, write_domain;
-    struct radeon_bo *bo;
+    struct radeon_bo_int *bo;
 
     bo = sc->bo;
     sc->new_accounted = 0;
@@ -47,7 +49,7 @@ static inline int radeon_cs_setup_bo(struct radeon_cs_space_check *sc, struct ra
     write_domain = sc->write_domain;
 
     /* legacy needs a static check */
-    if (radeon_bo_is_static(bo)) {
+    if (radeon_bo_is_static((struct radeon_bo *)sc->bo)) {
 	bo->space_accounted = sc->new_accounted = (read_domains << 16) | write_domain;
 	return 0;
     }
@@ -100,11 +102,11 @@ static inline int radeon_cs_setup_bo(struct radeon_cs_space_check *sc, struct ra
     return 0;
 }
 
-static int radeon_cs_do_space_check(struct radeon_cs *cs, struct radeon_cs_space_check *new_tmp)
+static int radeon_cs_do_space_check(struct radeon_cs_int *cs, struct radeon_cs_space_check *new_tmp)
 {
     struct radeon_cs_manager *csm = cs->csm;
     int i;
-    struct radeon_bo *bo;
+    struct radeon_bo_int *bo;
     struct rad_sizes sizes;
     int ret;
 
@@ -158,25 +160,28 @@ static int radeon_cs_do_space_check(struct radeon_cs *cs, struct radeon_cs_space
 
 void radeon_cs_space_add_persistent_bo(struct radeon_cs *cs, struct radeon_bo *bo, uint32_t read_domains, uint32_t write_domain)
 {
+    struct radeon_cs_int *csi = (struct radeon_cs_int *)cs;
+    struct radeon_bo_int *boi = (struct radeon_bo_int *)bo;
     int i;
-    for (i = 0; i < cs->bo_count; i++) {
-	if (cs->bos[i].bo == bo &&
-	    cs->bos[i].read_domains == read_domains &&
-	    cs->bos[i].write_domain == write_domain)
+    for (i = 0; i < csi->bo_count; i++) {
+	if (csi->bos[i].bo == boi &&
+	    csi->bos[i].read_domains == read_domains &&
+	    csi->bos[i].write_domain == write_domain)
 	    return;
     }
     radeon_bo_ref(bo);
-    i = cs->bo_count;
-    cs->bos[i].bo = bo;
-    cs->bos[i].read_domains = read_domains;
-    cs->bos[i].write_domain = write_domain;
-    cs->bos[i].new_accounted = 0;
-    cs->bo_count++;
-
-    assert(cs->bo_count < MAX_SPACE_BOS);
+    i = csi->bo_count;
+    csi->bos[i].bo = boi;
+    csi->bos[i].read_domains = read_domains;
+    csi->bos[i].write_domain = write_domain;
+    csi->bos[i].new_accounted = 0;
+    csi->bo_count++;
+
+    assert(csi->bo_count < MAX_SPACE_BOS);
 }
 
-static int radeon_cs_check_space_internal(struct radeon_cs *cs, struct radeon_cs_space_check *tmp_bo)
+static int radeon_cs_check_space_internal(struct radeon_cs_int *cs,
+					  struct radeon_cs_space_check *tmp_bo)
 {
     int ret;
     int flushed = 0;
@@ -198,37 +203,42 @@ again:
 int radeon_cs_space_check_with_bo(struct radeon_cs *cs,
 				  struct radeon_bo *bo,
 				  uint32_t read_domains, uint32_t write_domain)
-{									
+{
+    struct radeon_cs_int *csi = (struct radeon_cs_int *)cs;
+    struct radeon_bo_int *boi = (struct radeon_bo_int *)bo;
     struct radeon_cs_space_check temp_bo;
+    
     int ret = 0;
 
     if (bo) {
-	temp_bo.bo = bo;
+	temp_bo.bo = boi;
 	temp_bo.read_domains = read_domains;
 	temp_bo.write_domain = write_domain;
 	temp_bo.new_accounted = 0;
     }
 
-    ret = radeon_cs_check_space_internal(cs, bo ? &temp_bo : NULL);
+    ret = radeon_cs_check_space_internal(csi, bo ? &temp_bo : NULL);
     return ret;
 }
 
 int radeon_cs_space_check(struct radeon_cs *cs)
 {
-    return radeon_cs_check_space_internal(cs, NULL);
+    struct radeon_cs_int *csi = (struct radeon_cs_int *)cs;
+    return radeon_cs_check_space_internal(csi, NULL);
 }
 
 void radeon_cs_space_reset_bos(struct radeon_cs *cs)
 {
+    struct radeon_cs_int *csi = (struct radeon_cs_int *)cs;
     int i;
-    for (i = 0; i < cs->bo_count; i++) {
-	radeon_bo_unref(cs->bos[i].bo);
-	cs->bos[i].bo = NULL;
-	cs->bos[i].read_domains = 0;
-	cs->bos[i].write_domain = 0;
-	cs->bos[i].new_accounted = 0;
+    for (i = 0; i < csi->bo_count; i++) {
+	radeon_bo_unref((struct radeon_bo *)csi->bos[i].bo);
+	csi->bos[i].bo = NULL;
+	csi->bos[i].read_domains = 0;
+	csi->bos[i].write_domain = 0;
+	csi->bos[i].new_accounted = 0;
     }
-    cs->bo_count = 0;
+    csi->bo_count = 0;
 }
 
 
diff --git a/radeon/radeon_dma.c b/radeon/radeon_dma.c
index c6edbae..232972d 100644
--- a/radeon/radeon_dma.c
+++ b/radeon/radeon_dma.c
@@ -306,10 +306,6 @@ static int radeon_bo_is_idle(struct radeon_bo* bo)
 		WARN_ONCE("Your libdrm or kernel doesn't have support for busy query.\n"
 			"This may cause small performance drop for you.\n");
 	}
-	/* Protect against bug in legacy bo handling that causes bos stay
-	 * referenced even after they should be freed */
-	if (bo->cref != 1)
-		return 0;
 	return ret != -EBUSY;
 }
 
@@ -346,9 +342,7 @@ void radeonReleaseDmaRegions(radeonContextPtr rmesa)
 	foreach_s(dma_bo, temp, &rmesa->dma.wait) {
 		if (dma_bo->expire_counter == time) {
 			WARN_ONCE("Leaking dma buffer object!\n");
-			/* force free of buffer so we don't realy start
-			 * leaking stuff now*/
-			while ((dma_bo->bo = radeon_bo_unref(dma_bo->bo))) {}
+			radeon_bo_unref(dma_bo->bo);
 			remove_from_list(dma_bo);
 			FREE(dma_bo);
 			continue;
diff --git a/radeon/radeon_fbo.c b/radeon/radeon_fbo.c
index d83b166..2012cbc 100644
--- a/radeon/radeon_fbo.c
+++ b/radeon/radeon_fbo.c
@@ -445,7 +445,6 @@ restart:
 		goto restart;
 	}
 	
-	rrb->pitch = texImage->Width * rrb->cpp;
 	rrb->base.InternalFormat = rrb->base._ActualFormat;
 	rrb->base.Width = texImage->Width;
 	rrb->base.Height = texImage->Height;
@@ -555,8 +554,10 @@ radeon_render_texture(GLcontext * ctx,
       imageOffset += offsets[att->Zoffset];
    }
 
-   /* store that offset in the region */
+   /* store that offset in the region, along with the correct pitch for
+    * the image we are rendering to */
    rrb->draw_offset = imageOffset;
+   rrb->pitch = radeon_image->mt->levels[att->TextureLevel].rowstride;
 
    /* update drawing region, etc */
    radeon_draw_buffer(ctx, fb);
diff --git a/radeon/radeon_lock.c b/radeon/radeon_lock.c
index 02de8e5..7ad781b 100644
--- a/radeon/radeon_lock.c
+++ b/radeon/radeon_lock.c
@@ -62,8 +62,6 @@ void radeonGetLock(radeonContextPtr rmesa, GLuint flags)
 	__DRIdrawablePrivate *const readable = radeon_get_readable(rmesa);
 	__DRIscreenPrivate *sPriv = rmesa->dri.screen;
 
-	assert(drawable != NULL);
-
 	drmGetLock(rmesa->dri.fd, rmesa->dri.hwContext, flags);
 
 	/* The window might have moved, so we might need to get new clip
@@ -74,12 +72,13 @@ void radeonGetLock(radeonContextPtr rmesa, GLuint flags)
 	 * Since the hardware state depends on having the latest drawable
 	 * clip rects, all state checking must be done _after_ this call.
 	 */
-	DRI_VALIDATE_DRAWABLE_INFO(sPriv, drawable);
-	if (drawable != readable) {
+	if (drawable)
+		DRI_VALIDATE_DRAWABLE_INFO(sPriv, drawable);
+	if (readable && drawable != readable) {
 		DRI_VALIDATE_DRAWABLE_INFO(sPriv, readable);
 	}
 
-	if (rmesa->lastStamp != drawable->lastStamp) {
+	if (drawable && (rmesa->lastStamp != drawable->lastStamp)) {
 		radeon_window_moved(rmesa);
 		rmesa->lastStamp = drawable->lastStamp;
 	}
diff --git a/radeon/radeon_span.c b/radeon/radeon_span.c
index d603f52..5982ee4 100644
--- a/radeon/radeon_span.c
+++ b/radeon/radeon_span.c
@@ -755,8 +755,7 @@ static void map_unmap_rb(struct gl_renderbuffer *rb, int flag)
 		return;
 
 	if (flag) {
-		if (rrb->bo->bom->funcs->bo_wait)
-			radeon_bo_wait(rrb->bo);
+	        radeon_bo_wait(rrb->bo);
 		r = radeon_bo_map(rrb->bo, 1);
 		if (r) {
 			fprintf(stderr, "(%s) error(%d) mapping buffer.\n",
diff --git a/radeon/radeon_state.c b/radeon/radeon_state.c
index 4d0d35e..f6c733a 100644
--- a/radeon/radeon_state.c
+++ b/radeon/radeon_state.c
@@ -550,6 +550,31 @@ static void radeonPolygonOffset( GLcontext *ctx,
    rmesa->hw.zbs.cmd[ZBS_SE_ZBIAS_CONSTANT] = constant.ui32;
 }
 
+static void radeonPolygonStipplePreKMS( GLcontext *ctx, const GLubyte *mask )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   GLuint i;
+   drm_radeon_stipple_t stipple;
+
+   /* Must flip pattern upside down.
+    */
+   for ( i = 0 ; i < 32 ; i++ ) {
+      rmesa->state.stipple.mask[31 - i] = ((GLuint *) mask)[i];
+   }
+
+   /* TODO: push this into cmd mechanism
+    */
+   radeon_firevertices(&rmesa->radeon);
+   LOCK_HARDWARE( &rmesa->radeon );
+
+   /* FIXME: Use window x,y offsets into stipple RAM.
+    */
+   stipple.mask = rmesa->state.stipple.mask;
+   drmCommandWrite( rmesa->radeon.dri.fd, DRM_RADEON_STIPPLE,
+		    &stipple, sizeof(drm_radeon_stipple_t) );
+   UNLOCK_HARDWARE( &rmesa->radeon );
+}
+
 static void radeonPolygonMode( GLcontext *ctx, GLenum face, GLenum mode )
 {
    r100ContextPtr rmesa = R100_CONTEXT(ctx);
author	Luc Verhaegen <libv@skynet.be>	2010-03-14 08:30:53 +0100
committer	Luc Verhaegen <libv@skynet.be>	2010-03-14 08:30:53 +0100
commit	a682254179a2c3c2ec3ee4597bd71b12c05a778d (patch)
tree	c4d282e4927d130a4c0d4bad45080b03bbabc77d
parent	0c8469d1892b441c38d1cb09d6bbf85692c89e92 (diff)