From a198fe4074e8257c8c3e3a460c15ed108d8d07a8 Mon Sep 17 00:00:00 2001
From: Luc Verhaegen <libv@skynet.be>
Date: Fri, 2 Apr 2010 12:59:34 +0200
Subject: Import radeon, r200, r300 and r600 dri drivers from mesa 7.8.0.

---
 r200/r200_cmdbuf.c          |  3 ++-
 r200/r200_tcl.c             | 56 ++++++++++++++++++++++++++-------------------
 r300/r300_cmdbuf.c          | 42 ++++++++++++++++++++++++++++------
 r300/r300_fragprog_common.c | 13 +++++++++++
 r300/r300_vertprog.c        | 30 +++++++++++++++++-------
 r600/r700_state.c           |  2 +-
 radeon/radeon_dma.c         |  7 +-----
 7 files changed, 107 insertions(+), 46 deletions(-)

diff --git a/r200/r200_cmdbuf.c b/r200/r200_cmdbuf.c
index 2f2b8d9..ad43a8c 100644
--- a/r200/r200_cmdbuf.c
+++ b/r200/r200_cmdbuf.c
@@ -189,7 +189,8 @@ void r200FlushElts(GLcontext *ctx)
    if (R200_ELT_BUF_SZ > elt_used)
      radeonReturnDmaRegion(&rmesa->radeon, R200_ELT_BUF_SZ - elt_used);
 
-   if (radeon_is_debug_enabled(RADEON_SYNC, RADEON_CRITICAL)) {
+   if (radeon_is_debug_enabled(RADEON_SYNC, RADEON_CRITICAL)
+         && !rmesa->radeon.radeonScreen->kernel_mm) {
       radeon_print(RADEON_SYNC, RADEON_NORMAL, "%s: Syncing\n", __FUNCTION__);
       radeonFinish( rmesa->radeon.glCtx );
    }
diff --git a/r200/r200_tcl.c b/r200/r200_tcl.c
index f3f558b..d43e145 100644
--- a/r200/r200_tcl.c
+++ b/r200/r200_tcl.c
@@ -404,8 +404,9 @@ static GLuint r200EnsureEmitSize( GLcontext * ctx , GLubyte* vimap_rev )
          rendering code may decide convert to elts.
 	 In that case we have to make pessimistic prediction.
 	 and use larger of 2 paths. */
-      const GLuint elts = ELTS_BUFSZ(nr_aos);
-      const GLuint index = INDEX_BUFSZ;
+      const GLuint elt_count =(VB->Primitive[i].count/GET_MAX_HW_ELTS() + 1);
+      const GLuint elts = ELTS_BUFSZ(nr_aos) * elt_count;
+      const GLuint index = INDEX_BUFSZ * elt_count;
       const GLuint vbuf = VBUF_BUFSZ;
       if ( (!VB->Elts && VB->Primitive[i].count >= MAX_CONVERSION_SIZE)
 	  || vbuf > index + elts)
@@ -687,25 +688,34 @@ static char *getFallbackString(GLuint bit)
 
 void r200TclFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
 {
-   r200ContextPtr rmesa = R200_CONTEXT(ctx);
-   GLuint oldfallback = rmesa->radeon.TclFallback;
-
-   if (mode) {
-      rmesa->radeon.TclFallback |= bit;
-      if (oldfallback == 0) {
-	 if (R200_DEBUG & RADEON_FALLBACKS)
-	    fprintf(stderr, "R200 begin tcl fallback %s\n",
-		    getFallbackString( bit ));
-	 transition_to_swtnl( ctx );
-      }
-   }
-   else {
-      rmesa->radeon.TclFallback &= ~bit;
-      if (oldfallback == bit) {
-	 if (R200_DEBUG & RADEON_FALLBACKS)
-	    fprintf(stderr, "R200 end tcl fallback %s\n",
-		    getFallbackString( bit ));
-	 transition_to_hwtnl( ctx );
-      }
-   }
+	r200ContextPtr rmesa = R200_CONTEXT(ctx);
+	GLuint oldfallback = rmesa->radeon.TclFallback;
+
+	if (mode) {
+		if (oldfallback == 0) {
+			/* We have to flush before transition */
+			if ( rmesa->radeon.dma.flush )
+				rmesa->radeon.dma.flush( rmesa->radeon.glCtx );
+
+			if (R200_DEBUG & RADEON_FALLBACKS)
+				fprintf(stderr, "R200 begin tcl fallback %s\n",
+						getFallbackString( bit ));
+			rmesa->radeon.TclFallback |= bit;
+			transition_to_swtnl( ctx );
+		} else
+			rmesa->radeon.TclFallback |= bit;
+	} else {
+		if (oldfallback == bit) {
+			/* We have to flush before transition */
+			if ( rmesa->radeon.dma.flush )
+				rmesa->radeon.dma.flush( rmesa->radeon.glCtx );
+
+			if (R200_DEBUG & RADEON_FALLBACKS)
+				fprintf(stderr, "R200 end tcl fallback %s\n",
+						getFallbackString( bit ));
+			rmesa->radeon.TclFallback &= ~bit;
+			transition_to_hwtnl( ctx );
+		} else
+			rmesa->radeon.TclFallback &= ~bit;
+	}
 }
diff --git a/r300/r300_cmdbuf.c b/r300/r300_cmdbuf.c
index 6cfa568..788dc2f 100644
--- a/r300/r300_cmdbuf.c
+++ b/r300/r300_cmdbuf.c
@@ -77,12 +77,29 @@ static int check_vpu(GLcontext *ctx, struct radeon_state_atom *atom)
 	cnt = vpu_count(atom->cmd);
 
 	if (r300->radeon.radeonScreen->kernel_mm) {
-		extra = 5;
+		extra = 3;
 	}
 
 	return cnt ? (cnt * 4) + extra : 0;
 }
 
+static int check_vpp(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+    r300ContextPtr r300 = R300_CONTEXT(ctx);
+    int cnt;
+    int extra = 1;
+
+    if (r300->radeon.radeonScreen->kernel_mm) {
+        cnt = r300->selected_vp->code.constants.Count * 4;
+        extra = 3;
+    } else {
+        cnt = vpu_count(atom->cmd);
+        extra = 1;
+    }
+
+    return cnt ? (cnt * 4) + extra : 0;
+}
+
 void r300_emit_vpu(struct r300_context *r300,
                    uint32_t *data,
                    unsigned len,
@@ -101,15 +118,26 @@ static void emit_vpu_state(GLcontext *ctx, struct radeon_state_atom * atom)
 {
     r300ContextPtr r300 = R300_CONTEXT(ctx);
     drm_r300_cmd_header_t cmd;
-    uint32_t addr, ndw;
+    uint32_t addr;
 
     cmd.u = atom->cmd[0];
     addr = (cmd.vpu.adrhi << 8) | cmd.vpu.adrlo;
-    ndw = atom->check(ctx, atom);
 
     r300_emit_vpu(r300, &atom->cmd[1], vpu_count(atom->cmd) * 4, addr);
 }
 
+static void emit_vpp_state(GLcontext *ctx, struct radeon_state_atom * atom)
+{
+    r300ContextPtr r300 = R300_CONTEXT(ctx);
+    drm_r300_cmd_header_t cmd;
+    uint32_t addr;
+
+    cmd.u = atom->cmd[0];
+    addr = (cmd.vpu.adrhi << 8) | cmd.vpu.adrlo;
+
+    r300_emit_vpu(r300, &atom->cmd[1], r300->selected_vp->code.constants.Count * 4, addr);
+}
+
 void r500_emit_fp(struct r300_context *r300,
                   uint32_t *data,
                   unsigned len,
@@ -784,11 +812,11 @@ void r300InitCmdBuf(r300ContextPtr r300)
 			r300->hw.vpi.emit = emit_vpu_state;
 
 		if (is_r500) {
-			ALLOC_STATE(vpp, vpu, R300_VPP_CMDSIZE, 0);
+			ALLOC_STATE(vpp, vpp, R300_VPP_CMDSIZE, 0);
 			r300->hw.vpp.cmd[0] =
 				cmdvpu(r300->radeon.radeonScreen, R500_PVS_CONST_START, 0);
 			if (r300->radeon.radeonScreen->kernel_mm)
-				r300->hw.vpp.emit = emit_vpu_state;
+				r300->hw.vpp.emit = emit_vpp_state;
 
 			ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0);
 			r300->hw.vps.cmd[0] =
@@ -805,11 +833,11 @@ void r300InitCmdBuf(r300ContextPtr r300)
 					r300->hw.vpucp[i].emit = emit_vpu_state;
 			}
 		} else {
-			ALLOC_STATE(vpp, vpu, R300_VPP_CMDSIZE, 0);
+			ALLOC_STATE(vpp, vpp, R300_VPP_CMDSIZE, 0);
 			r300->hw.vpp.cmd[0] =
 				cmdvpu(r300->radeon.radeonScreen, R300_PVS_CONST_START, 0);
 			if (r300->radeon.radeonScreen->kernel_mm)
-				r300->hw.vpp.emit = emit_vpu_state;
+				r300->hw.vpp.emit = emit_vpp_state;
 
 			ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0);
 			r300->hw.vps.cmd[0] =
diff --git a/r300/r300_fragprog_common.c b/r300/r300_fragprog_common.c
index 61ea5e4..0646da4 100644
--- a/r300/r300_fragprog_common.c
+++ b/r300/r300_fragprog_common.c
@@ -256,6 +256,19 @@ static void translate_fragment_program(GLcontext *ctx, struct r300_fragment_prog
 
 	fp->InputsRead = compiler.Base.Program.InputsRead;
 
+	/* Clear the fog/wpos_attr if code accessing these
+	 * attributes has been removed during compilation
+	 */
+	if (fp->fog_attr != FRAG_ATTRIB_MAX) {
+		if (!(fp->InputsRead & (1 << fp->fog_attr)))
+			fp->fog_attr = FRAG_ATTRIB_MAX;
+	}
+
+	if (fp->wpos_attr != FRAG_ATTRIB_MAX) {
+		if (!(fp->InputsRead & (1 << fp->wpos_attr)))
+			fp->wpos_attr = FRAG_ATTRIB_MAX;
+	}
+
 	rc_destroy(&compiler.Base);
 }
 
diff --git a/r300/r300_vertprog.c b/r300/r300_vertprog.c
index 129004f..a1fe378 100644
--- a/r300/r300_vertprog.c
+++ b/r300/r300_vertprog.c
@@ -263,15 +263,25 @@ static struct r300_vertex_program *build_program(GLcontext *ctx,
 	rc_move_output(&compiler.Base, VERT_RESULT_PSIZ, VERT_RESULT_PSIZ, WRITEMASK_X);
 
 	if (vp->key.WPosAttr != FRAG_ATTRIB_MAX) {
-		rc_copy_output(&compiler.Base,
-			VERT_RESULT_HPOS,
-			vp->key.WPosAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0);
+		unsigned int vp_wpos_attr = vp->key.WPosAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0;
+
+		/* Set empty writemask for instructions writing to vp_wpos_attr
+		 * before moving the wpos attr there.
+		 * Such instructions will be removed by DCE.
+		 */
+		rc_move_output(&compiler.Base, vp_wpos_attr, vp->key.WPosAttr, 0);
+		rc_copy_output(&compiler.Base, VERT_RESULT_HPOS, vp_wpos_attr);
 	}
 
 	if (vp->key.FogAttr != FRAG_ATTRIB_MAX) {
-		rc_move_output(&compiler.Base,
-			VERT_RESULT_FOGC,
-			vp->key.FogAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0, WRITEMASK_X);
+		unsigned int vp_fog_attr = vp->key.FogAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0;
+
+		/* Set empty writemask for instructions writing to vp_fog_attr
+		 * before moving the fog attr there.
+		 * Such instructions will be removed by DCE.
+		 */
+		rc_move_output(&compiler.Base, vp_fog_attr, vp->key.FogAttr, 0);
+		rc_move_output(&compiler.Base, VERT_RESULT_FOGC, vp_fog_attr, WRITEMASK_X);
 	}
 
 	r3xx_compile_vertex_program(&compiler);
@@ -382,7 +392,11 @@ void r300SetupVertexProgram(r300ContextPtr rmesa)
 	R300_STATECHANGE(rmesa, vap_cntl);
 	R300_STATECHANGE(rmesa, vpp);
 	param_count = r300VertexProgUpdateParams(ctx, prog, (float *)&rmesa->hw.vpp.cmd[R300_VPP_PARAM_0]);
-	bump_vpu_count(rmesa->hw.vpp.cmd, param_count);
+	if (!rmesa->radeon.radeonScreen->kernel_mm && param_count > 255 * 4) {
+		WARN_ONCE("Too many VP params, expect rendering errors\n");
+	}
+	/* Prevent the overflow (vpu.count is u8) */
+	bump_vpu_count(rmesa->hw.vpp.cmd, MIN2(255 * 4, param_count));
 	param_count /= 4;
 
 	r300EmitVertexProgram(rmesa, R300_PVS_CODE_START, &(prog->code));
@@ -395,6 +409,6 @@ void r300SetupVertexProgram(r300ContextPtr rmesa)
 	rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] = (0 << R300_PVS_FIRST_INST_SHIFT) | (inst_count << R300_PVS_XYZW_VALID_INST_SHIFT) |
 				(inst_count << R300_PVS_LAST_INST_SHIFT);
 
-	rmesa->hw.pvs.cmd[R300_PVS_CNTL_2] = (0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) | (param_count << R300_PVS_MAX_CONST_ADDR_SHIFT);
+	rmesa->hw.pvs.cmd[R300_PVS_CNTL_2] = (0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) | ((param_count - 1) << R300_PVS_MAX_CONST_ADDR_SHIFT);
 	rmesa->hw.pvs.cmd[R300_PVS_CNTL_3] = (inst_count << R300_PVS_LAST_VTX_SRC_INST_SHIFT);
 }
diff --git a/r600/r700_state.c b/r600/r700_state.c
index 6f156b5..12eaebb 100644
--- a/r600/r700_state.c
+++ b/r600/r700_state.c
@@ -614,7 +614,7 @@ static GLuint translate_logicop(GLenum logicop)
 	case GL_XOR:
 		return 0x66;
 	case GL_EQUIV:
-		return 0xaa;
+		return 0x99;
 	case GL_AND_REVERSE:
 		return 0x44;
 	case GL_AND_INVERTED:
diff --git a/radeon/radeon_dma.c b/radeon/radeon_dma.c
index 22499bc..6b7690c 100644
--- a/radeon/radeon_dma.c
+++ b/radeon/radeon_dma.c
@@ -184,9 +184,6 @@ void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size)
 	radeon_print(RADEON_DMA, RADEON_NORMAL, "%s size %d minimum_size %d\n",
 			__FUNCTION__, size, rmesa->dma.minimum_size);
 
-	if (!is_empty_list(&rmesa->dma.reserved))
-		radeon_bo_unmap(first_elem(&rmesa->dma.reserved)->bo);
-
 	if (is_empty_list(&rmesa->dma.free)
 	      || last_elem(&rmesa->dma.free)->bo->size < size) {
 		dma_bo = CALLOC_STRUCT(radeon_dma_bo);
@@ -336,9 +333,6 @@ void radeonReleaseDmaRegions(radeonContextPtr rmesa)
 		legacy_track_pending(rmesa->radeonScreen->bom, 0);
 	}
 
-	if (!is_empty_list(&rmesa->dma.reserved))
-		radeon_bo_unmap(first_elem(&rmesa->dma.reserved)->bo);
-
 	/* move waiting bos to free list.
 	   wait list provides gpu time to handle data before reuse */
 	foreach_s(dma_bo, temp, &rmesa->dma.wait) {
@@ -368,6 +362,7 @@ void radeonReleaseDmaRegions(radeonContextPtr rmesa)
 
 	/* move reserved to wait list */
 	foreach_s(dma_bo, temp, &rmesa->dma.reserved) {
+		radeon_bo_unmap(dma_bo->bo);
 		/* free objects that are too small to be used because of large request */
 		if (dma_bo->bo->size < rmesa->dma.minimum_size) {
 		   radeon_bo_unref(dma_bo->bo);
-- 
cgit v1.2.3