Import i915 and i965 dri drivers from mesa 7.8.0.HEAD 7.8.1 7.8.0 7.8 master

author: Luc Verhaegen <libv@skynet.be> 2010-04-02 12:46:10 +0200
committer: Luc Verhaegen <libv@skynet.be> 2010-04-02 12:46:10 +0200
commit: e8bf66970f1ee08b214e5b88f8f4b62fc1a73509 (patch)
tree: 84830fb169ba2c393ca250d75341bee9eeb5ae6f
parent: 54a8e9cc3988d908b5b846a752679127cacefd3b (diff)
20 files changed, 228 insertions, 265 deletions
diff --git a/i915/intel_tris.c b/i915/intel_tris.c
index fb191fe..81c4ade 100644
--- a/i915/intel_tris.c
+++ b/i915/intel_tris.c
@@ -251,7 +251,7 @@ void intel_flush_prim(struct intel_context *intel)
       BEGIN_BATCH(5);
       OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
 		I1_LOAD_S(0) | I1_LOAD_S(1) | 1);
-      assert((offset & !S0_VB_OFFSET_MASK) == 0);
+      assert((offset & ~S0_VB_OFFSET_MASK) == 0);
       OUT_RELOC(vb_bo, I915_GEM_DOMAIN_VERTEX, 0, offset);
       OUT_BATCH((intel->vertex_size << S1_VERTEX_WIDTH_SHIFT) |
 		(intel->vertex_size << S1_VERTEX_PITCH_SHIFT));
@@ -270,7 +270,7 @@ void intel_flush_prim(struct intel_context *intel)
       OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
 		I1_LOAD_S(0) | I1_LOAD_S(2) | 1);
       /* S0 */
-      assert((offset & !S0_VB_OFFSET_MASK_830) == 0);
+      assert((offset & ~S0_VB_OFFSET_MASK_830) == 0);
       OUT_RELOC(vb_bo, I915_GEM_DOMAIN_VERTEX, 0,
 		offset | (intel->vertex_size << S0_VB_PITCH_SHIFT_830) |
 		S0_VB_ENABLE_830);
diff --git a/i965/brw_eu_emit.c b/i965/brw_eu_emit.c
index f69d529..82f2fda 100644
--- a/i965/brw_eu_emit.c
+++ b/i965/brw_eu_emit.c
@@ -1290,7 +1290,7 @@ void brw_SAMPLE(struct brw_compile *p,
 		GLuint simd_mode)
 {
    GLboolean need_stall = 0;
-   
+
    if (writemask == 0) {
       /*printf("%s: zero writemask??\n", __FUNCTION__); */
       return;
@@ -1327,8 +1327,14 @@ void brw_SAMPLE(struct brw_compile *p,
          /* printf("need stall %x %x\n", newmask , writemask); */
       }
       else {
+	 GLboolean dispatch_16 = GL_FALSE;
+
 	 struct brw_reg m1 = brw_message_reg(msg_reg_nr);
-	 
+
+	 guess_execution_size(p->current, dest);
+	 if (p->current->header.execution_size == BRW_EXECUTE_16)
+	    dispatch_16 = GL_TRUE;
+
 	 newmask = ~newmask & WRITEMASK_XYZW;
 
 	 brw_push_insn_state(p);
@@ -1343,7 +1349,13 @@ void brw_SAMPLE(struct brw_compile *p,
 
   	 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); 
 	 dest = offset(dest, dst_offset);
-	 response_length = len * 2;
+
+	 /* For 16-wide dispatch, masked channels are skipped in the
+	  * response.  For 8-wide, masked channels still take up slots,
+	  * and are just not written to.
+	  */
+	 if (dispatch_16)
+	    response_length = len * 2;
       }
    }
 
diff --git a/i965/brw_program.c b/i965/brw_program.c
index c78f7b3..1fd957b 100644
--- a/i965/brw_program.c
+++ b/i965/brw_program.c
@@ -95,9 +95,17 @@ static void brwDeleteProgram( GLcontext *ctx,
 			      struct gl_program *prog )
 {
    if (prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
-      struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog;
-      struct brw_fragment_program *brw_fprog = brw_fragment_program(fprog);
-      dri_bo_unreference(brw_fprog->const_buffer);
+      struct gl_fragment_program *fp = (struct gl_fragment_program *) prog;
+      struct brw_fragment_program *brw_fp = brw_fragment_program(fp);
+
+      dri_bo_unreference(brw_fp->const_buffer);
+   }
+
+   if (prog->Target == GL_VERTEX_PROGRAM_ARB) {
+      struct gl_vertex_program *vp = (struct gl_vertex_program *) prog;
+      struct brw_vertex_program *brw_vp = brw_vertex_program(vp);
+
+      dri_bo_unreference(brw_vp->const_buffer);
    }
 
    _mesa_delete_program( ctx, prog );
diff --git a/i965/brw_sf.c b/i965/brw_sf.c
index 8e6839b..57d1c29 100644
--- a/i965/brw_sf.c
+++ b/i965/brw_sf.c
@@ -46,7 +46,6 @@
 static void compile_sf_prog( struct brw_context *brw,
 			     struct brw_sf_prog_key *key )
 {
-   GLcontext *ctx = &brw->intel.ctx;
    struct brw_sf_compile c;
    const GLuint *program;
    GLuint program_size;
@@ -69,20 +68,14 @@ static void compile_sf_prog( struct brw_context *brw,
 
    /* Construct map from attribute number to position in the vertex.
     */
-   for (i = idx = 0; i < VERT_RESULT_MAX; i++) 
+   for (i = idx = 0; i < VERT_RESULT_MAX; i++) {
       if (c.key.attrs & BITFIELD64_BIT(i)) {
 	 c.attr_to_idx[i] = idx;
 	 c.idx_to_attr[idx] = i;
-	 if (i >= VERT_RESULT_TEX0 && i <= VERT_RESULT_TEX7) {
-            c.point_attrs[i].CoordReplace = 
-               ctx->Point.CoordReplace[i - VERT_RESULT_TEX0];
-	 }
-         else {
-            c.point_attrs[i].CoordReplace = GL_FALSE;
-         }
 	 idx++;
       }
-   
+   }
+
    /* Which primitive?  Or all three? 
     */
    switch (key->primitive) {
@@ -162,6 +155,14 @@ static void upload_sf_prog(struct brw_context *brw)
    }
 
    key.do_point_sprite = ctx->Point.PointSprite;
+   if (key.do_point_sprite) {
+      int i;
+
+      for (i = 0; i < 8; i++) {
+	 if (ctx->Point.CoordReplace[i])
+	    key.point_sprite_coord_replace |= (1 << i);
+      }
+   }
    key.sprite_origin_lower_left = (ctx->Point.SpriteOrigin == GL_LOWER_LEFT);
    /* _NEW_LIGHT */
    key.do_flat_shading = (ctx->Light.ShadeModel == GL_FLAT);
diff --git a/i965/brw_sf.h b/i965/brw_sf.h
index 0ba731f..a0680a5 100644
--- a/i965/brw_sf.h
+++ b/i965/brw_sf.h
@@ -46,6 +46,7 @@
 
 struct brw_sf_prog_key {
    GLbitfield64 attrs;
+   uint8_t point_sprite_coord_replace;
    GLuint primitive:2;
    GLuint do_twoside_color:1;
    GLuint do_flat_shading:1;
@@ -56,10 +57,6 @@ struct brw_sf_prog_key {
    GLuint pad:24;
 };
 
-struct brw_sf_point_tex {
-   GLboolean CoordReplace;	
-};
-
 struct brw_sf_compile {
    struct brw_compile func;
    struct brw_sf_prog_key key;
@@ -100,7 +97,6 @@ struct brw_sf_compile {
 
    GLubyte attr_to_idx[VERT_RESULT_MAX];   
    GLubyte idx_to_attr[VERT_RESULT_MAX];   
-   struct brw_sf_point_tex point_attrs[VERT_RESULT_MAX];
 };
 
  
diff --git a/i965/brw_sf_emit.c b/i965/brw_sf_emit.c
index bb08055..56f7c98 100644
--- a/i965/brw_sf_emit.c
+++ b/i965/brw_sf_emit.c
@@ -354,6 +354,33 @@ static GLboolean calculate_masks( struct brw_sf_compile *c,
    return is_last_attr;
 }
 
+/* Calculates the predicate control for which channels of a reg
+ * (containing 2 attrs) to do point sprite coordinate replacement on.
+ */
+static uint16_t
+calculate_point_sprite_mask(struct brw_sf_compile *c, GLuint reg)
+{
+   int attr1, attr2;
+   uint16_t pc = 0;
+
+   attr1 = c->idx_to_attr[reg * 2];
+   if (attr1 >= VERT_RESULT_TEX0 && attr1 <= VERT_RESULT_TEX7) {
+      if (c->key.point_sprite_coord_replace & (1 << (attr1 - VERT_RESULT_TEX0)))
+	 pc |= 0x0f;
+   }
+
+   if (reg * 2 + 1 < c->nr_setup_attrs) {
+       attr2 = c->idx_to_attr[reg * 2 + 1];
+       if (attr2 >= VERT_RESULT_TEX0 && attr2 <= VERT_RESULT_TEX7) {
+	  if (c->key.point_sprite_coord_replace & (1 << (attr2 -
+							 VERT_RESULT_TEX0)))
+	     pc |= 0xf0;
+       }
+   }
+
+   return pc;
+}
+
 
 
 void brw_emit_tri_setup( struct brw_sf_compile *c, GLboolean allocate)
@@ -529,22 +556,27 @@ void brw_emit_point_sprite_setup( struct brw_sf_compile *c, GLboolean allocate)
    copy_z_inv_w(c);
    for (i = 0; i < c->nr_setup_regs; i++)
    {
-      struct brw_sf_point_tex *tex = &c->point_attrs[c->idx_to_attr[2*i]];
       struct brw_reg a0 = offset(c->vert[0], i);
-      GLushort pc, pc_persp, pc_linear;
+      GLushort pc, pc_persp, pc_linear, pc_coord_replace;
       GLboolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
-            
-      if (pc_persp)
-      {				
-	  if (!tex->CoordReplace) {
-	      brw_set_predicate_control_flag_value(p, pc_persp);
-	      brw_MUL(p, a0, a0, c->inv_w[0]);
-	  }
+
+      pc_coord_replace = calculate_point_sprite_mask(c, i);
+      pc_persp &= ~pc_coord_replace;
+
+      if (pc_persp) {
+	 brw_set_predicate_control_flag_value(p, pc_persp);
+	 brw_MUL(p, a0, a0, c->inv_w[0]);
       }
 
-      if (tex->CoordReplace) {
-	  /* Caculate 1.0/PointWidth */
-	  brw_math(&c->func,
+      /* Point sprite coordinate replacement: A texcoord with this
+       * enabled gets replaced with the value (x, y, 0, 1) where x and
+       * y vary from 0 to 1 across the horizontal and vertical of the
+       * point.
+       */
+      if (pc_coord_replace) {
+	 brw_set_predicate_control_flag_value(p, pc_coord_replace);
+	 /* Caculate 1.0/PointWidth */
+	 brw_math(&c->func,
 		  c->tmp,
 		  BRW_MATH_FUNCTION_INV,
 		  BRW_MATH_SATURATE_NONE,
@@ -553,50 +585,51 @@ void brw_emit_point_sprite_setup( struct brw_sf_compile *c, GLboolean allocate)
 		  BRW_MATH_DATA_SCALAR,
 		  BRW_MATH_PRECISION_FULL);
 
-	  if (c->key.sprite_origin_lower_left) {
-	   	brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]);
-		brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0));
-	  	brw_MUL(p, c->m2Cy, c->tmp, negate(c->inv_w[0]));
-		brw_MOV(p, vec1(suboffset(c->m2Cy, 0)), brw_imm_f(0.0));
-	  } else {
-	   	brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]);
-		brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0));
-	  	brw_MUL(p, c->m2Cy, c->tmp, c->inv_w[0]);
-		brw_MOV(p, vec1(suboffset(c->m2Cy, 0)), brw_imm_f(0.0));
-	  }
-      } else {
-	  brw_MOV(p, c->m1Cx, brw_imm_ud(0));
-	  brw_MOV(p, c->m2Cy, brw_imm_ud(0));
-      }
+	 brw_set_access_mode(p, BRW_ALIGN_16);
 
-      {
-	 brw_set_predicate_control_flag_value(p, pc); 
-	 if (tex->CoordReplace) {
-	     if (c->key.sprite_origin_lower_left) {
-		 brw_MUL(p, c->m3C0, c->inv_w[0], brw_imm_f(1.0));
-		 brw_MOV(p, vec1(suboffset(c->m3C0, 0)), brw_imm_f(0.0));
-	     }
-	     else
-		 brw_MOV(p, c->m3C0, brw_imm_f(0.0));
+	 /* dA/dx, dA/dy */
+	 brw_MOV(p, c->m1Cx, brw_imm_f(0.0));
+	 brw_MOV(p, c->m2Cy, brw_imm_f(0.0));
+	 brw_MOV(p, brw_writemask(c->m1Cx, WRITEMASK_X), c->tmp);
+	 if (c->key.sprite_origin_lower_left) {
+	    brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), negate(c->tmp));
 	 } else {
-	 	brw_MOV(p, c->m3C0, a0); /* constant value */
+	    brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), c->tmp);
 	 }
 
-	 /* Copy m0..m3 to URB. 
-	  */
-	 brw_urb_WRITE(p, 
-		       brw_null_reg(),
-		       0,
-		       brw_vec8_grf(0, 0),
-		       0, 	/* allocate */
-		       1,	/* used */
-		       4, 	/* msg len */
-		       0,	/* response len */
-		       last, 	/* eot */
-		       last, 	/* writes complete */
-		       i*4,	/* urb destination offset */
-		       BRW_URB_SWIZZLE_TRANSPOSE);
+	 /* attribute constant offset */
+	 brw_MOV(p, c->m3C0, brw_imm_f(0.0));
+	 if (c->key.sprite_origin_lower_left) {
+	    brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_YW), brw_imm_f(1.0));
+	 } else {
+	    brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_W), brw_imm_f(1.0));
+	 }
+
+	 brw_set_access_mode(p, BRW_ALIGN_1);
       }
+
+      if (pc & ~pc_coord_replace) {
+	 brw_set_predicate_control_flag_value(p, pc & ~pc_coord_replace);
+	 brw_MOV(p, c->m1Cx, brw_imm_ud(0));
+	 brw_MOV(p, c->m2Cy, brw_imm_ud(0));
+	 brw_MOV(p, c->m3C0, a0); /* constant value */
+      }
+
+
+      brw_set_predicate_control_flag_value(p, pc);
+      /* Copy m0..m3 to URB. */
+      brw_urb_WRITE(p,
+		    brw_null_reg(),
+		    0,
+		    brw_vec8_grf(0, 0),
+		    0, 	/* allocate */
+		    1,	/* used */
+		    4, 	/* msg len */
+		    0,	/* response len */
+		    last, 	/* eot */
+		    last, 	/* writes complete */
+		    i*4,	/* urb destination offset */
+		    BRW_URB_SWIZZLE_TRANSPOSE);
    }
 }
 
diff --git a/i965/brw_vs_emit.c b/i965/brw_vs_emit.c
index a7c4b58..a48804a 100644
--- a/i965/brw_vs_emit.c
+++ b/i965/brw_vs_emit.c
@@ -1717,11 +1717,13 @@ void brw_vs_emit(struct brw_vs_compile *c )
             /* patch all the BREAK/CONT instructions from last BEGINLOOP */
             while (inst0 > loop_inst[loop_depth]) {
                inst0--;
-               if (inst0->header.opcode == BRW_OPCODE_BREAK) {
+               if (inst0->header.opcode == BRW_OPCODE_BREAK &&
+		   inst0->bits3.if_else.jump_count == 0) {
                   inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
                   inst0->bits3.if_else.pop_count = 0;
                }
-               else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) {
+               else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
+			inst0->bits3.if_else.jump_count == 0) {
                   inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
                   inst0->bits3.if_else.pop_count = 0;
                }
diff --git a/i965/brw_wm.h b/i965/brw_wm.h
index 88d84ee..47b764d 100644
--- a/i965/brw_wm.h
+++ b/i965/brw_wm.h
@@ -328,6 +328,12 @@ void emit_cinterp(struct brw_compile *p,
 		  const struct brw_reg *dst,
 		  GLuint mask,
 		  const struct brw_reg *arg0);
+void emit_cmp(struct brw_compile *p,
+	      const struct brw_reg *dst,
+	      GLuint mask,
+	      const struct brw_reg *arg0,
+	      const struct brw_reg *arg1,
+	      const struct brw_reg *arg2);
 void emit_ddxy(struct brw_compile *p,
 	       const struct brw_reg *dst,
 	       GLuint mask,
diff --git a/i965/brw_wm_emit.c b/i965/brw_wm_emit.c
index 9315bca..c7d87b9 100644
--- a/i965/brw_wm_emit.c
+++ b/i965/brw_wm_emit.c
@@ -566,12 +566,12 @@ static void emit_sne( struct brw_compile *p,
    emit_sop(p, dst, mask, BRW_CONDITIONAL_NEQ, arg0, arg1);
 }
 
-static void emit_cmp( struct brw_compile *p, 
-		      const struct brw_reg *dst,
-		      GLuint mask,
-		      const struct brw_reg *arg0,
-		      const struct brw_reg *arg1,
-		      const struct brw_reg *arg2 )
+void emit_cmp(struct brw_compile *p,
+	      const struct brw_reg *dst,
+	      GLuint mask,
+	      const struct brw_reg *arg0,
+	      const struct brw_reg *arg1,
+	      const struct brw_reg *arg2)
 {
    GLuint i;
 
diff --git a/i965/brw_wm_glsl.c b/i965/brw_wm_glsl.c
index 562608e..315b030 100644
--- a/i965/brw_wm_glsl.c
+++ b/i965/brw_wm_glsl.c
@@ -614,112 +614,6 @@ static void invoke_subroutine( struct brw_wm_compile *c,
     }
 }
 
-/* Workaround for using brw_wm_emit.c's emit functions, which expect
- * destination regs to be uniquely written.  Moves arguments out to
- * temporaries as necessary for instructions which use their destination as
- * a temporary.
- */
-static void
-unalias3(struct brw_wm_compile *c,
-	 void (*func)(struct brw_compile *c,
-		      const struct brw_reg *dst,
-		      GLuint mask,
-		      const struct brw_reg *arg0,
-		      const struct brw_reg *arg1,
-		      const struct brw_reg *arg2),
-	 const struct brw_reg *dst,
-	 GLuint mask,
-	 const struct brw_reg *arg0,
-	 const struct brw_reg *arg1,
-	 const struct brw_reg *arg2)
-{
-    struct brw_compile *p = &c->func;
-    struct brw_reg tmp_arg0[4], tmp_arg1[4], tmp_arg2[4];
-    int i, j;
-    int mark = mark_tmps(c);
-
-    for (j = 0; j < 4; j++) {
-	tmp_arg0[j] = arg0[j];
-	tmp_arg1[j] = arg1[j];
-	tmp_arg2[j] = arg2[j];
-    }
-
-    for (i = 0; i < 4; i++) {
-	if (mask & (1<<i)) {
-	    for (j = 0; j < 4; j++) {
-		if (arg0[j].file == dst[i].file &&
-		    dst[i].nr == arg0[j].nr) {
-		    tmp_arg0[j] = alloc_tmp(c);
-		    brw_MOV(p, tmp_arg0[j], arg0[j]);
-		}
-		if (arg1[j].file == dst[i].file &&
-		    dst[i].nr == arg1[j].nr) {
-		    tmp_arg1[j] = alloc_tmp(c);
-		    brw_MOV(p, tmp_arg1[j], arg1[j]);
-		}
-		if (arg2[j].file == dst[i].file &&
-		    dst[i].nr == arg2[j].nr) {
-		    tmp_arg2[j] = alloc_tmp(c);
-		    brw_MOV(p, tmp_arg2[j], arg2[j]);
-		}
-	    }
-	}
-    }
-
-    func(p, dst, mask, tmp_arg0, tmp_arg1, tmp_arg2);
-
-    release_tmps(c, mark);
-}
-
-/* Workaround for using brw_wm_emit.c's emit functions, which expect
- * destination regs to be uniquely written.  Moves arguments out to
- * temporaries as necessary for instructions which use their destination as
- * a temporary.
- */
-static void
-unalias2(struct brw_wm_compile *c,
-	 void (*func)(struct brw_compile *c,
-		      const struct brw_reg *dst,
-		      GLuint mask,
-		      const struct brw_reg *arg0,
-		      const struct brw_reg *arg1),
-	 const struct brw_reg *dst,
-	 GLuint mask,
-	 const struct brw_reg *arg0,
-	 const struct brw_reg *arg1)
-{
-    struct brw_compile *p = &c->func;
-    struct brw_reg tmp_arg0[4], tmp_arg1[4];
-    int i, j;
-    int mark = mark_tmps(c);
-
-    for (j = 0; j < 4; j++) {
-	tmp_arg0[j] = arg0[j];
-	tmp_arg1[j] = arg1[j];
-    }
-
-    for (i = 0; i < 4; i++) {
-	if (mask & (1<<i)) {
-	    for (j = 0; j < 4; j++) {
-		if (arg0[j].file == dst[i].file &&
-		    dst[i].nr == arg0[j].nr) {
-		    tmp_arg0[j] = alloc_tmp(c);
-		    brw_MOV(p, tmp_arg0[j], arg0[j]);
-		}
-		if (arg1[j].file == dst[i].file &&
-		    dst[i].nr == arg1[j].nr) {
-		    tmp_arg1[j] = alloc_tmp(c);
-		    brw_MOV(p, tmp_arg1[j], arg1[j]);
-		}
-	    }
-	}
-    }
-
-    func(p, dst, mask, tmp_arg0, tmp_arg1);
-
-    release_tmps(c, mark);
-}
-
 static void emit_arl(struct brw_wm_compile *c,
                      const struct prog_instruction *inst)
 {
@@ -1813,14 +1707,29 @@ static void
 get_argument_regs(struct brw_wm_compile *c,
 		  const struct prog_instruction *inst,
 		  int index,
+		  struct brw_reg *dst,
 		  struct brw_reg *regs,
 		  int mask)
 {
-    int i;
+    struct brw_compile *p = &c->func;
+    int i, j;
 
     for (i = 0; i < 4; i++) {
-	if (mask & (1 << i))
+	if (mask & (1 << i)) {
 	    regs[i] = get_src_reg(c, inst, index, i);
+
+	    /* Unalias destination registers from our sources. */
+	    if (regs[i].file == BRW_GENERAL_REGISTER_FILE) {
+	       for (j = 0; j < 4; j++) {
+		   if (memcmp(&regs[i], &dst[j], sizeof(regs[0])) == 0) {
+		       struct brw_reg tmp = alloc_tmp(c);
+		       brw_MOV(p, tmp, regs[i]);
+		       regs[i] = tmp;
+		       break;
+		   }
+	       }
+	    }
+	}
     }
 }
 
@@ -1845,6 +1754,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
 	int dst_flags;
 	struct brw_reg args[3][4], dst[4];
 	int j;
+	int mark = mark_tmps( c );
 
         c->cur_inst = i;
 
@@ -1866,7 +1776,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
 	   }
 	}
 	for (j = 0; j < brw_wm_nr_args(inst->Opcode); j++)
-	    get_argument_regs(c, inst, j, args[j], WRITEMASK_XYZW);
+	    get_argument_regs(c, inst, j, dst, args[j], WRITEMASK_XYZW);
 
 	dst_flags = inst->DstReg.WriteMask;
 	if (inst->SaturateMode == SATURATE_ZERO_ONE)
@@ -1920,8 +1830,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
 		emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]);
 		break;
 	    case OPCODE_LRP:
-		unalias3(c, emit_lrp,
-			 dst, dst_flags, args[0], args[1], args[2]);
+		emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]);
 		break;
 	    case OPCODE_TRUNC:
 		emit_alu1(p, brw_RNDZ, dst, dst_flags, args[0]);
@@ -1960,11 +1869,14 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
 	    case OPCODE_LG2:
 		emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]);
 		break;
+	    case OPCODE_CMP:
+		emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]);
+		break;
 	    case OPCODE_MIN:	
-		unalias2(c, emit_min, dst, dst_flags, args[0], args[1]);
+		emit_min(p, dst, dst_flags, args[0], args[1]);
 		break;
 	    case OPCODE_MAX:	
-		unalias2(c, emit_max, dst, dst_flags, args[0], args[1]);
+		emit_max(p, dst, dst_flags, args[0], args[1]);
 		break;
 	    case OPCODE_DDX:
 	    case OPCODE_DDY:
@@ -2103,11 +2015,13 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
                   /* patch all the BREAK/CONT instructions from last BGNLOOP */
                   while (inst0 > loop_inst[loop_depth]) {
                      inst0--;
-                     if (inst0->header.opcode == BRW_OPCODE_BREAK) {
+                     if (inst0->header.opcode == BRW_OPCODE_BREAK &&
+			 inst0->bits3.if_else.jump_count == 0) {
 			inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
 			inst0->bits3.if_else.pop_count = 0;
                      }
-                     else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) {
+                     else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
+			      inst0->bits3.if_else.jump_count == 0) {
                         inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
                         inst0->bits3.if_else.pop_count = 0;
                      }
@@ -2119,6 +2033,9 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
 			inst->Opcode);
 	}
 
+	/* Release temporaries containing any unaliased source regs. */
+	release_tmps( c, mark );
+
 	if (inst->CondUpdate)
 	    brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
 	else
diff --git a/shared/intel_blit.c b/shared/intel_blit.c
index f2769aa..4ad42a7 100644
--- a/shared/intel_blit.c
+++ b/shared/intel_blit.c
@@ -119,8 +119,6 @@ intelEmitCopyBlit(struct intel_context *intel,
            break;
    } while (pass < 2);
 
-   intel_prepare_render(intel);
-
    if (pass >= 2) {
       drm_intel_gem_bo_map_gtt(dst_buffer);
       drm_intel_gem_bo_map_gtt(src_buffer);
diff --git a/shared/intel_buffers.c b/shared/intel_buffers.c
index b106930..0480770 100644
--- a/shared/intel_buffers.c
+++ b/shared/intel_buffers.c
@@ -226,7 +226,7 @@ intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb)
        * only changes with _NEW_STENCIL (which seems sensible).  So flag it
        * here since this is the _NEW_BUFFERS path.
        */
-      ctx->NewState |= (_NEW_DEPTH | _NEW_STENCIL);
+      intel->NewGLState |= (_NEW_DEPTH | _NEW_STENCIL);
    }
 
    intel->vtbl.set_draw_region(intel, colorRegions, depthRegion, 
@@ -236,7 +236,7 @@ intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb)
 #ifdef I915
    intelCalcViewport(ctx);
 #else
-   ctx->NewState |= _NEW_VIEWPORT;
+   intel->NewGLState |= _NEW_VIEWPORT;
 #endif
    /* Set state we know depends on drawable parameters:
     */
@@ -256,7 +256,7 @@ intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb)
    if (ctx->Driver.FrontFace)
       ctx->Driver.FrontFace(ctx, ctx->Polygon.FrontFace);
    else
-      ctx->NewState |= _NEW_POLYGON;
+      intel->NewGLState |= _NEW_POLYGON;
 }
 
 
diff --git a/shared/intel_context.c b/shared/intel_context.c
index d6a1ba6..0a7dcb8 100644
--- a/shared/intel_context.c
+++ b/shared/intel_context.c
@@ -63,7 +63,7 @@ int INTEL_DEBUG = (0);
 #endif
 
 
-#define DRIVER_DATE                     "20091221 DEVELOPMENT"
+#define DRIVER_DATE                     "20100328 2010Q1"
 #define DRIVER_DATE_GEM                 "GEM " DRIVER_DATE
 
 
@@ -880,12 +880,12 @@ intelMakeCurrent(__DRIcontext * driContextPriv,
       struct gl_framebuffer *fb = driDrawPriv->driverPrivate;
       struct gl_framebuffer *readFb = driReadPriv->driverPrivate;
 
-      _mesa_make_current(&intel->ctx, fb, readFb);
       intel->driReadDrawable = driReadPriv;
       intel->driDrawable = driDrawPriv;
       driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1;
       driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1;
       intel_prepare_render(intel);
+      _mesa_make_current(&intel->ctx, fb, readFb);
    }
    else {
       _mesa_make_current(NULL, NULL, NULL);
diff --git a/shared/intel_fbo.c b/shared/intel_fbo.c
index a429f8d..ba3bb8f 100644
--- a/shared/intel_fbo.c
+++ b/shared/intel_fbo.c
@@ -104,7 +104,6 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb,
    struct intel_context *intel = intel_context(ctx);
    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
    int cpp;
-   GLuint pitch;
 
    ASSERT(rb->Name != 0);
 
@@ -176,15 +175,11 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb,
 
    /* allocate new memory region/renderbuffer */
 
-   /* Choose a pitch to match hardware requirements:
-    */
-   pitch = ((cpp * width + 63) & ~63) / cpp;
-
    /* alloc hardware renderbuffer */
-   DBG("Allocating %d x %d Intel RBO (pitch %d)\n", width, height, pitch);
+   DBG("Allocating %d x %d Intel RBO\n", width, height);
 
    irb->region = intel_region_alloc(intel, I915_TILING_NONE, cpp,
-				    width, height, pitch, GL_TRUE);
+				    width, height, GL_TRUE);
    if (!irb->region)
       return GL_FALSE;       /* out of memory? */
 
diff --git a/shared/intel_mipmap_tree.c b/shared/intel_mipmap_tree.c
index 4f14946..5b6b4b2 100644
--- a/shared/intel_mipmap_tree.c
+++ b/shared/intel_mipmap_tree.c
@@ -146,8 +146,8 @@ intel_miptree_create(struct intel_context *intel,
 				   mt->cpp,
 				   mt->pitch,
 				   mt->total_height,
-				   mt->pitch,
 				   expect_accelerated_upload);
+   mt->pitch = mt->region->pitch;
 
    if (!mt->region) {
        free(mt);
@@ -177,20 +177,11 @@ intel_miptree_create_for_region(struct intel_context *intel,
 				      I915_TILING_NONE);
    if (!mt)
       return mt;
-#if 0
-   if (mt->pitch != region->pitch) {
-      fprintf(stderr,
-	      "region pitch (%d) doesn't match mipmap tree pitch (%d)\n",
-	      region->pitch, mt->pitch);
-      free(mt);
-      return NULL;
-   }
-#else
+
    /* The mipmap tree pitch is aligned to 64 bytes to make sure render
     * to texture works, but we don't need that for texturing from a
     * pixmap.  Just override it here. */
    mt->pitch = region->pitch;
-#endif
 
    intel_region_reference(&mt->region, region);
 
@@ -520,12 +511,15 @@ intel_miptree_image_copy(struct intel_context *intel,
        width = ALIGN(width, align_w);
    }
 
+   intel_prepare_render(intel);
+
    for (i = 0; i < depth; i++) {
       intel_miptree_get_image_offset(src, level, face, i, &src_x, &src_y);
       intel_miptree_get_image_offset(dst, level, face, i, &dst_x, &dst_y);
       success = intel_region_copy(intel,
 				  dst->region, 0, dst_x, dst_y,
-				  src->region, 0, src_x, src_y, width, height,
+				  src->region, 0, src_x, src_y,
+				  width, height, GL_FALSE,
 				  GL_COPY);
       if (!success) {
 	 GLubyte *src_ptr, *dst_ptr;
diff --git a/shared/intel_pixel_copy.c b/shared/intel_pixel_copy.c
index f4f3fd6..56faf07 100644
--- a/shared/intel_pixel_copy.c
+++ b/shared/intel_pixel_copy.c
@@ -108,14 +108,15 @@ do_blit_copypixels(GLcontext * ctx,
                    GLint dstx, GLint dsty, GLenum type)
 {
    struct intel_context *intel = intel_context(ctx);
-   struct intel_region *dst = intel_drawbuf_region(intel);
-   struct intel_region *src = copypix_src_region(intel, type);
+   struct intel_region *dst;
+   struct intel_region *src;
    struct gl_framebuffer *fb = ctx->DrawBuffer;
    struct gl_framebuffer *read_fb = ctx->ReadBuffer;
    GLint orig_dstx;
    GLint orig_dsty;
    GLint orig_srcx;
    GLint orig_srcy;
+   GLboolean flip = GL_FALSE;
 
    if (type == GL_DEPTH || type == GL_STENCIL) {
       if (INTEL_DEBUG & DEBUG_FALLBACKS)
@@ -133,15 +134,16 @@ do_blit_copypixels(GLcontext * ctx,
        ctx->Pixel.ZoomX != 1.0F || ctx->Pixel.ZoomY != 1.0F)
       return GL_FALSE;
 
+   intel_prepare_render(intel);
+
+   dst = intel_drawbuf_region(intel);
+   src = copypix_src_region(intel, type);
+
    if (!src || !dst)
       return GL_FALSE;
 
    intelFlush(&intel->ctx);
 
-   intel_prepare_render(intel);
-
-   /* XXX: We fail to handle different inversion between read and draw framebuffer. */
-
    /* Clip to destination buffer. */
    orig_dstx = dstx;
    orig_dsty = dsty;
@@ -164,23 +166,23 @@ do_blit_copypixels(GLcontext * ctx,
    dstx += srcx - orig_srcx;
    dsty += srcy - orig_srcy;
 
-   /* Convert from GL to hardware coordinates: */
+   /* Flip dest Y if it's a window system framebuffer. */
    if (fb->Name == 0) {
-      /* copypixels to a system framebuffer */
+      /* copypixels to a window system framebuffer */
       dsty = fb->Height - dsty - height;
-   } else {
-      /* copypixels to a user framebuffer object */
-      dsty = dsty;
+      flip = !flip;
    }
 
-   /* Flip source Y if it's a system framebuffer. */
-   if (read_fb->Name == 0)
-      srcy = fb->Height - srcy - height;
+   /* Flip source Y if it's a window system framebuffer. */
+   if (read_fb->Name == 0) {
+      srcy = read_fb->Height - srcy - height;
+      flip = !flip;
+   }
 
    if (!intel_region_copy(intel,
 			  dst, 0, dstx, dsty,
 			  src, 0, srcx, srcy,
-			  width, height,
+			  width, height, flip,
 			  ctx->Color.ColorLogicOpEnabled ?
 			  ctx->Color.LogicOp : GL_COPY)) {
       DBG("%s: blit failure\n", __FUNCTION__);
diff --git a/shared/intel_reg.h b/shared/intel_reg.h
index d19f1ba..36d8180 100644
--- a/shared/intel_reg.h
+++ b/shared/intel_reg.h
@@ -70,8 +70,10 @@
 
 /** @{
  * 915 definitions
+ *
+ * 915 documents say that bits 31:28 and 1 are "undefined, must be zero."
  */
-#define S0_VB_OFFSET_MASK		0xffffffc0
+#define S0_VB_OFFSET_MASK		0x0ffffffc
 #define S0_AUTO_CACHE_INV_DISABLE	(1<<0)
 /** @} */
 
diff --git a/shared/intel_regions.c b/shared/intel_regions.c
index f042bcb..1172de9 100644
--- a/shared/intel_regions.c
+++ b/shared/intel_regions.c
@@ -164,7 +164,6 @@ intel_region_alloc_internal(struct intel_context *intel,
 
    /* Default to no tiling */
    region->tiling = I915_TILING_NONE;
-   region->bit_6_swizzle = I915_BIT_6_SWIZZLE_NONE;
 
    _DBG("%s <-- %p\n", __FUNCTION__, region);
    return region;
@@ -173,7 +172,7 @@ intel_region_alloc_internal(struct intel_context *intel,
 struct intel_region *
 intel_region_alloc(struct intel_context *intel,
 		   uint32_t tiling,
-                   GLuint cpp, GLuint width, GLuint height, GLuint pitch,
+                   GLuint cpp, GLuint width, GLuint height,
 		   GLboolean expect_accelerated_upload)
 {
    dri_bo *buffer;
@@ -187,19 +186,10 @@ intel_region_alloc(struct intel_context *intel,
    buffer = drm_intel_bo_alloc_tiled(intel->bufmgr, "region",
 				     width, height, cpp,
 				     &tiling, &aligned_pitch, flags);
-   /* We've already chosen a pitch as part of miptree layout.  It had
-    * better be the same.
-    */
-   assert(aligned_pitch == pitch * cpp);
 
    region = intel_region_alloc_internal(intel, cpp, width, height,
-					pitch, buffer);
-
-   if (tiling != I915_TILING_NONE) {
-      assert(((pitch * cpp) & 127) == 0);
-      drm_intel_bo_set_tiling(buffer, &tiling, pitch * cpp);
-      drm_intel_bo_get_tiling(buffer, &region->tiling, &region->bit_6_swizzle);
-   }
+					aligned_pitch / cpp, buffer);
+   region->tiling = tiling;
 
    return region;
 }
@@ -213,6 +203,7 @@ intel_region_alloc_for_handle(struct intel_context *intel,
    struct intel_region *region, *dummy;
    dri_bo *buffer;
    int ret;
+   uint32_t bit_6_swizzle;
 
    region = _mesa_HashLookup(intel->intelScreen->named_regions, handle);
    if (region != NULL) {
@@ -236,7 +227,7 @@ intel_region_alloc_for_handle(struct intel_context *intel,
       return region;
 
    ret = dri_bo_get_tiling(region->buffer, &region->tiling,
-			   &region->bit_6_swizzle);
+			   &bit_6_swizzle);
    if (ret != 0) {
       fprintf(stderr, "Couldn't get tiling of buffer %d (%s): %s\n",
 	      handle, name, strerror(-ret));
@@ -316,7 +307,7 @@ _mesa_copy_rect(GLubyte * dst,
    dst += dst_x * cpp;
    src += src_x * cpp;
    dst += dst_y * dst_pitch;
-   src += src_y * dst_pitch;
+   src += src_y * src_pitch;
    width *= cpp;
 
    if (width == dst_pitch && width == src_pitch)
@@ -380,8 +371,11 @@ intel_region_copy(struct intel_context *intel,
                   struct intel_region *src,
                   GLuint src_offset,
                   GLuint srcx, GLuint srcy, GLuint width, GLuint height,
+		  GLboolean flip,
 		  GLenum logicop)
 {
+   uint32_t src_pitch = src->pitch;
+
    _DBG("%s\n", __FUNCTION__);
 
    if (intel == NULL)
@@ -397,9 +391,12 @@ intel_region_copy(struct intel_context *intel,
 
    assert(src->cpp == dst->cpp);
 
+   if (flip)
+      src_pitch = -src_pitch;
+
    return intelEmitCopyBlit(intel,
 			    dst->cpp,
-			    src->pitch, src->buffer, src_offset, src->tiling,
+			    src_pitch, src->buffer, src_offset, src->tiling,
 			    dst->pitch, dst->buffer, dst_offset, dst->tiling,
 			    srcx, srcy, dstx, dsty, width, height,
 			    logicop);
diff --git a/shared/intel_regions.h b/shared/intel_regions.h
index 7ee6a98..2459c9a 100644
--- a/shared/intel_regions.h
+++ b/shared/intel_regions.h
@@ -65,7 +65,6 @@ struct intel_region
    GLuint draw_x, draw_y; /**< Offset of drawing within the region */
 
    uint32_t tiling; /**< Which tiling mode the region is in */
-   uint32_t bit_6_swizzle; /**< GEM flag for address swizzling requirement */
    struct intel_buffer_object *pbo;     /* zero-copy uploads */
 
    uint32_t name; /**< Global name for the bo */
@@ -79,7 +78,7 @@ struct intel_region
 struct intel_region *intel_region_alloc(struct intel_context *intel,
                                         uint32_t tiling,
 					GLuint cpp, GLuint width,
-                                        GLuint height, GLuint pitch,
+                                        GLuint height,
 					GLboolean expect_accelerated_upload);
 
 struct intel_region *
@@ -122,6 +121,7 @@ intel_region_copy(struct intel_context *intel,
 		  struct intel_region *src,
 		  GLuint src_offset,
 		  GLuint srcx, GLuint srcy, GLuint width, GLuint height,
+		  GLboolean flip,
 		  GLenum logicop);
 
 /* Helpers for zerocopy uploads, particularly texture image uploads:
diff --git a/shared/intel_span.c b/shared/intel_span.c
index fb5c01b..377f3a8 100644
--- a/shared/intel_span.c
+++ b/shared/intel_span.c
@@ -48,11 +48,11 @@ intel_set_span_functions(struct intel_context *intel,
 
 #define LOCAL_VARS							\
    struct intel_renderbuffer *irb = intel_renderbuffer(rb);		\
-   const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1;			\
-   const GLint yBias = ctx->DrawBuffer->Name ? 0 : irb->Base.Height - 1;\
+   const GLint yScale = rb->Name ? 1 : -1;				\
+   const GLint yBias = rb->Name ? 0 : rb->Height - 1;			\
    int minx = 0, miny = 0;						\
-   int maxx = ctx->DrawBuffer->Width;					\
-   int maxy = ctx->DrawBuffer->Height;					\
+   int maxx = rb->Width;						\
+   int maxy = rb->Height;						\
    int pitch = irb->region->pitch * irb->region->cpp;			\
    void *buf = irb->region->buffer->virtual;				\
    GLuint p;								\
@@ -108,11 +108,11 @@ intel_set_span_functions(struct intel_context *intel,
 
 #define LOCAL_DEPTH_VARS						\
    struct intel_renderbuffer *irb = intel_renderbuffer(rb);		\
-   const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1;			\
-   const GLint yBias = ctx->DrawBuffer->Name ? 0 : irb->Base.Height - 1;\
+   const GLint yScale = rb->Name ? 1 : -1;				\
+   const GLint yBias = rb->Name ? 0 : rb->Height - 1;			\
    int minx = 0, miny = 0;						\
-   int maxx = ctx->DrawBuffer->Width;					\
-   int maxy = ctx->DrawBuffer->Height;					\
+   int maxx = rb->Width;						\
+   int maxy = rb->Height;						\
    int pitch = irb->region->pitch * irb->region->cpp;			\
    void *buf = irb->region->buffer->virtual;				\
    (void)buf; (void)pitch; /* unused for non-gttmap. */			\
author	Luc Verhaegen <libv@skynet.be>	2010-04-02 12:46:10 +0200
committer	Luc Verhaegen <libv@skynet.be>	2010-04-02 12:46:10 +0200
commit	e8bf66970f1ee08b214e5b88f8f4b62fc1a73509 (patch)
tree	84830fb169ba2c393ca250d75341bee9eeb5ae6f
parent	54a8e9cc3988d908b5b846a752679127cacefd3b (diff)