softpipe: fix depth testing problems

The optimized Z-test functions assumed that the array of incoming quads are adjacent, but that's not always true. The fragment shader can cull intermediate quads, for example. Now these Z-test functions can cope with non-adjacent quads. A little bit of performance is probably lost, but it's probably not worth worring about. This fixes broken glBitmap() Z testing, among other things.
author: Brian Paul <brianp@vmware.com> 2010-02-18 16:47:27 -0700
committer: Brian Paul <brianp@vmware.com> 2010-02-18 16:47:27 -0700
commit: d437d905e6924ebc05ec9efe87e1e2c48d75bc13 (patch)
tree: 45fd3acf85281c539f021792bd3a10e1f65353a1
parent: 551c96979e643b409535afe868c42cac0d2285ad (diff)
1 files changed, 53 insertions, 36 deletions
diff --git a/src/gallium/drivers/softpipe/sp_quad_depth_test.c b/src/gallium/drivers/softpipe/sp_quad_depth_test.c
index 5cb17c5ae7..6b93ebbda5 100644
--- a/src/gallium/drivers/softpipe/sp_quad_depth_test.c
+++ b/src/gallium/drivers/softpipe/sp_quad_depth_test.c
@@ -714,9 +714,14 @@ depth_test_quads_fallback(struct quad_stage *qs,
       qs->next->run(qs->next, quads, nr);
 }
 
-/* XXX: this function assumes setup function actually emits linear
- * spans of quads.  It seems a lot more natural to do (early)
- * depth-testing on spans rather than quads.
+
+/**
+ * Special-case Z testing for 16-bit Zbuffer, PIPE_FUNC_LESS and
+ * Z buffer writes enabled.
+ *
+ * NOTE: there's no guarantee that the quads are sequentially side by
+ * side.  The fragment shader may have culled some quads, etc.  Sliver
+ * triangles may generate non-sequential quads.
  */
 static void
 depth_interp_z16_less_write(struct quad_stage *qs, 
@@ -733,25 +738,33 @@ depth_interp_z16_less_write(struct quad_stage *qs,
    const float z0 = quads[0]->posCoef->a0[2] + dzdx * fx + dzdy * fy;
    struct softpipe_cached_tile *tile;
    ushort (*depth16)[TILE_SIZE];
-   ushort idepth[4], depth_step;
+   ushort init_idepth[4], idepth[4], depth_step;
    const float scale = 65535.0;
 
-   idepth[0] = (ushort)((z0) * scale);
-   idepth[1] = (ushort)((z0 + dzdx) * scale);
-   idepth[2] = (ushort)((z0 + dzdy) * scale);
-   idepth[3] = (ushort)((z0 + dzdx + dzdy) * scale);
+   /* compute scaled depth of the four pixels in first quad */
+   init_idepth[0] = (ushort)((z0) * scale);
+   init_idepth[1] = (ushort)((z0 + dzdx) * scale);
+   init_idepth[2] = (ushort)((z0 + dzdy) * scale);
+   init_idepth[3] = (ushort)((z0 + dzdx + dzdy) * scale);
 
-   depth_step = (ushort)(dzdx * 2 * scale);
+   depth_step = (ushort)(dzdx * scale);
 
    tile = sp_get_cached_tile(qs->softpipe->zsbuf_cache, ix, iy);
 
-   depth16 = (ushort (*)[TILE_SIZE])
-      &tile->data.depth16[iy % TILE_SIZE][ix % TILE_SIZE];
-
    for (i = 0; i < nr; i++) {
-      unsigned outmask = quads[i]->inout.mask;
+      const unsigned outmask = quads[i]->inout.mask;
+      const int dx = quads[i]->input.x0 - ix;
       unsigned mask = 0;
-      
+
+      /* compute depth for this quad */
+      idepth[0] = init_idepth[0] + dx * depth_step;
+      idepth[1] = init_idepth[1] + dx * depth_step;
+      idepth[2] = init_idepth[2] + dx * depth_step;
+      idepth[3] = init_idepth[3] + dx * depth_step;
+
+      depth16 = (ushort (*)[TILE_SIZE])
+         &tile->data.depth16[iy % TILE_SIZE][(ix + dx)% TILE_SIZE];
+
       if ((outmask & 1) && idepth[0] < depth16[0][0]) {
          depth16[0][0] = idepth[0];
          mask |= (1 << 0);
@@ -772,13 +785,6 @@ depth_interp_z16_less_write(struct quad_stage *qs,
          mask |= (1 << 3);
       }
 
-      idepth[0] += depth_step;
-      idepth[1] += depth_step;
-      idepth[2] += depth_step;
-      idepth[3] += depth_step;
-
-      depth16 = (ushort (*)[TILE_SIZE]) &depth16[0][2];
-
       quads[i]->inout.mask = mask;
       if (quads[i]->inout.mask)
          quads[pass++] = quads[i];
@@ -790,6 +796,14 @@ depth_interp_z16_less_write(struct quad_stage *qs,
 }
 
 
+/**
+ * Special-case Z testing for 16-bit Zbuffer, PIPE_FUNC_LEQUAL and
+ * Z buffer writes enabled.
+ *
+ * NOTE: there's no guarantee that the quads are sequentially side by
+ * side.  The fragment shader may have culled some quads, etc.  Sliver
+ * triangles may generate non-sequential quads.
+ */
 static void
 depth_interp_z16_lequal_write(struct quad_stage *qs, 
                             struct quad_header *quads[],
@@ -805,25 +819,33 @@ depth_interp_z16_lequal_write(struct quad_stage *qs,
    const float z0 = quads[0]->posCoef->a0[2] + dzdx * fx + dzdy * fy;
    struct softpipe_cached_tile *tile;
    ushort (*depth16)[TILE_SIZE];
-   ushort idepth[4], depth_step;
+   ushort init_idepth[4], idepth[4], depth_step;
    const float scale = 65535.0;
 
-   idepth[0] = (ushort)((z0) * scale);
-   idepth[1] = (ushort)((z0 + dzdx) * scale);
-   idepth[2] = (ushort)((z0 + dzdy) * scale);
-   idepth[3] = (ushort)((z0 + dzdx + dzdy) * scale);
+   /* compute scaled depth of the four pixels in first quad */
+   init_idepth[0] = (ushort)((z0) * scale);
+   init_idepth[1] = (ushort)((z0 + dzdx) * scale);
+   init_idepth[2] = (ushort)((z0 + dzdy) * scale);
+   init_idepth[3] = (ushort)((z0 + dzdx + dzdy) * scale);
 
-   depth_step = (ushort)(dzdx * 2 * scale);
+   depth_step = (ushort)(dzdx * scale);
 
    tile = sp_get_cached_tile(qs->softpipe->zsbuf_cache, ix, iy);
 
-   depth16 = (ushort (*)[TILE_SIZE])
-      &tile->data.depth16[iy % TILE_SIZE][ix % TILE_SIZE];
-
    for (i = 0; i < nr; i++) {
-      unsigned outmask = quads[i]->inout.mask;
+      const unsigned outmask = quads[i]->inout.mask;
+      const int dx = quads[i]->input.x0 - ix;
       unsigned mask = 0;
       
+      /* compute depth for this quad */
+      idepth[0] = init_idepth[0] + dx * depth_step;
+      idepth[1] = init_idepth[1] + dx * depth_step;
+      idepth[2] = init_idepth[2] + dx * depth_step;
+      idepth[3] = init_idepth[3] + dx * depth_step;
+
+      depth16 = (ushort (*)[TILE_SIZE])
+         &tile->data.depth16[iy % TILE_SIZE][(ix + dx)% TILE_SIZE];
+
       if ((outmask & 1) && idepth[0] <= depth16[0][0]) {
          depth16[0][0] = idepth[0];
          mask |= (1 << 0);
@@ -844,11 +866,6 @@ depth_interp_z16_lequal_write(struct quad_stage *qs,
          mask |= (1 << 3);
       }
 
-      idepth[0] += depth_step;
-      idepth[1] += depth_step;
-      idepth[2] += depth_step;
-      idepth[3] += depth_step;
-
       depth16 = (ushort (*)[TILE_SIZE]) &depth16[0][2];
 
       quads[i]->inout.mask = mask;
author	Brian Paul <brianp@vmware.com>	2010-02-18 16:47:27 -0700
committer	Brian Paul <brianp@vmware.com>	2010-02-18 16:47:27 -0700
commit	d437d905e6924ebc05ec9efe87e1e2c48d75bc13 (patch)
tree	45fd3acf85281c539f021792bd3a10e1f65353a1
parent	551c96979e643b409535afe868c42cac0d2285ad (diff)