diff options
author | Brian Paul <brianp@vmware.com> | 2010-02-18 16:47:27 -0700 |
---|---|---|
committer | Brian Paul <brianp@vmware.com> | 2010-02-18 16:47:27 -0700 |
commit | d437d905e6924ebc05ec9efe87e1e2c48d75bc13 (patch) | |
tree | 45fd3acf85281c539f021792bd3a10e1f65353a1 | |
parent | 551c96979e643b409535afe868c42cac0d2285ad (diff) |
softpipe: fix depth testing problems
The optimized Z-test functions assumed that the array of incoming quads
are adjacent, but that's not always true. The fragment shader can cull
intermediate quads, for example.
Now these Z-test functions can cope with non-adjacent quads. A little bit
of performance is probably lost, but it's probably not worth worring about.
This fixes broken glBitmap() Z testing, among other things.
-rw-r--r-- | src/gallium/drivers/softpipe/sp_quad_depth_test.c | 89 |
1 files changed, 53 insertions, 36 deletions
diff --git a/src/gallium/drivers/softpipe/sp_quad_depth_test.c b/src/gallium/drivers/softpipe/sp_quad_depth_test.c index 5cb17c5ae7..6b93ebbda5 100644 --- a/src/gallium/drivers/softpipe/sp_quad_depth_test.c +++ b/src/gallium/drivers/softpipe/sp_quad_depth_test.c @@ -714,9 +714,14 @@ depth_test_quads_fallback(struct quad_stage *qs, qs->next->run(qs->next, quads, nr); } -/* XXX: this function assumes setup function actually emits linear - * spans of quads. It seems a lot more natural to do (early) - * depth-testing on spans rather than quads. + +/** + * Special-case Z testing for 16-bit Zbuffer, PIPE_FUNC_LESS and + * Z buffer writes enabled. + * + * NOTE: there's no guarantee that the quads are sequentially side by + * side. The fragment shader may have culled some quads, etc. Sliver + * triangles may generate non-sequential quads. */ static void depth_interp_z16_less_write(struct quad_stage *qs, @@ -733,25 +738,33 @@ depth_interp_z16_less_write(struct quad_stage *qs, const float z0 = quads[0]->posCoef->a0[2] + dzdx * fx + dzdy * fy; struct softpipe_cached_tile *tile; ushort (*depth16)[TILE_SIZE]; - ushort idepth[4], depth_step; + ushort init_idepth[4], idepth[4], depth_step; const float scale = 65535.0; - idepth[0] = (ushort)((z0) * scale); - idepth[1] = (ushort)((z0 + dzdx) * scale); - idepth[2] = (ushort)((z0 + dzdy) * scale); - idepth[3] = (ushort)((z0 + dzdx + dzdy) * scale); + /* compute scaled depth of the four pixels in first quad */ + init_idepth[0] = (ushort)((z0) * scale); + init_idepth[1] = (ushort)((z0 + dzdx) * scale); + init_idepth[2] = (ushort)((z0 + dzdy) * scale); + init_idepth[3] = (ushort)((z0 + dzdx + dzdy) * scale); - depth_step = (ushort)(dzdx * 2 * scale); + depth_step = (ushort)(dzdx * scale); tile = sp_get_cached_tile(qs->softpipe->zsbuf_cache, ix, iy); - depth16 = (ushort (*)[TILE_SIZE]) - &tile->data.depth16[iy % TILE_SIZE][ix % TILE_SIZE]; - for (i = 0; i < nr; i++) { - unsigned outmask = quads[i]->inout.mask; + const unsigned outmask = quads[i]->inout.mask; + const int dx = quads[i]->input.x0 - ix; unsigned mask = 0; - + + /* compute depth for this quad */ + idepth[0] = init_idepth[0] + dx * depth_step; + idepth[1] = init_idepth[1] + dx * depth_step; + idepth[2] = init_idepth[2] + dx * depth_step; + idepth[3] = init_idepth[3] + dx * depth_step; + + depth16 = (ushort (*)[TILE_SIZE]) + &tile->data.depth16[iy % TILE_SIZE][(ix + dx)% TILE_SIZE]; + if ((outmask & 1) && idepth[0] < depth16[0][0]) { depth16[0][0] = idepth[0]; mask |= (1 << 0); @@ -772,13 +785,6 @@ depth_interp_z16_less_write(struct quad_stage *qs, mask |= (1 << 3); } - idepth[0] += depth_step; - idepth[1] += depth_step; - idepth[2] += depth_step; - idepth[3] += depth_step; - - depth16 = (ushort (*)[TILE_SIZE]) &depth16[0][2]; - quads[i]->inout.mask = mask; if (quads[i]->inout.mask) quads[pass++] = quads[i]; @@ -790,6 +796,14 @@ depth_interp_z16_less_write(struct quad_stage *qs, } +/** + * Special-case Z testing for 16-bit Zbuffer, PIPE_FUNC_LEQUAL and + * Z buffer writes enabled. + * + * NOTE: there's no guarantee that the quads are sequentially side by + * side. The fragment shader may have culled some quads, etc. Sliver + * triangles may generate non-sequential quads. + */ static void depth_interp_z16_lequal_write(struct quad_stage *qs, struct quad_header *quads[], @@ -805,25 +819,33 @@ depth_interp_z16_lequal_write(struct quad_stage *qs, const float z0 = quads[0]->posCoef->a0[2] + dzdx * fx + dzdy * fy; struct softpipe_cached_tile *tile; ushort (*depth16)[TILE_SIZE]; - ushort idepth[4], depth_step; + ushort init_idepth[4], idepth[4], depth_step; const float scale = 65535.0; - idepth[0] = (ushort)((z0) * scale); - idepth[1] = (ushort)((z0 + dzdx) * scale); - idepth[2] = (ushort)((z0 + dzdy) * scale); - idepth[3] = (ushort)((z0 + dzdx + dzdy) * scale); + /* compute scaled depth of the four pixels in first quad */ + init_idepth[0] = (ushort)((z0) * scale); + init_idepth[1] = (ushort)((z0 + dzdx) * scale); + init_idepth[2] = (ushort)((z0 + dzdy) * scale); + init_idepth[3] = (ushort)((z0 + dzdx + dzdy) * scale); - depth_step = (ushort)(dzdx * 2 * scale); + depth_step = (ushort)(dzdx * scale); tile = sp_get_cached_tile(qs->softpipe->zsbuf_cache, ix, iy); - depth16 = (ushort (*)[TILE_SIZE]) - &tile->data.depth16[iy % TILE_SIZE][ix % TILE_SIZE]; - for (i = 0; i < nr; i++) { - unsigned outmask = quads[i]->inout.mask; + const unsigned outmask = quads[i]->inout.mask; + const int dx = quads[i]->input.x0 - ix; unsigned mask = 0; + /* compute depth for this quad */ + idepth[0] = init_idepth[0] + dx * depth_step; + idepth[1] = init_idepth[1] + dx * depth_step; + idepth[2] = init_idepth[2] + dx * depth_step; + idepth[3] = init_idepth[3] + dx * depth_step; + + depth16 = (ushort (*)[TILE_SIZE]) + &tile->data.depth16[iy % TILE_SIZE][(ix + dx)% TILE_SIZE]; + if ((outmask & 1) && idepth[0] <= depth16[0][0]) { depth16[0][0] = idepth[0]; mask |= (1 << 0); @@ -844,11 +866,6 @@ depth_interp_z16_lequal_write(struct quad_stage *qs, mask |= (1 << 3); } - idepth[0] += depth_step; - idepth[1] += depth_step; - idepth[2] += depth_step; - idepth[3] += depth_step; - depth16 = (ushort (*)[TILE_SIZE]) &depth16[0][2]; quads[i]->inout.mask = mask; |