diff options
author | José Fonseca <jfonseca@vmware.com> | 2010-09-04 19:51:54 +0100 |
---|---|---|
committer | José Fonseca <jfonseca@vmware.com> | 2010-09-05 10:17:51 +0100 |
commit | d278ddc00966b6348eb4703b12166c05cf539635 (patch) | |
tree | 11b1b4ef26c8b8224097a3487509bc6c32470337 | |
parent | b8684b2458bc9bdcfd6b43dc7c2b8c2d485105fd (diff) |
llvmpipe: Fix perspective divide interpolation.
Intuition != mathematics, so this time I actually worked out the right
formula for first order approximation of perspective interpolation.
Ironically, per quad divide actually makes things slower when compared
with per pixel divide -- probably because the divide hardware unit is
rarely used, whereas the multiply unit is typically already saturated
and the first order approximation imply more multiplications.
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_bld_interp.c | 83 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_bld_interp.h | 2 |
2 files changed, 76 insertions, 9 deletions
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/drivers/llvmpipe/lp_bld_interp.c index 2cf6f38c4b..2a374f8c39 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c @@ -75,6 +75,33 @@ */ +/** + * Do one perspective divide per quad. + * + * For perspective interpolation, the final attribute value is given + * + * a' = a/w = a * oow + * + * where + * + * a = a0 + dadx*x + dady*y + * w = w0 + dwdx*x + dwdy*y + * oow = 1/w = 1/(w0 + dwdx*x + dwdy*y) + * + * Instead of computing the division per pixel, with this macro we compute the + * division on the upper left pixel of each quad, and use a linear + * approximation in the remaining pixels, given by: + * + * da'dx = (dadx - dwdx*a)*oow + * da'dy = (dady - dwdy*a)*oow + * + * Ironically, this actually makes things slower -- probably because the + * divide hardware unit is rarely used, whereas the multiply unit is typically + * already saturated. + */ +#define PERSPECTIVE_DIVIDE_PER_QUAD 0 + + static const unsigned char quad_offset_x[4] = {0, 1, 0, 1}; static const unsigned char quad_offset_y[4] = {0, 0, 1, 1}; @@ -107,7 +134,6 @@ coeffs_init(struct lp_build_interp_soa_context *bld, LLVMValueRef i1 = LLVMConstInt(LLVMInt32Type(), 1, 0); LLVMValueRef i2 = LLVMConstInt(LLVMInt32Type(), 2, 0); LLVMValueRef i3 = LLVMConstInt(LLVMInt32Type(), 3, 0); - LLVMValueRef oow = NULL; unsigned attrib; unsigned chan; @@ -213,22 +239,22 @@ coeffs_init(struct lp_build_interp_soa_context *bld, a = LLVMBuildFAdd(builder, a, dadq2, ""); +#if PERSPECTIVE_DIVIDE_PER_QUAD /* - * a *= 1 / w - * dadq *= 1 / w + * a *= 1 / w */ if (interp == LP_INTERP_PERSPECTIVE) { LLVMValueRef w = bld->a[0][3]; assert(attrib != 0); assert(bld->mask[0] & TGSI_WRITEMASK_W); - if (!oow) { - oow = lp_build_rcp(coeff_bld, w); - lp_build_name(oow, "oow"); + if (!bld->oow) { + bld->oow = lp_build_rcp(coeff_bld, w); + lp_build_name(bld->oow, "oow"); } - a = lp_build_mul(coeff_bld, a, oow); - dadq = lp_build_mul(coeff_bld, dadq, oow); + a = lp_build_mul(coeff_bld, a, bld->oow); } +#endif attrib_name(a, attrib, chan, ".a"); attrib_name(dadq, attrib, chan, ".dadq"); @@ -250,6 +276,7 @@ attribs_update(struct lp_build_interp_soa_context *bld, int quad_index) { struct lp_build_context *coeff_bld = &bld->coeff_bld; LLVMValueRef shuffle = lp_build_const_int_vec(coeff_bld->type, quad_index); + LLVMValueRef oow = NULL; unsigned attrib; unsigned chan; @@ -270,6 +297,8 @@ attribs_update(struct lp_build_interp_soa_context *bld, int quad_index) a = bld->attribs[0][chan]; } else { + LLVMValueRef dadq; + a = bld->a[attrib][chan]; /* @@ -280,10 +309,46 @@ attribs_update(struct lp_build_interp_soa_context *bld, int quad_index) a, coeff_bld->undef, shuffle, ""); /* + * Get the derivatives. + */ + + dadq = bld->dadq[attrib][chan]; + +#if PERSPECTIVE_DIVIDE_PER_QUAD + if (interp == LP_INTERP_PERSPECTIVE) { + LLVMValueRef dwdq = bld->dadq[0][3]; + + if (oow == NULL) { + assert(bld->oow); + oow = LLVMBuildShuffleVector(coeff_bld->builder, + bld->oow, coeff_bld->undef, + shuffle, ""); + } + + dadq = lp_build_sub(coeff_bld, + dadq, + lp_build_mul(coeff_bld, a, dwdq)); + dadq = lp_build_mul(coeff_bld, dadq, oow); + } +#endif + + /* * Add the derivatives */ - a = lp_build_add(coeff_bld, a, bld->dadq[attrib][chan]); + a = lp_build_add(coeff_bld, a, dadq); + +#if !PERSPECTIVE_DIVIDE_PER_QUAD + if (interp == LP_INTERP_PERSPECTIVE) { + if (oow == NULL) { + LLVMValueRef w = bld->attribs[0][3]; + assert(attrib != 0); + assert(bld->mask[0] & TGSI_WRITEMASK_W); + oow = lp_build_rcp(coeff_bld, w); + } + a = lp_build_mul(coeff_bld, a, oow); + } +#endif attrib_name(a, attrib, chan, ""); } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.h b/src/gallium/drivers/llvmpipe/lp_bld_interp.h index 2905513301..3054030f73 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_interp.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.h @@ -64,6 +64,8 @@ struct lp_build_interp_soa_context LLVMValueRef a [1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; LLVMValueRef dadq[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; + LLVMValueRef oow; + LLVMValueRef attribs[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; /* |