summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJosé Fonseca <jfonseca@vmware.com>2010-09-04 19:51:54 +0100
committerJosé Fonseca <jfonseca@vmware.com>2010-09-05 10:17:51 +0100
commitd278ddc00966b6348eb4703b12166c05cf539635 (patch)
tree11b1b4ef26c8b8224097a3487509bc6c32470337
parentb8684b2458bc9bdcfd6b43dc7c2b8c2d485105fd (diff)
llvmpipe: Fix perspective divide interpolation.
Intuition != mathematics, so this time I actually worked out the right formula for first order approximation of perspective interpolation. Ironically, per quad divide actually makes things slower when compared with per pixel divide -- probably because the divide hardware unit is rarely used, whereas the multiply unit is typically already saturated and the first order approximation imply more multiplications.
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_interp.c83
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_interp.h2
2 files changed, 76 insertions, 9 deletions
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/drivers/llvmpipe/lp_bld_interp.c
index 2cf6f38c4b..2a374f8c39 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c
@@ -75,6 +75,33 @@
*/
+/**
+ * Do one perspective divide per quad.
+ *
+ * For perspective interpolation, the final attribute value is given
+ *
+ * a' = a/w = a * oow
+ *
+ * where
+ *
+ * a = a0 + dadx*x + dady*y
+ * w = w0 + dwdx*x + dwdy*y
+ * oow = 1/w = 1/(w0 + dwdx*x + dwdy*y)
+ *
+ * Instead of computing the division per pixel, with this macro we compute the
+ * division on the upper left pixel of each quad, and use a linear
+ * approximation in the remaining pixels, given by:
+ *
+ * da'dx = (dadx - dwdx*a)*oow
+ * da'dy = (dady - dwdy*a)*oow
+ *
+ * Ironically, this actually makes things slower -- probably because the
+ * divide hardware unit is rarely used, whereas the multiply unit is typically
+ * already saturated.
+ */
+#define PERSPECTIVE_DIVIDE_PER_QUAD 0
+
+
static const unsigned char quad_offset_x[4] = {0, 1, 0, 1};
static const unsigned char quad_offset_y[4] = {0, 0, 1, 1};
@@ -107,7 +134,6 @@ coeffs_init(struct lp_build_interp_soa_context *bld,
LLVMValueRef i1 = LLVMConstInt(LLVMInt32Type(), 1, 0);
LLVMValueRef i2 = LLVMConstInt(LLVMInt32Type(), 2, 0);
LLVMValueRef i3 = LLVMConstInt(LLVMInt32Type(), 3, 0);
- LLVMValueRef oow = NULL;
unsigned attrib;
unsigned chan;
@@ -213,22 +239,22 @@ coeffs_init(struct lp_build_interp_soa_context *bld,
a = LLVMBuildFAdd(builder, a, dadq2, "");
+#if PERSPECTIVE_DIVIDE_PER_QUAD
/*
- * a *= 1 / w
- * dadq *= 1 / w
+ * a *= 1 / w
*/
if (interp == LP_INTERP_PERSPECTIVE) {
LLVMValueRef w = bld->a[0][3];
assert(attrib != 0);
assert(bld->mask[0] & TGSI_WRITEMASK_W);
- if (!oow) {
- oow = lp_build_rcp(coeff_bld, w);
- lp_build_name(oow, "oow");
+ if (!bld->oow) {
+ bld->oow = lp_build_rcp(coeff_bld, w);
+ lp_build_name(bld->oow, "oow");
}
- a = lp_build_mul(coeff_bld, a, oow);
- dadq = lp_build_mul(coeff_bld, dadq, oow);
+ a = lp_build_mul(coeff_bld, a, bld->oow);
}
+#endif
attrib_name(a, attrib, chan, ".a");
attrib_name(dadq, attrib, chan, ".dadq");
@@ -250,6 +276,7 @@ attribs_update(struct lp_build_interp_soa_context *bld, int quad_index)
{
struct lp_build_context *coeff_bld = &bld->coeff_bld;
LLVMValueRef shuffle = lp_build_const_int_vec(coeff_bld->type, quad_index);
+ LLVMValueRef oow = NULL;
unsigned attrib;
unsigned chan;
@@ -270,6 +297,8 @@ attribs_update(struct lp_build_interp_soa_context *bld, int quad_index)
a = bld->attribs[0][chan];
}
else {
+ LLVMValueRef dadq;
+
a = bld->a[attrib][chan];
/*
@@ -280,10 +309,46 @@ attribs_update(struct lp_build_interp_soa_context *bld, int quad_index)
a, coeff_bld->undef, shuffle, "");
/*
+ * Get the derivatives.
+ */
+
+ dadq = bld->dadq[attrib][chan];
+
+#if PERSPECTIVE_DIVIDE_PER_QUAD
+ if (interp == LP_INTERP_PERSPECTIVE) {
+ LLVMValueRef dwdq = bld->dadq[0][3];
+
+ if (oow == NULL) {
+ assert(bld->oow);
+ oow = LLVMBuildShuffleVector(coeff_bld->builder,
+ bld->oow, coeff_bld->undef,
+ shuffle, "");
+ }
+
+ dadq = lp_build_sub(coeff_bld,
+ dadq,
+ lp_build_mul(coeff_bld, a, dwdq));
+ dadq = lp_build_mul(coeff_bld, dadq, oow);
+ }
+#endif
+
+ /*
* Add the derivatives
*/
- a = lp_build_add(coeff_bld, a, bld->dadq[attrib][chan]);
+ a = lp_build_add(coeff_bld, a, dadq);
+
+#if !PERSPECTIVE_DIVIDE_PER_QUAD
+ if (interp == LP_INTERP_PERSPECTIVE) {
+ if (oow == NULL) {
+ LLVMValueRef w = bld->attribs[0][3];
+ assert(attrib != 0);
+ assert(bld->mask[0] & TGSI_WRITEMASK_W);
+ oow = lp_build_rcp(coeff_bld, w);
+ }
+ a = lp_build_mul(coeff_bld, a, oow);
+ }
+#endif
attrib_name(a, attrib, chan, "");
}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.h b/src/gallium/drivers/llvmpipe/lp_bld_interp.h
index 2905513301..3054030f73 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_interp.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.h
@@ -64,6 +64,8 @@ struct lp_build_interp_soa_context
LLVMValueRef a [1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
LLVMValueRef dadq[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
+ LLVMValueRef oow;
+
LLVMValueRef attribs[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
/*