summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSøren Sandmann Pedersen <ssp@redhat.com>2013-02-14 08:06:19 -0500
committerSøren Sandmann Pedersen <ssp@redhat.com>2013-03-08 17:47:00 -0500
commitf6ad427233fbdcf116eb6850bc6f23b651df9a87 (patch)
tree718230635f4fd06362145ef2afb34919ad9c6759
parente6e2c400b7ccb77fe135b7e6abbcf3dbd5b328d4 (diff)
Improve precision of calculations in pixman-gradient-walker.cgradient-precision
The computations in pixman-gradient-walker.c currently take place at very limited 8 bit precision which results in quite visible artefacts in gradients. An example is the one produced by demos/linear-gradient which currently looks like this: http://i.imgur.com/kQbX8nd.png With the changes in this commit, the gradient looks like this: http://i.imgur.com/nUlyuKI.png The images are also available here: http://people.freedesktop.org/~sandmann/gradients/before.png http://people.freedesktop.org/~sandmann/gradients/after.png This patch computes pixels using floating point, but uses a faster algorithm, which makes up for the loss of performance. == Theory: In both the new and the old algorithm, the various gradient implementations compute a parameter x that indicates how far along the gradient the current scanline is. The current algorithm has a cache of the two color stops surrounding the last parameter; those are used in a SIMD-within-register fashion in this way: t1 = walker->left_rb * idist + walker->right_rb * dist; where dist and idist are the distances to the left and right color stops respectively normalized to the distance between the left and right stops. The normalization (which involves a division) is captured in another cached variable "stepper". The cached values are recomputed whenever the parameter moves in between two different stops (called "reset" in the implementation). Because idist and dist are computed in 8 bits only, a lot of information is lost, which is quite visible as the image linked above shows. The new algorithm caches more information in the following way. When interpolating between stops, the formula to be used is this: t = ((x - left) / (right - left)); result = lc * (1 - t) + rc * t; where - x is the parameter as computed by the main gradient code, - left is the position of the left color stop, - right is the position of the right color stop - lc is the color of the left color stop - rc is the color of the right color stop That formula can also be written like this: result = lc * (1 - t) + rc * t; = lc + (rc - lc) * t = lc + (rc - lc) * ((x - left) / (right - left)) = (rc - lc) / (right - left) * x + lc - (left * (rc - lc)) / (right - left) = s * x + b where s = (rc - lc) / (right - left) and b = lc - left * (rc - lc) / (right - left) = (lc * (right - left) - left * (rc - lc)) / (right - left) = (lc * right - rc * left) / (right - left) To summarize, setting w = (right - left): s = (rc - lc) / w b = (lc * right - rc * left) / w r = s * x + b Since s and b only depend on the two active stops, both can be cached so that the computation only needs to do one multiplication and one addition per pixel (followed by premultiplication of the alpha channel). That is, seven multiplications in total, which is the same number as the old SIMD-within-register implementation had. == Implementation notes: The new formula described above is implemented in single precision floating point, and the eight divisions necessary to compute the cached values are done by multiplication with the reciprocal of the distance between the color stops. The alpha values used in the cached computation are scaled by 255.0, whereas the RGB values are kept in the [0, 1] interval. The ensures that after premultiplication, all values will be in the [0, 255] interval. == Performance impact (median of three runs of radial-perf-test): == Intel Sandy Bridge, Core i5 @ 1.2GHz Before: 0.014516 After: 0.014511 Change: 0.0% == AMD Barcelona @ 1.2 GHz Before: 0.021854 seconds After: 0.022073 seconds Change: 1.0% slowdown Ie., little to no change, though conceivably the impact could be bigger on machines with a bigger difference between integer and floating point performance.
-rw-r--r--pixman/pixman-gradient-walker.c116
-rw-r--r--pixman/pixman-private.h9
2 files changed, 80 insertions, 45 deletions
diff --git a/pixman/pixman-gradient-walker.c b/pixman/pixman-gradient-walker.c
index e7e724fa..c2694c66 100644
--- a/pixman/pixman-gradient-walker.c
+++ b/pixman/pixman-gradient-walker.c
@@ -27,6 +27,7 @@
#include <config.h>
#endif
#include "pixman-private.h"
+#include "pixman-combine32.h"
void
_pixman_gradient_walker_init (pixman_gradient_walker_t *walker,
@@ -38,10 +39,15 @@ _pixman_gradient_walker_init (pixman_gradient_walker_t *walker,
walker->left_x = 0;
walker->right_x = 0x10000;
walker->stepper = 0;
- walker->left_ag = 0;
- walker->left_rb = 0;
- walker->right_ag = 0;
- walker->right_rb = 0;
+ walker->a_m = 0;
+ walker->a_d = 0;
+ walker->r_m = 0;
+ walker->r_d = 0;
+ walker->g_m = 0;
+ walker->g_d = 0;
+ walker->b_m = 0;
+ walker->b_d = 0;
+ walker->offset = 0;
walker->repeat = repeat;
walker->need_reset = TRUE;
@@ -83,10 +89,10 @@ gradient_walker_reset (pixman_gradient_walker_t *walker,
right_x = stops[n].x;
right_c = &stops[n].color;
+ walker->offset = 0;
if (walker->repeat == PIXMAN_REPEAT_NORMAL)
{
- left_x += (pos - x);
- right_x += (pos - x);
+ walker->offset = x - pos;
}
else if (walker->repeat == PIXMAN_REPEAT_REFLECT)
{
@@ -105,8 +111,7 @@ gradient_walker_reset (pixman_gradient_walker_t *walker,
x = 0x10000 - x;
}
- left_x += (pos - x);
- right_x += (pos - x);
+ walker->offset = x - pos;
}
else if (walker->repeat == PIXMAN_REPEAT_NONE)
{
@@ -116,24 +121,48 @@ gradient_walker_reset (pixman_gradient_walker_t *walker,
left_c = right_c;
}
- walker->left_x = left_x;
- walker->right_x = right_x;
- walker->left_ag = ((left_c->alpha >> 8) << 16) | (left_c->green >> 8);
- walker->left_rb = ((left_c->red & 0xff00) << 8) | (left_c->blue >> 8);
- walker->right_ag = ((right_c->alpha >> 8) << 16) | (right_c->green >> 8);
- walker->right_rb = ((right_c->red & 0xff00) << 8) | (right_c->blue >> 8);
-
- if (walker->left_x == walker->right_x ||
- (walker->left_ag == walker->right_ag &&
- walker->left_rb == walker->right_rb))
+ float la, lr, lg, lb;
+ float ra, rr, rg, rb;
+ float lx, rx;
+
+ la = (left_c->alpha * (1.0f/257.0f));
+ lr = (left_c->red * (1.0f/257.0f));
+ lg = (left_c->green * (1.0f/257.0f));
+ lb = (left_c->blue * (1.0f/257.0f));
+
+ ra = (right_c->alpha * (1.0f/257.0f));
+ rr = (right_c->red * (1.0f/257.0f));
+ rg = (right_c->green * (1.0f/257.0f));
+ rb = (right_c->blue * (1.0f/257.0f));
+
+ lx = left_x * (1.0f/65536.0f);
+ rx = right_x * (1.0f/65536.0f);
+
+ if (FLOAT_IS_ZERO (rx - lx) || left_x == INT32_MIN || right_x == INT32_MAX)
{
- walker->stepper = 0;
+ walker->a_m = walker->r_m = walker->g_m = walker->b_m = 0.0f;
+ walker->a_d = (la + ra) * (1.0 / 2.0f);
+ walker->r_d = (lr + rr) * (1.0 / 510.0f);
+ walker->g_d = (lg + rg) * (1.0 / 510.0f);
+ walker->b_d = (lb + rb) * (1.0 / 510.0f);
}
else
{
- int32_t width = right_x - left_x;
- walker->stepper = ((1 << 24) + width / 2) / width;
+ float w_rec = 1.0f / (rx - lx);
+
+ walker->a_d = (la * rx - ra * lx) * w_rec;
+ walker->r_d = (lr * rx - rr * lx) * w_rec * (1.0f/255.0f);
+ walker->g_d = (lg * rx - rg * lx) * w_rec * (1.0f/255.0f);
+ walker->b_d = (lb * rx - rb * lx) * w_rec * (1.0f/255.0f);
+
+ walker->a_m = (ra - la) * w_rec;
+ walker->r_m = (rr - lr) * w_rec * (1.0f/255.0f);
+ walker->g_m = (rg - lg) * w_rec * (1.0f/255.0f);
+ walker->b_m = (rb - lb) * w_rec * (1.0f/255.0f);
}
+
+ walker->left_x = left_x;
+ walker->right_x = right_x;
walker->need_reset = FALSE;
}
@@ -142,31 +171,36 @@ uint32_t
_pixman_gradient_walker_pixel (pixman_gradient_walker_t *walker,
pixman_fixed_48_16_t x)
{
- int dist, idist;
- uint32_t t1, t2, a, color;
-
- if (walker->need_reset || x < walker->left_x || x >= walker->right_x)
- gradient_walker_reset (walker, x);
+ float a, r, g, b;
+ uint8_t a8, r8, g8, b8;
+ uint32_t v;
+ float y;
- dist = ((int)(x - walker->left_x) * walker->stepper) >> 16;
- idist = 256 - dist;
+ x += walker->offset;
- /* combined INTERPOLATE and premultiply */
- t1 = walker->left_rb * idist + walker->right_rb * dist;
- t1 = (t1 >> 8) & 0xff00ff;
+ if (walker->need_reset || x < walker->left_x || x >= walker->right_x)
+ {
+ x -= walker->offset;
+ gradient_walker_reset (walker, x);
+ x += walker->offset;
+ }
- t2 = walker->left_ag * idist + walker->right_ag * dist;
- t2 &= 0xff00ff00;
+ y = x * (1.0f / 65536.0f);
- color = t2 & 0xff000000;
- a = t2 >> 24;
+ a = walker->a_m * y + walker->a_d;
+ r = a * (walker->r_m * y + walker->r_d);
+ g = a * (walker->g_m * y + walker->g_d);
+ b = a * (walker->b_m * y + walker->b_d);
- t1 = t1 * a + 0x800080;
- t1 = (t1 + ((t1 >> 8) & 0xff00ff)) >> 8;
+ a8 = a + 0.5f;
+ r8 = r + 0.5f;
+ g8 = g + 0.5f;
+ b8 = b + 0.5f;
- t2 = (t2 >> 8) * a + 0x800080;
- t2 = (t2 + ((t2 >> 8) & 0xff00ff));
+ v = ((a8 << 24) & 0xff000000) |
+ ((r8 << 16) & 0x00ff0000) |
+ ((g8 << 8) & 0x0000ff00) |
+ ((b8 >> 0) & 0x000000ff);
- return (color | (t1 & 0xff00ff) | (t2 & 0xff00));
+ return v;
}
-
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index 91e329f2..8c2c7f53 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -319,10 +319,11 @@ _pixman_image_validate (pixman_image_t *image);
*/
typedef struct
{
- uint32_t left_ag;
- uint32_t left_rb;
- uint32_t right_ag;
- uint32_t right_rb;
+ float a_m, a_d;
+ float r_m, r_d;
+ float g_m, g_d;
+ float b_m, b_d;
+ int32_t offset;
pixman_fixed_t left_x;
pixman_fixed_t right_x;
pixman_fixed_t stepper;