diff options
author | André Tupinambá <andrelrt@gmail.com> | 2009-09-19 09:32:37 -0400 |
---|---|---|
committer | Søren Sandmann Pedersen <sandmann@redhat.com> | 2009-10-26 13:04:21 -0400 |
commit | 88323c5abe68906472049537b54b0e7eea343f43 (patch) | |
tree | 2048231e5519690cdd31b88aebaff8541473f610 | |
parent | f0c157f888185279681bad305973f246dca2e535 (diff) |
Speed up bilinear interpolation.
Speed up bilinear interpolation by processing more than one component
at a time on 64 bit architectures, and by precomputing the dist{ixiy}
products on 32 bit architectures.
Previously bilinear interpolation for one pixel would take 24
multiplications. With this improvement it takes 12 on 64 bit, and 20
on 32 bit.
This is a small but consistent speedup on the swfdec-youtube
benchmark:
[ # ] backend test min(s) median(s) stddev. count
Before:
[ 0] image swfdec-youtube 18.010 18.020 0.09% 4/5
After:
[ 0] image swfdec-youtube 17.488 17.584 0.22% 5/6
Signed-off-by: Søren Sandmann Pedersen <sandmann@redhat.com>
-rw-r--r-- | configure.ac | 2 | ||||
-rw-r--r-- | pixman/pixman-bits-image.c | 115 |
2 files changed, 95 insertions, 22 deletions
diff --git a/configure.ac b/configure.ac index c3e339c8..12c7f751 100644 --- a/configure.ac +++ b/configure.ac @@ -76,6 +76,8 @@ AC_CHECK_FUNCS([getisax]) AC_C_BIGENDIAN AC_C_INLINE +AC_CHECK_SIZEOF(long) + # Checks for Sun Studio compilers AC_CHECK_DECL([__SUNPRO_C], [SUNCC="yes"], [SUNCC="no"]) AC_CHECK_DECL([__amd64], [AMD64_ABI="yes"], [AMD64_ABI="no"]) diff --git a/pixman/pixman-bits-image.c b/pixman/pixman-bits-image.c index 9e93623e..288edf40 100644 --- a/pixman/pixman-bits-image.c +++ b/pixman/pixman-bits-image.c @@ -184,6 +184,95 @@ bits_image_fetch_pixel_nearest (bits_image_t *image, } } +#if SIZEOF_LONG > 4 + +static inline uint32_t +bilinear_interpolation (uint32_t tl, uint32_t tr, + uint32_t bl, uint32_t br, + int distx, int disty) +{ + uint64_t distxy, distxiy, distixy, distixiy; + uint64_t tl64, tr64, bl64, br64; + uint64_t f, r; + + distxy = distx * disty; + distxiy = distx * (256 - disty); + distixy = (256 - distx) * disty; + distixiy = (256 - distx) * (256 - disty); + + /* Alpha and Blue */ + tl64 = tl & 0xff0000ff; + tr64 = tr & 0xff0000ff; + bl64 = bl & 0xff0000ff; + br64 = br & 0xff0000ff; + + f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy; + r = f & 0x0000ff0000ff0000ull; + + /* Red and Green */ + tl64 = tl; + tl64 = ((tl64 << 16) & 0x000000ff00000000ull) | (tl64 & 0x0000ff00ull); + + tr64 = tr; + tr64 = ((tr64 << 16) & 0x000000ff00000000ull) | (tr64 & 0x0000ff00ull); + + bl64 = bl; + bl64 = ((bl64 << 16) & 0x000000ff00000000ull) | (bl64 & 0x0000ff00ull); + + br64 = br; + br64 = ((br64 << 16) & 0x000000ff00000000ull) | (br64 & 0x0000ff00ull); + + f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy; + r |= ((f >> 16) & 0x000000ff00000000ull) | (f & 0xff000000ull); + + return (uint32_t)(r >> 16); +} + +#else + +static inline uint32_t +bilinear_interpolation (uint32_t tl, uint32_t tr, + uint32_t bl, uint32_t br, + int distx, int disty) +{ + int distxy, distxiy, distixy, distixiy; + uint32_t f, r; + + distxy = distx * disty; + distxiy = distx * (256 - disty); + distixy = (256 - distx) * disty; + distixiy = (256 - distx) * (256 - disty); + + /* Blue */ + r = (tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy + + (bl & 0x000000ff) * distixy + (br & 0x000000ff) * distxy; + + /* Green */ + f = (tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy + + (bl & 0x0000ff00) * distixy + (br & 0x0000ff00) * distxy; + r |= f & 0xff000000; + + tl >>= 16; + tr >>= 16; + bl >>= 16; + br >>= 16; + r >>= 16; + + /* Red */ + f = (tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy + + (bl & 0x000000ff) * distixy + (br & 0x000000ff) * distxy; + r |= f & 0x00ff0000; + + /* Alpha */ + f = (tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy + + (bl & 0x0000ff00) * distixy + (br & 0x0000ff00) * distxy; + r |= f & 0xff000000; + + return r; +} + +#endif + static force_inline uint32_t bits_image_fetch_pixel_bilinear (bits_image_t *image, pixman_fixed_t x, @@ -193,9 +282,8 @@ bits_image_fetch_pixel_bilinear (bits_image_t *image, int width = image->width; int height = image->height; int x1, y1, x2, y2; - uint32_t tl, tr, bl, br, r; - int32_t distx, disty, idistx, idisty; - uint32_t ft, fb; + uint32_t tl, tr, bl, br; + int32_t distx, disty; x1 = x - pixman_fixed_1 / 2; y1 = y - pixman_fixed_1 / 2; @@ -214,7 +302,7 @@ bits_image_fetch_pixel_bilinear (bits_image_t *image, repeat (repeat_mode, height, &y1); repeat (repeat_mode, width, &x2); repeat (repeat_mode, height, &y2); - + tl = get_pixel (image, x1, y1, FALSE); bl = get_pixel (image, x1, y2, FALSE); tr = get_pixel (image, x2, y1, FALSE); @@ -228,24 +316,7 @@ bits_image_fetch_pixel_bilinear (bits_image_t *image, br = get_pixel (image, x2, y2, TRUE); } - idistx = 256 - distx; - idisty = 256 - disty; - -#define GET8(v, i) ((uint16_t) (uint8_t) ((v) >> i)) - ft = GET8 (tl, 0) * idistx + GET8 (tr, 0) * distx; - fb = GET8 (bl, 0) * idistx + GET8 (br, 0) * distx; - r = (((ft * idisty + fb * disty) >> 16) & 0xff); - ft = GET8 (tl, 8) * idistx + GET8 (tr, 8) * distx; - fb = GET8 (bl, 8) * idistx + GET8 (br, 8) * distx; - r |= (((ft * idisty + fb * disty) >> 8) & 0xff00); - ft = GET8 (tl, 16) * idistx + GET8 (tr, 16) * distx; - fb = GET8 (bl, 16) * idistx + GET8 (br, 16) * distx; - r |= (((ft * idisty + fb * disty)) & 0xff0000); - ft = GET8 (tl, 24) * idistx + GET8 (tr, 24) * distx; - fb = GET8 (bl, 24) * idistx + GET8 (br, 24) * distx; - r |= (((ft * idisty + fb * disty) << 8) & 0xff000000); - - return r; + return bilinear_interpolation (tl, tr, bl, br, distx, disty); } static force_inline uint32_t |