summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndré Tupinambá <andrelrt@gmail.com>2009-09-19 09:32:37 -0400
committerSøren Sandmann Pedersen <sandmann@redhat.com>2009-10-26 13:04:21 -0400
commit88323c5abe68906472049537b54b0e7eea343f43 (patch)
tree2048231e5519690cdd31b88aebaff8541473f610
parentf0c157f888185279681bad305973f246dca2e535 (diff)
Speed up bilinear interpolation.
Speed up bilinear interpolation by processing more than one component at a time on 64 bit architectures, and by precomputing the dist{ixiy} products on 32 bit architectures. Previously bilinear interpolation for one pixel would take 24 multiplications. With this improvement it takes 12 on 64 bit, and 20 on 32 bit. This is a small but consistent speedup on the swfdec-youtube benchmark: [ # ] backend test min(s) median(s) stddev. count Before: [ 0] image swfdec-youtube 18.010 18.020 0.09% 4/5 After: [ 0] image swfdec-youtube 17.488 17.584 0.22% 5/6 Signed-off-by: Søren Sandmann Pedersen <sandmann@redhat.com>
-rw-r--r--configure.ac2
-rw-r--r--pixman/pixman-bits-image.c115
2 files changed, 95 insertions, 22 deletions
diff --git a/configure.ac b/configure.ac
index c3e339c8..12c7f751 100644
--- a/configure.ac
+++ b/configure.ac
@@ -76,6 +76,8 @@ AC_CHECK_FUNCS([getisax])
AC_C_BIGENDIAN
AC_C_INLINE
+AC_CHECK_SIZEOF(long)
+
# Checks for Sun Studio compilers
AC_CHECK_DECL([__SUNPRO_C], [SUNCC="yes"], [SUNCC="no"])
AC_CHECK_DECL([__amd64], [AMD64_ABI="yes"], [AMD64_ABI="no"])
diff --git a/pixman/pixman-bits-image.c b/pixman/pixman-bits-image.c
index 9e93623e..288edf40 100644
--- a/pixman/pixman-bits-image.c
+++ b/pixman/pixman-bits-image.c
@@ -184,6 +184,95 @@ bits_image_fetch_pixel_nearest (bits_image_t *image,
}
}
+#if SIZEOF_LONG > 4
+
+static inline uint32_t
+bilinear_interpolation (uint32_t tl, uint32_t tr,
+ uint32_t bl, uint32_t br,
+ int distx, int disty)
+{
+ uint64_t distxy, distxiy, distixy, distixiy;
+ uint64_t tl64, tr64, bl64, br64;
+ uint64_t f, r;
+
+ distxy = distx * disty;
+ distxiy = distx * (256 - disty);
+ distixy = (256 - distx) * disty;
+ distixiy = (256 - distx) * (256 - disty);
+
+ /* Alpha and Blue */
+ tl64 = tl & 0xff0000ff;
+ tr64 = tr & 0xff0000ff;
+ bl64 = bl & 0xff0000ff;
+ br64 = br & 0xff0000ff;
+
+ f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy;
+ r = f & 0x0000ff0000ff0000ull;
+
+ /* Red and Green */
+ tl64 = tl;
+ tl64 = ((tl64 << 16) & 0x000000ff00000000ull) | (tl64 & 0x0000ff00ull);
+
+ tr64 = tr;
+ tr64 = ((tr64 << 16) & 0x000000ff00000000ull) | (tr64 & 0x0000ff00ull);
+
+ bl64 = bl;
+ bl64 = ((bl64 << 16) & 0x000000ff00000000ull) | (bl64 & 0x0000ff00ull);
+
+ br64 = br;
+ br64 = ((br64 << 16) & 0x000000ff00000000ull) | (br64 & 0x0000ff00ull);
+
+ f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy;
+ r |= ((f >> 16) & 0x000000ff00000000ull) | (f & 0xff000000ull);
+
+ return (uint32_t)(r >> 16);
+}
+
+#else
+
+static inline uint32_t
+bilinear_interpolation (uint32_t tl, uint32_t tr,
+ uint32_t bl, uint32_t br,
+ int distx, int disty)
+{
+ int distxy, distxiy, distixy, distixiy;
+ uint32_t f, r;
+
+ distxy = distx * disty;
+ distxiy = distx * (256 - disty);
+ distixy = (256 - distx) * disty;
+ distixiy = (256 - distx) * (256 - disty);
+
+ /* Blue */
+ r = (tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy
+ + (bl & 0x000000ff) * distixy + (br & 0x000000ff) * distxy;
+
+ /* Green */
+ f = (tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy
+ + (bl & 0x0000ff00) * distixy + (br & 0x0000ff00) * distxy;
+ r |= f & 0xff000000;
+
+ tl >>= 16;
+ tr >>= 16;
+ bl >>= 16;
+ br >>= 16;
+ r >>= 16;
+
+ /* Red */
+ f = (tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy
+ + (bl & 0x000000ff) * distixy + (br & 0x000000ff) * distxy;
+ r |= f & 0x00ff0000;
+
+ /* Alpha */
+ f = (tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy
+ + (bl & 0x0000ff00) * distixy + (br & 0x0000ff00) * distxy;
+ r |= f & 0xff000000;
+
+ return r;
+}
+
+#endif
+
static force_inline uint32_t
bits_image_fetch_pixel_bilinear (bits_image_t *image,
pixman_fixed_t x,
@@ -193,9 +282,8 @@ bits_image_fetch_pixel_bilinear (bits_image_t *image,
int width = image->width;
int height = image->height;
int x1, y1, x2, y2;
- uint32_t tl, tr, bl, br, r;
- int32_t distx, disty, idistx, idisty;
- uint32_t ft, fb;
+ uint32_t tl, tr, bl, br;
+ int32_t distx, disty;
x1 = x - pixman_fixed_1 / 2;
y1 = y - pixman_fixed_1 / 2;
@@ -214,7 +302,7 @@ bits_image_fetch_pixel_bilinear (bits_image_t *image,
repeat (repeat_mode, height, &y1);
repeat (repeat_mode, width, &x2);
repeat (repeat_mode, height, &y2);
-
+
tl = get_pixel (image, x1, y1, FALSE);
bl = get_pixel (image, x1, y2, FALSE);
tr = get_pixel (image, x2, y1, FALSE);
@@ -228,24 +316,7 @@ bits_image_fetch_pixel_bilinear (bits_image_t *image,
br = get_pixel (image, x2, y2, TRUE);
}
- idistx = 256 - distx;
- idisty = 256 - disty;
-
-#define GET8(v, i) ((uint16_t) (uint8_t) ((v) >> i))
- ft = GET8 (tl, 0) * idistx + GET8 (tr, 0) * distx;
- fb = GET8 (bl, 0) * idistx + GET8 (br, 0) * distx;
- r = (((ft * idisty + fb * disty) >> 16) & 0xff);
- ft = GET8 (tl, 8) * idistx + GET8 (tr, 8) * distx;
- fb = GET8 (bl, 8) * idistx + GET8 (br, 8) * distx;
- r |= (((ft * idisty + fb * disty) >> 8) & 0xff00);
- ft = GET8 (tl, 16) * idistx + GET8 (tr, 16) * distx;
- fb = GET8 (bl, 16) * idistx + GET8 (br, 16) * distx;
- r |= (((ft * idisty + fb * disty)) & 0xff0000);
- ft = GET8 (tl, 24) * idistx + GET8 (tr, 24) * distx;
- fb = GET8 (bl, 24) * idistx + GET8 (br, 24) * distx;
- r |= (((ft * idisty + fb * disty) << 8) & 0xff000000);
-
- return r;
+ return bilinear_interpolation (tl, tr, bl, br, distx, disty);
}
static force_inline uint32_t