summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSiarhei Siamashka <siarhei.siamashka@gmail.com>2012-11-30 12:00:47 +0200
committerSiarhei Siamashka <siarhei.siamashka@gmail.com>2012-12-10 20:02:08 +0200
commit2bc59006d7fe91abf68a2061ad86c06e1b2964ab (patch)
tree61a43cd15ca8c3596ff5c1cbba345fca70029a43
parent8ca4e144724ba2041bc5ef077ccf6d24e7cf4d1f (diff)
Improve performance of combine_over_u
The generic C over_u combiner can be a lot faster with the addition of special shortcuts for 0xFF and 0x00 alpha/mask values. This is already implemented in C and SSE2 fast paths. Profiling the run of cairo-perf-trace benchmarks with PIXMAN_DISABLE environment variable set to "fast mmx sse2" on Intel Core i7: === before === 37.32% cairo-perf-trac libpixman-1.so.0.29.1 [.] combine_over_u 21.37% cairo-perf-trac libpixman-1.so.0.29.1 [.] bits_image_fetch_bilinear_no_repeat_8888 13.51% cairo-perf-trac libpixman-1.so.0.29.1 [.] bits_image_fetch_bilinear_affine_none_a8r8g8b8 2.96% cairo-perf-trac libpixman-1.so.0.29.1 [.] radial_compute_color 2.74% cairo-perf-trac libpixman-1.so.0.29.1 [.] fetch_scanline_a8 2.71% cairo-perf-trac libpixman-1.so.0.29.1 [.] fetch_scanline_x8r8g8b8 2.17% cairo-perf-trac libpixman-1.so.0.29.1 [.] _pixman_gradient_walker_pixel 1.86% cairo-perf-trac libcairo.so.2.11200.0 [.] _cairo_tor_scan_converter_generate 1.57% cairo-perf-trac libpixman-1.so.0.29.1 [.] bits_image_fetch_bilinear_affine_pad_a8r8g8b8 0.97% cairo-perf-trac libpixman-1.so.0.29.1 [.] combine_in_reverse_u 0.96% cairo-perf-trac libpixman-1.so.0.29.1 [.] combine_over_ca === after === 28.79% cairo-perf-trac libpixman-1.so.0.29.1 [.] bits_image_fetch_bilinear_no_repeat_8888 18.44% cairo-perf-trac libpixman-1.so.0.29.1 [.] bits_image_fetch_bilinear_affine_none_a8r8g8b8 15.54% cairo-perf-trac libpixman-1.so.0.29.1 [.] combine_over_u 3.94% cairo-perf-trac libpixman-1.so.0.29.1 [.] radial_compute_color 3.69% cairo-perf-trac libpixman-1.so.0.29.1 [.] fetch_scanline_a8 3.69% cairo-perf-trac libpixman-1.so.0.29.1 [.] fetch_scanline_x8r8g8b8 2.94% cairo-perf-trac libpixman-1.so.0.29.1 [.] _pixman_gradient_walker_pixel 2.52% cairo-perf-trac libcairo.so.2.11200.0 [.] _cairo_tor_scan_converter_generate 2.08% cairo-perf-trac libpixman-1.so.0.29.1 [.] bits_image_fetch_bilinear_affine_pad_a8r8g8b8 1.31% cairo-perf-trac libpixman-1.so.0.29.1 [.] combine_in_reverse_u 1.29% cairo-perf-trac libpixman-1.so.0.29.1 [.] combine_over_ca
-rw-r--r--pixman/pixman-combine32.c58
1 files changed, 51 insertions, 7 deletions
diff --git a/pixman/pixman-combine32.c b/pixman/pixman-combine32.c
index 54cc877..3ac7576 100644
--- a/pixman/pixman-combine32.c
+++ b/pixman/pixman-combine32.c
@@ -196,14 +196,58 @@ combine_over_u (pixman_implementation_t *imp,
{
int i;
- for (i = 0; i < width; ++i)
+ if (!mask)
{
- uint32_t s = combine_mask (src, mask, i);
- uint32_t d = *(dest + i);
- uint32_t ia = ALPHA_8 (~s);
-
- UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s);
- *(dest + i) = d;
+ for (i = 0; i < width; ++i)
+ {
+ uint32_t s = *(src + i);
+ uint32_t a = ALPHA_8 (s);
+ if (a == 0xFF)
+ {
+ *(dest + i) = s;
+ }
+ else if (s)
+ {
+ uint32_t d = *(dest + i);
+ uint32_t ia = a ^ 0xFF;
+ UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s);
+ *(dest + i) = d;
+ }
+ }
+ }
+ else
+ {
+ for (i = 0; i < width; ++i)
+ {
+ uint32_t m = ALPHA_8 (*(mask + i));
+ if (m == 0xFF)
+ {
+ uint32_t s = *(src + i);
+ uint32_t a = ALPHA_8 (s);
+ if (a == 0xFF)
+ {
+ *(dest + i) = s;
+ }
+ else if (s)
+ {
+ uint32_t d = *(dest + i);
+ uint32_t ia = a ^ 0xFF;
+ UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s);
+ *(dest + i) = d;
+ }
+ }
+ else if (m)
+ {
+ uint32_t s = *(src + i);
+ if (s)
+ {
+ uint32_t d = *(dest + i);
+ UN8x4_MUL_UN8 (s, m);
+ UN8x4_MUL_UN8_ADD_UN8x4 (d, ALPHA_8 (~s), s);
+ *(dest + i) = d;
+ }
+ }
+ }
}
}