diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2013-01-24 08:55:54 +0000 |
---|---|---|
committer | Chris Wilson <chris@chris-wilson.co.uk> | 2013-01-26 16:10:47 +0000 |
commit | c986a7310bb06582b7d8a566d5f007ba4e5e75bf (patch) | |
tree | b1f01fb7896726f69beffd369bcaf936041c4668 | |
parent | cfe0e59663c71a6ecd0c976797ac32339e363af2 (diff) |
image: Enable inplace compositing with opacities for general routines
On a SNB i5-2500:
Speedups
========
firefox-chalkboard 34284.16 -> 19637.40: 1.74x speedup
swfdec-giant-steps 778.35 -> 665.37: 1.17x speedup
ocitysmap 485.64 -> 431.94: 1.12x speedup
Slowdowns
=========
firefox-fishbowl 46878.98 -> 54407.14: 1.16x slowdown
That slow down is due to overhead of the increased number of calls to
pixman_image_composite32() (pixman_transform_point for analyzing the
source extents in particular) outweighing any advantage gained by
performing the rasterisation in a single pass and eliding gaps. The
solution that has been floated in the past is for an interface into
pixman to only perform the analysis once and then to return a kernel to
use for all spans.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
-rw-r--r-- | src/cairo-image-compositor.c | 190 | ||||
-rw-r--r-- | src/cairo-spans-compositor-private.h | 2 |
2 files changed, 162 insertions, 30 deletions
diff --git a/src/cairo-image-compositor.c b/src/cairo-image-compositor.c index 97152502..7e905ce7 100644 --- a/src/cairo-image-compositor.c +++ b/src/cairo-image-compositor.c @@ -1547,7 +1547,8 @@ typedef struct _cairo_image_span_renderer { uint8_t *data; } mask; } u; - uint8_t buf[sizeof(cairo_abstract_span_renderer_t)-128]; + uint8_t _buf[0]; +#define SZ_BUF (sizeof (cairo_abstract_span_renderer_t) - sizeof (cairo_image_span_renderer_t)) } cairo_image_span_renderer_t; COMPILE_TIME_ASSERT (sizeof (cairo_image_span_renderer_t) <= sizeof (cairo_abstract_span_renderer_t)); @@ -2251,7 +2252,7 @@ _fill_a8_lerp_spans (void *abstract_renderer, int y, int h, if (likely(h == 1)) { do { - uint8_t a = mul8_8 (spans[0].coverage, r->op); + uint8_t a = mul8_8 (spans[0].coverage, r->bpp); if (a) { int len = spans[1].x - spans[0].x; uint8_t *d = r->u.fill.data + r->u.fill.stride*y + spans[0].x; @@ -2266,7 +2267,7 @@ _fill_a8_lerp_spans (void *abstract_renderer, int y, int h, } while (--num_spans > 1); } else { do { - uint8_t a = mul8_8 (spans[0].coverage, r->op); + uint8_t a = mul8_8 (spans[0].coverage, r->bpp); if (a) { int yy = y, hh = h; uint16_t p = (uint16_t)a * r->u.fill.pixel + 0x7f; @@ -2299,7 +2300,7 @@ _fill_xrgb32_lerp_spans (void *abstract_renderer, int y, int h, if (likely(h == 1)) { do { - uint8_t a = mul8_8 (spans[0].coverage, r->op); + uint8_t a = mul8_8 (spans[0].coverage, r->bpp); if (a) { int len = spans[1].x - spans[0].x; uint32_t *d = (uint32_t*)(r->u.fill.data + r->u.fill.stride*y + spans[0].x*4); @@ -2312,7 +2313,7 @@ _fill_xrgb32_lerp_spans (void *abstract_renderer, int y, int h, } while (--num_spans > 1); } else { do { - uint8_t a = mul8_8 (spans[0].coverage, r->op); + uint8_t a = mul8_8 (spans[0].coverage, r->bpp); if (a) { int yy = y, hh = h; do { @@ -2345,7 +2346,7 @@ _blit_xrgb32_lerp_spans (void *abstract_renderer, int y, int h, uint8_t *src = r->u.blit.src_data + y*r->u.blit.src_stride; uint8_t *dst = r->u.blit.data + y*r->u.blit.stride; do { - uint8_t a = mul8_8 (spans[0].coverage, r->op); + uint8_t a = mul8_8 (spans[0].coverage, r->bpp); if (a) { uint32_t *s = (uint32_t*)src + spans[0].x; uint32_t *d = (uint32_t*)dst + spans[0].x; @@ -2366,7 +2367,7 @@ _blit_xrgb32_lerp_spans (void *abstract_renderer, int y, int h, } while (--num_spans > 1); } else { do { - uint8_t a = mul8_8 (spans[0].coverage, r->op); + uint8_t a = mul8_8 (spans[0].coverage, r->bpp); if (a) { int yy = y, hh = h; do { @@ -2441,7 +2442,7 @@ _inplace_spans (void *abstract_renderer, mask = (uint8_t *)pixman_image_get_data (r->mask); x0 = spans[1].x; } else if (spans[0].coverage == 0x0) { - if (x1 != x0) { + if (x1 - x0 > r->u.composite.run_length) { pixman_image_composite32 (r->op, r->src, r->mask, r->u.composite.dst, x0 + r->u.composite.src_x, y + r->u.composite.src_y, @@ -2473,8 +2474,58 @@ _inplace_spans (void *abstract_renderer, } static cairo_status_t -_inplace_src_spans (void *abstract_renderer, - int y, int h, +_inplace_opacity_spans (void *abstract_renderer, int y, int h, + const cairo_half_open_span_t *spans, + unsigned num_spans) +{ + cairo_image_span_renderer_t *r = abstract_renderer; + uint8_t *mask; + int x0, x1; + + if (num_spans == 0) + return CAIRO_STATUS_SUCCESS; + + mask = (uint8_t *)pixman_image_get_data (r->mask); + x1 = x0 = spans[0].x; + do { + int len = spans[1].x - spans[0].x; + uint8_t m = mul8_8(spans[0].coverage, r->bpp); + *mask++ = m; + if (len > 1) { + if (m == 0) { + if (x1 - x0 > r->u.composite.run_length) { + pixman_image_composite32 (r->op, r->src, r->mask, r->u.composite.dst, + x0 + r->u.composite.src_x, + y + r->u.composite.src_y, + 0, 0, + x0, y, + x1 - x0, h); + } + mask = (uint8_t *)pixman_image_get_data (r->mask); + x0 = spans[1].x; + }else { + memset (mask, m, --len); + mask += len; + } + } + x1 = spans[1].x; + spans++; + } while (--num_spans > 1); + + if (x1 != x0) { + pixman_image_composite32 (r->op, r->src, r->mask, r->u.composite.dst, + x0 + r->u.composite.src_x, + y + r->u.composite.src_y, + 0, 0, + x0, y, + x1 - x0, h); + } + + return CAIRO_STATUS_SUCCESS; +} + +static cairo_status_t +_inplace_src_spans (void *abstract_renderer, int y, int h, const cairo_half_open_span_t *spans, unsigned num_spans) { @@ -2486,7 +2537,7 @@ _inplace_src_spans (void *abstract_renderer, return CAIRO_STATUS_SUCCESS; x0 = spans[0].x; - m = r->buf; + m = r->_buf; do { int len = spans[1].x - spans[0].x; if (len >= r->u.composite.run_length && spans[0].coverage == 0xff) { @@ -2524,7 +2575,7 @@ _inplace_src_spans (void *abstract_renderer, spans[0].x, y, spans[1].x - spans[0].x, h); - m = r->buf; + m = r->_buf; x0 = spans[1].x; } else if (spans[0].coverage == 0x0) { if (spans[0].x != x0) { @@ -2553,7 +2604,7 @@ _inplace_src_spans (void *abstract_renderer, #endif } - m = r->buf; + m = r->_buf; x0 = spans[1].x; } else { *m++ = spans[0].coverage; @@ -2594,6 +2645,91 @@ _inplace_src_spans (void *abstract_renderer, return CAIRO_STATUS_SUCCESS; } +static cairo_status_t +_inplace_src_opacity_spans (void *abstract_renderer, int y, int h, + const cairo_half_open_span_t *spans, + unsigned num_spans) +{ + cairo_image_span_renderer_t *r = abstract_renderer; + uint8_t *mask; + int x0; + + if (num_spans == 0) + return CAIRO_STATUS_SUCCESS; + + x0 = spans[0].x; + mask = (uint8_t *)pixman_image_get_data (r->mask); + do { + int len = spans[1].x - spans[0].x; + uint8_t m = mul8_8(spans[0].coverage, r->bpp); + if (m == 0) { + if (spans[0].x != x0) { +#if PIXMAN_HAS_OP_LERP + pixman_image_composite32 (PIXMAN_OP_LERP_SRC, + r->src, r->mask, r->u.composite.dst, + x0 + r->u.composite.src_x, + y + r->u.composite.src_y, + 0, 0, + x0, y, + spans[0].x - x0, h); +#else + pixman_image_composite32 (PIXMAN_OP_OUT_REVERSE, + r->mask, NULL, r->u.composite.dst, + 0, 0, + 0, 0, + x0, y, + spans[0].x - x0, h); + pixman_image_composite32 (PIXMAN_OP_ADD, + r->src, r->mask, r->u.composite.dst, + x0 + r->u.composite.src_x, + y + r->u.composite.src_y, + 0, 0, + x0, y, + spans[0].x - x0, h); +#endif + } + + mask = (uint8_t *)pixman_image_get_data (r->mask); + x0 = spans[1].x; + } else { + *mask++ = m; + if (len > 1) { + memset (mask, m, --len); + mask += len; + } + } + spans++; + } while (--num_spans > 1); + + if (spans[0].x != x0) { +#if PIXMAN_HAS_OP_LERP + pixman_image_composite32 (PIXMAN_OP_LERP_SRC, + r->src, r->mask, r->u.composite.dst, + x0 + r->u.composite.src_x, + y + r->u.composite.src_y, + 0, 0, + x0, y, + spans[0].x - x0, h); +#else + pixman_image_composite32 (PIXMAN_OP_OUT_REVERSE, + r->mask, NULL, r->u.composite.dst, + 0, 0, + 0, 0, + x0, y, + spans[0].x - x0, h); + pixman_image_composite32 (PIXMAN_OP_ADD, + r->src, r->mask, r->u.composite.dst, + x0 + r->u.composite.src_x, + y + r->u.composite.src_y, + 0, 0, + x0, y, + spans[0].x - x0, h); +#endif + } + + return CAIRO_STATUS_SUCCESS; +} + static void free_pixels (pixman_image_t *image, void *data) { free (data); @@ -2612,7 +2748,7 @@ inplace_renderer_init (cairo_image_span_renderer_t *r, return CAIRO_INT_STATUS_UNSUPPORTED; r->base.render_rows = NULL; - r->op = composite->mask_pattern.solid.color.alpha_short >> 8; + r->bpp = composite->mask_pattern.solid.color.alpha_short >> 8; if (composite->source_pattern.base.type == CAIRO_PATTERN_TYPE_SOLID) { const cairo_color_t *color; @@ -2627,7 +2763,7 @@ inplace_renderer_init (cairo_image_span_renderer_t *r, * typically small, too small to payback the startup overheads of * using SSE2 etc. */ - if (r->op == 0xff) { + if (r->bpp == 0xff) { switch (dst->format) { case CAIRO_FORMAT_A8: r->base.render_rows = _fill_a8_lerp_opaque_spans; @@ -2689,17 +2825,15 @@ inplace_renderer_init (cairo_image_span_renderer_t *r, } } if (r->base.render_rows == NULL) { - unsigned int width; const cairo_pattern_t *src = &composite->source_pattern.base; - - if (r->op != 0xff) - return CAIRO_INT_STATUS_UNSUPPORTED; + unsigned int width; if (composite->is_bounded == 0) return CAIRO_INT_STATUS_UNSUPPORTED; + r->base.render_rows = r->bpp == 0xff ? _inplace_spans : _inplace_opacity_spans; width = (composite->bounded.width + 3) & ~3; - r->base.render_rows = _inplace_spans; + r->u.composite.run_length = 8; if (src->type == CAIRO_PATTERN_TYPE_LINEAR || src->type == CAIRO_PATTERN_TYPE_RADIAL) @@ -2710,7 +2844,7 @@ inplace_renderer_init (cairo_image_span_renderer_t *r, composite->op == CAIRO_OPERATOR_ADD)) { r->op = PIXMAN_OP_SRC; } else if (composite->op == CAIRO_OPERATOR_SOURCE) { - r->base.render_rows = _inplace_src_spans; + r->base.render_rows = r->bpp == 0xff ? _inplace_src_spans : _inplace_src_opacity_spans; r->u.composite.mask_y = r->composite->unbounded.y; width = (composite->unbounded.width + 3) & ~3; } else if (composite->op == CAIRO_OPERATOR_CLEAR) { @@ -2728,8 +2862,8 @@ inplace_renderer_init (cairo_image_span_renderer_t *r, return _cairo_error (CAIRO_STATUS_NO_MEMORY); /* Create an effectively unbounded mask by repeating the single line */ - buf = r->buf; - if (width > sizeof (r->buf)) { + buf = r->_buf; + if (width > SZ_BUF) { buf = malloc (width); if (unlikely (buf == NULL)) { pixman_image_unref (r->src); @@ -2741,19 +2875,17 @@ inplace_renderer_init (cairo_image_span_renderer_t *r, (uint32_t *)buf, 0); if (unlikely (r->mask == NULL)) { pixman_image_unref (r->src); - if (buf != r->buf) + if (buf != r->_buf) free (buf); return _cairo_error(CAIRO_STATUS_NO_MEMORY); } - if (buf != r->buf) + if (buf != r->_buf) pixman_image_set_destroy_function (r->mask, free_pixels, buf); r->u.composite.dst = dst->pixman_image; } - r->bpp = PIXMAN_FORMAT_BPP(dst->pixman_format); - return CAIRO_INT_STATUS_SUCCESS; } @@ -2855,7 +2987,7 @@ span_renderer_init (cairo_abstract_span_renderer_t *_r, r->u.mask.extents = composite->unbounded; r->u.mask.stride = (r->u.mask.extents.width + 3) & ~3; - if (r->u.mask.extents.height * r->u.mask.stride > (int)sizeof (r->buf)) { + if (r->u.mask.extents.height * r->u.mask.stride > (int)sizeof (r->_buf)) { r->mask = pixman_image_create_bits (PIXMAN_a8, r->u.mask.extents.width, r->u.mask.extents.height, @@ -2867,7 +2999,7 @@ span_renderer_init (cairo_abstract_span_renderer_t *_r, r->mask = pixman_image_create_bits (PIXMAN_a8, r->u.mask.extents.width, r->u.mask.extents.height, - (uint32_t *)r->buf, r->u.mask.stride); + (uint32_t *)r->_buf, r->u.mask.stride); r->base.render_rows = _cairo_image_spans_and_zero; r->base.finish = _cairo_image_finish_spans_and_zero; diff --git a/src/cairo-spans-compositor-private.h b/src/cairo-spans-compositor-private.h index d8b94fba..0babebd2 100644 --- a/src/cairo-spans-compositor-private.h +++ b/src/cairo-spans-compositor-private.h @@ -46,7 +46,7 @@ CAIRO_BEGIN_DECLS typedef struct _cairo_abstract_span_renderer { cairo_span_renderer_t base; - char data[2048]; + char data[4096]; } cairo_abstract_span_renderer_t; struct cairo_spans_compositor { |