summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2013-01-24 08:55:54 +0000
committerChris Wilson <chris@chris-wilson.co.uk>2013-01-26 16:10:47 +0000
commitc986a7310bb06582b7d8a566d5f007ba4e5e75bf (patch)
treeb1f01fb7896726f69beffd369bcaf936041c4668
parentcfe0e59663c71a6ecd0c976797ac32339e363af2 (diff)
image: Enable inplace compositing with opacities for general routines
On a SNB i5-2500: Speedups ======== firefox-chalkboard 34284.16 -> 19637.40: 1.74x speedup swfdec-giant-steps 778.35 -> 665.37: 1.17x speedup ocitysmap 485.64 -> 431.94: 1.12x speedup Slowdowns ========= firefox-fishbowl 46878.98 -> 54407.14: 1.16x slowdown That slow down is due to overhead of the increased number of calls to pixman_image_composite32() (pixman_transform_point for analyzing the source extents in particular) outweighing any advantage gained by performing the rasterisation in a single pass and eliding gaps. The solution that has been floated in the past is for an interface into pixman to only perform the analysis once and then to return a kernel to use for all spans. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
-rw-r--r--src/cairo-image-compositor.c190
-rw-r--r--src/cairo-spans-compositor-private.h2
2 files changed, 162 insertions, 30 deletions
diff --git a/src/cairo-image-compositor.c b/src/cairo-image-compositor.c
index 97152502..7e905ce7 100644
--- a/src/cairo-image-compositor.c
+++ b/src/cairo-image-compositor.c
@@ -1547,7 +1547,8 @@ typedef struct _cairo_image_span_renderer {
uint8_t *data;
} mask;
} u;
- uint8_t buf[sizeof(cairo_abstract_span_renderer_t)-128];
+ uint8_t _buf[0];
+#define SZ_BUF (sizeof (cairo_abstract_span_renderer_t) - sizeof (cairo_image_span_renderer_t))
} cairo_image_span_renderer_t;
COMPILE_TIME_ASSERT (sizeof (cairo_image_span_renderer_t) <= sizeof (cairo_abstract_span_renderer_t));
@@ -2251,7 +2252,7 @@ _fill_a8_lerp_spans (void *abstract_renderer, int y, int h,
if (likely(h == 1)) {
do {
- uint8_t a = mul8_8 (spans[0].coverage, r->op);
+ uint8_t a = mul8_8 (spans[0].coverage, r->bpp);
if (a) {
int len = spans[1].x - spans[0].x;
uint8_t *d = r->u.fill.data + r->u.fill.stride*y + spans[0].x;
@@ -2266,7 +2267,7 @@ _fill_a8_lerp_spans (void *abstract_renderer, int y, int h,
} while (--num_spans > 1);
} else {
do {
- uint8_t a = mul8_8 (spans[0].coverage, r->op);
+ uint8_t a = mul8_8 (spans[0].coverage, r->bpp);
if (a) {
int yy = y, hh = h;
uint16_t p = (uint16_t)a * r->u.fill.pixel + 0x7f;
@@ -2299,7 +2300,7 @@ _fill_xrgb32_lerp_spans (void *abstract_renderer, int y, int h,
if (likely(h == 1)) {
do {
- uint8_t a = mul8_8 (spans[0].coverage, r->op);
+ uint8_t a = mul8_8 (spans[0].coverage, r->bpp);
if (a) {
int len = spans[1].x - spans[0].x;
uint32_t *d = (uint32_t*)(r->u.fill.data + r->u.fill.stride*y + spans[0].x*4);
@@ -2312,7 +2313,7 @@ _fill_xrgb32_lerp_spans (void *abstract_renderer, int y, int h,
} while (--num_spans > 1);
} else {
do {
- uint8_t a = mul8_8 (spans[0].coverage, r->op);
+ uint8_t a = mul8_8 (spans[0].coverage, r->bpp);
if (a) {
int yy = y, hh = h;
do {
@@ -2345,7 +2346,7 @@ _blit_xrgb32_lerp_spans (void *abstract_renderer, int y, int h,
uint8_t *src = r->u.blit.src_data + y*r->u.blit.src_stride;
uint8_t *dst = r->u.blit.data + y*r->u.blit.stride;
do {
- uint8_t a = mul8_8 (spans[0].coverage, r->op);
+ uint8_t a = mul8_8 (spans[0].coverage, r->bpp);
if (a) {
uint32_t *s = (uint32_t*)src + spans[0].x;
uint32_t *d = (uint32_t*)dst + spans[0].x;
@@ -2366,7 +2367,7 @@ _blit_xrgb32_lerp_spans (void *abstract_renderer, int y, int h,
} while (--num_spans > 1);
} else {
do {
- uint8_t a = mul8_8 (spans[0].coverage, r->op);
+ uint8_t a = mul8_8 (spans[0].coverage, r->bpp);
if (a) {
int yy = y, hh = h;
do {
@@ -2441,7 +2442,7 @@ _inplace_spans (void *abstract_renderer,
mask = (uint8_t *)pixman_image_get_data (r->mask);
x0 = spans[1].x;
} else if (spans[0].coverage == 0x0) {
- if (x1 != x0) {
+ if (x1 - x0 > r->u.composite.run_length) {
pixman_image_composite32 (r->op, r->src, r->mask, r->u.composite.dst,
x0 + r->u.composite.src_x,
y + r->u.composite.src_y,
@@ -2473,8 +2474,58 @@ _inplace_spans (void *abstract_renderer,
}
static cairo_status_t
-_inplace_src_spans (void *abstract_renderer,
- int y, int h,
+_inplace_opacity_spans (void *abstract_renderer, int y, int h,
+ const cairo_half_open_span_t *spans,
+ unsigned num_spans)
+{
+ cairo_image_span_renderer_t *r = abstract_renderer;
+ uint8_t *mask;
+ int x0, x1;
+
+ if (num_spans == 0)
+ return CAIRO_STATUS_SUCCESS;
+
+ mask = (uint8_t *)pixman_image_get_data (r->mask);
+ x1 = x0 = spans[0].x;
+ do {
+ int len = spans[1].x - spans[0].x;
+ uint8_t m = mul8_8(spans[0].coverage, r->bpp);
+ *mask++ = m;
+ if (len > 1) {
+ if (m == 0) {
+ if (x1 - x0 > r->u.composite.run_length) {
+ pixman_image_composite32 (r->op, r->src, r->mask, r->u.composite.dst,
+ x0 + r->u.composite.src_x,
+ y + r->u.composite.src_y,
+ 0, 0,
+ x0, y,
+ x1 - x0, h);
+ }
+ mask = (uint8_t *)pixman_image_get_data (r->mask);
+ x0 = spans[1].x;
+ }else {
+ memset (mask, m, --len);
+ mask += len;
+ }
+ }
+ x1 = spans[1].x;
+ spans++;
+ } while (--num_spans > 1);
+
+ if (x1 != x0) {
+ pixman_image_composite32 (r->op, r->src, r->mask, r->u.composite.dst,
+ x0 + r->u.composite.src_x,
+ y + r->u.composite.src_y,
+ 0, 0,
+ x0, y,
+ x1 - x0, h);
+ }
+
+ return CAIRO_STATUS_SUCCESS;
+}
+
+static cairo_status_t
+_inplace_src_spans (void *abstract_renderer, int y, int h,
const cairo_half_open_span_t *spans,
unsigned num_spans)
{
@@ -2486,7 +2537,7 @@ _inplace_src_spans (void *abstract_renderer,
return CAIRO_STATUS_SUCCESS;
x0 = spans[0].x;
- m = r->buf;
+ m = r->_buf;
do {
int len = spans[1].x - spans[0].x;
if (len >= r->u.composite.run_length && spans[0].coverage == 0xff) {
@@ -2524,7 +2575,7 @@ _inplace_src_spans (void *abstract_renderer,
spans[0].x, y,
spans[1].x - spans[0].x, h);
- m = r->buf;
+ m = r->_buf;
x0 = spans[1].x;
} else if (spans[0].coverage == 0x0) {
if (spans[0].x != x0) {
@@ -2553,7 +2604,7 @@ _inplace_src_spans (void *abstract_renderer,
#endif
}
- m = r->buf;
+ m = r->_buf;
x0 = spans[1].x;
} else {
*m++ = spans[0].coverage;
@@ -2594,6 +2645,91 @@ _inplace_src_spans (void *abstract_renderer,
return CAIRO_STATUS_SUCCESS;
}
+static cairo_status_t
+_inplace_src_opacity_spans (void *abstract_renderer, int y, int h,
+ const cairo_half_open_span_t *spans,
+ unsigned num_spans)
+{
+ cairo_image_span_renderer_t *r = abstract_renderer;
+ uint8_t *mask;
+ int x0;
+
+ if (num_spans == 0)
+ return CAIRO_STATUS_SUCCESS;
+
+ x0 = spans[0].x;
+ mask = (uint8_t *)pixman_image_get_data (r->mask);
+ do {
+ int len = spans[1].x - spans[0].x;
+ uint8_t m = mul8_8(spans[0].coverage, r->bpp);
+ if (m == 0) {
+ if (spans[0].x != x0) {
+#if PIXMAN_HAS_OP_LERP
+ pixman_image_composite32 (PIXMAN_OP_LERP_SRC,
+ r->src, r->mask, r->u.composite.dst,
+ x0 + r->u.composite.src_x,
+ y + r->u.composite.src_y,
+ 0, 0,
+ x0, y,
+ spans[0].x - x0, h);
+#else
+ pixman_image_composite32 (PIXMAN_OP_OUT_REVERSE,
+ r->mask, NULL, r->u.composite.dst,
+ 0, 0,
+ 0, 0,
+ x0, y,
+ spans[0].x - x0, h);
+ pixman_image_composite32 (PIXMAN_OP_ADD,
+ r->src, r->mask, r->u.composite.dst,
+ x0 + r->u.composite.src_x,
+ y + r->u.composite.src_y,
+ 0, 0,
+ x0, y,
+ spans[0].x - x0, h);
+#endif
+ }
+
+ mask = (uint8_t *)pixman_image_get_data (r->mask);
+ x0 = spans[1].x;
+ } else {
+ *mask++ = m;
+ if (len > 1) {
+ memset (mask, m, --len);
+ mask += len;
+ }
+ }
+ spans++;
+ } while (--num_spans > 1);
+
+ if (spans[0].x != x0) {
+#if PIXMAN_HAS_OP_LERP
+ pixman_image_composite32 (PIXMAN_OP_LERP_SRC,
+ r->src, r->mask, r->u.composite.dst,
+ x0 + r->u.composite.src_x,
+ y + r->u.composite.src_y,
+ 0, 0,
+ x0, y,
+ spans[0].x - x0, h);
+#else
+ pixman_image_composite32 (PIXMAN_OP_OUT_REVERSE,
+ r->mask, NULL, r->u.composite.dst,
+ 0, 0,
+ 0, 0,
+ x0, y,
+ spans[0].x - x0, h);
+ pixman_image_composite32 (PIXMAN_OP_ADD,
+ r->src, r->mask, r->u.composite.dst,
+ x0 + r->u.composite.src_x,
+ y + r->u.composite.src_y,
+ 0, 0,
+ x0, y,
+ spans[0].x - x0, h);
+#endif
+ }
+
+ return CAIRO_STATUS_SUCCESS;
+}
+
static void free_pixels (pixman_image_t *image, void *data)
{
free (data);
@@ -2612,7 +2748,7 @@ inplace_renderer_init (cairo_image_span_renderer_t *r,
return CAIRO_INT_STATUS_UNSUPPORTED;
r->base.render_rows = NULL;
- r->op = composite->mask_pattern.solid.color.alpha_short >> 8;
+ r->bpp = composite->mask_pattern.solid.color.alpha_short >> 8;
if (composite->source_pattern.base.type == CAIRO_PATTERN_TYPE_SOLID) {
const cairo_color_t *color;
@@ -2627,7 +2763,7 @@ inplace_renderer_init (cairo_image_span_renderer_t *r,
* typically small, too small to payback the startup overheads of
* using SSE2 etc.
*/
- if (r->op == 0xff) {
+ if (r->bpp == 0xff) {
switch (dst->format) {
case CAIRO_FORMAT_A8:
r->base.render_rows = _fill_a8_lerp_opaque_spans;
@@ -2689,17 +2825,15 @@ inplace_renderer_init (cairo_image_span_renderer_t *r,
}
}
if (r->base.render_rows == NULL) {
- unsigned int width;
const cairo_pattern_t *src = &composite->source_pattern.base;
-
- if (r->op != 0xff)
- return CAIRO_INT_STATUS_UNSUPPORTED;
+ unsigned int width;
if (composite->is_bounded == 0)
return CAIRO_INT_STATUS_UNSUPPORTED;
+ r->base.render_rows = r->bpp == 0xff ? _inplace_spans : _inplace_opacity_spans;
width = (composite->bounded.width + 3) & ~3;
- r->base.render_rows = _inplace_spans;
+
r->u.composite.run_length = 8;
if (src->type == CAIRO_PATTERN_TYPE_LINEAR ||
src->type == CAIRO_PATTERN_TYPE_RADIAL)
@@ -2710,7 +2844,7 @@ inplace_renderer_init (cairo_image_span_renderer_t *r,
composite->op == CAIRO_OPERATOR_ADD)) {
r->op = PIXMAN_OP_SRC;
} else if (composite->op == CAIRO_OPERATOR_SOURCE) {
- r->base.render_rows = _inplace_src_spans;
+ r->base.render_rows = r->bpp == 0xff ? _inplace_src_spans : _inplace_src_opacity_spans;
r->u.composite.mask_y = r->composite->unbounded.y;
width = (composite->unbounded.width + 3) & ~3;
} else if (composite->op == CAIRO_OPERATOR_CLEAR) {
@@ -2728,8 +2862,8 @@ inplace_renderer_init (cairo_image_span_renderer_t *r,
return _cairo_error (CAIRO_STATUS_NO_MEMORY);
/* Create an effectively unbounded mask by repeating the single line */
- buf = r->buf;
- if (width > sizeof (r->buf)) {
+ buf = r->_buf;
+ if (width > SZ_BUF) {
buf = malloc (width);
if (unlikely (buf == NULL)) {
pixman_image_unref (r->src);
@@ -2741,19 +2875,17 @@ inplace_renderer_init (cairo_image_span_renderer_t *r,
(uint32_t *)buf, 0);
if (unlikely (r->mask == NULL)) {
pixman_image_unref (r->src);
- if (buf != r->buf)
+ if (buf != r->_buf)
free (buf);
return _cairo_error(CAIRO_STATUS_NO_MEMORY);
}
- if (buf != r->buf)
+ if (buf != r->_buf)
pixman_image_set_destroy_function (r->mask, free_pixels, buf);
r->u.composite.dst = dst->pixman_image;
}
- r->bpp = PIXMAN_FORMAT_BPP(dst->pixman_format);
-
return CAIRO_INT_STATUS_SUCCESS;
}
@@ -2855,7 +2987,7 @@ span_renderer_init (cairo_abstract_span_renderer_t *_r,
r->u.mask.extents = composite->unbounded;
r->u.mask.stride = (r->u.mask.extents.width + 3) & ~3;
- if (r->u.mask.extents.height * r->u.mask.stride > (int)sizeof (r->buf)) {
+ if (r->u.mask.extents.height * r->u.mask.stride > (int)sizeof (r->_buf)) {
r->mask = pixman_image_create_bits (PIXMAN_a8,
r->u.mask.extents.width,
r->u.mask.extents.height,
@@ -2867,7 +2999,7 @@ span_renderer_init (cairo_abstract_span_renderer_t *_r,
r->mask = pixman_image_create_bits (PIXMAN_a8,
r->u.mask.extents.width,
r->u.mask.extents.height,
- (uint32_t *)r->buf, r->u.mask.stride);
+ (uint32_t *)r->_buf, r->u.mask.stride);
r->base.render_rows = _cairo_image_spans_and_zero;
r->base.finish = _cairo_image_finish_spans_and_zero;
diff --git a/src/cairo-spans-compositor-private.h b/src/cairo-spans-compositor-private.h
index d8b94fba..0babebd2 100644
--- a/src/cairo-spans-compositor-private.h
+++ b/src/cairo-spans-compositor-private.h
@@ -46,7 +46,7 @@ CAIRO_BEGIN_DECLS
typedef struct _cairo_abstract_span_renderer {
cairo_span_renderer_t base;
- char data[2048];
+ char data[4096];
} cairo_abstract_span_renderer_t;
struct cairo_spans_compositor {