From a43946a51fbbdc76be1af9bc25fe7c5c2a6314bb Mon Sep 17 00:00:00 2001 From: Taekyun Kim Date: Thu, 22 Sep 2011 18:42:38 +0900 Subject: Simple repeat: Extend too short source scanlines into temporary buffer Too short scanlines can cause repeat handling overhead and optimized pixman composite functions usually process a bunch of pixels in a single loop iteration it might be beneficial to pre-extend source scanlines. The temporary buffers will usually reside in cache, so accessing them should be quite efficient. --- pixman/pixman-fast-path.c | 95 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 92 insertions(+), 3 deletions(-) diff --git a/pixman/pixman-fast-path.c b/pixman/pixman-fast-path.c index 89c973a..038dcf7 100644 --- a/pixman/pixman-fast-path.c +++ b/pixman/pixman-fast-path.c @@ -1191,6 +1191,8 @@ FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, OVER, NONE) FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, OVER, PAD) FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, OVER, NORMAL) +#define REPEAT_MIN_WIDTH 32 + static void fast_composite_tiled_repeat (pixman_implementation_t *imp, pixman_composite_info_t *info) @@ -1224,27 +1226,111 @@ fast_composite_tiled_repeat (pixman_implementation_t *imp, int32_t sx, sy; int32_t width_remain; int32_t num_pixels; + int32_t src_width; + int32_t i, j; + pixman_image_t extended_src_image; + uint32_t extended_src[REPEAT_MIN_WIDTH * 2]; + pixman_bool_t need_src_extension; + uint32_t *src_line; + int32_t src_stride; + int32_t src_bpp; pixman_composite_info_t info2 = *info; + src_bpp = PIXMAN_FORMAT_BPP (src_image->bits.format); + + if (src_image->bits.width < REPEAT_MIN_WIDTH && + (src_bpp == 32 || src_bpp == 16 || src_bpp == 8)) + { + sx = src_x; + sx = MOD (sx, src_image->bits.width); + sx += width; + src_width = 0; + + while (src_width < REPEAT_MIN_WIDTH && src_width <= sx) + src_width += src_image->bits.width; + + src_stride = (src_width * (src_bpp >> 3) + 3) / (int) sizeof (uint32_t); + + /* Initialize/validate stack-allocated temporary image */ + _pixman_bits_image_init (&extended_src_image, src_image->bits.format, + src_width, 1, &extended_src[0], src_stride); + _pixman_image_validate (&extended_src_image); + + info2.src_image = &extended_src_image; + need_src_extension = TRUE; + } + else + { + src_width = src_image->bits.width; + need_src_extension = FALSE; + } + sx = src_x; sy = src_y; while (--height >= 0) { - sx = MOD (sx, src_image->bits.width); + sx = MOD (sx, src_width); sy = MOD (sy, src_image->bits.height); + if (need_src_extension) + { + if (src_bpp == 32) + { + PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint32_t, src_stride, src_line, 1); + + for (i = 0; i < src_width; ) + { + for (j = 0; j < src_image->bits.width; j++, i++) + extended_src[i] = src_line[j]; + } + } + else if (src_bpp == 16) + { + uint16_t *src_line_16; + + PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint16_t, src_stride, + src_line_16, 1); + src_line = (uint32_t*)src_line_16; + + for (i = 0; i < src_width; ) + { + for (j = 0; j < src_image->bits.width; j++, i++) + ((uint16_t*)extended_src)[i] = ((uint16_t*)src_line)[j]; + } + } + else if (src_bpp == 8) + { + uint8_t *src_line_8; + + PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint8_t, src_stride, + src_line_8, 1); + src_line = (uint32_t*)src_line_8; + + for (i = 0; i < src_width; ) + { + for (j = 0; j < src_image->bits.width; j++, i++) + ((uint8_t*)extended_src)[i] = ((uint8_t*)src_line)[j]; + } + } + + info2.src_y = 0; + } + else + { + info2.src_y = sy; + } + width_remain = width; while (width_remain > 0) { - num_pixels = src_image->bits.width - sx; + num_pixels = src_width - sx; if (num_pixels > width_remain) num_pixels = width_remain; info2.src_x = sx; - info2.src_y = sy; info2.width = num_pixels; info2.height = 1; @@ -1263,6 +1349,9 @@ fast_composite_tiled_repeat (pixman_implementation_t *imp, info2.dest_x = info->dest_x; info2.dest_y++; } + + if (need_src_extension) + _pixman_image_fini (&extended_src_image); } else { -- cgit v1.2.3