diff options
author | Siarhei Siamashka <siarhei.siamashka@nokia.com> | 2010-09-16 17:10:40 +0300 |
---|---|---|
committer | Siarhei Siamashka <siarhei.siamashka@nokia.com> | 2010-09-21 13:32:11 +0300 |
commit | 45833d5b198507e9e69b918459eaaf6088e5de00 (patch) | |
tree | 2b9d0d4faf7880c48952a05784d6f4de1d111bb8 | |
parent | 3db0cc5c75a4a764726059511fa6d67082fbeb64 (diff) |
PAD repeat support for fast scaling with nearest filter
When processing pixels from the left and right padding, the same
scanline function is used with 'unit_x' set to 0.
Actually appears that gcc can handle this quite efficiently. When
using 'restrict' keyword, it is able to optimize the whole operation
performed on left or right padding pixels to a small unrolled loop
(the code is reduced to a simple fill implementation):
9b30: 89 08 mov %ecx,(%rax)
9b32: 89 48 04 mov %ecx,0x4(%rax)
9b35: 48 83 c0 08 add $0x8,%rax
9b39: 49 39 c0 cmp %rax,%r8
9b3c: 75 f2 jne 9b30
Without 'restrict' keyword, there is one instruction more: reloading
source pixel data from memory in the beginning of each iteration. That
is slower, but also acceptable.
-rw-r--r-- | pixman/pixman-fast-path.c | 5 | ||||
-rw-r--r-- | pixman/pixman-fast-path.h | 104 |
2 files changed, 105 insertions, 4 deletions
diff --git a/pixman/pixman-fast-path.c b/pixman/pixman-fast-path.c index c0607496..5b10d65c 100644 --- a/pixman/pixman-fast-path.c +++ b/pixman/pixman-fast-path.c @@ -1388,14 +1388,19 @@ fast_composite_src_memcpy (pixman_implementation_t *imp, } FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, SRC, COVER); +FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, SRC, PAD); FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, SRC, NORMAL); FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, OVER, COVER); +FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, OVER, PAD); FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, OVER, NORMAL); FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, SRC, COVER); +FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, SRC, PAD); FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, SRC, NORMAL); FAST_NEAREST (565_565_cover, 0565, 0565, uint16_t, uint16_t, SRC, COVER); +FAST_NEAREST (565_565_pad, 0565, 0565, uint16_t, uint16_t, SRC, PAD); FAST_NEAREST (565_565_normal, 0565, 0565, uint16_t, uint16_t, SRC, NORMAL); FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, OVER, COVER); +FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, OVER, PAD); FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, OVER, NORMAL); static force_inline uint32_t diff --git a/pixman/pixman-fast-path.h b/pixman/pixman-fast-path.h index 287b7535..7c14379b 100644 --- a/pixman/pixman-fast-path.h +++ b/pixman/pixman-fast-path.h @@ -58,6 +58,63 @@ repeat (pixman_repeat_t repeat, int *c, int size) return TRUE; } +/* + * For each scanline fetched from source image with PAD repeat: + * - calculate how many pixels need to be padded on the left side + * - calculate how many pixels need to be padded on the right side + * - update width to only count pixels which are fetched from the image + * All this information is returned via 'width', 'left_pad', 'right_pad' + * arguments. The code is assuming that 'unit_x' is positive. + * + * Note: 64-bit math is used in order to avoid potential overflows, which + * is probably excessive in many cases. This particular function + * may need its own correctness test and performance tuning. + */ +static force_inline void +pad_repeat_get_scanline_bounds (int32_t source_image_width, + pixman_fixed_t vx, + pixman_fixed_t unit_x, + int32_t * width, + int32_t * left_pad, + int32_t * right_pad) +{ + int64_t max_vx = (int64_t) source_image_width << 16; + int64_t tmp; + if (vx < 0) + { + tmp = ((int64_t) unit_x - 1 - vx) / unit_x; + if (tmp > *width) + { + *left_pad = *width; + *width = 0; + } + else + { + *left_pad = (int32_t) tmp; + *width -= (int32_t) tmp; + } + } + else + { + *left_pad = 0; + } + tmp = ((int64_t) unit_x - 1 - vx + max_vx) / unit_x - *left_pad; + if (tmp < 0) + { + *right_pad = *width; + *width = 0; + } + else if (tmp >= *width) + { + *right_pad = 0; + } + else + { + *right_pad = *width - (int32_t) tmp; + *width = (int32_t) tmp; + } +} + /* A macroified version of specialized nearest scalers for some * common 8888 and 565 formats. It supports SRC and OVER ops. * @@ -213,6 +270,7 @@ fast_composite_scaled_nearest_ ## scale_func_name (pixman_implementation_t *imp, pixman_vector_t v; \ pixman_fixed_t vx, vy; \ pixman_fixed_t unit_x, unit_y; \ + int32_t left_pad, right_pad; \ \ src_type_t *src; \ dst_type_t *dst; \ @@ -251,6 +309,13 @@ fast_composite_scaled_nearest_ ## scale_func_name (pixman_implementation_t *imp, repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \ } \ \ + if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \ + { \ + pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, \ + &width, &left_pad, &right_pad); \ + vx += left_pad * unit_x; \ + } \ + \ while (--height >= 0) \ { \ dst = dst_line; \ @@ -260,10 +325,29 @@ fast_composite_scaled_nearest_ ## scale_func_name (pixman_implementation_t *imp, vy += unit_y; \ if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \ - \ - src = src_first_line + src_stride * y; \ - \ - scanline_func (dst, src, width, vx, unit_x, max_vx); \ + if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \ + { \ + repeat (PIXMAN_REPEAT_PAD, &y, src_image->bits.height); \ + src = src_first_line + src_stride * y; \ + if (left_pad > 0) \ + { \ + scanline_func (dst, src, left_pad, 0, 0, 0); \ + } \ + if (width > 0) \ + { \ + scanline_func (dst + left_pad, src, width, vx, unit_x, 0); \ + } \ + if (right_pad > 0) \ + { \ + scanline_func (dst + left_pad + width, src + src_image->bits.width - 1, \ + right_pad, 0, 0, 0); \ + } \ + } \ + else \ + { \ + src = src_first_line + src_stride * y; \ + scanline_func (dst, src, width, vx, unit_x, max_vx); \ + } \ } \ } @@ -295,6 +379,17 @@ fast_composite_scaled_nearest_ ## scale_func_name (pixman_implementation_t *imp, fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \ } +#define SIMPLE_NEAREST_FAST_PATH_PAD(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_NEAREST_FLAGS | \ + FAST_PATH_PAD_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_null, 0, \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \ + } + #define SIMPLE_NEAREST_FAST_PATH_COVER(op,s,d,func) \ { PIXMAN_OP_ ## op, \ PIXMAN_ ## s, \ @@ -307,6 +402,7 @@ fast_composite_scaled_nearest_ ## scale_func_name (pixman_implementation_t *imp, /* Prefer the use of 'cover' variant, because it is faster */ #define SIMPLE_NEAREST_FAST_PATH(op,s,d,func) \ SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func), \ + SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func), \ SIMPLE_NEAREST_FAST_PATH_NORMAL (op,s,d,func) #endif |