diff options
author | Taekyun Kim <tkq.kim@samsung.com> | 2011-06-13 19:53:49 +0900 |
---|---|---|
committer | Taekyun Kim <tkq.kim@samsung.com> | 2011-06-28 23:20:32 +0900 |
commit | eff7c8efabe2da33edbf0bdc06e101352981286b (patch) | |
tree | fa8f8ce22c20dadc9736025081e77b202a591fb2 | |
parent | 828794d328e7ad1efc860baee8d6e72450b486b9 (diff) |
Bilinear REPEAT_NORMAL source line extension for too short src_width
To avoid function call and other calculation overhead, extend source
scanline into temporary buffer when source width is too small.
Temporary buffer will be repeatedly accessed, so extension cost is
very small due to cache effect.
-rw-r--r-- | pixman/pixman-fast-path.h | 50 |
1 files changed, 47 insertions, 3 deletions
diff --git a/pixman/pixman-fast-path.h b/pixman/pixman-fast-path.h index 8bc1d8a..e94591a 100644 --- a/pixman/pixman-fast-path.h +++ b/pixman/pixman-fast-path.h @@ -48,6 +48,11 @@ #define FLAG_HAVE_SOLID_MASK (1 << 1) #define FLAG_HAVE_NON_SOLID_MASK (1 << 2) +/* To avoid too short repeated scanline function calls, extend source + * scanlines having width less than below constant value. + */ +#define REPEAT_NORMAL_MIN_WIDTH 64 + static force_inline pixman_bool_t repeat (pixman_repeat_t repeat, int *c, int size) { @@ -692,6 +697,8 @@ fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp, \ int src_width; \ pixman_fixed_t src_width_fixed; \ + int max_x; \ + pixman_bool_t need_src_extension; \ \ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type_t, dst_stride, dst_line, 1); \ if (flags & FLAG_HAVE_SOLID_MASK) \ @@ -743,7 +750,25 @@ fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp, \ if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ { \ - src_width = src_image->bits.width; \ + vx = v.vector[0]; \ + repeat (PIXMAN_REPEAT_NORMAL, &vx, pixman_int_to_fixed(src_image->bits.width)); \ + max_x = pixman_fixed_to_int (vx + (width - 1) * unit_x) + 1; \ + \ + if (src_image->bits.width < REPEAT_NORMAL_MIN_WIDTH) \ + { \ + src_width = 0; \ + \ + while (src_width < REPEAT_NORMAL_MIN_WIDTH && src_width <= max_x) \ + src_width += src_image->bits.width; \ + \ + need_src_extension = TRUE; \ + } \ + else \ + { \ + src_width = src_image->bits.width; \ + need_src_extension = FALSE; \ + } \ + \ src_width_fixed = pixman_int_to_fixed (src_width); \ } \ \ @@ -901,22 +926,41 @@ fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp, src_type_t * src_line_bottom; \ src_type_t buf1[2]; \ src_type_t buf2[2]; \ + src_type_t extended_src_line0[REPEAT_NORMAL_MIN_WIDTH*2]; \ + src_type_t extended_src_line1[REPEAT_NORMAL_MIN_WIDTH*2]; \ + int i, j; \ \ repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); \ repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); \ src_line_top = src_first_line + src_stride * y1; \ src_line_bottom = src_first_line + src_stride * y2; \ \ + if (need_src_extension) \ + { \ + for (i=0; i<src_width;) \ + { \ + for (j=0; j<src_image->bits.width; j++, i++) \ + { \ + extended_src_line0[i] = src_line_top[j]; \ + extended_src_line1[i] = src_line_bottom[j]; \ + } \ + } \ + \ + src_line_top = &extended_src_line0[0]; \ + src_line_bottom = &extended_src_line1[0]; \ + } \ + \ /* Top & Bottom wrap around buffer */ \ - buf1[0] = src_line_top[src_image->bits.width - 1]; \ + buf1[0] = src_line_top[src_width - 1]; \ buf1[1] = src_line_top[0]; \ - buf2[0] = src_line_bottom[src_image->bits.width - 1]; \ + buf2[0] = src_line_bottom[src_width - 1]; \ buf2[1] = src_line_bottom[0]; \ \ width_remain = width; \ \ while (width_remain > 0) \ { \ + /* We use src_width_fixed because it can make vx in original source range */ \ repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); \ \ /* Wrap around part */ \ |