summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTaekyun Kim <tkq.kim@samsung.com>2011-08-29 19:03:55 +0900
committerTaekyun Kim <tkq.kim@samsung.com>2011-08-29 19:41:00 +0900
commit8d628e413fcbf79568ff0bfa065617ab991a0012 (patch)
tree1a752bf6ee64fb50b950586acd546fce187b92ad
parent511798debef735a5e19426bbeefa17291c17b4b5 (diff)
Simple repeat: Extend too short source scanline into temporary buffernormal_repeat
Too short scanlines can cause repeat handling overhead and optimized pixman composite functions usually process a bunch of pixels in a single loop iteration it might be beneficial to pre-extend source scanlines. The temporary buffers will usually reside in cache, so accessing them should be quite efficient.
-rw-r--r--pixman/pixman-arm-neon.c46
-rw-r--r--pixman/pixman-arm-simd.c6
-rw-r--r--pixman/pixman-fast-path.c22
-rw-r--r--pixman/pixman-inlines.h56
-rw-r--r--pixman/pixman-sse2.c26
5 files changed, 102 insertions, 54 deletions
diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
index 5d5c754..c17e939 100644
--- a/pixman/pixman-arm-neon.c
+++ b/pixman/pixman-arm-neon.c
@@ -263,29 +263,29 @@ pixman_blt_neon (uint32_t *src_bits,
}
/* Simple repeat fast path functions. */
-FAST_COMPOSITE_SIMPLE_REPEAT (uint16_t, neon_composite_src_0565_0565)
-FAST_COMPOSITE_SIMPLE_REPEAT (uint32_t, neon_composite_src_8888_0565)
-FAST_COMPOSITE_SIMPLE_REPEAT (uint16_t, neon_composite_src_0565_8888)
-FAST_COMPOSITE_SIMPLE_REPEAT (uint32_t, neon_composite_src_8888_8888)
-FAST_COMPOSITE_SIMPLE_REPEAT (uint32_t, neon_composite_src_x888_8888)
-FAST_COMPOSITE_SIMPLE_REPEAT (uint32_t, neon_composite_over_8888_n_8888)
-FAST_COMPOSITE_SIMPLE_REPEAT (uint32_t, neon_composite_over_8888_n_0565)
-FAST_COMPOSITE_SIMPLE_REPEAT (uint16_t, neon_composite_over_0565_n_0565)
-FAST_COMPOSITE_SIMPLE_REPEAT (uint32_t, neon_composite_over_8888_8_8888)
-FAST_COMPOSITE_SIMPLE_REPEAT (uint32_t, neon_composite_over_8888_8_0565)
-FAST_COMPOSITE_SIMPLE_REPEAT (uint16_t, neon_composite_over_0565_8_0565)
-FAST_COMPOSITE_SIMPLE_REPEAT (uint32_t, neon_composite_over_8888_8888_8888)
-FAST_COMPOSITE_SIMPLE_REPEAT (uint32_t, neon_composite_over_8888_0565)
-FAST_COMPOSITE_SIMPLE_REPEAT (uint32_t, neon_composite_over_8888_8888)
-FAST_COMPOSITE_SIMPLE_REPEAT (uint8_t, neon_composite_add_8_8_8)
-FAST_COMPOSITE_SIMPLE_REPEAT (uint16_t, neon_composite_add_0565_8_0565)
-FAST_COMPOSITE_SIMPLE_REPEAT (uint32_t, neon_composite_add_8888_8_8888)
-FAST_COMPOSITE_SIMPLE_REPEAT (uint32_t, neon_composite_add_8888_8888_8888)
-FAST_COMPOSITE_SIMPLE_REPEAT (uint32_t, neon_composite_add_8888_n_8888)
-FAST_COMPOSITE_SIMPLE_REPEAT (uint8_t, neon_composite_add_8_8)
-FAST_COMPOSITE_SIMPLE_REPEAT (uint32_t, neon_composite_add_8888_8888)
-FAST_COMPOSITE_SIMPLE_REPEAT (uint8_t, neon_composite_out_reverse_8_0565)
-FAST_COMPOSITE_SIMPLE_REPEAT (uint8_t, neon_composite_out_reverse_8_8888)
+FAST_COMPOSITE_SIMPLE_REPEAT (uint16_t, neon_composite_src_0565_0565, 32)
+FAST_COMPOSITE_SIMPLE_REPEAT (uint32_t, neon_composite_src_8888_0565, 32)
+FAST_COMPOSITE_SIMPLE_REPEAT (uint16_t, neon_composite_src_0565_8888, 32)
+FAST_COMPOSITE_SIMPLE_REPEAT (uint32_t, neon_composite_src_8888_8888, 32)
+FAST_COMPOSITE_SIMPLE_REPEAT (uint32_t, neon_composite_src_x888_8888, 32)
+FAST_COMPOSITE_SIMPLE_REPEAT (uint32_t, neon_composite_over_8888_n_8888, 32)
+FAST_COMPOSITE_SIMPLE_REPEAT (uint32_t, neon_composite_over_8888_n_0565, 32)
+FAST_COMPOSITE_SIMPLE_REPEAT (uint16_t, neon_composite_over_0565_n_0565, 32)
+FAST_COMPOSITE_SIMPLE_REPEAT (uint32_t, neon_composite_over_8888_8_8888, 32)
+FAST_COMPOSITE_SIMPLE_REPEAT (uint32_t, neon_composite_over_8888_8_0565, 32)
+FAST_COMPOSITE_SIMPLE_REPEAT (uint16_t, neon_composite_over_0565_8_0565, 32)
+FAST_COMPOSITE_SIMPLE_REPEAT (uint32_t, neon_composite_over_8888_8888_8888, 32)
+FAST_COMPOSITE_SIMPLE_REPEAT (uint32_t, neon_composite_over_8888_0565, 32)
+FAST_COMPOSITE_SIMPLE_REPEAT (uint32_t, neon_composite_over_8888_8888, 32)
+FAST_COMPOSITE_SIMPLE_REPEAT (uint8_t, neon_composite_add_8_8_8, 32)
+FAST_COMPOSITE_SIMPLE_REPEAT (uint16_t, neon_composite_add_0565_8_0565, 32)
+FAST_COMPOSITE_SIMPLE_REPEAT (uint32_t, neon_composite_add_8888_8_8888, 32)
+FAST_COMPOSITE_SIMPLE_REPEAT (uint32_t, neon_composite_add_8888_8888_8888, 32)
+FAST_COMPOSITE_SIMPLE_REPEAT (uint32_t, neon_composite_add_8888_n_8888, 32)
+FAST_COMPOSITE_SIMPLE_REPEAT (uint8_t, neon_composite_add_8_8, 32)
+FAST_COMPOSITE_SIMPLE_REPEAT (uint32_t, neon_composite_add_8888_8888, 32)
+FAST_COMPOSITE_SIMPLE_REPEAT (uint8_t, neon_composite_out_reverse_8_0565, 32)
+FAST_COMPOSITE_SIMPLE_REPEAT (uint8_t, neon_composite_out_reverse_8_8888, 32)
static const pixman_fast_path_t arm_neon_fast_paths[] =
{
diff --git a/pixman/pixman-arm-simd.c b/pixman/pixman-arm-simd.c
index 59c00b1..7a4d899 100644
--- a/pixman/pixman-arm-simd.c
+++ b/pixman/pixman-arm-simd.c
@@ -393,9 +393,9 @@ PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (armv6, 8888_8888, SRC,
uint32_t, uint32_t)
/* Simple repeat fast path functions. */
-FAST_COMPOSITE_SIMPLE_REPEAT (uint32_t, armv6_composite_over_8888_8888)
-FAST_COMPOSITE_SIMPLE_REPEAT (uint32_t, armv6_composite_over_8888_n_8888)
-FAST_COMPOSITE_SIMPLE_REPEAT (uint8_t, armv6_composite_add_8_8)
+FAST_COMPOSITE_SIMPLE_REPEAT (uint32_t, armv6_composite_over_8888_8888, 32)
+FAST_COMPOSITE_SIMPLE_REPEAT (uint32_t, armv6_composite_over_8888_n_8888, 32)
+FAST_COMPOSITE_SIMPLE_REPEAT (uint8_t, armv6_composite_add_8_8, 32)
static const pixman_fast_path_t arm_simd_fast_paths[] =
{
diff --git a/pixman/pixman-fast-path.c b/pixman/pixman-fast-path.c
index 61597a7..351098e 100644
--- a/pixman/pixman-fast-path.c
+++ b/pixman/pixman-fast-path.c
@@ -1686,17 +1686,17 @@ FAST_SIMPLE_ROTATE (565, uint16_t)
FAST_SIMPLE_ROTATE (8888, uint32_t)
/* Simple repeat fast path functions. */
-FAST_COMPOSITE_SIMPLE_REPEAT (uint32_t, fast_composite_over_x888_8_8888)
-FAST_COMPOSITE_SIMPLE_REPEAT (uint32_t, fast_composite_over_8888_8888)
-FAST_COMPOSITE_SIMPLE_REPEAT (uint32_t, fast_composite_over_8888_0565)
-FAST_COMPOSITE_SIMPLE_REPEAT (uint32_t, fast_composite_add_8888_8888)
-FAST_COMPOSITE_SIMPLE_REPEAT (uint8_t, fast_composite_add_8_8)
-FAST_COMPOSITE_SIMPLE_REPEAT (uint32_t, fast_composite_src_x888_8888)
-FAST_COMPOSITE_SIMPLE_REPEAT (uint32_t, fast_composite_src_8888_8888)
-FAST_COMPOSITE_SIMPLE_REPEAT (uint16_t, fast_composite_src_0565_0565)
-FAST_COMPOSITE_SIMPLE_REPEAT (uint8_t, fast_composite_src_8_8)
-FAST_COMPOSITE_SIMPLE_REPEAT (uint32_t, fast_composite_src_x888_0565)
-FAST_COMPOSITE_SIMPLE_REPEAT (uint8_t, fast_composite_in_8_8)
+FAST_COMPOSITE_SIMPLE_REPEAT (uint32_t, fast_composite_over_x888_8_8888, 32)
+FAST_COMPOSITE_SIMPLE_REPEAT (uint32_t, fast_composite_over_8888_8888, 32)
+FAST_COMPOSITE_SIMPLE_REPEAT (uint32_t, fast_composite_over_8888_0565, 32)
+FAST_COMPOSITE_SIMPLE_REPEAT (uint32_t, fast_composite_add_8888_8888, 32)
+FAST_COMPOSITE_SIMPLE_REPEAT (uint8_t, fast_composite_add_8_8, 32)
+FAST_COMPOSITE_SIMPLE_REPEAT (uint32_t, fast_composite_src_x888_8888, 32)
+FAST_COMPOSITE_SIMPLE_REPEAT (uint32_t, fast_composite_src_8888_8888, 32)
+FAST_COMPOSITE_SIMPLE_REPEAT (uint16_t, fast_composite_src_0565_0565, 32)
+FAST_COMPOSITE_SIMPLE_REPEAT (uint8_t, fast_composite_src_8_8, 32)
+FAST_COMPOSITE_SIMPLE_REPEAT (uint32_t, fast_composite_src_x888_0565, 32)
+FAST_COMPOSITE_SIMPLE_REPEAT (uint8_t, fast_composite_in_8_8, 32)
static const pixman_fast_path_t c_fast_paths[] =
{
diff --git a/pixman/pixman-inlines.h b/pixman/pixman-inlines.h
index fef8fbe..0513f44 100644
--- a/pixman/pixman-inlines.h
+++ b/pixman/pixman-inlines.h
@@ -233,7 +233,7 @@ pad_repeat_get_scanline_bounds (int32_t source_image_width,
* We can stitch source scanlines horizontally to produce best memory access patterns.
* By giving height of 1 to composite functions, we can use them as scanline functions.
*/
-#define FAST_COMPOSITE_SIMPLE_REPEAT(src_type, composite_func) \
+#define FAST_COMPOSITE_SIMPLE_REPEAT(src_type, composite_func, REPEAT_MIN_WIDTH) \
static void \
fast_composite_simple_repeat_##composite_func (pixman_implementation_t *imp, \
pixman_composite_info_t *info) \
@@ -243,6 +243,36 @@ fast_composite_simple_repeat_##composite_func (pixman_implementation_t *imp, \
int32_t width_remain; \
int32_t num_pixels; \
pixman_composite_info_t info2 = *info; \
+ int32_t src_width; \
+ int32_t i, j; \
+ pixman_image_t extended_src_image; \
+ src_type extended_src[REPEAT_MIN_WIDTH*2]; \
+ pixman_bool_t need_src_extension; \
+ src_type *src_line; \
+ int32_t src_stride; \
+ \
+ if (src_image->bits.width < REPEAT_MIN_WIDTH) \
+ { \
+ sx = src_x; \
+ repeat (PIXMAN_REPEAT_NORMAL, &sx, src_image->bits.width); \
+ sx += width; \
+ src_width = 0; \
+ \
+ while (src_width < REPEAT_MIN_WIDTH && src_width <= sx) \
+ src_width += src_image->bits.width; \
+ \
+ need_src_extension = TRUE; \
+ extended_src_image.bits = src_image->bits; \
+ extended_src_image.bits.bits = (uint32_t*)(&extended_src[0]); \
+ extended_src_image.bits.width = src_width; \
+ extended_src_image.bits.height = 1; \
+ info2.src_image = &extended_src_image; \
+ } \
+ else \
+ { \
+ src_width = src_image->bits.width; \
+ need_src_extension = FALSE; \
+ } \
\
sx = src_x; \
sy = src_y; \
@@ -250,19 +280,37 @@ fast_composite_simple_repeat_##composite_func (pixman_implementation_t *imp, \
while (--height >= 0) \
{ \
repeat (PIXMAN_REPEAT_NORMAL, &sy, src_image->bits.height); \
- repeat (PIXMAN_REPEAT_NORMAL, &sx, src_image->bits.width); \
+ repeat (PIXMAN_REPEAT_NORMAL, &sx, src_width); \
+ \
+ PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, src_type, src_stride, src_line, 1); \
+ \
+ if (need_src_extension) \
+ { \
+ for (i=0; i<src_width; ) \
+ { \
+ for (j=0; j<src_image->bits.width; j++, i++) \
+ { \
+ extended_src[i] = src_line[j]; \
+ } \
+ } \
+ \
+ info2.src_y = 0; \
+ } \
+ else \
+ { \
+ info2.src_y = sy; \
+ } \
\
width_remain = width; \
\
while (width_remain > 0) \
{ \
- num_pixels = src_image->bits.width - sx; \
+ num_pixels = src_width - sx; \
\
if (num_pixels > width_remain) \
num_pixels = width_remain; \
\
info2.src_x = sx; \
- info2.src_y = sy; \
info2.width = num_pixels; \
info2.height = 1; \
\
diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
index 022ed88..4a57abf 100644
--- a/pixman/pixman-sse2.c
+++ b/pixman/pixman-sse2.c
@@ -5414,19 +5414,19 @@ FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_normal_SRC,
NORMAL, FLAG_NONE)
/* Tiled repeat fast path functions. */
-FAST_COMPOSITE_SIMPLE_REPEAT (uint32_t, sse2_composite_over_8888_8888)
-FAST_COMPOSITE_SIMPLE_REPEAT (uint32_t, sse2_composite_over_8888_0565)
-FAST_COMPOSITE_SIMPLE_REPEAT (uint32_t, sse2_composite_over_8888_8888_8888)
-FAST_COMPOSITE_SIMPLE_REPEAT (uint32_t, sse2_composite_over_8888_8_8888)
-FAST_COMPOSITE_SIMPLE_REPEAT (uint32_t, sse2_composite_over_x888_8_8888)
-FAST_COMPOSITE_SIMPLE_REPEAT (uint32_t, sse2_composite_over_x888_n_8888)
-FAST_COMPOSITE_SIMPLE_REPEAT (uint32_t, sse2_composite_over_8888_n_8888)
-FAST_COMPOSITE_SIMPLE_REPEAT (uint32_t, sse2_composite_src_8888_8888)
-FAST_COMPOSITE_SIMPLE_REPEAT (uint16_t, sse2_composite_src_0565_0565)
-FAST_COMPOSITE_SIMPLE_REPEAT (uint32_t, sse2_composite_src_x888_8888)
-FAST_COMPOSITE_SIMPLE_REPEAT (uint8_t, sse2_composite_add_8_8)
-FAST_COMPOSITE_SIMPLE_REPEAT (uint32_t, sse2_composite_add_8888_8888)
-FAST_COMPOSITE_SIMPLE_REPEAT (uint8_t, sse2_composite_in_8_8)
+FAST_COMPOSITE_SIMPLE_REPEAT (uint32_t, sse2_composite_over_8888_8888, 32)
+FAST_COMPOSITE_SIMPLE_REPEAT (uint32_t, sse2_composite_over_8888_0565, 32)
+FAST_COMPOSITE_SIMPLE_REPEAT (uint32_t, sse2_composite_over_8888_8888_8888, 32)
+FAST_COMPOSITE_SIMPLE_REPEAT (uint32_t, sse2_composite_over_8888_8_8888, 32)
+FAST_COMPOSITE_SIMPLE_REPEAT (uint32_t, sse2_composite_over_x888_8_8888, 32)
+FAST_COMPOSITE_SIMPLE_REPEAT (uint32_t, sse2_composite_over_x888_n_8888, 32)
+FAST_COMPOSITE_SIMPLE_REPEAT (uint32_t, sse2_composite_over_8888_n_8888, 32)
+FAST_COMPOSITE_SIMPLE_REPEAT (uint32_t, sse2_composite_src_8888_8888, 32)
+FAST_COMPOSITE_SIMPLE_REPEAT (uint16_t, sse2_composite_src_0565_0565, 32)
+FAST_COMPOSITE_SIMPLE_REPEAT (uint32_t, sse2_composite_src_x888_8888, 32)
+FAST_COMPOSITE_SIMPLE_REPEAT (uint8_t, sse2_composite_add_8_8, 32)
+FAST_COMPOSITE_SIMPLE_REPEAT (uint32_t, sse2_composite_add_8888_8888, 32)
+FAST_COMPOSITE_SIMPLE_REPEAT (uint8_t, sse2_composite_in_8_8, 32)
static const pixman_fast_path_t sse2_fast_paths[] =
{