diff options
author | Søren Sandmann Pedersen <ssp@redhat.com> | 2010-04-25 20:25:50 -0400 |
---|---|---|
committer | Søren Sandmann Pedersen <ssp@redhat.com> | 2010-07-14 06:06:01 -0400 |
commit | 6c7ffde9ce8efc657458425c40885f9e2180ee6d (patch) | |
tree | 55089e6e4bbe8df4ad37265b9245a47ce223188f | |
parent | e439a8f6ea6ad3fa1ffb9b22fa78c066699ad7eb (diff) |
[sse2] Add sse2_composite_add_n_8()sse2_in_n_8
This shows up on the same Epiphany page.
-rw-r--r-- | pixman/pixman-sse2.c | 98 |
1 files changed, 98 insertions, 0 deletions
diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c index 822fffe4..3dd7967a 100644 --- a/pixman/pixman-sse2.c +++ b/pixman/pixman-sse2.c @@ -5386,6 +5386,103 @@ sse2_composite_add_n_8_8 (pixman_implementation_t *imp, _mm_empty (); } +/* ------------------------------------------------------------------------- + * composite_add_n_8_8 + */ + +static void +sse2_composite_add_n_8 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint8_t *dst_line, *dst; + int dst_stride; + int32_t w; + uint32_t src; + + __m128i xmm_src; + + PIXMAN_IMAGE_GET_LINE ( + dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); + + src = _pixman_image_get_solid (src_image, dst_image->bits.format); + + src >>= 24; + + if (src == 0x00) + return; + + if (src == 0xff) + { + pixman_fill (dst_image->bits.bits, dst_image->bits.rowstride, + 8, dest_x, dest_y, width, height, 0xff); + + return; + } + + src = (src << 24) | (src << 16) | (src << 8) | src; + xmm_src = _mm_set_epi32 (src, src, src, src); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + w = width; + + /* call prefetch hint to optimize cache load*/ + cache_prefetch ((__m128i*)dst); + + while (w && ((unsigned long)dst & 15)) + { + *dst = (uint8_t)_mm_cvtsi64_si32 ( + _mm_adds_pu8 ( + _mm_movepi64_pi64 (xmm_src), + _mm_cvtsi32_si64 (*dst))); + + w--; + dst++; + } + + /* call prefetch hint to optimize cache load*/ + cache_prefetch ((__m128i*)dst); + + while (w >= 16) + { + /* fill cache line with next memory */ + cache_prefetch_next ((__m128i*)dst); + + save_128_aligned ( + (__m128i*)dst, _mm_adds_epu8 (xmm_src, load_128_aligned ((__m128i*)dst))); + + dst += 16; + w -= 16; + } + + while (w) + { + *dst = (uint8_t)_mm_cvtsi64_si32 ( + _mm_adds_pu8 ( + _mm_movepi64_pi64 (xmm_src), + _mm_cvtsi32_si64 (*dst))); + + w--; + dst++; + } + } + + _mm_empty (); +} + /* ---------------------------------------------------------------------- * composite_add_8000_8000 */ @@ -6309,6 +6406,7 @@ static const pixman_fast_path_t sse2_fast_paths[] = PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, sse2_composite_add_8888_8888), PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, sse2_composite_add_8888_8888), PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, sse2_composite_add_n_8_8), + PIXMAN_STD_FAST_PATH (ADD, solid, null, a8, sse2_composite_add_n_8), /* PIXMAN_OP_SRC */ PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8r8g8b8, sse2_composite_src_n_8_8888), |