diff options
author | Søren Sandmann Pedersen <ssp@redhat.com> | 2011-01-23 16:53:26 -0500 |
---|---|---|
committer | Søren Sandmann Pedersen <ssp@redhat.com> | 2011-01-28 14:37:04 -0500 |
commit | b7ff0749d0016be83f460294957d875f11af5802 (patch) | |
tree | 47749be8db3a3628362ebec0912215e40b8c5509 | |
parent | 89c9b5a6a7cbe5831bff01801b27e1d1b29a2a5e (diff) |
Add SSE2 fetcher for 0565sse2-fetchers
Before:
add_0565_0565 = L1: 61.08 L2: 61.03 M: 60.57 ( 10.95%) HT: 46.85 VT: 45.25 R: 39.99 RT: 20.41 ( 233Kops/s)
After:
add_0565_0565 = L1: 77.84 L2: 76.25 M: 75.38 ( 13.71%) HT: 55.99 VT: 54.56 R: 45.41 RT: 21.95 ( 255Kops/s)
-rw-r--r-- | pixman/pixman-sse2.c | 47 |
1 files changed, 47 insertions, 0 deletions
diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c index e28b9866..91adc056 100644 --- a/pixman/pixman-sse2.c +++ b/pixman/pixman-sse2.c @@ -6082,6 +6082,52 @@ sse2_fetch_x8r8g8b8 (pixman_iter_t *iter, const uint32_t *mask) } static uint32_t * +sse2_fetch_r5g6b5 (pixman_iter_t *iter, const uint32_t *mask) +{ + int w = iter->width; + uint32_t *dst = iter->buffer; + uint16_t *src = (uint16_t *)iter->bits; + __m128i ff000000 = mask_ff000000; + + iter->bits += iter->stride; + + while (w && ((unsigned long)dst) & 0x0f) + { + uint16_t s = *src++; + + *dst++ = CONVERT_0565_TO_8888 (s); + w--; + } + + while (w >= 8) + { + __m128i lo, hi, s; + + s = _mm_loadu_si128 ((__m128i *)src); + + lo = unpack_565_to_8888 (_mm_unpacklo_epi16 (s, _mm_setzero_si128 ())); + hi = unpack_565_to_8888 (_mm_unpackhi_epi16 (s, _mm_setzero_si128 ())); + + save_128_aligned ((__m128i *)(dst + 0), _mm_or_si128 (lo, ff000000)); + save_128_aligned ((__m128i *)(dst + 4), _mm_or_si128 (hi, ff000000)); + + dst += 8; + src += 8; + w -= 8; + } + + while (w) + { + uint16_t s = *src++; + + *dst++ = CONVERT_0565_TO_8888 (s); + w--; + } + + return iter->buffer; +} + +static uint32_t * sse2_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask) { int w = iter->width; @@ -6136,6 +6182,7 @@ typedef struct static const fetcher_info_t fetchers[] = { { PIXMAN_x8r8g8b8, sse2_fetch_x8r8g8b8 }, + { PIXMAN_r5g6b5, sse2_fetch_r5g6b5 }, { PIXMAN_a8, sse2_fetch_a8 }, { PIXMAN_null } }; |