summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSøren Sandmann Pedersen <ssp@redhat.com>2011-01-23 16:53:26 -0500
committerSøren Sandmann Pedersen <ssp@redhat.com>2011-01-28 14:37:04 -0500
commitb7ff0749d0016be83f460294957d875f11af5802 (patch)
tree47749be8db3a3628362ebec0912215e40b8c5509
parent89c9b5a6a7cbe5831bff01801b27e1d1b29a2a5e (diff)
Add SSE2 fetcher for 0565sse2-fetchers
Before: add_0565_0565 = L1: 61.08 L2: 61.03 M: 60.57 ( 10.95%) HT: 46.85 VT: 45.25 R: 39.99 RT: 20.41 ( 233Kops/s) After: add_0565_0565 = L1: 77.84 L2: 76.25 M: 75.38 ( 13.71%) HT: 55.99 VT: 54.56 R: 45.41 RT: 21.95 ( 255Kops/s)
-rw-r--r--pixman/pixman-sse2.c47
1 files changed, 47 insertions, 0 deletions
diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
index e28b9866..91adc056 100644
--- a/pixman/pixman-sse2.c
+++ b/pixman/pixman-sse2.c
@@ -6082,6 +6082,52 @@ sse2_fetch_x8r8g8b8 (pixman_iter_t *iter, const uint32_t *mask)
}
static uint32_t *
+sse2_fetch_r5g6b5 (pixman_iter_t *iter, const uint32_t *mask)
+{
+ int w = iter->width;
+ uint32_t *dst = iter->buffer;
+ uint16_t *src = (uint16_t *)iter->bits;
+ __m128i ff000000 = mask_ff000000;
+
+ iter->bits += iter->stride;
+
+ while (w && ((unsigned long)dst) & 0x0f)
+ {
+ uint16_t s = *src++;
+
+ *dst++ = CONVERT_0565_TO_8888 (s);
+ w--;
+ }
+
+ while (w >= 8)
+ {
+ __m128i lo, hi, s;
+
+ s = _mm_loadu_si128 ((__m128i *)src);
+
+ lo = unpack_565_to_8888 (_mm_unpacklo_epi16 (s, _mm_setzero_si128 ()));
+ hi = unpack_565_to_8888 (_mm_unpackhi_epi16 (s, _mm_setzero_si128 ()));
+
+ save_128_aligned ((__m128i *)(dst + 0), _mm_or_si128 (lo, ff000000));
+ save_128_aligned ((__m128i *)(dst + 4), _mm_or_si128 (hi, ff000000));
+
+ dst += 8;
+ src += 8;
+ w -= 8;
+ }
+
+ while (w)
+ {
+ uint16_t s = *src++;
+
+ *dst++ = CONVERT_0565_TO_8888 (s);
+ w--;
+ }
+
+ return iter->buffer;
+}
+
+static uint32_t *
sse2_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask)
{
int w = iter->width;
@@ -6136,6 +6182,7 @@ typedef struct
static const fetcher_info_t fetchers[] =
{
{ PIXMAN_x8r8g8b8, sse2_fetch_x8r8g8b8 },
+ { PIXMAN_r5g6b5, sse2_fetch_r5g6b5 },
{ PIXMAN_a8, sse2_fetch_a8 },
{ PIXMAN_null }
};