diff options
author | Søren Sandmann Pedersen <ssp@redhat.com> | 2011-01-12 06:38:54 -0500 |
---|---|---|
committer | Søren Sandmann Pedersen <ssp@redhat.com> | 2011-01-28 14:36:58 -0500 |
commit | b1e7dc3730429362ff0ccc6468b6d54163c5e4b7 (patch) | |
tree | bf4c3317fc8e61d377ebf62c0a19cd30be0f682d | |
parent | 13aed37758d1af5b5bc2a80d886b764d4c45827e (diff) |
Add SSE2 fetcher for x8r8g8b8
New output of lowlevel-blt-bench over_x888_8_0565:
over_x888_8_0565 = L1: 55.68 L2: 55.11 M: 52.83 ( 19.04%) HT: 39.62 VT: 37.70 R: 30.88 RT: 14.62 ( 174Kops/s)
The fetcher is looked up in a table, so that other fetchers can easily
be added.
-rw-r--r-- | pixman/pixman-private.h | 18 | ||||
-rw-r--r-- | pixman/pixman-sse2.c | 90 |
2 files changed, 102 insertions, 6 deletions
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h index 664260b9..f5d0ba16 100644 --- a/pixman/pixman-private.h +++ b/pixman/pixman-private.h @@ -183,6 +183,9 @@ union pixman_image }; typedef struct pixman_iter_t pixman_iter_t; +typedef uint32_t *(* pixman_iter_get_scanline_t) (pixman_iter_t *iter, const uint32_t *mask); +typedef void (* pixman_iter_write_back_t) (pixman_iter_t *iter); + typedef enum { ITER_NARROW = (1 << 0), @@ -209,13 +212,16 @@ typedef enum struct pixman_iter_t { - uint32_t *(* get_scanline) (pixman_iter_t *iter, const uint32_t *mask); - void (* write_back) (pixman_iter_t *iter); + pixman_iter_get_scanline_t get_scanline; + pixman_iter_write_back_t write_back; + + pixman_image_t * image; + uint32_t * buffer; + int x, y; + int width; - pixman_image_t * image; - uint32_t * buffer; - int x, y; - int width; + uint8_t * bits; + int stride; }; void diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c index ae554560..10a3dd06 100644 --- a/pixman/pixman-sse2.c +++ b/pixman/pixman-sse2.c @@ -5953,6 +5953,94 @@ sse2_fill (pixman_implementation_t *imp, return TRUE; } +static uint32_t * +sse2_fetch_x8r8g8b8 (pixman_iter_t *iter, const uint32_t *mask) +{ + int w = iter->width; + __m128i ff000000 = mask_ff000000; + uint32_t *dst = iter->buffer; + uint32_t *src = (uint32_t *)iter->bits; + + iter->bits += iter->stride; + + while (w && ((unsigned long)dst) & 0x0f) + { + *dst++ = (*src++) | 0xff000000; + w--; + } + + while (w >= 4) + { + save_128_aligned ( + (__m128i *)dst, _mm_or_si128 ( + load_128_unaligned ((__m128i *)src), ff000000)); + + dst += 4; + src += 4; + w -= 4; + } + + while (w) + { + *dst++ = (*src++) | 0xff000000; + w--; + } + + return iter->buffer; +} + +typedef struct +{ + pixman_format_code_t format; + pixman_iter_get_scanline_t get_scanline; +} fetcher_info_t; + +static const fetcher_info_t fetchers[] = +{ + { PIXMAN_x8r8g8b8, sse2_fetch_x8r8g8b8 }, + { PIXMAN_null } +}; + +static void +sse2_src_iter_init (pixman_implementation_t *imp, + pixman_iter_t *iter, + pixman_image_t *image, + int x, int y, int width, int height, + uint8_t *buffer, iter_flags_t flags) +{ +#define FLAGS \ + (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM) + + if ((flags & ITER_NARROW) && + (image->common.flags & FLAGS) == FLAGS && + x >= 0 && y >= 0 && + x + width <= image->bits.width && + y + height <= image->bits.height) + { + const fetcher_info_t *f; + + for (f = &fetchers[0]; f->format != PIXMAN_null; f++) + { + if (image->common.extended_format_code == f->format) + { + uint8_t *b = (uint8_t *)image->bits.bits; + int s = image->bits.rowstride * 4; + + iter->bits = b + s * y + x * PIXMAN_FORMAT_BPP (f->format) / 8; + iter->stride = s; + iter->width = width; + iter->buffer = (uint32_t *)buffer; + + iter->get_scanline = f->get_scanline; + return; + } + } + } + + _pixman_implementation_src_iter_init ( + imp->delegate, iter, image, x, y, width, height, buffer, flags); +} + #if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__) __attribute__((__force_align_arg_pointer__)) #endif @@ -6020,6 +6108,8 @@ _pixman_implementation_create_sse2 (pixman_implementation_t *fallback) imp->blt = sse2_blt; imp->fill = sse2_fill; + imp->src_iter_init = sse2_src_iter_init; + return imp; } |