summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSøren Sandmann Pedersen <ssp@redhat.com>2011-01-12 06:38:54 -0500
committerSøren Sandmann Pedersen <ssp@redhat.com>2011-01-28 14:36:58 -0500
commitb1e7dc3730429362ff0ccc6468b6d54163c5e4b7 (patch)
treebf4c3317fc8e61d377ebf62c0a19cd30be0f682d
parent13aed37758d1af5b5bc2a80d886b764d4c45827e (diff)
Add SSE2 fetcher for x8r8g8b8
New output of lowlevel-blt-bench over_x888_8_0565: over_x888_8_0565 = L1: 55.68 L2: 55.11 M: 52.83 ( 19.04%) HT: 39.62 VT: 37.70 R: 30.88 RT: 14.62 ( 174Kops/s) The fetcher is looked up in a table, so that other fetchers can easily be added.
-rw-r--r--pixman/pixman-private.h18
-rw-r--r--pixman/pixman-sse2.c90
2 files changed, 102 insertions, 6 deletions
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index 664260b9..f5d0ba16 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -183,6 +183,9 @@ union pixman_image
};
typedef struct pixman_iter_t pixman_iter_t;
+typedef uint32_t *(* pixman_iter_get_scanline_t) (pixman_iter_t *iter, const uint32_t *mask);
+typedef void (* pixman_iter_write_back_t) (pixman_iter_t *iter);
+
typedef enum
{
ITER_NARROW = (1 << 0),
@@ -209,13 +212,16 @@ typedef enum
struct pixman_iter_t
{
- uint32_t *(* get_scanline) (pixman_iter_t *iter, const uint32_t *mask);
- void (* write_back) (pixman_iter_t *iter);
+ pixman_iter_get_scanline_t get_scanline;
+ pixman_iter_write_back_t write_back;
+
+ pixman_image_t * image;
+ uint32_t * buffer;
+ int x, y;
+ int width;
- pixman_image_t * image;
- uint32_t * buffer;
- int x, y;
- int width;
+ uint8_t * bits;
+ int stride;
};
void
diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
index ae554560..10a3dd06 100644
--- a/pixman/pixman-sse2.c
+++ b/pixman/pixman-sse2.c
@@ -5953,6 +5953,94 @@ sse2_fill (pixman_implementation_t *imp,
return TRUE;
}
+static uint32_t *
+sse2_fetch_x8r8g8b8 (pixman_iter_t *iter, const uint32_t *mask)
+{
+ int w = iter->width;
+ __m128i ff000000 = mask_ff000000;
+ uint32_t *dst = iter->buffer;
+ uint32_t *src = (uint32_t *)iter->bits;
+
+ iter->bits += iter->stride;
+
+ while (w && ((unsigned long)dst) & 0x0f)
+ {
+ *dst++ = (*src++) | 0xff000000;
+ w--;
+ }
+
+ while (w >= 4)
+ {
+ save_128_aligned (
+ (__m128i *)dst, _mm_or_si128 (
+ load_128_unaligned ((__m128i *)src), ff000000));
+
+ dst += 4;
+ src += 4;
+ w -= 4;
+ }
+
+ while (w)
+ {
+ *dst++ = (*src++) | 0xff000000;
+ w--;
+ }
+
+ return iter->buffer;
+}
+
+typedef struct
+{
+ pixman_format_code_t format;
+ pixman_iter_get_scanline_t get_scanline;
+} fetcher_info_t;
+
+static const fetcher_info_t fetchers[] =
+{
+ { PIXMAN_x8r8g8b8, sse2_fetch_x8r8g8b8 },
+ { PIXMAN_null }
+};
+
+static void
+sse2_src_iter_init (pixman_implementation_t *imp,
+ pixman_iter_t *iter,
+ pixman_image_t *image,
+ int x, int y, int width, int height,
+ uint8_t *buffer, iter_flags_t flags)
+{
+#define FLAGS \
+ (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM)
+
+ if ((flags & ITER_NARROW) &&
+ (image->common.flags & FLAGS) == FLAGS &&
+ x >= 0 && y >= 0 &&
+ x + width <= image->bits.width &&
+ y + height <= image->bits.height)
+ {
+ const fetcher_info_t *f;
+
+ for (f = &fetchers[0]; f->format != PIXMAN_null; f++)
+ {
+ if (image->common.extended_format_code == f->format)
+ {
+ uint8_t *b = (uint8_t *)image->bits.bits;
+ int s = image->bits.rowstride * 4;
+
+ iter->bits = b + s * y + x * PIXMAN_FORMAT_BPP (f->format) / 8;
+ iter->stride = s;
+ iter->width = width;
+ iter->buffer = (uint32_t *)buffer;
+
+ iter->get_scanline = f->get_scanline;
+ return;
+ }
+ }
+ }
+
+ _pixman_implementation_src_iter_init (
+ imp->delegate, iter, image, x, y, width, height, buffer, flags);
+}
+
#if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__)
__attribute__((__force_align_arg_pointer__))
#endif
@@ -6020,6 +6108,8 @@ _pixman_implementation_create_sse2 (pixman_implementation_t *fallback)
imp->blt = sse2_blt;
imp->fill = sse2_fill;
+ imp->src_iter_init = sse2_src_iter_init;
+
return imp;
}