summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSiarhei Siamashka <siarhei.siamashka@gmail.com>2012-12-03 17:42:21 +0200
committerSiarhei Siamashka <siarhei.siamashka@gmail.com>2012-12-03 23:50:29 +0200
commitb4c1c69aa2e918ab095701b6786a63265ffdd7d5 (patch)
tree8f7cb5b58a4114525087be0b2043115dc8a2a581
parent4c36cc7e033f3d73336fc0b9c6c735e0e167ef7d (diff)
Faster fetch for the C variant of r5g6b5 src/dest iteratoriterators-r5g6b5
Processing two pixels at once is used to reduce the number of arithmetic operations. The speedup relative to the generic fetch_scanline_r5g6b5() from "pixman-access.c" (pixman was compiled with gcc 4.7.2): MIPS 74K 480MHz : 20.32 MPix/s -> 26.47 MPix/s ARM11 700MHz : 34.95 MPix/s -> 38.22 MPix/s ARM Cortex-A8 1000MHz : 87.44 MPix/s -> 100.92 MPix/s ARM Cortex-A9 1700MHz : 150.95 MPix/s -> 158.13 MPix/s ARM Cortex-A15 1700MHz : 148.91 MPix/s -> 155.42 MPix/s IBM Cell PPU 3200MHz : 75.29 MPix/s -> 98.33 MPix/s Intel Core i7 2800MHz : 257.02 MPix/s -> 376.93 MPix/s That's the performance for C code (SIMD and assembly optimizations are disabled via PIXMAN_DISABLE environment variable).
-rw-r--r--pixman/pixman-fast-path.c31
1 files changed, 30 insertions, 1 deletions
diff --git a/pixman/pixman-fast-path.c b/pixman/pixman-fast-path.c
index 4aa6d73..696aca9 100644
--- a/pixman/pixman-fast-path.c
+++ b/pixman/pixman-fast-path.c
@@ -2176,11 +2176,40 @@ fast_fetch_r5g6b5 (pixman_iter_t *iter, const uint32_t *mask)
iter->bits += iter->stride;
- while (w > 0)
+ /* Align the source buffer at 4 bytes boundary */
+ if (w > 0 && ((uintptr_t)src & 3))
{
*dst++ = convert_0565_to_8888 (*src++);
w--;
}
+ /* Process two pixels per iteration */
+ while ((w -= 2) >= 0)
+ {
+ uint32_t sr, sb, sg, t0, t1;
+ uint32_t s = *(const uint32_t *)src;
+ src += 2;
+ sr = (s >> 8) & 0x00F800F8;
+ sb = (s << 3) & 0x00F800F8;
+ sg = (s >> 3) & 0x00FC00FC;
+ sr |= sr >> 5;
+ sb |= sb >> 5;
+ sg |= sg >> 6;
+ t0 = ((sr << 16) & 0x00FF0000) | ((sg << 8) & 0x0000FF00) |
+ (sb & 0xFF) | 0xFF000000;
+ t1 = (sr & 0x00FF0000) | ((sg >> 8) & 0x0000FF00) |
+ (sb >> 16) | 0xFF000000;
+#ifdef WORDS_BIGENDIAN
+ *dst++ = t1;
+ *dst++ = t0;
+#else
+ *dst++ = t0;
+ *dst++ = t1;
+#endif
+ }
+ if (w & 1)
+ {
+ *dst = convert_0565_to_8888 (*src);
+ }
return iter->buffer;
}