summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatt Turner <mattst88@gmail.com>2012-04-18 16:24:28 -0400
committerMatt Turner <mattst88@gmail.com>2012-04-18 20:29:51 -0400
commitbf80274df97362713773ac5b9752d713ebe610f2 (patch)
tree0ee0b052fc1133c781c18bac89cc6ec344db4466
parent000a32aa38e6bff799e9515674bef1bad04325d8 (diff)
mmx: add x8f8g8b8 fetchermmx-fetchers
Loongson: add_x888_x888 = L1: 29.36 L2: 27.81 M: 14.05 ( 38.74%) HT: 12.45 VT: 11.78 R: 11.52 RT: 7.23 ( 75Kops/s) add_x888_x888 = L1: 36.06 L2: 34.55 M: 14.81 ( 41.03%) HT: 14.01 VT: 13.41 R: 13.06 RT: 9.06 ( 90Kops/s) src_x888_8_x888 = L1: 21.92 L2: 20.15 M: 13.35 ( 41.42%) HT: 11.70 VT: 10.95 R: 10.53 RT: 6.18 ( 65Kops/s) src_x888_8_x888 = L1: 25.43 L2: 23.51 M: 14.12 ( 44.00%) HT: 13.14 VT: 12.50 R: 11.86 RT: 7.49 ( 76Kops/s) over_x888_8_0565 = L1: 10.64 L2: 10.17 M: 7.74 ( 21.35%) HT: 6.83 VT: 6.55 R: 6.34 RT: 4.03 ( 46Kops/s) over_x888_8_0565 = L1: 11.41 L2: 10.97 M: 8.07 ( 22.36%) HT: 7.42 VT: 7.18 R: 6.92 RT: 4.62 ( 52Kops/s) ARM/iwMMXt: add_x888_x888 = L1: 22.10 L2: 18.93 M: 13.48 ( 32.29%) HT: 11.32 VT: 10.64 R: 10.36 RT: 6.51 ( 61Kops/s) add_x888_x888 = L1: 24.26 L2: 20.83 M: 14.52 ( 35.64%) HT: 12.66 VT: 12.98 R: 11.34 RT: 7.69 ( 72Kops/s) src_x888_8_x888 = L1: 19.33 L2: 17.66 M: 14.26 ( 38.43%) HT: 11.53 VT: 10.83 R: 10.57 RT: 6.12 ( 58Kops/s) src_x888_8_x888 = L1: 21.23 L2: 19.60 M: 15.41 ( 42.55%) HT: 12.66 VT: 13.30 R: 11.55 RT: 7.32 ( 67Kops/s) over_x888_8_0565 = L1: 8.15 L2: 7.56 M: 6.50 ( 15.58%) HT: 5.73 VT: 5.49 R: 5.50 RT: 3.53 ( 38Kops/s) over_x888_8_0565 = L1: 8.35 L2: 7.85 M: 6.68 ( 16.40%) HT: 6.12 VT: 5.97 R: 5.78 RT: 4.03 ( 43Kops/s)
-rw-r--r--pixman/pixman-mmx.c42
1 files changed, 42 insertions, 0 deletions
diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
index c127f4a..7511216 100644
--- a/pixman/pixman-mmx.c
+++ b/pixman/pixman-mmx.c
@@ -3210,6 +3210,47 @@ mmx_composite_over_x888_8_8888 (pixman_implementation_t *imp,
}
static uint32_t *
+mmx_fetch_x8r8g8b8 (pixman_iter_t *iter, const uint32_t *mask)
+{
+ int w = iter->width;
+ uint32_t *dst = iter->buffer;
+ uint32_t *src = (uint32_t *)iter->bits;
+
+ iter->bits += iter->stride;
+
+ while (w && ((unsigned long)dst) & 7)
+ {
+ *dst++ = (*src++) | 0xff000000;
+ w--;
+ }
+
+ while (w >= 8)
+ {
+ __m64 vsrc1 = ldq_u ((__m64 *)(src + 0));
+ __m64 vsrc2 = ldq_u ((__m64 *)(src + 2));
+ __m64 vsrc3 = ldq_u ((__m64 *)(src + 4));
+ __m64 vsrc4 = ldq_u ((__m64 *)(src + 6));
+
+ *(__m64 *)(dst + 0) = _mm_or_si64 (vsrc1, MC (ff000000));
+ *(__m64 *)(dst + 2) = _mm_or_si64 (vsrc2, MC (ff000000));
+ *(__m64 *)(dst + 4) = _mm_or_si64 (vsrc3, MC (ff000000));
+ *(__m64 *)(dst + 6) = _mm_or_si64 (vsrc4, MC (ff000000));
+
+ dst += 8;
+ src += 8;
+ w -= 8;
+ }
+
+ while (w)
+ {
+ *dst++ = (*src++) | 0xff000000;
+ w--;
+ }
+
+ return iter->buffer;
+}
+
+static uint32_t *
mmx_fetch_r5g6b5 (pixman_iter_t *iter, const uint32_t *mask)
{
int w = iter->width;
@@ -3307,6 +3348,7 @@ typedef struct
static const fetcher_info_t fetchers[] =
{
+ { PIXMAN_x8r8g8b8, mmx_fetch_x8r8g8b8 },
{ PIXMAN_r5g6b5, mmx_fetch_r5g6b5 },
{ PIXMAN_a8, mmx_fetch_a8 },
{ PIXMAN_null }