summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatt Turner <mattst88@gmail.com>2012-02-24 15:23:09 -0500
committerMatt Turner <mattst88@gmail.com>2012-04-14 19:33:50 -0400
commitb6782c8b935d2afc455f66a400f62b9334fbec10 (patch)
tree01d4d7e143b9f32b9be5fcd53f5b2ea32aca806a
parent237142b2f851dd7e10abec544f9c694d0b0e63ab (diff)
mmx: don't pack and unpack src unnecessarily
The combine function was store8888'ing the result, and all consumers were immediately load8888'ing it, causing lots of unnecessary pack and unpack instructions. It's a very straight forward conversion, except for mmx_combine_over_u and mmx_combine_saturate_u. mmx_combine_over_u was testing the integer result to skip pixels, so we use the is_* functions to test the __m64 data directly without loading it into an integer register. For mmx_combine_saturate_u there's not a lot we can do, since it uses DIV_UN8.
-rw-r--r--pixman/pixman-mmx.c82
1 files changed, 35 insertions, 47 deletions
diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
index 54045dd..fcd01e3 100644
--- a/pixman/pixman-mmx.c
+++ b/pixman/pixman-mmx.c
@@ -564,23 +564,20 @@ pix_add_mul (__m64 x, __m64 a, __m64 y, __m64 b)
/* --------------- MMX code patch for fbcompose.c --------------------- */
-static force_inline uint32_t
+static force_inline __m64
combine (const uint32_t *src, const uint32_t *mask)
{
- uint32_t ssrc = *src;
+ __m64 vsrc = load8888 (src);
if (mask)
{
__m64 m = load8888 (mask);
- __m64 s = load8888 (&ssrc);
m = expand_alpha (m);
- s = pix_multiply (s, m);
-
- store8888 (&ssrc, s);
+ vsrc = pix_multiply (vsrc, m);
}
- return ssrc;
+ return vsrc;
}
static void
@@ -595,19 +592,16 @@ mmx_combine_over_u (pixman_implementation_t *imp,
while (dest < end)
{
- uint32_t ssrc = combine (src, mask);
- uint32_t a = ssrc >> 24;
+ __m64 vsrc = combine (src, mask);
- if (a == 0xff)
+ if (is_opaque (vsrc))
{
- *dest = ssrc;
+ store8888 (dest, vsrc);
}
- else if (ssrc)
+ else if (!is_zero (vsrc))
{
- __m64 s, sa;
- s = load8888 (&ssrc);
- sa = expand_alpha (s);
- store8888 (dest, over (s, sa, load8888 (dest)));
+ __m64 sa = expand_alpha (vsrc);
+ store8888 (dest, over (vsrc, sa, load8888 (dest)));
}
++dest;
@@ -631,11 +625,11 @@ mmx_combine_over_reverse_u (pixman_implementation_t *imp,
while (dest < end)
{
__m64 d, da;
- uint32_t s = combine (src, mask);
+ __m64 s = combine (src, mask);
d = load8888 (dest);
da = expand_alpha (d);
- store8888 (dest, over (d, da, load8888 (&s)));
+ store8888 (dest, over (d, da, s));
++dest;
++src;
@@ -657,10 +651,9 @@ mmx_combine_in_u (pixman_implementation_t *imp,
while (dest < end)
{
- __m64 x, a;
- uint32_t ssrc = combine (src, mask);
+ __m64 a;
+ __m64 x = combine (src, mask);
- x = load8888 (&ssrc);
a = load8888 (dest);
a = expand_alpha (a);
x = pix_multiply (x, a);
@@ -687,11 +680,10 @@ mmx_combine_in_reverse_u (pixman_implementation_t *imp,
while (dest < end)
{
- __m64 x, a;
- uint32_t ssrc = combine (src, mask);
+ __m64 a = combine (src, mask);
+ __m64 x;
x = load8888 (dest);
- a = load8888 (&ssrc);
a = expand_alpha (a);
x = pix_multiply (x, a);
store8888 (dest, x);
@@ -716,10 +708,9 @@ mmx_combine_out_u (pixman_implementation_t *imp,
while (dest < end)
{
- __m64 x, a;
- uint32_t ssrc = combine (src, mask);
+ __m64 a;
+ __m64 x = combine (src, mask);
- x = load8888 (&ssrc);
a = load8888 (dest);
a = expand_alpha (a);
a = negate (a);
@@ -746,11 +737,10 @@ mmx_combine_out_reverse_u (pixman_implementation_t *imp,
while (dest < end)
{
- __m64 x, a;
- uint32_t ssrc = combine (src, mask);
+ __m64 a = combine (src, mask);
+ __m64 x;
x = load8888 (dest);
- a = load8888 (&ssrc);
a = expand_alpha (a);
a = negate (a);
x = pix_multiply (x, a);
@@ -777,10 +767,9 @@ mmx_combine_atop_u (pixman_implementation_t *imp,
while (dest < end)
{
- __m64 s, da, d, sia;
- uint32_t ssrc = combine (src, mask);
+ __m64 da, d, sia;
+ __m64 s = combine (src, mask);
- s = load8888 (&ssrc);
d = load8888 (dest);
sia = expand_alpha (s);
sia = negate (sia);
@@ -810,10 +799,9 @@ mmx_combine_atop_reverse_u (pixman_implementation_t *imp,
while (dest < end)
{
- __m64 s, dia, d, sa;
- uint32_t ssrc = combine (src, mask);
+ __m64 dia, d, sa;
+ __m64 s = combine (src, mask);
- s = load8888 (&ssrc);
d = load8888 (dest);
sa = expand_alpha (s);
dia = expand_alpha (d);
@@ -841,10 +829,9 @@ mmx_combine_xor_u (pixman_implementation_t *imp,
while (dest < end)
{
- __m64 s, dia, d, sia;
- uint32_t ssrc = combine (src, mask);
+ __m64 dia, d, sia;
+ __m64 s = combine (src, mask);
- s = load8888 (&ssrc);
d = load8888 (dest);
sia = expand_alpha (s);
dia = expand_alpha (d);
@@ -873,10 +860,9 @@ mmx_combine_add_u (pixman_implementation_t *imp,
while (dest < end)
{
- __m64 s, d;
- uint32_t ssrc = combine (src, mask);
+ __m64 d;
+ __m64 s = combine (src, mask);
- s = load8888 (&ssrc);
d = load8888 (dest);
s = pix_add (s, d);
store8888 (dest, s);
@@ -901,12 +887,14 @@ mmx_combine_saturate_u (pixman_implementation_t *imp,
while (dest < end)
{
- uint32_t s = combine (src, mask);
+ uint32_t s, sa, da;
uint32_t d = *dest;
- __m64 ms = load8888 (&s);
- __m64 md = load8888 (&d);
- uint32_t sa = s >> 24;
- uint32_t da = ~d >> 24;
+ __m64 ms = combine (src, mask);
+ __m64 md = load8888 (dest);
+
+ store8888(&s, ms);
+ da = ~d >> 24;
+ sa = s >> 24;
if (sa > da)
{