summaryrefslogtreecommitdiff
path: root/fb/fbmmx.c
diff options
context:
space:
mode:
Diffstat (limited to 'fb/fbmmx.c')
-rw-r--r--fb/fbmmx.c139
1 files changed, 94 insertions, 45 deletions
diff --git a/fb/fbmmx.c b/fb/fbmmx.c
index 875fec01f..7f1a7b12b 100644
--- a/fb/fbmmx.c
+++ b/fb/fbmmx.c
@@ -85,30 +85,40 @@
typedef unsigned long long ullong;
+#ifdef __GNUC__
+typedef ullong mmxdatafield;
+#endif
+#ifdef _MSC_VER
+typedef unsigned __int64 ullong;
+typedef __m64 mmxdatafield;
+#endif
+
typedef struct
{
- ullong mmx_4x00ff;
- ullong mmx_4x0080;
- ullong mmx_565_rgb;
- ullong mmx_565_unpack_multiplier;
- ullong mmx_565_r;
- ullong mmx_565_g;
- ullong mmx_565_b;
- ullong mmx_mask_0;
- ullong mmx_mask_1;
- ullong mmx_mask_2;
- ullong mmx_mask_3;
- ullong mmx_full_alpha;
- ullong mmx_ffff0000ffff0000;
- ullong mmx_0000ffff00000000;
- ullong mmx_000000000000ffff;
+ mmxdatafield mmx_4x00ff;
+ mmxdatafield mmx_4x0080;
+ mmxdatafield mmx_565_rgb;
+ mmxdatafield mmx_565_unpack_multiplier;
+ mmxdatafield mmx_565_r;
+ mmxdatafield mmx_565_g;
+ mmxdatafield mmx_565_b;
+ mmxdatafield mmx_mask_0;
+ mmxdatafield mmx_mask_1;
+ mmxdatafield mmx_mask_2;
+ mmxdatafield mmx_mask_3;
+ mmxdatafield mmx_full_alpha;
+ mmxdatafield mmx_ffff0000ffff0000;
+ mmxdatafield mmx_0000ffff00000000;
+ mmxdatafield mmx_000000000000ffff;
} MMXData;
static const MMXData c =
{
+#ifdef __GNUC__
.mmx_4x00ff = 0x00ff00ff00ff00ffULL,
.mmx_4x0080 = 0x0080008000800080ULL,
.mmx_565_rgb = 0x000001f0003f001fULL,
+ .mmx_565_unpack_multiplier = 0x0000008404100840ULL,
.mmx_565_r = 0x000000f800000000ULL,
.mmx_565_g = 0x0000000000fc0000ULL,
.mmx_565_b = 0x00000000000000f8ULL,
@@ -117,15 +127,42 @@ static const MMXData c =
.mmx_mask_2 = 0xffff0000ffffffffULL,
.mmx_mask_3 = 0x0000ffffffffffffULL,
.mmx_full_alpha = 0x00ff000000000000ULL,
- .mmx_565_unpack_multiplier = 0x0000008404100840ULL,
.mmx_ffff0000ffff0000 = 0xffff0000ffff0000ULL,
.mmx_0000ffff00000000 = 0x0000ffff00000000ULL,
.mmx_000000000000ffff = 0x000000000000ffffULL,
+#endif
+#ifdef _MSC_VER
+ { 0x00ff00ff00ff00ffUI64 },
+ { 0x0080008000800080UI64 },
+ { 0x000001f0003f001fUI64 },
+ { 0x0000008404100840UI64 },
+ { 0x000000f800000000UI64 },
+ { 0x0000000000fc0000UI64 },
+ { 0x00000000000000f8UI64 },
+ { 0xffffffffffff0000UI64 },
+ { 0xffffffff0000ffffUI64 },
+ { 0xffff0000ffffffffUI64 },
+ { 0x0000ffffffffffffUI64 },
+ { 0x00ff000000000000UI64 },
+ { 0xffff0000ffff0000UI64 },
+ { 0x0000ffff00000000UI64 },
+ { 0x000000000000ffffUI64 },
+#endif
};
+#ifdef _MSC_VER
+#undef inline
+#define inline __forceinline
+#endif
+
+#ifdef __GNUC__
#define MC(x) ((__m64) c.mmx_##x)
+#endif
+#ifdef _MSC_VER
+#define MC(x) c.mmx_##x
+#endif
-static __inline__ __m64
+static inline __m64
shift (__m64 v, int s)
{
if (s > 0)
@@ -136,13 +173,13 @@ shift (__m64 v, int s)
return v;
}
-static __inline__ __m64
+static inline __m64
negate (__m64 mask)
{
return _mm_xor_si64 (mask, MC(4x00ff));
}
-static __inline__ __m64
+static inline __m64
pix_multiply (__m64 a, __m64 b)
{
__m64 res;
@@ -155,7 +192,7 @@ pix_multiply (__m64 a, __m64 b)
return res;
}
-static __inline__ __m64
+static inline __m64
pix_add (__m64 a, __m64 b)
{
return _mm_adds_pu8 (a, b);
@@ -163,19 +200,19 @@ pix_add (__m64 a, __m64 b)
#ifdef USE_SSE
-static __inline__ __m64
+static inline __m64
expand_alpha (__m64 pixel)
{
return _mm_shuffle_pi16 (pixel, _MM_SHUFFLE(3, 3, 3, 3));
}
-static __inline__ __m64
+static inline __m64
expand_alpha_rev (__m64 pixel)
{
return _mm_shuffle_pi16 (pixel, _MM_SHUFFLE(0, 0, 0, 0));
}
-static __inline__ __m64
+static inline __m64
invert_colors (__m64 pixel)
{
return _mm_shuffle_pi16 (pixel, _MM_SHUFFLE(3, 0, 1, 2));
@@ -183,7 +220,7 @@ invert_colors (__m64 pixel)
#else
-static __inline__ __m64
+static inline __m64
expand_alpha (__m64 pixel)
{
__m64 t1, t2;
@@ -197,7 +234,7 @@ expand_alpha (__m64 pixel)
return t1;
}
-static __inline__ __m64
+static inline __m64
expand_alpha_rev (__m64 pixel)
{
__m64 t1, t2;
@@ -214,7 +251,7 @@ expand_alpha_rev (__m64 pixel)
return t1;
}
-static __inline__ __m64
+static inline __m64
invert_colors (__m64 pixel)
{
__m64 x, y, z;
@@ -236,13 +273,13 @@ invert_colors (__m64 pixel)
#endif
-static __inline__ __m64
+static inline __m64
over (__m64 src, __m64 srca, __m64 dest)
{
return _mm_adds_pu8 (src, pix_multiply(dest, negate(srca)));
}
-static __inline__ __m64
+static inline __m64
over_rev_non_pre (__m64 src, __m64 dest)
{
__m64 srca = expand_alpha (src);
@@ -251,14 +288,15 @@ over_rev_non_pre (__m64 src, __m64 dest)
return over(pix_multiply(invert_colors(src), srcfaaa), srca, dest);
}
-static __inline__ __m64
+static inline __m64
in (__m64 src,
__m64 mask)
{
return pix_multiply (src, mask);
}
-static __inline__ __m64
+#ifndef _MSC_VER
+static inline __m64
in_over (__m64 src,
__m64 srca,
__m64 mask,
@@ -266,20 +304,23 @@ in_over (__m64 src,
{
return over(in(src, mask), pix_multiply(srca, mask), dest);
}
+#else
+#define in_over(src, srca, mask, dest) over(in(src, mask), pix_multiply(srca, mask), dest)
+#endif
-static __inline__ __m64
+static inline __m64
load8888 (CARD32 v)
{
return _mm_unpacklo_pi8 (_mm_cvtsi32_si64 (v), _mm_setzero_si64());
}
-static __inline__ __m64
+static inline __m64
pack8888 (__m64 lo, __m64 hi)
{
return _mm_packs_pu16 (lo, hi);
}
-static __inline__ CARD32
+static inline CARD32
store8888 (__m64 v)
{
return _mm_cvtsi64_si32(pack8888(v, _mm_setzero_si64()));
@@ -299,7 +340,7 @@ store8888 (__m64 v)
* Note the trick here - the top word is shifted by another nibble to
* avoid it bumping into the middle word
*/
-static __inline__ __m64
+static inline __m64
expand565 (__m64 pixel, int pos)
{
__m64 p = pixel;
@@ -319,7 +360,7 @@ expand565 (__m64 pixel, int pos)
return _mm_srli_pi16 (pixel, 8);
}
-static __inline__ __m64
+static inline __m64
expand8888 (__m64 in, int pos)
{
if (pos == 0)
@@ -328,7 +369,7 @@ expand8888 (__m64 in, int pos)
return _mm_unpackhi_pi8 (in, _mm_setzero_si64());
}
-static __inline__ __m64
+static inline __m64
pack565 (__m64 pixel, __m64 target, int pos)
{
__m64 p = pixel;
@@ -358,20 +399,28 @@ pack565 (__m64 pixel, __m64 target, int pos)
return _mm_or_si64 (b, p);
}
-static __inline__ __m64
+#ifndef _MSC_VER
+static inline __m64
pix_add_mul (__m64 x, __m64 a, __m64 y, __m64 b)
{
- x = _mm_mullo_pi16 (x, a);
- y = _mm_mullo_pi16 (y, b);
- x = _mm_srli_pi16(x, 1);
- y = _mm_srli_pi16(y, 1);
- x = _mm_adds_pu16 (x, y);
- x = _mm_adds_pu16 (x, _mm_srli_pi16 (x, 8));
+ x = _mm_mullo_pi16 (x, a);
+ y = _mm_mullo_pi16 (y, b);
x = _mm_adds_pu16 (x, MC(4x0080));
- x = _mm_srli_pi16 (x, 7);
+ x = _mm_adds_pu16 (x, y);
+ x = _mm_adds_pu16 (x, _mm_srli_pi16 (x, 8));
+ x = _mm_srli_pi16 (x, 8);
return x;
}
+#else
+#define pix_add_mul(x, a, y, b) \
+( x = _mm_mullo_pi16 (x, a), \
+ y = _mm_mullo_pi16 (y, b), \
+ x = _mm_adds_pu16 (x, MC(4x0080)), \
+ x = _mm_adds_pu16 (x, y), \
+ x = _mm_adds_pu16 (x, _mm_srli_pi16 (x, 8)), \
+ _mm_srli_pi16 (x, 8) )
+#endif
/* --------------- MMX code patch for fbcompose.c --------------------- */
@@ -590,7 +639,7 @@ mmxCombineSaturateU (CARD32 *dest, const CARD32 *src, int width)
CARD32 da = ~d >> 24;
if (sa > da) {
- __m64 msa = load8888(FbIntDiv(da, sa));
+ __m64 msa = load8888(FbIntDiv(da, sa))<<24;
msa = expand_alpha_rev(msa);
ms = pix_multiply(ms, msa);
}