diff options
-rw-r--r-- | pixman/ChangeLog | 53 | ||||
-rw-r--r-- | pixman/configure.in | 32 | ||||
-rw-r--r-- | pixman/src/Makefile.am | 10 | ||||
-rw-r--r-- | pixman/src/fbmmx.c | 2503 | ||||
-rw-r--r-- | pixman/src/fbmmx.h | 228 | ||||
-rw-r--r-- | pixman/src/fbpict.c (renamed from pixman/src/ic.c) | 647 | ||||
-rw-r--r-- | pixman/src/icint.h | 30 |
7 files changed, 3286 insertions, 217 deletions
diff --git a/pixman/ChangeLog b/pixman/ChangeLog index 6817dc88..467ee93e 100644 --- a/pixman/ChangeLog +++ b/pixman/ChangeLog @@ -1,3 +1,56 @@ +2005-08-16 Billy Biggs <vektor@dumbterm.net> + + * src/fbmmx.c: (shift), (negate), (pix_multiply), (pix_add), + (expand_alpha), (expand_alpha_rev), (invert_colors), (over), + (over_rev_non_pre), (in), (in_over), (load8888), (pack8888), + (store8888), (expand565), (expand8888), (pack565), (pix_add_mul), + (mmxCombineMaskU), (mmxCombineOverU), (mmxCombineOverReverseU), + (mmxCombineInU), (mmxCombineInReverseU), (mmxCombineOutU), + (mmxCombineOutReverseU), (mmxCombineAtopU), + (mmxCombineAtopReverseU), (mmxCombineXorU), (mmxCombineAddU), + (mmxCombineSaturateU), (mmxCombineSrcC), (mmxCombineOverC), + (mmxCombineOverReverseC), (mmxCombineInC), (mmxCombineInReverseC), + (mmxCombineOutC), (mmxCombineOutReverseC), (mmxCombineAtopC), + (mmxCombineAtopReverseC), (mmxCombineXorC), (mmxCombineAddC), + (fbComposeSetupMMX), (fbCompositeSolid_nx8888mmx), + (fbCompositeSolid_nx0565mmx), + (fbCompositeSolidMask_nx8888x8888Cmmx), + (fbCompositeSrc_8888x8x8888mmx), (fbCompositeSrc_x888x8x8888mmx), + (fbCompositeSolidMask_nx8x8888mmx), + (fbCompositeSolidMaskSrc_nx8x8888mmx), + (fbCompositeSolidMask_nx8x0565mmx), + (fbCompositeSrc_8888RevNPx0565mmx), + (fbCompositeSrc_8888RevNPx8888mmx), + (fbCompositeSolidMask_nx8888x0565Cmmx), + (fbCompositeSrcAdd_8000x8000mmx), (fbCompositeSrcAdd_8888x8888mmx), + (fbSolidFillmmx), (fbCopyAreammx), (fbCompositeCopyAreammx), + (detectCPUFeatures), (fbHaveMMX): + * src/fbmmx.h: Port MMX code from xserver to pixman. + + * src/fbpict.c: (fbOver), (fbOver24), (fbIn), (fbIn24), + (fbCompositeSolidMask_nx8x8888), + (fbCompositeSolidMask_nx8888x8888C), + (fbCompositeSolidMask_nx8x0888), (fbCompositeSolidMask_nx8x0565), + (fbCompositeSolidMask_nx8888x0565), + (fbCompositeSolidMask_nx8888x0565C), (fbCompositeSrc_8888x8888), + (fbCompositeSrc_8888x0888), (fbCompositeSrc_8888x0565), + (fbCompositeSrcAdd_8000x8000), (fbCompositeSrcAdd_8888x8888), + (fbCompositeSrcAdd_1000x1000), (fbCompositeSolidMask_nx1xn), + (fbCompositeTrans_0565xnx0565), (fbCompositeTrans_0888xnx0888), + (fbCompositeSrcSrc_nxn), (pixman_composite): Add an fbpict.c ported + from the latest in xserver/fb, including hooks to the MMX code + where appropriate. This replaces the old ic.c file. + + * src/ic.c: Replaced by fbpict.c. + + * src/icint.h: Move some X server macros from fbpict.c up into + icint.h to keep the diff small. + + * src/Makefile.am: Add fbmmx and fbpict.c, remove ic.c. + + * configure.in: Add a check for the MMX intrinsics. gcc >= 3.4 + is required. + 2005-08-12 Billy Biggs <vektor@dumbterm.net> * src/icrect.c: (pixman_fill_rect_1bpp): Fix to be correct for diff --git a/pixman/configure.in b/pixman/configure.in index 1ee59697..c9cdae8f 100644 --- a/pixman/configure.in +++ b/pixman/configure.in @@ -51,6 +51,38 @@ fi AC_SUBST(WARN_CFLAGS) dnl =========================================================================== +dnl Check for MMX + +MMX_CFLAGS="-mmmx -msse -Winline --param inline-unit-growth=10000 --param large-function-growth=10000" + +have_mmx_intrinsics=no +AC_MSG_CHECKING(For MMX/SSE intrinsics in the compiler) +xserver_save_CFLAGS=$CFLAGS +CFLAGS="$CFLAGS $MMX_CFLAGS" +AC_COMPILE_IFELSE([ +#if defined(__GNUC__) && (__GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 4)) +#error "Need GCC >= 3.4 for MMX intrinsics" +#endif +#include <mmintrin.h> +#include <xmmintrin.h> +int main () { + __m64 v = _mm_cvtsi32_si64 (1); + v = _mm_shuffle_pi16 (v, _MM_SHUFFLE(3, 3, 3, 3)); + return _mm_cvtsi64_si32 (v); +}], have_mmx_intrinsics=yes) +CFLAGS=$xserver_save_CFLAGS +AC_MSG_RESULT($have_mmx_intrinsics) + +if test $have_mmx_intrinsics = yes ; then + AC_DEFINE(USE_MMX, 1, [use MMX compiler intrinsics]) +else + MMX_CFLAGS= +fi +AC_SUBST(MMX_CFLAGS) + +AM_CONDITIONAL(USE_MMX, test $have_mmx_intrinsics = yes) + +dnl =========================================================================== AC_OUTPUT([ libpixman.pc diff --git a/pixman/src/Makefile.am b/pixman/src/Makefile.am index 50b3516a..819543ab 100644 --- a/pixman/src/Makefile.am +++ b/pixman/src/Makefile.am @@ -7,7 +7,7 @@ libpixman_la_SOURCES = \ pixregion.c \ pixregionint.h \ fbpict.h \ - ic.c \ + fbpict.c \ icblt.c \ icbltone.c \ iccolor.c \ @@ -31,4 +31,10 @@ libpixman_la_SOURCES = \ renderedge.h \ slim_internal.h -INCLUDES = -I$(top_srcdir) -I$(srcdir) $(WARN_CFLAGS) +if USE_MMX +libpixman_la_SOURCES += \ + fbmmx.c \ + fbmmx.h +endif + +INCLUDES = -I$(top_srcdir) -I$(srcdir) $(WARN_CFLAGS) @MMX_CFLAGS@ diff --git a/pixman/src/fbmmx.c b/pixman/src/fbmmx.c new file mode 100644 index 00000000..347a5121 --- /dev/null +++ b/pixman/src/fbmmx.c @@ -0,0 +1,2503 @@ +/* + * Copyright © 2004 Red Hat, Inc. + * Copyright © 2004 Nicholas Miell + * Copyright © 2005 Trolltech AS + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of Red Hat not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. Red Hat makes no representations about the + * suitability of this software for any purpose. It is provided "as is" + * without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS + * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY + * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN + * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS + * SOFTWARE. + * + * Author: Søren Sandmann (sandmann@redhat.com) + * Minor Improvements: Nicholas Miell (nmiell@gmail.com) + * MMX code paths for fbcompose.c by Lars Knoll (lars@trolltech.com) + * + * Based on work by Owen Taylor + */ + + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include <assert.h> +#include "fbpict.h" +#include "pixman-xserver-compat.h" +#include "fbmmx.h" + +#if defined(__amd64__) || defined(__x86_64__) +#define USE_SSE +#endif + +#include <mmintrin.h> +#include <xmmintrin.h> /* for _mm_shuffle_pi16 and _MM_SHUFFLE */ + +#ifdef RENDER + +#include "fbpict.h" + +#define noVERBOSE + +#ifdef VERBOSE +#define CHECKPOINT() ErrorF ("at %s %d\n", __FUNCTION__, __LINE__) +#else +#define CHECKPOINT() +#endif + +/* Notes about writing mmx code + * + * give memory operands as the second operand. If you give it as the + * first, gcc will first load it into a register, then use that + * register + * + * ie. use + * + * _mm_mullo_pi16 (x, mmx_constant); + * + * not + * + * _mm_mullo_pi16 (mmx_constant, x); + * + * Also try to minimize dependencies. i.e. when you need a value, try + * to calculate it from a value that was calculated as early as + * possible. + */ + +/* --------------- MMX primitivess ------------------------------------ */ + +typedef unsigned long long ullong; + +typedef struct +{ + ullong mmx_4x00ff; + ullong mmx_4x0080; + ullong mmx_565_rgb; + ullong mmx_565_unpack_multiplier; + ullong mmx_565_r; + ullong mmx_565_g; + ullong mmx_565_b; + ullong mmx_mask_0; + ullong mmx_mask_1; + ullong mmx_mask_2; + ullong mmx_mask_3; + ullong mmx_full_alpha; + ullong mmx_ffff0000ffff0000; + ullong mmx_0000ffff00000000; + ullong mmx_000000000000ffff; +} MMXData; + +static const MMXData c = +{ + .mmx_4x00ff = 0x00ff00ff00ff00ffULL, + .mmx_4x0080 = 0x0080008000800080ULL, + .mmx_565_rgb = 0x000001f0003f001fULL, + .mmx_565_r = 0x000000f800000000ULL, + .mmx_565_g = 0x0000000000fc0000ULL, + .mmx_565_b = 0x00000000000000f8ULL, + .mmx_mask_0 = 0xffffffffffff0000ULL, + .mmx_mask_1 = 0xffffffff0000ffffULL, + .mmx_mask_2 = 0xffff0000ffffffffULL, + .mmx_mask_3 = 0x0000ffffffffffffULL, + .mmx_full_alpha = 0x00ff000000000000ULL, + .mmx_565_unpack_multiplier = 0x0000008404100840ULL, + .mmx_ffff0000ffff0000 = 0xffff0000ffff0000ULL, + .mmx_0000ffff00000000 = 0x0000ffff00000000ULL, + .mmx_000000000000ffff = 0x000000000000ffffULL, +}; + +#define MC(x) ((__m64) c.mmx_##x) + +static __inline__ __m64 +shift (__m64 v, int s) +{ + if (s > 0) + return _mm_slli_si64 (v, s); + else if (s < 0) + return _mm_srli_si64 (v, -s); + else + return v; +} + +static __inline__ __m64 +negate (__m64 mask) +{ + return _mm_xor_si64 (mask, MC(4x00ff)); +} + +static __inline__ __m64 +pix_multiply (__m64 a, __m64 b) +{ + __m64 res; + + res = _mm_mullo_pi16 (a, b); + res = _mm_adds_pu16 (res, MC(4x0080)); + res = _mm_adds_pu16 (res, _mm_srli_pi16 (res, 8)); + res = _mm_srli_pi16 (res, 8); + + return res; +} + +static __inline__ __m64 +pix_add (__m64 a, __m64 b) +{ + return _mm_adds_pu8 (a, b); +} + +#ifdef USE_SSE + +static __inline__ __m64 +expand_alpha (__m64 pixel) +{ + return _mm_shuffle_pi16 (pixel, _MM_SHUFFLE(3, 3, 3, 3)); +} + +static __inline__ __m64 +expand_alpha_rev (__m64 pixel) +{ + return _mm_shuffle_pi16 (pixel, _MM_SHUFFLE(0, 0, 0, 0)); +} + +static __inline__ __m64 +invert_colors (__m64 pixel) +{ + return _mm_shuffle_pi16 (pixel, _MM_SHUFFLE(3, 0, 1, 2)); +} + +#else + +static __inline__ __m64 +expand_alpha (__m64 pixel) +{ + __m64 t1, t2; + + t1 = shift (pixel, -48); + t2 = shift (t1, 16); + t1 = _mm_or_si64 (t1, t2); + t2 = shift (t1, 32); + t1 = _mm_or_si64 (t1, t2); + + return t1; +} + +static __inline__ __m64 +expand_alpha_rev (__m64 pixel) +{ + __m64 t1, t2; + + /* move alpha to low 16 bits and zero the rest */ + t1 = shift (pixel, 48); + t1 = shift (t1, -48); + + t2 = shift (t1, 16); + t1 = _mm_or_si64 (t1, t2); + t2 = shift (t1, 32); + t1 = _mm_or_si64 (t1, t2); + + return t1; +} + +static __inline__ __m64 +invert_colors (__m64 pixel) +{ + __m64 x, y, z; + + x = y = z = pixel; + + x = _mm_and_si64 (x, MC(ffff0000ffff0000)); + y = _mm_and_si64 (y, MC(000000000000ffff)); + z = _mm_and_si64 (z, MC(0000ffff00000000)); + + y = shift (y, 32); + z = shift (z, -32); + + x = _mm_or_si64 (x, y); + x = _mm_or_si64 (x, z); + + return x; +} + +#endif + +static __inline__ __m64 +over (__m64 src, __m64 srca, __m64 dest) +{ + return _mm_adds_pu8 (src, pix_multiply(dest, negate(srca))); +} + +static __inline__ __m64 +over_rev_non_pre (__m64 src, __m64 dest) +{ + __m64 srca = expand_alpha (src); + __m64 srcfaaa = _mm_or_si64 (srca, MC(full_alpha)); + + return over(pix_multiply(invert_colors(src), srcfaaa), srca, dest); +} + +static __inline__ __m64 +in (__m64 src, + __m64 mask) +{ + return pix_multiply (src, mask); +} + +static __inline__ __m64 +in_over (__m64 src, + __m64 srca, + __m64 mask, + __m64 dest) +{ + return over(in(src, mask), pix_multiply(srca, mask), dest); +} + +static __inline__ __m64 +load8888 (CARD32 v) +{ + return _mm_unpacklo_pi8 (_mm_cvtsi32_si64 (v), _mm_setzero_si64()); +} + +static __inline__ __m64 +pack8888 (__m64 lo, __m64 hi) +{ + return _mm_packs_pu16 (lo, hi); +} + +static __inline__ CARD32 +store8888 (__m64 v) +{ + return _mm_cvtsi64_si32(pack8888(v, _mm_setzero_si64())); +} + +/* Expand 16 bits positioned at @pos (0-3) of a mmx register into + * + * 00RR00GG00BB + * + * --- Expanding 565 in the low word --- + * + * m = (m << (32 - 3)) | (m << (16 - 5)) | m; + * m = m & (01f0003f001f); + * m = m * (008404100840); + * m = m >> 8; + * + * Note the trick here - the top word is shifted by another nibble to + * avoid it bumping into the middle word + */ +static __inline__ __m64 +expand565 (__m64 pixel, int pos) +{ + __m64 p = pixel; + __m64 t1, t2; + + /* move pixel to low 16 bit and zero the rest */ + p = shift (shift (p, (3 - pos) * 16), -48); + + t1 = shift (p, 36 - 11); + t2 = shift (p, 16 - 5); + + p = _mm_or_si64 (t1, p); + p = _mm_or_si64 (t2, p); + p = _mm_and_si64 (p, MC(565_rgb)); + + pixel = _mm_mullo_pi16 (p, MC(565_unpack_multiplier)); + return _mm_srli_pi16 (pixel, 8); +} + +static __inline__ __m64 +expand8888 (__m64 in, int pos) +{ + if (pos == 0) + return _mm_unpacklo_pi8 (in, _mm_setzero_si64()); + else + return _mm_unpackhi_pi8 (in, _mm_setzero_si64()); +} + +static __inline__ __m64 +pack565 (__m64 pixel, __m64 target, int pos) +{ + __m64 p = pixel; + __m64 t = target; + __m64 r, g, b; + + r = _mm_and_si64 (p, MC(565_r)); + g = _mm_and_si64 (p, MC(565_g)); + b = _mm_and_si64 (p, MC(565_b)); + + r = shift (r, - (32 - 8) + pos * 16); + g = shift (g, - (16 - 3) + pos * 16); + b = shift (b, - (0 + 3) + pos * 16); + + if (pos == 0) + t = _mm_and_si64 (t, MC(mask_0)); + else if (pos == 1) + t = _mm_and_si64 (t, MC(mask_1)); + else if (pos == 2) + t = _mm_and_si64 (t, MC(mask_2)); + else if (pos == 3) + t = _mm_and_si64 (t, MC(mask_3)); + + p = _mm_or_si64 (r, t); + p = _mm_or_si64 (g, p); + + return _mm_or_si64 (b, p); +} + +static __inline__ __m64 +pix_add_mul (__m64 x, __m64 a, __m64 y, __m64 b) +{ + x = _mm_mullo_pi16 (x, a); + y = _mm_mullo_pi16 (y, b); + x = _mm_adds_pu16 (x, MC(4x0080)); + x = _mm_adds_pu16 (x, y); + x = _mm_adds_pu16 (x, _mm_srli_pi16 (x, 8)); + x = _mm_srli_pi16 (x, 8); + + return x; +} + +/* --------------- MMX code patch for fbcompose.c --------------------- */ + +static FASTCALL void +mmxCombineMaskU (CARD32 *src, const CARD32 *mask, int width) +{ + const CARD32 *end = mask + width; + while (mask < end) { + __m64 a = load8888(*mask); + __m64 s = load8888(*src); + a = expand_alpha(a); + s = pix_multiply(s, a); + *src = store8888(s); + ++src; + ++mask; + } + _mm_empty(); +} + + +static FASTCALL void +mmxCombineOverU (CARD32 *dest, const CARD32 *src, int width) +{ + const CARD32 *end = dest + width; + + while (dest < end) { + __m64 s, sa; + s = load8888(*src); + sa = expand_alpha(s); + *dest = store8888(over(s, sa, load8888(*dest))); + ++dest; + ++src; + } + _mm_empty(); +} + +static FASTCALL void +mmxCombineOverReverseU (CARD32 *dest, const CARD32 *src, int width) +{ + const CARD32 *end = dest + width; + + while (dest < end) { + __m64 d, da; + d = load8888(*dest); + da = expand_alpha(d); + *dest = store8888(over (d, da, load8888(*src))); + ++dest; + ++src; + } + _mm_empty(); +} + +static FASTCALL void +mmxCombineInU (CARD32 *dest, const CARD32 *src, int width) +{ + const CARD32 *end = dest + width; + + while (dest < end) { + __m64 x, a; + x = load8888(*src); + a = load8888(*dest); + a = expand_alpha(a); + x = pix_multiply(x, a); + *dest = store8888(x); + ++dest; + ++src; + } + _mm_empty(); +} + +static FASTCALL void +mmxCombineInReverseU (CARD32 *dest, const CARD32 *src, int width) +{ + const CARD32 *end = dest + width; + + while (dest < end) { + __m64 x, a; + x = load8888(*dest); + a = load8888(*src); + a = expand_alpha(a); + x = pix_multiply(x, a); + *dest = store8888(x); + ++dest; + ++src; + } + _mm_empty(); +} + +static FASTCALL void +mmxCombineOutU (CARD32 *dest, const CARD32 *src, int width) +{ + const CARD32 *end = dest + width; + + while (dest < end) { + __m64 x, a; + x = load8888(*src); + a = load8888(*dest); + a = expand_alpha(a); + a = negate(a); + x = pix_multiply(x, a); + *dest = store8888(x); + ++dest; + ++src; + } + _mm_empty(); +} + +static FASTCALL void +mmxCombineOutReverseU (CARD32 *dest, const CARD32 *src, int width) +{ + const CARD32 *end = dest + width; + + while (dest < end) { + __m64 x, a; + x = load8888(*dest); + a = load8888(*src); + a = expand_alpha(a); + a = negate(a); + x = pix_multiply(x, a); + *dest = store8888(x); + ++dest; + ++src; + } + _mm_empty(); +} + +static FASTCALL void +mmxCombineAtopU (CARD32 *dest, const CARD32 *src, int width) +{ + const CARD32 *end = dest + width; + + while (dest < end) { + __m64 s, da, d, sia; + s = load8888(*src); + d = load8888(*dest); + sia = expand_alpha(s); + sia = negate(sia); + da = expand_alpha(d); + s = pix_add_mul (s, da, d, sia); + *dest = store8888(s); + ++dest; + ++src; + } + _mm_empty(); +} + +static FASTCALL void +mmxCombineAtopReverseU (CARD32 *dest, const CARD32 *src, int width) +{ + const CARD32 *end; + + end = dest + width; + + while (dest < end) { + __m64 s, dia, d, sa; + s = load8888(*src); + d = load8888(*dest); + sa = expand_alpha(s); + dia = expand_alpha(d); + dia = negate(dia); + s = pix_add_mul (s, dia, d, sa); + *dest = store8888(s); + ++dest; + ++src; + } + _mm_empty(); +} + +static FASTCALL void +mmxCombineXorU (CARD32 *dest, const CARD32 *src, int width) +{ + const CARD32 *end = dest + width; + + while (dest < end) { + __m64 s, dia, d, sia; + s = load8888(*src); + d = load8888(*dest); + sia = expand_alpha(s); + dia = expand_alpha(d); + sia = negate(sia); + dia = negate(dia); + s = pix_add_mul (s, dia, d, sia); + *dest = store8888(s); + ++dest; + ++src; + } + _mm_empty(); +} + +static FASTCALL void +mmxCombineAddU (CARD32 *dest, const CARD32 *src, int width) +{ + const CARD32 *end = dest + width; + while (dest < end) { + __m64 s, d; + s = load8888(*src); + d = load8888(*dest); + s = pix_add(s, d); + *dest = store8888(s); + ++dest; + ++src; + } + _mm_empty(); +} + +static FASTCALL void +mmxCombineSaturateU (CARD32 *dest, const CARD32 *src, int width) +{ + const CARD32 *end = dest + width; + while (dest < end) { + CARD32 s = *src; + CARD32 d = *dest; + __m64 ms = load8888(s); + __m64 md = load8888(d); + CARD32 sa = s >> 24; + CARD32 da = ~d >> 24; + + if (sa > da) { + __m64 msa = load8888(FbIntDiv(da, sa)<<24); + msa = expand_alpha(msa); + ms = pix_multiply(ms, msa); + } + md = pix_add(md, ms); + *dest = store8888(md); + ++src; + ++dest; + } + _mm_empty(); +} + + +static FASTCALL void +mmxCombineSrcC (CARD32 *dest, CARD32 *src, CARD32 *mask, int width) +{ + const CARD32 *end = src + width; + while (src < end) { + __m64 a = load8888(*mask); + __m64 s = load8888(*src); + s = pix_multiply(s, a); + *dest = store8888(s); + ++src; + ++mask; + ++dest; + } + _mm_empty(); +} + +static FASTCALL void +mmxCombineOverC (CARD32 *dest, CARD32 *src, CARD32 *mask, int width) +{ + const CARD32 *end = src + width; + while (src < end) { + __m64 a = load8888(*mask); + __m64 s = load8888(*src); + __m64 d = load8888(*dest); + __m64 sa = expand_alpha(s); + + *dest = store8888(in_over (s, sa, a, d)); + + ++src; + ++dest; + ++mask; + } + _mm_empty(); +} + +static FASTCALL void +mmxCombineOverReverseC (CARD32 *dest, CARD32 *src, CARD32 *mask, int width) +{ + const CARD32 *end = src + width; + while (src < end) { + __m64 a = load8888(*mask); + __m64 s = load8888(*src); + __m64 d = load8888(*dest); + __m64 da = expand_alpha(d); + + *dest = store8888(over (d, da, in (s, a))); + + ++src; + ++dest; + ++mask; + } + _mm_empty(); +} + + +static FASTCALL void +mmxCombineInC (CARD32 *dest, CARD32 *src, CARD32 *mask, int width) +{ + const CARD32 *end = src + width; + while (src < end) { + __m64 a = load8888(*mask); + __m64 s = load8888(*src); + __m64 d = load8888(*dest); + __m64 da = expand_alpha(d); + s = pix_multiply(s, a); + s = pix_multiply(s, da); + *dest = store8888(s); + ++src; + ++dest; + ++mask; + } + _mm_empty(); +} + +static FASTCALL void +mmxCombineInReverseC (CARD32 *dest, CARD32 *src, CARD32 *mask, int width) +{ + const CARD32 *end = src + width; + while (src < end) { + __m64 a = load8888(*mask); + __m64 s = load8888(*src); + __m64 d = load8888(*dest); + __m64 sa = expand_alpha(s); + a = pix_multiply(a, sa); + d = pix_multiply(d, a); + *dest = store8888(d); + ++src; + ++dest; + ++mask; + } + _mm_empty(); +} + +static FASTCALL void +mmxCombineOutC (CARD32 *dest, CARD32 *src, CARD32 *mask, int width) +{ + const CARD32 *end = src + width; + while (src < end) { + __m64 a = load8888(*mask); + __m64 s = load8888(*src); + __m64 d = load8888(*dest); + __m64 da = expand_alpha(d); + da = negate(da); + s = pix_multiply(s, a); + s = pix_multiply(s, da); + *dest = store8888(s); + ++src; + ++dest; + ++mask; + } + _mm_empty(); +} + +static FASTCALL void +mmxCombineOutReverseC (CARD32 *dest, CARD32 *src, CARD32 *mask, int width) +{ + const CARD32 *end = src + width; + while (src < end) { + __m64 a = load8888(*mask); + __m64 s = load8888(*src); + __m64 d = load8888(*dest); + __m64 sa = expand_alpha(s); + a = pix_multiply(a, sa); + a = negate(a); + d = pix_multiply(d, a); + *dest = store8888(d); + ++src; + ++dest; + ++mask; + } + _mm_empty(); +} + +static FASTCALL void +mmxCombineAtopC (CARD32 *dest, CARD32 *src, CARD32 *mask, int width) +{ + const CARD32 *end = src + width; + while (src < end) { + __m64 a = load8888(*mask); + __m64 s = load8888(*src); + __m64 d = load8888(*dest); + __m64 da = expand_alpha(d); + __m64 sa = expand_alpha(s); + s = pix_multiply(s, a); + a = pix_multiply(a, sa); + a = negate(a); + d = pix_add_mul (d, a, s, da); + *dest = store8888(d); + ++src; + ++dest; + ++mask; + } + _mm_empty(); +} + +static FASTCALL void +mmxCombineAtopReverseC (CARD32 *dest, CARD32 *src, CARD32 *mask, int width) +{ + const CARD32 *end = src + width; + while (src < end) { + __m64 a = load8888(*mask); + __m64 s = load8888(*src); + __m64 d = load8888(*dest); + __m64 da = expand_alpha(d); + __m64 sa = expand_alpha(s); + s = pix_multiply(s, a); + a = pix_multiply(a, sa); + da = negate(da); + d = pix_add_mul (d, a, s, da); + *dest = store8888(d); + ++src; + ++dest; + ++mask; + } + _mm_empty(); +} + +static FASTCALL void +mmxCombineXorC (CARD32 *dest, CARD32 *src, CARD32 *mask, int width) +{ + const CARD32 *end = src + width; + while (src < end) { + __m64 a = load8888(*mask); + __m64 s = load8888(*src); + __m64 d = load8888(*dest); + __m64 da = expand_alpha(d); + __m64 sa = expand_alpha(s); + s = pix_multiply(s, a); + a = pix_multiply(a, sa); + da = negate(da); + a = negate(a); + d = pix_add_mul (d, a, s, da); + *dest = store8888(d); + ++src; + ++dest; + ++mask; + } + _mm_empty(); +} + +static FASTCALL void +mmxCombineAddC (CARD32 *dest, CARD32 *src, CARD32 *mask, int width) +{ + const CARD32 *end = src + width; + while (src < end) { + __m64 a = load8888(*mask); + __m64 s = load8888(*src); + __m64 d = load8888(*dest); + s = pix_multiply(s, a); + d = pix_add(s, d); + *dest = store8888(d); + ++src; + ++dest; + ++mask; + } + _mm_empty(); +} + +extern FbComposeFunctions composeFunctions; + +void fbComposeSetupMMX(void) +{ + /* check if we have MMX support and initialize accordingly */ + if (fbHaveMMX()) { + composeFunctions.combineU[PIXMAN_OPERATOR_OVER] = mmxCombineOverU; + composeFunctions.combineU[PIXMAN_OPERATOR_OVER_REVERSE] = mmxCombineOverReverseU; + composeFunctions.combineU[PIXMAN_OPERATOR_IN] = mmxCombineInU; + composeFunctions.combineU[PIXMAN_OPERATOR_IN_REVERSE] = mmxCombineInReverseU; + composeFunctions.combineU[PIXMAN_OPERATOR_OUT] = mmxCombineOutU; + composeFunctions.combineU[PIXMAN_OPERATOR_OUT_REVERSE] = mmxCombineOutReverseU; + composeFunctions.combineU[PIXMAN_OPERATOR_ATOP] = mmxCombineAtopU; + composeFunctions.combineU[PIXMAN_OPERATOR_ATOP_REVERSE] = mmxCombineAtopReverseU; + composeFunctions.combineU[PIXMAN_OPERATOR_XOR] = mmxCombineXorU; + composeFunctions.combineU[PIXMAN_OPERATOR_ADD] = mmxCombineAddU; + composeFunctions.combineU[PIXMAN_OPERATOR_SATURATE] = mmxCombineSaturateU; + + composeFunctions.combineC[PIXMAN_OPERATOR_SRC] = mmxCombineSrcC; + composeFunctions.combineC[PIXMAN_OPERATOR_OVER] = mmxCombineOverC; + composeFunctions.combineC[PIXMAN_OPERATOR_OVER_REVERSE] = mmxCombineOverReverseC; + composeFunctions.combineC[PIXMAN_OPERATOR_IN] = mmxCombineInC; + composeFunctions.combineC[PIXMAN_OPERATOR_IN_REVERSE] = mmxCombineInReverseC; + composeFunctions.combineC[PIXMAN_OPERATOR_OUT] = mmxCombineOutC; + composeFunctions.combineC[PIXMAN_OPERATOR_OUT_REVERSE] = mmxCombineOutReverseC; + composeFunctions.combineC[PIXMAN_OPERATOR_ATOP] = mmxCombineAtopC; + composeFunctions.combineC[PIXMAN_OPERATOR_ATOP_REVERSE] = mmxCombineAtopReverseC; + composeFunctions.combineC[PIXMAN_OPERATOR_XOR] = mmxCombineXorC; + composeFunctions.combineC[PIXMAN_OPERATOR_ADD] = mmxCombineAddC; + + composeFunctions.combineMaskU = mmxCombineMaskU; + } +} + + +/* ------------------ MMX code paths called from fbpict.c ----------------------- */ + +void +fbCompositeSolid_nx8888mmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height) +{ + CARD32 src; + CARD32 *dstLine, *dst; + CARD16 w; + FbStride dstStride; + __m64 vsrc, vsrca; + + CHECKPOINT(); + + fbComposeGetSolid(pSrc, src); + + if (src >> 24 == 0) + return; + + fbComposeGetStart (pDst, xDst, yDst, CARD32, dstStride, dstLine, 1); + + vsrc = load8888 (src); + vsrca = expand_alpha (vsrc); + + while (height--) + { + dst = dstLine; + dstLine += dstStride; + w = width; + + CHECKPOINT(); + + while (w && (unsigned long)dst & 7) + { + *dst = store8888(over(vsrc, vsrca, load8888(*dst))); + + w--; + dst++; + } + + while (w >= 2) + { + __m64 vdest; + __m64 dest0, dest1; + + vdest = *(__m64 *)dst; + + dest0 = over(vsrc, vsrca, expand8888(vdest, 0)); + dest1 = over(vsrc, vsrca, expand8888(vdest, 1)); + + *(__m64 *)dst = pack8888(dest0, dest1); + + dst += 2; + w -= 2; + } + + CHECKPOINT(); + + while (w) + { + *dst = store8888(over(vsrc, vsrca, load8888(*dst))); + + w--; + dst++; + } + } + + _mm_empty(); +} + +void +fbCompositeSolid_nx0565mmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height) +{ + CARD32 src; + CARD16 *dstLine, *dst; + CARD16 w; + FbStride dstStride; + __m64 vsrc, vsrca; + + CHECKPOINT(); + + fbComposeGetSolid(pSrc, src); + + if (src >> 24 == 0) + return; + + fbComposeGetStart (pDst, xDst, yDst, CARD16, dstStride, dstLine, 1); + + vsrc = load8888 (src); + vsrca = expand_alpha (vsrc); + + while (height--) + { + dst = dstLine; + dstLine += dstStride; + w = width; + + CHECKPOINT(); + + while (w && (unsigned long)dst & 7) + { + ullong d = *dst; + __m64 vdest = expand565 ((__m64)d, 0); + vdest = pack565(over(vsrc, vsrca, vdest), vdest, 0); + *dst = (ullong)vdest; + + w--; + dst++; + } + + while (w >= 4) + { + __m64 vdest; + + vdest = *(__m64 *)dst; + + vdest = pack565 (over(vsrc, vsrca, expand565(vdest, 0)), vdest, 0); + vdest = pack565 (over(vsrc, vsrca, expand565(vdest, 1)), vdest, 1); + vdest = pack565 (over(vsrc, vsrca, expand565(vdest, 2)), vdest, 2); + vdest = pack565 (over(vsrc, vsrca, expand565(vdest, 3)), vdest, 3); + + *(__m64 *)dst = vdest; + + dst += 4; + w -= 4; + } + + CHECKPOINT(); + + while (w) + { + ullong d = *dst; + __m64 vdest = expand565 ((__m64)d, 0); + vdest = pack565(over(vsrc, vsrca, vdest), vdest, 0); + *dst = (ullong)vdest; + + w--; + dst++; + } + } + + _mm_empty(); +} + +void +fbCompositeSolidMask_nx8888x8888Cmmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height) +{ + CARD32 src, srca; + CARD32 *dstLine; + CARD32 *maskLine; + FbStride dstStride, maskStride; + __m64 vsrc, vsrca; + + CHECKPOINT(); + + fbComposeGetSolid(pSrc, src); + + srca = src >> 24; + if (srca == 0) + return; + + fbComposeGetStart (pDst, xDst, yDst, CARD32, dstStride, dstLine, 1); + fbComposeGetStart (pMask, xMask, yMask, CARD32, maskStride, maskLine, 1); + + vsrc = load8888(src); + vsrca = expand_alpha(vsrc); + + while (height--) + { + int twidth = width; + CARD32 *p = (CARD32 *)maskLine; + CARD32 *q = (CARD32 *)dstLine; + + while (twidth && (unsigned long)q & 7) + { + CARD32 m = *(CARD32 *)p; + + if (m) + { + __m64 vdest = load8888(*q); + vdest = in_over(vsrc, vsrca, load8888(m), vdest); + *q = store8888(vdest); + } + + twidth--; + p++; + q++; + } + + while (twidth >= 2) + { + CARD32 m0, m1; + m0 = *p; + m1 = *(p + 1); + + if (m0 | m1) + { + __m64 dest0, dest1; + __m64 vdest = *(__m64 *)q; + + dest0 = in_over(vsrc, vsrca, load8888(m0), + expand8888 (vdest, 0)); + dest1 = in_over(vsrc, vsrca, load8888(m1), + expand8888 (vdest, 1)); + + *(__m64 *)q = pack8888(dest0, dest1); + } + + p += 2; + q += 2; + twidth -= 2; + } + + while (twidth) + { + CARD32 m = *(CARD32 *)p; + + if (m) + { + __m64 vdest = load8888(*q); + vdest = in_over(vsrc, vsrca, load8888(m), vdest); + *q = store8888(vdest); + } + + twidth--; + p++; + q++; + } + + dstLine += dstStride; + maskLine += maskStride; + } + + _mm_empty(); +} + +void +fbCompositeSrc_8888x8x8888mmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height) +{ + CARD32 *dstLine, *dst; + CARD32 *srcLine, *src; + CARD8 *maskLine; + CARD32 mask; + __m64 vmask; + FbStride dstStride, srcStride, maskStride; + CARD16 w; + __m64 srca; + + CHECKPOINT(); + + fbComposeGetStart (pDst, xDst, yDst, CARD32, dstStride, dstLine, 1); + fbComposeGetStart (pSrc, xSrc, ySrc, CARD32, srcStride, srcLine, 1); + fbComposeGetStart (pMask, xMask, yMask, CARD8, maskStride, maskLine, 1); + + mask = *maskLine << 24 | *maskLine << 16 | *maskLine << 8 | *maskLine; + vmask = load8888 (mask); + srca = MC(4x00ff); + + while (height--) + { + dst = dstLine; + dstLine += dstStride; + src = srcLine; + srcLine += srcStride; + w = width; + + while (w && (unsigned long)dst & 7) + { + __m64 s = load8888 (*src); + __m64 d = load8888 (*dst); + + *dst = store8888 (over (s, expand_alpha (s), d)); + + w--; + dst++; + src++; + } + + while (w >= 2) + { + __m64 vs = *(__m64 *)dst; + __m64 vd = *(__m64 *)src; + __m64 vsrc0 = expand8888 (vs, 0); + __m64 vsrc1 = expand8888 (vs, 1); + + *(__m64 *)dst = (__m64)pack8888 ( + in_over (vsrc0, expand_alpha (vsrc0), vmask, expand8888 (vd, 0)), + in_over (vsrc1, expand_alpha (vsrc1), vmask, expand8888 (vd, 1))); + + w -= 2; + dst += 2; + src += 2; + } + + while (w) + { + __m64 s = load8888 (*src); + __m64 d = load8888 (*dst); + + *dst = store8888 (in_over (s, expand_alpha (s), vmask, d)); + + w--; + dst++; + src++; + } + } + + _mm_empty(); +} + +void +fbCompositeSrc_x888x8x8888mmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height) +{ + CARD32 *dstLine, *dst; + CARD32 *srcLine, *src; + CARD8 *maskLine; + CARD32 mask; + __m64 vmask; + FbStride dstStride, srcStride, maskStride; + CARD16 w; + __m64 srca; + + CHECKPOINT(); + + fbComposeGetStart (pDst, xDst, yDst, CARD32, dstStride, dstLine, 1); + fbComposeGetStart (pSrc, xSrc, ySrc, CARD32, srcStride, srcLine, 1); + fbComposeGetStart (pMask, xMask, yMask, CARD8, maskStride, maskLine, 1); + + mask = *maskLine << 24 | *maskLine << 16 | *maskLine << 8 | *maskLine; + vmask = load8888 (mask); + srca = MC(4x00ff); + + while (height--) + { + dst = dstLine; + dstLine += dstStride; + src = srcLine; + srcLine += srcStride; + w = width; + + while (w && (unsigned long)dst & 7) + { + __m64 s = load8888 (*src); + __m64 d = load8888 (*dst); + + *dst = store8888 (in_over (s, srca, vmask, d)); + + w--; + dst++; + src++; + } + + while (w >= 16) + { + __m64 vd0 = *(__m64 *)(dst + 0); + __m64 vd1 = *(__m64 *)(dst + 2); + __m64 vd2 = *(__m64 *)(dst + 4); + __m64 vd3 = *(__m64 *)(dst + 6); + __m64 vd4 = *(__m64 *)(dst + 8); + __m64 vd5 = *(__m64 *)(dst + 10); + __m64 vd6 = *(__m64 *)(dst + 12); + __m64 vd7 = *(__m64 *)(dst + 14); + + __m64 vs0 = *(__m64 *)(src + 0); + __m64 vs1 = *(__m64 *)(src + 2); + __m64 vs2 = *(__m64 *)(src + 4); + __m64 vs3 = *(__m64 *)(src + 6); + __m64 vs4 = *(__m64 *)(src + 8); + __m64 vs5 = *(__m64 *)(src + 10); + __m64 vs6 = *(__m64 *)(src + 12); + __m64 vs7 = *(__m64 *)(src + 14); + + vd0 = (__m64)pack8888 ( + in_over (expand8888 (vs0, 0), srca, vmask, expand8888 (vd0, 0)), + in_over (expand8888 (vs0, 1), srca, vmask, expand8888 (vd0, 1))); + + vd1 = (__m64)pack8888 ( + in_over (expand8888 (vs1, 0), srca, vmask, expand8888 (vd1, 0)), + in_over (expand8888 (vs1, 1), srca, vmask, expand8888 (vd1, 1))); + + vd2 = (__m64)pack8888 ( + in_over (expand8888 (vs2, 0), srca, vmask, expand8888 (vd2, 0)), + in_over (expand8888 (vs2, 1), srca, vmask, expand8888 (vd2, 1))); + + vd3 = (__m64)pack8888 ( + in_over (expand8888 (vs3, 0), srca, vmask, expand8888 (vd3, 0)), + in_over (expand8888 (vs3, 1), srca, vmask, expand8888 (vd3, 1))); + + vd4 = (__m64)pack8888 ( + in_over (expand8888 (vs4, 0), srca, vmask, expand8888 (vd4, 0)), + in_over (expand8888 (vs4, 1), srca, vmask, expand8888 (vd4, 1))); + + vd5 = (__m64)pack8888 ( + in_over (expand8888 (vs5, 0), srca, vmask, expand8888 (vd5, 0)), + in_over (expand8888 (vs5, 1), srca, vmask, expand8888 (vd5, 1))); + + vd6 = (__m64)pack8888 ( + in_over (expand8888 (vs6, 0), srca, vmask, expand8888 (vd6, 0)), + in_over (expand8888 (vs6, 1), srca, vmask, expand8888 (vd6, 1))); + + vd7 = (__m64)pack8888 ( + in_over (expand8888 (vs7, 0), srca, vmask, expand8888 (vd7, 0)), + in_over (expand8888 (vs7, 1), srca, vmask, expand8888 (vd7, 1))); + + *(__m64 *)(dst + 0) = vd0; + *(__m64 *)(dst + 2) = vd1; + *(__m64 *)(dst + 4) = vd2; + *(__m64 *)(dst + 6) = vd3; + *(__m64 *)(dst + 8) = vd4; + *(__m64 *)(dst + 10) = vd5; + *(__m64 *)(dst + 12) = vd6; + *(__m64 *)(dst + 14) = vd7; + + w -= 16; + dst += 16; + src += 16; + } + + while (w) + { + __m64 s = load8888 (*src); + __m64 d = load8888 (*dst); + + *dst = store8888 (in_over (s, srca, vmask, d)); + + w--; + dst++; + src++; + } + } + + _mm_empty(); +} + +void +fbCompositeSolidMask_nx8x8888mmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height) +{ + CARD32 src, srca; + CARD32 *dstLine, *dst; + CARD8 *maskLine, *mask; + FbStride dstStride, maskStride; + CARD16 w; + __m64 vsrc, vsrca; + ullong srcsrc; + + CHECKPOINT(); + + fbComposeGetSolid(pSrc, src); + + srca = src >> 24; + if (srca == 0) + return; + + srcsrc = (unsigned long long)src << 32 | src; + + fbComposeGetStart (pDst, xDst, yDst, CARD32, dstStride, dstLine, 1); + fbComposeGetStart (pMask, xMask, yMask, CARD8, maskStride, maskLine, 1); + + vsrc = load8888 (src); + vsrca = expand_alpha (vsrc); + + while (height--) + { + dst = dstLine; + dstLine += dstStride; + mask = maskLine; + maskLine += maskStride; + w = width; + + CHECKPOINT(); + + while (w && (unsigned long)dst & 7) + { + ullong m = *mask; + + if (m) + { + __m64 vdest = in_over(vsrc, vsrca, expand_alpha_rev ((__m64)m), load8888(*dst)); + *dst = store8888(vdest); + } + + w--; + mask++; + dst++; + } + + CHECKPOINT(); + + while (w >= 2) + { + ullong m0, m1; + m0 = *mask; + m1 = *(mask + 1); + + if (srca == 0xff && (m0 & m1) == 0xff) + { + *(unsigned long long *)dst = srcsrc; + } + else if (m0 | m1) + { + __m64 vdest; + __m64 dest0, dest1; + + vdest = *(__m64 *)dst; + + dest0 = in_over(vsrc, vsrca, expand_alpha_rev ((__m64)m0), expand8888(vdest, 0)); + dest1 = in_over(vsrc, vsrca, expand_alpha_rev ((__m64)m1), expand8888(vdest, 1)); + + *(__m64 *)dst = pack8888(dest0, dest1); + } + + mask += 2; + dst += 2; + w -= 2; + } + + CHECKPOINT(); + + while (w) + { + ullong m = *mask; + + if (m) + { + __m64 vdest = load8888(*dst); + vdest = in_over(vsrc, vsrca, expand_alpha_rev ((__m64)m), vdest); + *dst = store8888(vdest); + } + + w--; + mask++; + dst++; + } + } + + _mm_empty(); +} + +void +fbCompositeSolidMaskSrc_nx8x8888mmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height) +{ + CARD32 src, srca; + CARD32 *dstLine, *dst; + CARD8 *maskLine, *mask; + FbStride dstStride, maskStride; + CARD16 w; + __m64 vsrc, vsrca; + ullong srcsrc; + + CHECKPOINT(); + + fbComposeGetSolid(pSrc, src); + + srca = src >> 24; + if (srca == 0) + { + if (fbSolidFillmmx (pDst->pDrawable, xDst, yDst, width, height, 0)) + return; + } + + srcsrc = (unsigned long long)src << 32 | src; + + fbComposeGetStart (pDst, xDst, yDst, CARD32, dstStride, dstLine, 1); + fbComposeGetStart (pMask, xMask, yMask, CARD8, maskStride, maskLine, 1); + + vsrc = load8888 (src); + vsrca = expand_alpha (vsrc); + + while (height--) + { + dst = dstLine; + dstLine += dstStride; + mask = maskLine; + maskLine += maskStride; + w = width; + + CHECKPOINT(); + + while (w && (unsigned long)dst & 7) + { + ullong m = *mask; + + if (m) + { + __m64 vdest = in(vsrc, expand_alpha_rev ((__m64)m)); + *dst = store8888(vdest); + } + + w--; + mask++; + dst++; + } + + CHECKPOINT(); + + while (w >= 2) + { + ullong m0, m1; + m0 = *mask; + m1 = *(mask + 1); + + if (srca == 0xff && (m0 & m1) == 0xff) + { + *(unsigned long long *)dst = srcsrc; + } + else if (m0 | m1) + { + __m64 vdest; + __m64 dest0, dest1; + + vdest = *(__m64 *)dst; + + dest0 = in(vsrc, expand_alpha_rev ((__m64)m0)); + dest1 = in(vsrc, expand_alpha_rev ((__m64)m1)); + + *(__m64 *)dst = pack8888(dest0, dest1); + } + + mask += 2; + dst += 2; + w -= 2; + } + + CHECKPOINT(); + + while (w) + { + ullong m = *mask; + + if (m) + { + __m64 vdest = load8888(*dst); + vdest = in(vsrc, expand_alpha_rev ((__m64)m)); + *dst = store8888(vdest); + } + + w--; + mask++; + dst++; + } + } + + _mm_empty(); +} + + +void +fbCompositeSolidMask_nx8x0565mmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height) +{ + CARD32 src, srca; + CARD16 *dstLine, *dst; + CARD8 *maskLine, *mask; + FbStride dstStride, maskStride; + CARD16 w; + __m64 vsrc, vsrca; + unsigned long long srcsrcsrcsrc, src16; + + CHECKPOINT(); + + fbComposeGetSolid(pSrc, src); + + srca = src >> 24; + if (srca == 0) + return; + + fbComposeGetStart (pDst, xDst, yDst, CARD16, dstStride, dstLine, 1); + fbComposeGetStart (pMask, xMask, yMask, CARD8, maskStride, maskLine, 1); + + vsrc = load8888 (src); + vsrca = expand_alpha (vsrc); + + src16 = (ullong)pack565(vsrc, _mm_setzero_si64(), 0); + + srcsrcsrcsrc = (ullong)src16 << 48 | (ullong)src16 << 32 | + (ullong)src16 << 16 | (ullong)src16; + + while (height--) + { + dst = dstLine; + dstLine += dstStride; + mask = maskLine; + maskLine += maskStride; + w = width; + + CHECKPOINT(); + + while (w && (unsigned long)dst & 7) + { + ullong m = *mask; + + if (m) + { + ullong d = *dst; + __m64 vd = (__m64)d; + __m64 vdest = in_over(vsrc, vsrca, expand_alpha_rev ((__m64)m), expand565(vd, 0)); + *dst = (ullong)pack565(vdest, _mm_setzero_si64(), 0); + } + + w--; + mask++; + dst++; + } + + CHECKPOINT(); + + while (w >= 4) + { + ullong m0, m1, m2, m3; + m0 = *mask; + m1 = *(mask + 1); + m2 = *(mask + 2); + m3 = *(mask + 3); + + if (srca == 0xff && (m0 & m1 & m2 & m3) == 0xff) + { + *(unsigned long long *)dst = srcsrcsrcsrc; + } + else if (m0 | m1 | m2 | m3) + { + __m64 vdest; + __m64 vm0, vm1, vm2, vm3; + + vdest = *(__m64 *)dst; + + vm0 = (__m64)m0; + vdest = pack565(in_over(vsrc, vsrca, expand_alpha_rev(vm0), expand565(vdest, 0)), vdest, 0); + vm1 = (__m64)m1; + vdest = pack565(in_over(vsrc, vsrca, expand_alpha_rev(vm1), expand565(vdest, 1)), vdest, 1); + vm2 = (__m64)m2; + vdest = pack565(in_over(vsrc, vsrca, expand_alpha_rev(vm2), expand565(vdest, 2)), vdest, 2); + vm3 = (__m64)m3; + vdest = pack565(in_over(vsrc, vsrca, expand_alpha_rev(vm3), expand565(vdest, 3)), vdest, 3); + + *(__m64 *)dst = vdest; + } + + w -= 4; + mask += 4; + dst += 4; + } + + CHECKPOINT(); + + while (w) + { + ullong m = *mask; + + if (m) + { + ullong d = *dst; + __m64 vd = (__m64)d; + __m64 vdest = in_over(vsrc, vsrca, expand_alpha_rev ((__m64)m), expand565(vd, 0)); + *dst = (ullong)pack565(vdest, _mm_setzero_si64(), 0); + } + + w--; + mask++; + dst++; + } + } + + _mm_empty(); +} + +void +fbCompositeSrc_8888RevNPx0565mmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height) +{ + CARD16 *dstLine, *dst; + CARD32 *srcLine, *src; + FbStride dstStride, srcStride; + CARD16 w; + + CHECKPOINT(); + + fbComposeGetStart (pDst, xDst, yDst, CARD16, dstStride, dstLine, 1); + fbComposeGetStart (pSrc, xSrc, ySrc, CARD32, srcStride, srcLine, 1); + + assert (pSrc->pDrawable == pMask->pDrawable); + + while (height--) + { + dst = dstLine; + dstLine += dstStride; + src = srcLine; + srcLine += srcStride; + w = width; + + CHECKPOINT(); + + while (w && (unsigned long)dst & 7) + { + __m64 vsrc = load8888 (*src); + ullong d = *dst; + __m64 vdest = expand565 ((__m64)d, 0); + + vdest = pack565(over_rev_non_pre(vsrc, vdest), vdest, 0); + + *dst = (ullong)vdest; + + w--; + dst++; + src++; + } + + CHECKPOINT(); + + while (w >= 4) + { + CARD32 s0, s1, s2, s3; + unsigned char a0, a1, a2, a3; + + s0 = *src; + s1 = *(src + 1); + s2 = *(src + 2); + s3 = *(src + 3); + + a0 = (s0 >> 24); + a1 = (s1 >> 24); + a2 = (s2 >> 24); + a3 = (s3 >> 24); + + if ((a0 & a1 & a2 & a3) == 0xFF) + { + __m64 vdest; + vdest = pack565(invert_colors(load8888(s0)), _mm_setzero_si64(), 0); + vdest = pack565(invert_colors(load8888(s1)), vdest, 1); + vdest = pack565(invert_colors(load8888(s2)), vdest, 2); + vdest = pack565(invert_colors(load8888(s3)), vdest, 3); + + *(__m64 *)dst = vdest; + } + else if (a0 | a1 | a2 | a3) + { + __m64 vdest = *(__m64 *)dst; + + vdest = pack565(over_rev_non_pre(load8888(s0), expand565(vdest, 0)), vdest, 0); + vdest = pack565(over_rev_non_pre(load8888(s1), expand565(vdest, 1)), vdest, 1); + vdest = pack565(over_rev_non_pre(load8888(s2), expand565(vdest, 2)), vdest, 2); + vdest = pack565(over_rev_non_pre(load8888(s3), expand565(vdest, 3)), vdest, 3); + + *(__m64 *)dst = vdest; + } + + w -= 4; + dst += 4; + src += 4; + } + + CHECKPOINT(); + + while (w) + { + __m64 vsrc = load8888 (*src); + ullong d = *dst; + __m64 vdest = expand565 ((__m64)d, 0); + + vdest = pack565(over_rev_non_pre(vsrc, vdest), vdest, 0); + + *dst = (ullong)vdest; + + w--; + dst++; + src++; + } + } + + _mm_empty(); +} + +/* "8888RevNP" is GdkPixbuf's format: ABGR, non premultiplied */ + +void +fbCompositeSrc_8888RevNPx8888mmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height) +{ + CARD32 *dstLine, *dst; + CARD32 *srcLine, *src; + FbStride dstStride, srcStride; + CARD16 w; + + CHECKPOINT(); + + fbComposeGetStart (pDst, xDst, yDst, CARD32, dstStride, dstLine, 1); + fbComposeGetStart (pSrc, xSrc, ySrc, CARD32, srcStride, srcLine, 1); + + assert (pSrc->pDrawable == pMask->pDrawable); + + while (height--) + { + dst = dstLine; + dstLine += dstStride; + src = srcLine; + srcLine += srcStride; + w = width; + + while (w && (unsigned long)dst & 7) + { + __m64 s = load8888 (*src); + __m64 d = load8888 (*dst); + + *dst = store8888 (over_rev_non_pre (s, d)); + + w--; + dst++; + src++; + } + + while (w >= 2) + { + ullong s0, s1; + unsigned char a0, a1; + __m64 d0, d1; + + s0 = *src; + s1 = *(src + 1); + + a0 = (s0 >> 24); + a1 = (s1 >> 24); + + if ((a0 & a1) == 0xFF) + { + d0 = invert_colors(load8888(s0)); + d1 = invert_colors(load8888(s1)); + + *(__m64 *)dst = pack8888 (d0, d1); + } + else if (a0 | a1) + { + __m64 vdest = *(__m64 *)dst; + + d0 = over_rev_non_pre (load8888(s0), expand8888 (vdest, 0)); + d1 = over_rev_non_pre (load8888(s1), expand8888 (vdest, 1)); + + *(__m64 *)dst = pack8888 (d0, d1); + } + + w -= 2; + dst += 2; + src += 2; + } + + while (w) + { + __m64 s = load8888 (*src); + __m64 d = load8888 (*dst); + + *dst = store8888 (over_rev_non_pre (s, d)); + + w--; + dst++; + src++; + } + } + + _mm_empty(); +} + +void +fbCompositeSolidMask_nx8888x0565Cmmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height) +{ + CARD32 src, srca; + CARD16 *dstLine; + CARD32 *maskLine; + FbStride dstStride, maskStride; + __m64 vsrc, vsrca; + + CHECKPOINT(); + + fbComposeGetSolid(pSrc, src); + + srca = src >> 24; + if (srca == 0) + return; + + fbComposeGetStart (pDst, xDst, yDst, CARD16, dstStride, dstLine, 1); + fbComposeGetStart (pMask, xMask, yMask, CARD32, maskStride, maskLine, 1); + + vsrc = load8888 (src); + vsrca = expand_alpha (vsrc); + + while (height--) + { + int twidth = width; + CARD32 *p = (CARD32 *)maskLine; + CARD16 *q = (CARD16 *)dstLine; + + while (twidth && ((unsigned long)q & 7)) + { + CARD32 m = *(CARD32 *)p; + + if (m) + { + ullong d = *q; + __m64 vdest = expand565 ((__m64)d, 0); + vdest = pack565 (in_over (vsrc, vsrca, load8888 (m), vdest), vdest, 0); + *q = (ullong)vdest; + } + + twidth--; + p++; + q++; + } + + while (twidth >= 4) + { + CARD32 m0, m1, m2, m3; + + m0 = *p; + m1 = *(p + 1); + m2 = *(p + 2); + m3 = *(p + 3); + + if ((m0 | m1 | m2 | m3)) + { + __m64 vdest = *(__m64 *)q; + + vdest = pack565(in_over(vsrc, vsrca, load8888(m0), expand565(vdest, 0)), vdest, 0); + vdest = pack565(in_over(vsrc, vsrca, load8888(m1), expand565(vdest, 1)), vdest, 1); + vdest = pack565(in_over(vsrc, vsrca, load8888(m2), expand565(vdest, 2)), vdest, 2); + vdest = pack565(in_over(vsrc, vsrca, load8888(m3), expand565(vdest, 3)), vdest, 3); + + *(__m64 *)q = vdest; + } + twidth -= 4; + p += 4; + q += 4; + } + + while (twidth) + { + CARD32 m; + + m = *(CARD32 *)p; + if (m) + { + ullong d = *q; + __m64 vdest = expand565((__m64)d, 0); + vdest = pack565 (in_over(vsrc, vsrca, load8888(m), vdest), vdest, 0); + *q = (ullong)vdest; + } + + twidth--; + p++; + q++; + } + + maskLine += maskStride; + dstLine += dstStride; + } + + _mm_empty (); +} + +void +fbCompositeSrcAdd_8000x8000mmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height) +{ + CARD8 *dstLine, *dst; + CARD8 *srcLine, *src; + FbStride dstStride, srcStride; + CARD16 w; + CARD8 s, d; + CARD16 t; + + CHECKPOINT(); + + fbComposeGetStart (pSrc, xSrc, ySrc, CARD8, srcStride, srcLine, 1); + fbComposeGetStart (pDst, xDst, yDst, CARD8, dstStride, dstLine, 1); + + while (height--) + { + dst = dstLine; + dstLine += dstStride; + src = srcLine; + srcLine += srcStride; + w = width; + + while (w && (unsigned long)dst & 7) + { + s = *src; + d = *dst; + t = d + s; + s = t | (0 - (t >> 8)); + *dst = s; + + dst++; + src++; + w--; + } + + while (w >= 8) + { + *(__m64*)dst = _mm_adds_pu8(*(__m64*)src, *(__m64*)dst); + dst += 8; + src += 8; + w -= 8; + } + + while (w) + { + s = *src; + d = *dst; + t = d + s; + s = t | (0 - (t >> 8)); + *dst = s; + + dst++; + src++; + w--; + } + } + + _mm_empty(); +} + +void +fbCompositeSrcAdd_8888x8888mmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height) +{ + CARD32 *dstLine, *dst; + CARD32 *srcLine, *src; + FbStride dstStride, srcStride; + CARD16 w; + + CHECKPOINT(); + + fbComposeGetStart (pSrc, xSrc, ySrc, CARD32, srcStride, srcLine, 1); + fbComposeGetStart (pDst, xDst, yDst, CARD32, dstStride, dstLine, 1); + + while (height--) + { + dst = dstLine; + dstLine += dstStride; + src = srcLine; + srcLine += srcStride; + w = width; + + while (w && (unsigned long)dst & 7) + { + *dst = _mm_cvtsi64_si32(_mm_adds_pu8(_mm_cvtsi32_si64(*src), + _mm_cvtsi32_si64(*dst))); + dst++; + src++; + w--; + } + + while (w >= 2) + { + *(ullong*)dst = (ullong) _mm_adds_pu8(*(__m64*)src, *(__m64*)dst); + dst += 2; + src += 2; + w -= 2; + } + + if (w) + { + *dst = _mm_cvtsi64_si32(_mm_adds_pu8(_mm_cvtsi32_si64(*src), + _mm_cvtsi32_si64(*dst))); + + } + } + + _mm_empty(); +} + +Bool +fbSolidFillmmx (FbPixels *pDraw, + int x, + int y, + int width, + int height, + FbBits xor) +{ + FbStride stride; + int bpp; + ullong fill; + __m64 vfill; + CARD32 byte_width; + CARD8 *byte_line; + FbBits *bits; + int xoff, yoff; + + CHECKPOINT(); + + fbGetDrawable(pDraw, bits, stride, bpp, xoff, yoff); + + if (bpp == 16 && (xor >> 16 != (xor & 0xffff))) + return FALSE; + + if (bpp != 16 && bpp != 32) + return FALSE; + + if (bpp == 16) + { + stride = stride * sizeof (FbBits) / 2; + byte_line = (CARD8 *)(((CARD16 *)bits) + stride * (y + yoff) + (x + xoff)); + byte_width = 2 * width; + stride *= 2; + } + else + { + stride = stride * sizeof (FbBits) / 4; + byte_line = (CARD8 *)(((CARD32 *)bits) + stride * (y + yoff) + (x + xoff)); + byte_width = 4 * width; + stride *= 4; + } + + fill = ((ullong)xor << 32) | xor; + vfill = (__m64)fill; + + while (height--) + { + int w; + CARD8 *d = byte_line; + byte_line += stride; + w = byte_width; + + while (w >= 2 && ((unsigned long)d & 3)) + { + *(CARD16 *)d = xor; + w -= 2; + d += 2; + } + + while (w >= 4 && ((unsigned long)d & 7)) + { + *(CARD32 *)d = xor; + + w -= 4; + d += 4; + } + + while (w >= 64) + { + *(__m64*) (d + 0) = vfill; + *(__m64*) (d + 8) = vfill; + *(__m64*) (d + 16) = vfill; + *(__m64*) (d + 24) = vfill; + *(__m64*) (d + 32) = vfill; + *(__m64*) (d + 40) = vfill; + *(__m64*) (d + 48) = vfill; + *(__m64*) (d + 56) = vfill; + + w -= 64; + d += 64; + } + while (w >= 4) + { + *(CARD32 *)d = xor; + + w -= 4; + d += 4; + } + if (w >= 2) + { + *(CARD16 *)d = xor; + w -= 2; + d += 2; + } + } + + _mm_empty(); + return TRUE; +} + +Bool +fbCopyAreammx (FbPixels *pSrc, + FbPixels *pDst, + int src_x, + int src_y, + int dst_x, + int dst_y, + int width, + int height) +{ + FbBits * src_bits; + FbStride src_stride; + int src_bpp; + int src_xoff; + int src_yoff; + + FbBits * dst_bits; + FbStride dst_stride; + int dst_bpp; + int dst_xoff; + int dst_yoff; + + CARD8 * src_bytes; + CARD8 * dst_bytes; + int byte_width; + + fbGetDrawable(pSrc, src_bits, src_stride, src_bpp, src_xoff, src_yoff); + fbGetDrawable(pDst, dst_bits, dst_stride, dst_bpp, dst_xoff, dst_yoff); + + if (src_bpp != 16 && src_bpp != 32) + return FALSE; + + if (dst_bpp != 16 && dst_bpp != 32) + return FALSE; + + if (src_bpp != dst_bpp) + { + return FALSE; + } + + if (src_bpp == 16) + { + src_stride = src_stride * sizeof (FbBits) / 2; + dst_stride = dst_stride * sizeof (FbBits) / 2; + src_bytes = (CARD8 *)(((CARD16 *)src_bits) + src_stride * (src_y + src_yoff) + (src_x + src_xoff)); + dst_bytes = (CARD8 *)(((CARD16 *)dst_bits) + dst_stride * (dst_y + dst_yoff) + (dst_x + dst_xoff)); + byte_width = 2 * width; + src_stride *= 2; + dst_stride *= 2; + } + else + { + src_stride = src_stride * sizeof (FbBits) / 4; + dst_stride = dst_stride * sizeof (FbBits) / 4; + src_bytes = (CARD8 *)(((CARD32 *)src_bits) + src_stride * (src_y + src_yoff) + (src_x + src_xoff)); + dst_bytes = (CARD8 *)(((CARD32 *)dst_bits) + dst_stride * (dst_y + dst_yoff) + (dst_x + dst_xoff)); + byte_width = 4 * width; + src_stride *= 4; + dst_stride *= 4; + } + + while (height--) + { + int w; + CARD8 *s = src_bytes; + CARD8 *d = dst_bytes; + src_bytes += src_stride; + dst_bytes += dst_stride; + w = byte_width; + + while (w >= 2 && ((unsigned long)d & 3)) + { + *(CARD16 *)d = *(CARD16 *)s; + w -= 2; + s += 2; + d += 2; + } + + while (w >= 4 && ((unsigned int)d & 7)) + { + *(CARD32 *)d = *(CARD32 *)s; + + w -= 4; + s += 4; + d += 4; + } + + while (w >= 64) + { + *(__m64 *)(d + 0) = *(__m64 *)(s + 0); + *(__m64 *)(d + 8) = *(__m64 *)(s + 8); + *(__m64 *)(d + 16) = *(__m64 *)(s + 16); + *(__m64 *)(d + 24) = *(__m64 *)(s + 24); + *(__m64 *)(d + 32) = *(__m64 *)(s + 32); + *(__m64 *)(d + 40) = *(__m64 *)(s + 40); + *(__m64 *)(d + 48) = *(__m64 *)(s + 48); + *(__m64 *)(d + 56) = *(__m64 *)(s + 56); + w -= 64; + s += 64; + d += 64; + } + while (w >= 4) + { + *(CARD32 *)d = *(CARD32 *)s; + + w -= 4; + s += 4; + d += 4; + } + if (w >= 2) + { + *(CARD16 *)d = *(CARD16 *)s; + w -= 2; + s += 2; + d += 2; + } + } + + _mm_empty(); + return TRUE; +} + +void +fbCompositeCopyAreammx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height) +{ + fbCopyAreammx (pSrc->pDrawable, + pDst->pDrawable, + xSrc, ySrc, + xDst, yDst, + width, height); +} + +#if !defined(__amd64__) && !defined(__x86_64__) + +enum CPUFeatures { + NoFeatures = 0, + MMX = 0x1, + MMX_Extensions = 0x2, + SSE = 0x6, + SSE2 = 0x8, + CMOV = 0x10 +}; + +static unsigned int detectCPUFeatures(void) { + unsigned int result; + char vendor[13]; + vendor[0] = 0; + vendor[12] = 0; + /* see p. 118 of amd64 instruction set manual Vol3 */ + __asm__ ("push %%ebx\n" + "pushf\n" + "pop %%eax\n" + "mov %%eax, %%ebx\n" + "xor $0x00200000, %%eax\n" + "push %%eax\n" + "popf\n" + "pushf\n" + "pop %%eax\n" + "mov $0x0, %%edx\n" + "xor %%ebx, %%eax\n" + "jz skip\n" + + "mov $0x00000000, %%eax\n" + "cpuid\n" + "mov %%ebx, %1\n" + "mov %%edx, %2\n" + "mov %%ecx, %3\n" + "mov $0x00000001, %%eax\n" + "cpuid\n" + "skip:\n" + "pop %%ebx\n" + "mov %%edx, %0\n" + : "=r" (result), + "=m" (vendor[0]), + "=m" (vendor[4]), + "=m" (vendor[8]) + : + : "%eax", "%ecx", "%edx" + ); + + unsigned int features = 0; + if (result) { + /* result now contains the standard feature bits */ + if (result & (1 << 15)) + features |= CMOV; + if (result & (1 << 23)) + features |= MMX; + if (result & (1 << 25)) + features |= SSE; + if (result & (1 << 26)) + features |= SSE2; + if ((result & MMX) && !(result & SSE) && (strcmp(vendor, "AuthenticAMD") == 0)) { + /* check for AMD MMX extensions */ + + unsigned int result; + __asm__("push %%ebx\n" + "mov $0x80000000, %%eax\n" + "cpuid\n" + "xor %%edx, %%edx\n" + "cmp $0x1, %%eax\n" + "jge skip2\n" + "mov $0x80000001, %%eax\n" + "cpuid\n" + "skip2:\n" + "mov %%edx, %0\n" + "pop %%ebx\n" + : "=r" (result) + : + : "%eax", "%ecx", "%edx" + ); + if (result & (1<<22)) + features |= MMX_Extensions; + } + } + return features; +} + +Bool +fbHaveMMX (void) +{ + static Bool initialized = FALSE; + static Bool mmx_present; + + if (!initialized) + { + unsigned int features = detectCPUFeatures(); + mmx_present = (features & (MMX|MMX_Extensions)) == (MMX|MMX_Extensions); + initialized = TRUE; + } + + return mmx_present; +} +#endif /* __amd64__ */ + + +#endif /* RENDER */ diff --git a/pixman/src/fbmmx.h b/pixman/src/fbmmx.h new file mode 100644 index 00000000..d9de56a5 --- /dev/null +++ b/pixman/src/fbmmx.h @@ -0,0 +1,228 @@ +/* + * Copyright © 2004 Red Hat, Inc. + * Copyright © 2005 Trolltech AS + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of Red Hat not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. Red Hat makes no representations about the + * suitability of this software for any purpose. It is provided "as is" + * without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS + * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY + * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN + * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS + * SOFTWARE. + * + * Author: Søren Sandmann (sandmann@redhat.com) + * Lars Knoll (lars@trolltech.com) + * + * Based on work by Owen Taylor + */ +#ifdef USE_MMX + +#if !defined(__amd64__) && !defined(__x86_64__) +Bool fbHaveMMX(void); +#else +#define fbHaveMMX() TRUE +#endif + +#else +#define fbHaveMMX() FALSE +#endif + +#ifdef USE_MMX + +void fbComposeSetupMMX(void); + +void fbCompositeSolidMask_nx8888x0565Cmmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height); +void fbCompositeSrcAdd_8888x8888mmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height); +void fbCompositeSolidMask_nx8888x8888Cmmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height); +void fbCompositeSolidMask_nx8x8888mmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height); +void fbCompositeSolidMaskSrc_nx8x8888mmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height); +void fbCompositeSrcAdd_8000x8000mmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height); +void fbCompositeSrc_8888RevNPx8888mmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height); +void fbCompositeSrc_8888RevNPx0565mmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height); +void fbCompositeSolid_nx8888mmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height); +void fbCompositeSolid_nx0565mmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height); +void fbCompositeSolidMask_nx8x0565mmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height); +void fbCompositeSrc_x888x8x8888mmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height); +void fbCompositeSrc_8888x8x8888mmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height); +Bool fbCopyAreammx (FbPixels *pSrc, + FbPixels *pDst, + int src_x, + int src_y, + int dst_x, + int dst_y, + int width, + int height); +void fbCompositeCopyAreammx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height); +Bool fbSolidFillmmx (FbPixels *pDraw, + int x, + int y, + int width, + int height, + FbBits xor); + +#endif /* USE_MMX */ diff --git a/pixman/src/ic.c b/pixman/src/fbpict.c index 6bdaba5a..5cea6896 100644 --- a/pixman/src/ic.c +++ b/pixman/src/fbpict.c @@ -1,4 +1,6 @@ /* + * $Id: fbpict.c,v 1.1 2005-08-16 23:50:25 vektor Exp $ + * * Copyright © 2000 SuSE, Inc. * * Permission to use, copy, modify, distribute, and sell this software and its @@ -15,42 +17,22 @@ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION - * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * * Author: Keith Packard, SuSE, Inc. */ +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif #include "pixman-xserver-compat.h" + +#ifdef RENDER + #include "fbpict.h" +#include "fbmmx.h" -#define cvt8888to0565(s) ((((s) >> 3) & 0x001f) | \ - (((s) >> 5) & 0x07e0) | \ - (((s) >> 8) & 0xf800)) -#define cvt0565to0888(s) (((((s) << 3) & 0xf8) | (((s) >> 2) & 0x7)) | \ - ((((s) << 5) & 0xfc00) | (((s) >> 1) & 0x300)) | \ - ((((s) << 8) & 0xf80000) | (((s) << 3) & 0x70000))) - -#if IMAGE_BYTE_ORDER == MSBFirst -#define Fetch24(a) ((unsigned long) (a) & 1 ? \ - ((*(a) << 16) | *((CARD16 *) ((a)+1))) : \ - ((*((CARD16 *) (a)) << 8) | *((a)+2))) -#define Store24(a,v) ((unsigned long) (a) & 1 ? \ - ((*(a) = (CARD8) ((v) >> 16)), \ - (*((CARD16 *) ((a)+1)) = (CARD16) (v))) : \ - ((*((CARD16 *) (a)) = (CARD16) ((v) >> 8)), \ - (*((a)+2) = (CARD8) (v)))) -#else -#define Fetch24(a) ((unsigned long) (a) & 1 ? \ - ((*(a)) | (*((CARD16 *) ((a)+1)) << 8)) : \ - ((*((CARD16 *) (a))) | (*((a)+2) << 16))) -#define Store24(a,v) ((unsigned long) (a) & 1 ? \ - ((*(a) = (CARD8) (v)), \ - (*((CARD16 *) ((a)+1)) = (CARD16) ((v) >> 8))) : \ - ((*((CARD16 *) (a)) = (CARD16) (v)),\ - (*((a)+2) = (CARD8) ((v) >> 16)))) -#endif - static CARD32 fbOver (CARD32 x, CARD32 y) { @@ -123,6 +105,7 @@ fbIn24 (CARD32 x, CARD8 y) * mask is a 5-bit alpha value. Again, if src has alpha, this will not * work. */ + #define inOver0565(alpha, source, destval, dest) { \ CARD16 dstrb = destval & 0xf81f; CARD16 dstg = destval & 0x7e0; \ CARD32 drb = ((source&0xf81f)-dstrb)*alpha; CARD32 dg=((source & 0x7e0)-dstg)*alpha; \ @@ -136,7 +119,7 @@ fbIn24 (CARD32 x, CARD8 y) } #if IMAGE_BYTE_ORDER == LSBFirst - #define setupPackedReader(count,temp,where,workingWhere,workingVal) count=(int)where; \ + #define setupPackedReader(count,temp,where,workingWhere,workingVal) count=(long)where; \ temp=count&3; \ where-=temp; \ workingWhere=(CARD32 *)where; \ @@ -146,10 +129,10 @@ fbIn24 (CARD32 x, CARD8 y) #define readPacked(where,x,y,z) {if(!(x)) { (x)=4; y=*z++; } where=(y)&0xff; (y)>>=8; (x)--;} #define readPackedSource(where) readPacked(where,ws,workingSource,wsrc) #define readPackedDest(where) readPacked(where,wd,workingiDest,widst) - #define writePacked(what) workingoDest>>=8; workingoDest|=(what<<24); ww--; if(!ww) { ww=4; *wodst++=workingoDest; } + #define writePacked(what) workingoDest>>=8; workingoDest|=(what<<24); ww--; if(!ww) { ww=4; *wodst++=workingoDest; } #else #warning "I havn't tested fbCompositeTrans_0888xnx0888() on big endian yet!" - #define setupPackedReader(count,temp,where,workingWhere,workingVal) count=(int)where; \ + #define setupPackedReader(count,temp,where,workingWhere,workingVal) count=(long)where; \ temp=count&3; \ where-=temp; \ workingWhere=(CARD32 *)where; \ @@ -159,7 +142,7 @@ fbIn24 (CARD32 x, CARD8 y) #define readPacked(where,x,y,z) {if(!(x)) { (x)=4; y=*z++; } where=(y)>>24; (y)<<=8; (x)--;} #define readPackedSource(where) readPacked(where,ws,workingSource,wsrc) #define readPackedDest(where) readPacked(where,wd,workingiDest,widst) - #define writePacked(what) workingoDest<<=8; workingoDest|=what; ww--; if(!ww) { ww=4; *wodst++=workingoDest; } + #define writePacked(what) workingoDest<<=8; workingoDest|=what; ww--; if(!ww) { ww=4; *wodst++=workingoDest; } #endif /* * Naming convention: @@ -188,15 +171,15 @@ fbCompositeSolidMask_nx8x8888 (pixman_operator_t op, CARD16 w; fbComposeGetSolid(pSrc, src); - - dstMask = FbFullMask (pDst->pixels->depth); + + dstMask = FbFullMask (pDst->pDrawable->depth); srca = src >> 24; if (src == 0) return; - + fbComposeGetStart (pDst, xDst, yDst, CARD32, dstStride, dstLine, 1); fbComposeGetStart (pMask, xMask, yMask, CARD8, maskStride, maskLine, 1); - + while (height--) { dst = dstLine; @@ -247,15 +230,15 @@ fbCompositeSolidMask_nx8888x8888C (pixman_operator_t op, CARD32 m, n, o, p; fbComposeGetSolid(pSrc, src); - - dstMask = FbFullMask (pDst->pixels->depth); + + dstMask = FbFullMask (pDst->pDrawable->depth); srca = src >> 24; if (src == 0) return; - + fbComposeGetStart (pDst, xDst, yDst, CARD32, dstStride, dstLine, 1); fbComposeGetStart (pMask, xMask, yMask, CARD32, maskStride, maskLine, 1); - + while (height--) { dst = dstLine; @@ -321,7 +304,7 @@ fbCompositeSolidMask_nx8x0888 (pixman_operator_t op, CARD32 rs,gs,bs,rd,gd,bd; fbComposeGetSolid(pSrc, src); - + srca = src >> 24; srcia = 255-srca; if (src == 0) @@ -330,27 +313,27 @@ fbCompositeSolidMask_nx8x0888 (pixman_operator_t op, rs=src&0xff; gs=(src>>8)&0xff; bs=(src>>16)&0xff; - + fbComposeGetStart (pDst, xDst, yDst, CARD8, dstStride, dstLine, 3); fbComposeGetStart (pMask, xMask, yMask, CARD8, maskStride, maskLine, 1); while (height--) { /* fixme: cleanup unused */ - unsigned int wt,wd; + unsigned long wt,wd; CARD32 workingiDest; CARD32 *widst; - + edst=dst = dstLine; dstLine += dstStride; mask = maskLine; maskLine += maskStride; w = width; - + #ifndef NO_MASKED_PACKED_READ setupPackedReader(wd,wt,edst,widst,workingiDest); #endif - + while (w--) { #ifndef NO_MASKED_PACKED_READ @@ -420,14 +403,15 @@ fbCompositeSolidMask_nx8x0565 (pixman_operator_t op, if (src == 0) return; - + srca8 = (src >> 24); srca5 = (srca8 >> 3); src16 = cvt8888to0565(src); - + fbComposeGetStart (pDst, xDst, yDst, CARD16, dstStride, dstLine, 1); fbComposeGetStart (pMask, xMask, yMask, CARD8, maskStride, maskLine, 1); + while (height--) { dst = dstLine; @@ -445,7 +429,7 @@ fbCompositeSolidMask_nx8x0565 (pixman_operator_t op, { if (m == 0xff) *dst++ = src16; - else + else { d = *dst; m >>= 3; @@ -455,7 +439,7 @@ fbCompositeSolidMask_nx8x0565 (pixman_operator_t op, else { d = *dst; - if (m == 0xff) + if (m == 0xff) { t = fbOver24 (src, cvt0565to0888 (d)); } @@ -531,7 +515,7 @@ fbCompositeSolidMask_nx8888x0565 (pixman_operator_t op, } else { - if (m == 0xff) + if (m == 0xff) { d = *dst; t = fbOver24 (src, cvt0565to0888 (d)); @@ -573,16 +557,16 @@ fbCompositeSolidMask_nx8888x0565C (pixman_operator_t op, CARD32 m, n, o; fbComposeGetSolid(pSrc, src); - + srca = src >> 24; if (src == 0) return; - + src16 = cvt8888to0565(src); - + fbComposeGetStart (pDst, xDst, yDst, CARD16, dstStride, dstLine, 1); fbComposeGetStart (pMask, xMask, yMask, CARD32, maskStride, maskLine, 1); - + while (height--) { dst = dstLine; @@ -641,11 +625,11 @@ fbCompositeSrc_8888x8888 (pixman_operator_t op, FbStride dstStride, srcStride; CARD8 a; CARD16 w; - + fbComposeGetStart (pDst, xDst, yDst, CARD32, dstStride, dstLine, 1); fbComposeGetStart (pSrc, xSrc, ySrc, CARD32, srcStride, srcLine, 1); - - dstMask = FbFullMask (pDst->pixels->depth); + + dstMask = FbFullMask (pDst->pDrawable->depth); while (height--) { @@ -688,10 +672,10 @@ fbCompositeSrc_8888x0888 (pixman_operator_t op, CARD8 a; FbStride dstStride, srcStride; CARD16 w; - + fbComposeGetStart (pDst, xDst, yDst, CARD8, dstStride, dstLine, 3); fbComposeGetStart (pSrc, xSrc, ySrc, CARD32, srcStride, srcLine, 1); - + while (height--) { dst = dstLine; @@ -737,7 +721,7 @@ fbCompositeSrc_8888x0565 (pixman_operator_t op, CARD8 a; FbStride dstStride, srcStride; CARD16 w; - + fbComposeGetStart (pSrc, xSrc, ySrc, CARD32, srcStride, srcLine, 1); fbComposeGetStart (pDst, xDst, yDst, CARD16, dstStride, dstLine, 1); @@ -791,7 +775,7 @@ fbCompositeSrcAdd_8000x8000 (pixman_operator_t op, CARD16 w; CARD8 s, d; CARD16 t; - + fbComposeGetStart (pSrc, xSrc, ySrc, CARD8, srcStride, srcLine, 1); fbComposeGetStart (pDst, xDst, yDst, CARD8, dstStride, dstLine, 1); @@ -842,7 +826,7 @@ fbCompositeSrcAdd_8888x8888 (pixman_operator_t op, CARD32 s, d; CARD16 t; CARD32 m,n,o,p; - + fbComposeGetStart (pSrc, xSrc, ySrc, CARD32, srcStride, srcLine, 1); fbComposeGetStart (pDst, xDst, yDst, CARD32, dstStride, dstLine, 1); @@ -897,10 +881,10 @@ fbCompositeSrcAdd_1000x1000 (pixman_operator_t op, int dstBpp, srcBpp; int dstXoff, dstYoff; int srcXoff, srcYoff; - - FbGetPixels(pSrc->pixels, srcBits, srcStride, srcBpp, srcXoff, srcYoff); - FbGetPixels(pDst->pixels, dstBits, dstStride, dstBpp, dstXoff, dstYoff); + fbGetDrawable(pSrc->pDrawable, srcBits, srcStride, srcBpp, srcXoff, srcYoff); + + fbGetDrawable(pDst->pDrawable, dstBits, dstStride, dstBpp, dstXoff, dstYoff); fbBlt (srcBits + srcStride * (ySrc + srcYoff), srcStride, @@ -948,12 +932,12 @@ fbCompositeSolidMask_nx1xn (pixman_operator_t op, if ((src & 0xff000000) != 0xff000000) { pixman_compositeGeneral (op, pSrc, pMask, pDst, - xSrc, ySrc, xMask, yMask, xDst, yDst, + xSrc, ySrc, xMask, yMask, xDst, yDst, width, height); return; } FbGetStipPixels (pMask->pixels, maskBits, maskStride, maskBpp, maskXoff, maskYoff); - FbGetPixels (pDst->pixels, dstBits, dstStride, dstBpp, dstXoff, dstYoff); + fbGetDrawable (pDst->pDrawable, dstBits, dstStride, dstBpp, dstXoff, dstYoff); switch (dstBpp) { case 32: @@ -1024,16 +1008,16 @@ fbCompositeTrans_0565xnx0565(pixman_operator_t op, CARD8 maskAlpha; CARD16 s_16, d_16; CARD32 s_32, d_32; - + fbComposeGetSolid (pMask, mask); maskAlpha = mask >> 27; - + if (!maskAlpha) return; if (maskAlpha == 0xff) { fbCompositeSrcSrc_nxn (PIXMAN_OPERATOR_SRC, pSrc, pMask, pDst, - xSrc, ySrc, xMask, yMask, xDst, yDst, + xSrc, ySrc, xMask, yMask, xDst, yDst, width, height); return; } @@ -1049,8 +1033,8 @@ fbCompositeTrans_0565xnx0565(pixman_operator_t op, src = srcLine; srcLine += srcStride; w = width; - - if(((int)src&1)==1) + + if(((long)src&1)==1) { s_16 = *src++; d_16 = *dst; @@ -1058,7 +1042,7 @@ fbCompositeTrans_0565xnx0565(pixman_operator_t op, w--; } isrc=(CARD32 *)src; - if(((int)dst&1)==0) + if(((long)dst&1)==0) { idst=(CARD32 *)dst; while (w>1) @@ -1126,28 +1110,28 @@ fbCompositeTrans_0888xnx0888(pixman_operator_t op, CARD16 w; FbBits mask; CARD16 maskAlpha,maskiAlpha; - + fbComposeGetSolid (pMask, mask); maskAlpha = mask >> 24; maskiAlpha= 255-maskAlpha; - + if (!maskAlpha) return; /* if (maskAlpha == 0xff) { fbCompositeSrc_0888x0888 (op, pSrc, pMask, pDst, - xSrc, ySrc, xMask, yMask, xDst, yDst, + xSrc, ySrc, xMask, yMask, xDst, yDst, width, height); return; } */ - + fbComposeGetStart (pSrc, xSrc, ySrc, CARD8, srcStride, srcLine, 3); fbComposeGetStart (pDst, xDst, yDst, CARD8, dstStride, dstLine, 3); { - unsigned int ws,wt; + unsigned long ws,wt; CARD32 workingSource; CARD32 *wsrc, *wdst, *widst; CARD32 rs, rd, nd; @@ -1164,11 +1148,11 @@ fbCompositeTrans_0888xnx0888(pixman_operator_t op, isrc = src = srcLine; srcLine += srcStride; w = width*3; - + setupPackedReader(ws,wt,isrc,wsrc,workingSource); /* get to word aligned */ - switch(!(int)dst&3) + switch(!(long)dst&3) { case 1: readPackedSource(rs); @@ -1178,19 +1162,19 @@ fbCompositeTrans_0888xnx0888(pixman_operator_t op, w--; if(w==0) break; case 2: readPackedSource(rs); - rd=*dst; + rd=*dst; *dst++=alphamaskCombine24(rs, rd)>>8; w--; if(w==0) break; case 3: readPackedSource(rs); - rd=*dst; + rd=*dst; *dst++=alphamaskCombine24(rs, rd)>>8; w--; if(w==0) break; } wdst=(CARD32 *)dst; while (w>3) { - /* FIXME: write a special readPackedWord macro, which knows how to + /* FIXME: write a special readPackedWord macro, which knows how to * halfword combine */ @@ -1221,15 +1205,15 @@ fbCompositeTrans_0888xnx0888(pixman_operator_t op, { case 3: readPackedSource(rs); - rd=*dst; + rd=*dst; *dst++=alphamaskCombine24(rs, rd)>>8; case 2: readPackedSource(rs); - rd=*dst; + rd=*dst; *dst++=alphamaskCombine24(rs, rd)>>8; case 1: readPackedSource(rs); - rd=*dst; + rd=*dst; *dst++=alphamaskCombine24(rs, rd)>>8; } } @@ -1244,7 +1228,7 @@ fbCompositeTrans_0888xnx0888(pixman_operator_t op, srcLine += srcStride; w = width*3; /* get to word aligned */ - switch(!(int)src&3) + switch(!(long)src&3) { case 1: rd=alphamaskCombine24(*src++, *dst)>>8; @@ -1315,9 +1299,9 @@ fbCompositeSrcSrc_nxn (pixman_operator_t op, int dstBpp; Bool reverse = FALSE; Bool upsidedown = FALSE; - - FbGetPixels(pSrc->pixels,src,srcStride,srcBpp,srcXoff,srcYoff); - FbGetPixels(pDst->pixels,dst,dstStride,dstBpp,dstXoff,dstYoff); + + fbGetDrawable(pSrc->pDrawable,src,srcStride,srcBpp,srcXoff,srcYoff); + fbGetDrawable(pDst->pDrawable,dst,dstStride,dstBpp,dstXoff,dstYoff); fbBlt (src + (ySrc + srcYoff) * srcStride, srcStride, @@ -1354,10 +1338,12 @@ fbCompositeSolidSrc_nxn (CARD8 op, CARD16 width, CARD16 height) { - + } */ +# define mod(a,b) ((b) == 1 ? 0 : (a) >= 0 ? (a) % (b) : (b) - (-a) % (b)) + void pixman_composite (pixman_operator_t op, PicturePtr pSrc, @@ -1375,60 +1361,72 @@ pixman_composite (pixman_operator_t op, pixman_region16_t *region; int n; pixman_box16_t *pbox; - CompositeFunc func; - int srcRepeat = pSrc->repeat; - int maskRepeat = FALSE; - int srcAlphaMap = pSrc->alphaMap != 0; - int maskAlphaMap = FALSE; - int dstAlphaMap = pDst->alphaMap != 0; + CompositeFunc func = 0; + Bool srcRepeat = pSrc->pDrawable && pSrc->repeat == RepeatNormal; + Bool maskRepeat = FALSE; + Bool srcTransform = pSrc->transform != 0; + Bool maskTransform = FALSE; + Bool srcAlphaMap = pSrc->alphaMap != 0; + Bool maskAlphaMap = FALSE; + Bool dstAlphaMap = pDst->alphaMap != 0; int x_msk, y_msk, x_src, y_src, x_dst, y_dst; int w, h, w_this, h_this; - if (pSrc->pixels->width == 0 || - pSrc->pixels->height == 0) - { - return; +#ifdef USE_MMX + static Bool mmx_setup = FALSE; + if (!mmx_setup) { + fbComposeSetupMMX(); + mmx_setup = TRUE; } - - xDst += pDst->pixels->x; - yDst += pDst->pixels->y; - xSrc += pSrc->pixels->x; - ySrc += pSrc->pixels->y; - if (pMask) +#endif + + xDst += pDst->pDrawable->x; + yDst += pDst->pDrawable->y; + if (pSrc->pDrawable) { + xSrc += pSrc->pDrawable->x; + ySrc += pSrc->pDrawable->y; + } + + if (srcRepeat && srcTransform && + pSrc->pDrawable->width == 1 && + pSrc->pDrawable->height == 1) + srcTransform = FALSE; + + if (pMask && pMask->pDrawable) { - xMask += pMask->pixels->x; - yMask += pMask->pixels->y; - maskRepeat = pMask->repeat; + xMask += pMask->pDrawable->x; + yMask += pMask->pDrawable->y; + maskRepeat = pMask->repeat == RepeatNormal; + maskTransform = pMask->transform != 0; +#ifdef PIXMAN_CONVOLUTION + if (pMask->filter == PictFilterConvolution) + maskTransform = TRUE; +#endif + maskAlphaMap = pMask->alphaMap != 0; + + if (maskRepeat && maskTransform && + pMask->pDrawable->width == 1 && + pMask->pDrawable->height == 1) + maskTransform = FALSE; } - region = pixman_region_create(); - pixman_region_union_rect (region, region, xDst, yDst, width, height); - - if (!FbComputeCompositeRegion (region, - pSrc, - pMask, - pDst, - xSrc, - ySrc, - xMask, - yMask, - xDst, - yDst, - width, - height)) - return; - - func = pixman_compositeGeneral; - if (!pSrc->transform && !(pMask && pMask->transform)) - if (!maskAlphaMap && !srcAlphaMap && !dstAlphaMap) + if (pSrc->pDrawable && (!pMask || pMask->pDrawable) + && !srcTransform && !maskTransform + && !maskAlphaMap && !srcAlphaMap && !dstAlphaMap +#ifdef PIXMAN_CONVOLUTION + && (pSrc->filter != PictFilterConvolution) + && (!pMask || pMask->filter != PictFilterConvolution)) +#else + && !pMask) +#endif switch (op) { case PIXMAN_OPERATOR_OVER: if (pMask) { - if (srcRepeat && - pSrc->pixels->width == 1 && - pSrc->pixels->height == 1) + if (srcRepeat && + pSrc->pDrawable->width == 1 && + pSrc->pDrawable->height == 1) { if (PICT_FORMAT_COLOR(pSrc->format_code)) { switch (pMask->format_code) { @@ -1436,7 +1434,12 @@ pixman_composite (pixman_operator_t op, switch (pDst->format_code) { case PICT_r5g6b5: case PICT_b5g6r5: - func = fbCompositeSolidMask_nx8x0565; +#ifdef USE_MMX + if (fbHaveMMX()) + func = fbCompositeSolidMask_nx8x0565mmx; + else +#endif + func = fbCompositeSolidMask_nx8x0565; break; case PICT_r8g8b8: case PICT_b8g8r8: @@ -1446,7 +1449,12 @@ pixman_composite (pixman_operator_t op, case PICT_x8r8g8b8: case PICT_a8b8g8r8: case PICT_x8b8g8r8: - func = fbCompositeSolidMask_nx8x8888; +#ifdef USE_MMX + if (fbHaveMMX()) + func = fbCompositeSolidMask_nx8x8888mmx; + else +#endif + func = fbCompositeSolidMask_nx8x8888; break; } break; @@ -1455,14 +1463,24 @@ pixman_composite (pixman_operator_t op, switch (pDst->format_code) { case PICT_a8r8g8b8: case PICT_x8r8g8b8: - func = fbCompositeSolidMask_nx8888x8888C; +#ifdef USE_MMX + if (fbHaveMMX()) + func = fbCompositeSolidMask_nx8888x8888Cmmx; + else +#endif + func = fbCompositeSolidMask_nx8888x8888C; break; case PICT_r5g6b5: - func = fbCompositeSolidMask_nx8888x0565C; +#ifdef USE_MMX + if (fbHaveMMX()) + func = fbCompositeSolidMask_nx8888x0565Cmmx; + else +#endif + func = fbCompositeSolidMask_nx8888x0565C; break; } } - else + else { switch (pDst->format_code) { case PICT_r5g6b5: @@ -1476,17 +1494,27 @@ pixman_composite (pixman_operator_t op, switch (pDst->format_code) { case PICT_a8b8g8r8: case PICT_x8b8g8r8: - func = fbCompositeSolidMask_nx8888x8888C; +#ifdef USE_MMX + if (fbHaveMMX()) + func = fbCompositeSolidMask_nx8888x8888Cmmx; + else +#endif + func = fbCompositeSolidMask_nx8888x8888C; break; case PICT_b5g6r5: - func = fbCompositeSolidMask_nx8888x0565C; +#ifdef USE_MMX + if (fbHaveMMX()) + func = fbCompositeSolidMask_nx8888x0565Cmmx; + else +#endif + func = fbCompositeSolidMask_nx8888x0565C; break; } } - else + else { switch (pDst->format_code) { - case PICT_r5g6b5: + case PICT_b5g6r5: func = fbCompositeSolidMask_nx8888x0565; break; } @@ -1510,65 +1538,187 @@ pixman_composite (pixman_operator_t op, if (func != pixman_compositeGeneral) srcRepeat = FALSE; } - else if (maskRepeat && - pMask->pDrawable->width == 1 && - pMask->pDrawable->height == 1) + else /* has mask and non-repeating source */ { - switch (pSrc->format_code) { - case PICT_r5g6b5: - case PICT_b5g6r5: - if (pDst->format_code == pSrc->format_code) - func = fbCompositeTrans_0565xnx0565; - break; - case PICT_r8g8b8: - case PICT_b8g8r8: - if (pDst->format_code == pSrc->format_code) - func = fbCompositeTrans_0888xnx0888; + if (pSrc->pDrawable == pMask->pDrawable && + xSrc == xMask && ySrc == yMask && + !pMask->componentAlpha) + { + /* source == mask: non-premultiplied data */ + switch (pSrc->format_code) { + case PICT_x8b8g8r8: + switch (pMask->format_code) { + case PICT_a8r8g8b8: + case PICT_a8b8g8r8: + switch (pDst->format_code) { + case PICT_a8r8g8b8: + case PICT_x8r8g8b8: +#ifdef USE_MMX + if (fbHaveMMX()) + func = fbCompositeSrc_8888RevNPx8888mmx; +#endif + break; + case PICT_r5g6b5: +#ifdef USE_MMX + if (fbHaveMMX()) + func = fbCompositeSrc_8888RevNPx0565mmx; +#endif + break; + } + break; + } + break; + case PICT_x8r8g8b8: + switch (pMask->format_code) { + case PICT_a8r8g8b8: + case PICT_a8b8g8r8: + switch (pDst->format_code) { + case PICT_a8b8g8r8: + case PICT_x8b8g8r8: +#ifdef USE_MMX + if (fbHaveMMX()) + func = fbCompositeSrc_8888RevNPx8888mmx; +#endif + break; + case PICT_r5g6b5: +#ifdef USE_MMX + if (fbHaveMMX()) + func = fbCompositeSrc_8888RevNPx0565mmx; +#endif + break; + } + break; + } + break; + } break; } + else + { + /* non-repeating source, repeating mask => translucent window */ + if (maskRepeat && + pMask->pDrawable->width == 1 && + pMask->pDrawable->height == 1) + { + switch (pSrc->format_code) { + case PICT_r5g6b5: + case PICT_b5g6r5: + if (pDst->format_code == pSrc->format_code) + func = fbCompositeTrans_0565xnx0565; + break; + case PICT_r8g8b8: + case PICT_b8g8r8: + if (pDst->format_code == pSrc->format_code) + func = fbCompositeTrans_0888xnx0888; + break; +#ifdef USE_MMX + case PICT_x8r8g8b8: + case PICT_x8b8g8r8: + if (pDst->format_code == pSrc->format_code && + pMask->format_code == PICT_a8 && fbHaveMMX()) + func = fbCompositeSrc_x888x8x8888mmx; + break; +#if 0 /* This case fails rendercheck for me */ + case PICT_a8r8g8b8: + if ((pDst->format == PICT_a8r8g8b8 || + pDst->format == PICT_x8r8g8b8) && + pMask->format == PICT_a8 && fbHaveMMX()) + func = fbCompositeSrc_8888x8x8888mmx; + break; +#endif + case PICT_a8b8g8r8: + if ((pDst->format_code == PICT_a8b8g8r8 || + pDst->format_code == PICT_x8b8g8r8) && + pMask->format_code == PICT_a8 && fbHaveMMX()) + func = fbCompositeSrc_8888x8x8888mmx; + break; +#endif + } - if (func != pixman_compositeGeneral) - maskRepeat = FALSE; + if (func != pixman_compositeGeneral) + maskRepeat = FALSE; + } + } } } - else + else /* no mask */ { - /* - * Formats without alpha bits are just Copy with Over - */ - if (pSrc->format_code == pDst->format_code && !PICT_FORMAT_A(pSrc->format_code)) + if (srcRepeat && + pSrc->pDrawable->width == 1 && + pSrc->pDrawable->height == 1) { - func = fbCompositeSrcSrc_nxn; - } - else switch (pSrc->format_code) { - case PICT_a8r8g8b8: - switch (pDst->format_code) { + /* no mask and repeating source */ + switch (pSrc->format_code) { case PICT_a8r8g8b8: - case PICT_x8r8g8b8: - func = fbCompositeSrc_8888x8888; - break; - case PICT_r8g8b8: - func = fbCompositeSrc_8888x0888; - break; - case PICT_r5g6b5: - func = fbCompositeSrc_8888x0565; + switch (pDst->format_code) { + case PICT_a8r8g8b8: + case PICT_x8r8g8b8: +#ifdef USE_MMX + if (fbHaveMMX()) + { + srcRepeat = FALSE; + func = fbCompositeSolid_nx8888mmx; + } +#endif + break; + case PICT_r5g6b5: +#ifdef USE_MMX + if (fbHaveMMX()) + { + srcRepeat = FALSE; + func = fbCompositeSolid_nx0565mmx; + } +#endif + break; + } break; } - break; - case PICT_a8b8g8r8: - switch (pDst->format_code) { - case PICT_a8b8g8r8: - case PICT_x8b8g8r8: - func = fbCompositeSrc_8888x8888; - break; - case PICT_b8g8r8: - func = fbCompositeSrc_8888x0888; + } + else + { + /* + * Formats without alpha bits are just Copy with Over + */ + if (pSrc->format_code == pDst->format_code && !PICT_FORMAT_A(pSrc->format_code)) + { +#ifdef USE_MMX + if (fbHaveMMX() && + (pSrc->format_code == PICT_x8r8g8b8 || pSrc->format_code == PICT_x8b8g8r8)) + func = fbCompositeCopyAreammx; + else +#endif + func = fbCompositeSrcSrc_nxn; + } + else switch (pSrc->format_code) { + case PICT_a8r8g8b8: + switch (pDst->format_code) { + case PICT_a8r8g8b8: + case PICT_x8r8g8b8: + func = fbCompositeSrc_8888x8888; + break; + case PICT_r8g8b8: + func = fbCompositeSrc_8888x0888; + break; + case PICT_r5g6b5: + func = fbCompositeSrc_8888x0565; + break; + } break; - case PICT_b5g6r5: - func = fbCompositeSrc_8888x0565; + case PICT_a8b8g8r8: + switch (pDst->format_code) { + case PICT_a8b8g8r8: + case PICT_x8b8g8r8: + func = fbCompositeSrc_8888x8888; + break; + case PICT_b8g8r8: + func = fbCompositeSrc_8888x0888; + break; + case PICT_b5g6r5: + func = fbCompositeSrc_8888x0565; + break; + } break; } - break; } } break; @@ -1579,21 +1729,36 @@ pixman_composite (pixman_operator_t op, case PICT_a8r8g8b8: switch (pDst->format_code) { case PICT_a8r8g8b8: - func = fbCompositeSrcAdd_8888x8888; +#ifdef USE_MMX + if (fbHaveMMX()) + func = fbCompositeSrcAdd_8888x8888mmx; + else +#endif + func = fbCompositeSrcAdd_8888x8888; break; } break; case PICT_a8b8g8r8: switch (pDst->format_code) { case PICT_a8b8g8r8: - func = fbCompositeSrcAdd_8888x8888; +#ifdef USE_MMX + if (fbHaveMMX()) + func = fbCompositeSrcAdd_8888x8888mmx; + else +#endif + func = fbCompositeSrcAdd_8888x8888; break; } break; case PICT_a8: switch (pDst->format_code) { case PICT_a8: - func = fbCompositeSrcAdd_8000x8000; +#ifdef USE_MMX + if (fbHaveMMX()) + func = fbCompositeSrcAdd_8000x8000mmx; + else +#endif + func = fbCompositeSrcAdd_8000x8000; break; } break; @@ -1608,22 +1773,73 @@ pixman_composite (pixman_operator_t op, } break; case PIXMAN_OPERATOR_SRC: - if (pMask == 0) + if (pMask) + { +#ifdef USE_MMX + if (srcRepeat && + pSrc->pDrawable->width == 1 && + pSrc->pDrawable->height == 1) + { + if (pMask->format_code == PICT_a8) + { + switch (pDst->format_code) { + case PICT_a8r8g8b8: + case PICT_x8r8g8b8: + case PICT_a8b8g8r8: + case PICT_x8b8g8r8: + if (fbHaveMMX()) + func = fbCompositeSolidMaskSrc_nx8x8888mmx; + break; + } + } + } +#endif + } + else { if (pSrc->format_code == pDst->format_code) - func = fbCompositeSrcSrc_nxn; + { +#ifdef USE_MMX + if (pSrc->pDrawable != pDst->pDrawable && + (PICT_FORMAT_BPP (pSrc->format_code) == 16 || + PICT_FORMAT_BPP (pSrc->format_code) == 32)) + func = fbCompositeCopyAreammx; + else +#endif + func = fbCompositeSrcSrc_nxn; + } } - default: - func = pixman_compositeGeneral; break; } - /* if we are transforming, we handle repeats in - * FbFetch[a]_transform - */ - if (pSrc->transform) - srcRepeat = 0; - if (pMask && pMask->transform) - maskRepeat = 0; + + if (!func) { + /* no fast path, use the general code */ + pixman_compositeGeneral(op, pSrc, pMask, pDst, xSrc, ySrc, xMask, yMask, xDst, yDst, width, height); + return; + } + + /* if we are transforming, we handle repeats in IcFetch[a]_transform */ + if (srcTransform) + srcRepeat = 0; + if (maskTransform) + maskRepeat = 0; + + region = pixman_region_create(); + pixman_region_union_rect (region, region, xDst, yDst, width, height); + + if (!FbComputeCompositeRegion (region, + pSrc, + pMask, + pDst, + xSrc, + ySrc, + xMask, + yMask, + xDst, + yDst, + width, + height)) + return; n = pixman_region_num_rects (region); pbox = pixman_region_rects (region); @@ -1642,33 +1858,33 @@ pixman_composite (pixman_operator_t op, x_dst = pbox->x1; if (maskRepeat) { - y_msk = MOD (y_msk, pMask->pixels->height); - if (h_this > pMask->pixels->height - y_msk) - h_this = pMask->pixels->height - y_msk; + y_msk = mod (y_msk, pMask->pDrawable->height); + if (h_this > pMask->pDrawable->height - y_msk) + h_this = pMask->pDrawable->height - y_msk; } if (srcRepeat) { - y_src = MOD (y_src, pSrc->pixels->height); - if (h_this > pSrc->pixels->height - y_src) - h_this = pSrc->pixels->height - y_src; + y_src = mod (y_src, pSrc->pDrawable->height); + if (h_this > pSrc->pDrawable->height - y_src) + h_this = pSrc->pDrawable->height - y_src; } while (w) { w_this = w; if (maskRepeat) { - x_msk = MOD (x_msk, pMask->pixels->width); - if (w_this > pMask->pixels->width - x_msk) - w_this = pMask->pixels->width - x_msk; + x_msk = mod (x_msk, pMask->pDrawable->width); + if (w_this > pMask->pDrawable->width - x_msk) + w_this = pMask->pDrawable->width - x_msk; } if (srcRepeat) { - x_src = MOD (x_src, pSrc->pixels->width); - if (w_this > pSrc->pixels->width - x_src) - w_this = pSrc->pixels->width - x_src; + x_src = mod (x_src, pSrc->pDrawable->width); + if (w_this > pSrc->pDrawable->width - x_src) + w_this = pSrc->pDrawable->width - x_src; } (*func) (op, pSrc, pMask, pDst, - x_src, y_src, x_msk, y_msk, x_dst, y_dst, + x_src, y_src, x_msk, y_msk, x_dst, y_dst, w_this, h_this); w -= w_this; x_src += w_this; @@ -1685,3 +1901,4 @@ pixman_composite (pixman_operator_t op, pixman_region_destroy (region); } slim_hidden_def(pixman_composite); +#endif /* RENDER */ diff --git a/pixman/src/icint.h b/pixman/src/icint.h index 8cb37ffc..5e6367e8 100644 --- a/pixman/src/icint.h +++ b/pixman/src/icint.h @@ -1071,4 +1071,34 @@ typedef xFixed_16_16 xFixed; #endif /* _PICTURE_H_ */ + +/* Macros needed by fbpict.c */ + +#define cvt8888to0565(s) ((((s) >> 3) & 0x001f) | \ + (((s) >> 5) & 0x07e0) | \ + (((s) >> 8) & 0xf800)) +#define cvt0565to0888(s) (((((s) << 3) & 0xf8) | (((s) >> 2) & 0x7)) | \ + ((((s) << 5) & 0xfc00) | (((s) >> 1) & 0x300)) | \ + ((((s) << 8) & 0xf80000) | (((s) << 3) & 0x70000))) + +#if IMAGE_BYTE_ORDER == MSBFirst +#define Fetch24(a) ((unsigned long) (a) & 1 ? \ + ((*(a) << 16) | *((CARD16 *) ((a)+1))) : \ + ((*((CARD16 *) (a)) << 8) | *((a)+2))) +#define Store24(a,v) ((unsigned long) (a) & 1 ? \ + ((*(a) = (CARD8) ((v) >> 16)), \ + (*((CARD16 *) ((a)+1)) = (CARD16) (v))) : \ + ((*((CARD16 *) (a)) = (CARD16) ((v) >> 8)), \ + (*((a)+2) = (CARD8) (v)))) +#else +#define Fetch24(a) ((unsigned long) (a) & 1 ? \ + ((*(a)) | (*((CARD16 *) ((a)+1)) << 8)) : \ + ((*((CARD16 *) (a))) | (*((a)+2) << 16))) +#define Store24(a,v) ((unsigned long) (a) & 1 ? \ + ((*(a) = (CARD8) (v)), \ + (*((CARD16 *) ((a)+1)) = (CARD16) ((v) >> 8))) : \ + ((*((CARD16 *) (a)) = (CARD16) (v)),\ + (*((a)+2) = (CARD8) ((v) >> 16)))) +#endif + #endif /* _ICINT_H_ */ |