summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatt Turner <mattst88@gmail.com>2015-10-11 14:44:46 -0700
committerMatt Turner <mattst88@gmail.com>2015-10-13 09:40:42 -0700
commit7de61d8d14e84623b6fa46506eb74f938287f536 (patch)
tree16cbac196caeed23b06878b76d35c1d28e8782db
parent90e62c086766afffd289a321c7de8ea4b5cac87d (diff)
mmx: Use MMX2 intrinsics from xmmintrin.h directly.
We had lots of hacks to handle the inability to include xmmintrin.h without compiling with -msse (lest SSE instructions be used in pixman-mmx.c). Some recent version of gcc relaxed this restriction. Change configure.ac to test that xmmintrin.h can be included and that we can use some intrinsics from it, and remove the work-around code from pixman-mmx.c. Evidently allows gcc 4.9.3 to optimize better as well: text data bss dec hex filename 657078 30848 680 688606 a81de libpixman-1.so.0.33.3 before 656710 30848 680 688238 a806e libpixman-1.so.0.33.3 after Reviewed-by: Siarhei Siamashka <siarhei.siamashka@gmail.com> Tested-by: Pekka Paalanen <pekka.paalanen@collabora.co.uk> Signed-off-by: Matt Turner <mattst88@gmail.com>
-rw-r--r--configure.ac15
-rw-r--r--pixman/pixman-mmx.c64
2 files changed, 8 insertions, 71 deletions
diff --git a/configure.ac b/configure.ac
index 424bfd3..b04cc69 100644
--- a/configure.ac
+++ b/configure.ac
@@ -347,21 +347,14 @@ AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
#error "Need GCC >= 3.4 for MMX intrinsics"
#endif
#include <mmintrin.h>
+#include <xmmintrin.h>
int main () {
__m64 v = _mm_cvtsi32_si64 (1);
__m64 w;
- /* Some versions of clang will choke on K */
- asm ("pshufw %2, %1, %0\n\t"
- : "=y" (w)
- : "y" (v), "K" (5)
- );
-
- /* Some versions of clang will choke on this */
- asm ("pmulhuw %1, %0\n\t"
- : "+y" (w)
- : "y" (v)
- );
+ /* Test some intrinsics from xmmintrin.h */
+ w = _mm_shuffle_pi16(v, 5);
+ w = _mm_mulhi_pu16(w, w);
return _mm_cvtsi64_si32 (v);
}]])], have_mmx_intrinsics=yes)
diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
index 05c48a4..88c3a39 100644
--- a/pixman/pixman-mmx.c
+++ b/pixman/pixman-mmx.c
@@ -40,6 +40,9 @@
#else
#include <mmintrin.h>
#endif
+#ifdef USE_X86_MMX
+#include <xmmintrin.h>
+#endif
#include "pixman-private.h"
#include "pixman-combine32.h"
#include "pixman-inlines.h"
@@ -59,66 +62,7 @@ _mm_empty (void)
}
#endif
-#ifdef USE_X86_MMX
-# if (defined(__SUNPRO_C) || defined(_MSC_VER) || defined(_WIN64))
-# include <xmmintrin.h>
-# else
-/* We have to compile with -msse to use xmmintrin.h, but that causes SSE
- * instructions to be generated that we don't want. Just duplicate the
- * functions we want to use. */
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_movemask_pi8 (__m64 __A)
-{
- int ret;
-
- asm ("pmovmskb %1, %0\n\t"
- : "=r" (ret)
- : "y" (__A)
- );
-
- return ret;
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mulhi_pu16 (__m64 __A, __m64 __B)
-{
- asm ("pmulhuw %1, %0\n\t"
- : "+y" (__A)
- : "y" (__B)
- );
- return __A;
-}
-
-# ifdef __OPTIMIZE__
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_shuffle_pi16 (__m64 __A, int8_t const __N)
-{
- __m64 ret;
-
- asm ("pshufw %2, %1, %0\n\t"
- : "=y" (ret)
- : "y" (__A), "K" (__N)
- );
-
- return ret;
-}
-# else
-# define _mm_shuffle_pi16(A, N) \
- ({ \
- __m64 ret; \
- \
- asm ("pshufw %2, %1, %0\n\t" \
- : "=y" (ret) \
- : "y" (A), "K" ((const int8_t)N) \
- ); \
- \
- ret; \
- })
-# endif
-# endif
-#endif
-
-#ifndef _MSC_VER
+#ifndef _MM_SHUFFLE
#define _MM_SHUFFLE(fp3,fp2,fp1,fp0) \
(((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | (fp0))
#endif