diff options
author | Matt Turner <mattst88@gmail.com> | 2015-10-11 14:44:46 -0700 |
---|---|---|
committer | Matt Turner <mattst88@gmail.com> | 2015-10-13 09:40:42 -0700 |
commit | 7de61d8d14e84623b6fa46506eb74f938287f536 (patch) | |
tree | 16cbac196caeed23b06878b76d35c1d28e8782db | |
parent | 90e62c086766afffd289a321c7de8ea4b5cac87d (diff) |
mmx: Use MMX2 intrinsics from xmmintrin.h directly.
We had lots of hacks to handle the inability to include xmmintrin.h
without compiling with -msse (lest SSE instructions be used in
pixman-mmx.c). Some recent version of gcc relaxed this restriction.
Change configure.ac to test that xmmintrin.h can be included and that we
can use some intrinsics from it, and remove the work-around code from
pixman-mmx.c.
Evidently allows gcc 4.9.3 to optimize better as well:
text data bss dec hex filename
657078 30848 680 688606 a81de libpixman-1.so.0.33.3 before
656710 30848 680 688238 a806e libpixman-1.so.0.33.3 after
Reviewed-by: Siarhei Siamashka <siarhei.siamashka@gmail.com>
Tested-by: Pekka Paalanen <pekka.paalanen@collabora.co.uk>
Signed-off-by: Matt Turner <mattst88@gmail.com>
-rw-r--r-- | configure.ac | 15 | ||||
-rw-r--r-- | pixman/pixman-mmx.c | 64 |
2 files changed, 8 insertions, 71 deletions
diff --git a/configure.ac b/configure.ac index 424bfd3..b04cc69 100644 --- a/configure.ac +++ b/configure.ac @@ -347,21 +347,14 @@ AC_COMPILE_IFELSE([AC_LANG_SOURCE([[ #error "Need GCC >= 3.4 for MMX intrinsics" #endif #include <mmintrin.h> +#include <xmmintrin.h> int main () { __m64 v = _mm_cvtsi32_si64 (1); __m64 w; - /* Some versions of clang will choke on K */ - asm ("pshufw %2, %1, %0\n\t" - : "=y" (w) - : "y" (v), "K" (5) - ); - - /* Some versions of clang will choke on this */ - asm ("pmulhuw %1, %0\n\t" - : "+y" (w) - : "y" (v) - ); + /* Test some intrinsics from xmmintrin.h */ + w = _mm_shuffle_pi16(v, 5); + w = _mm_mulhi_pu16(w, w); return _mm_cvtsi64_si32 (v); }]])], have_mmx_intrinsics=yes) diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c index 05c48a4..88c3a39 100644 --- a/pixman/pixman-mmx.c +++ b/pixman/pixman-mmx.c @@ -40,6 +40,9 @@ #else #include <mmintrin.h> #endif +#ifdef USE_X86_MMX +#include <xmmintrin.h> +#endif #include "pixman-private.h" #include "pixman-combine32.h" #include "pixman-inlines.h" @@ -59,66 +62,7 @@ _mm_empty (void) } #endif -#ifdef USE_X86_MMX -# if (defined(__SUNPRO_C) || defined(_MSC_VER) || defined(_WIN64)) -# include <xmmintrin.h> -# else -/* We have to compile with -msse to use xmmintrin.h, but that causes SSE - * instructions to be generated that we don't want. Just duplicate the - * functions we want to use. */ -extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_movemask_pi8 (__m64 __A) -{ - int ret; - - asm ("pmovmskb %1, %0\n\t" - : "=r" (ret) - : "y" (__A) - ); - - return ret; -} - -extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mulhi_pu16 (__m64 __A, __m64 __B) -{ - asm ("pmulhuw %1, %0\n\t" - : "+y" (__A) - : "y" (__B) - ); - return __A; -} - -# ifdef __OPTIMIZE__ -extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_shuffle_pi16 (__m64 __A, int8_t const __N) -{ - __m64 ret; - - asm ("pshufw %2, %1, %0\n\t" - : "=y" (ret) - : "y" (__A), "K" (__N) - ); - - return ret; -} -# else -# define _mm_shuffle_pi16(A, N) \ - ({ \ - __m64 ret; \ - \ - asm ("pshufw %2, %1, %0\n\t" \ - : "=y" (ret) \ - : "y" (A), "K" ((const int8_t)N) \ - ); \ - \ - ret; \ - }) -# endif -# endif -#endif - -#ifndef _MSC_VER +#ifndef _MM_SHUFFLE #define _MM_SHUFFLE(fp3,fp2,fp1,fp0) \ (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | (fp0)) #endif |