diff options
author | Matt Turner <mattst88@gmail.com> | 2013-01-02 13:52:43 -0800 |
---|---|---|
committer | Matt Turner <mattst88@gmail.com> | 2014-03-13 19:45:52 -0700 |
commit | 437ca6d0e29bf39537cf536647ffb828f6919eab (patch) | |
tree | 6f1f8a63d46874308e4bd54b02848b93cd9bf6f1 | |
parent | d725d93b9a94ff1c5cb0fe51b050cb36366cd25b (diff) |
mmx: Don't unpack+repack when not needed
Nearest:
over_8888_8888 = L1: 225.75 L2: 230.91 M:217.17 ( 11.54%) HT:266.81 VT:212.29 R:184.76 RT: 86.19 ( 752Kops/s)
over_8888_8888 = L1: 235.79 L2: 243.24 M:225.78 ( 11.84%) HT:305.29 VT:242.82 R:210.29 RT: 99.14 ( 818Kops/s)
Bilinear:
over_8888_8888 = L1: 111.66 L2: 112.01 M:108.58 ( 5.69%) HT:118.60 VT:109.76 R: 95.89 RT: 55.55 ( 547Kops/s)
over_8888_8888 = L1: 121.62 L2: 122.41 M:118.91 ( 6.29%) HT:126.99 VT:117.31 R:101.50 RT: 57.56 ( 561Kops/s)
-rw-r--r-- | pixman/pixman-mmx.c | 35 |
1 files changed, 27 insertions, 8 deletions
diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c index c7fd503..57a2223 100644 --- a/pixman/pixman-mmx.c +++ b/pixman/pixman-mmx.c @@ -203,6 +203,9 @@ typedef struct mmxdatafield mmx_mask_3; #endif mmxdatafield mmx_full_alpha; +#ifdef USE_LOONGSON_MMI + mmxdatafield mmx_full_alpha_packed; +#endif mmxdatafield mmx_4x0101; mmxdatafield mmx_ff000000; } mmx_data_t; @@ -237,6 +240,9 @@ static const mmx_data_t c = MMXDATA_INIT (.mmx_mask_3, 0x0000ffffffffffff), #endif MMXDATA_INIT (.mmx_full_alpha, 0x00ff000000000000), +#ifdef USE_LOONGSON_MMI + MMXDATA_INIT (.mmx_full_alpha_packed, 0x00000000ff000000), +#endif MMXDATA_INIT (.mmx_4x0101, 0x0101010101010101), MMXDATA_INIT (.mmx_ff000000, 0xff000000ff000000), }; @@ -512,6 +518,18 @@ is_opaque (__m64 v) } static force_inline pixman_bool_t +is_opaque_packed (__m64 v) +{ +#ifdef USE_LOONGSON_MMI + return is_equal (_mm_and_si64 (v, MC (full_alpha_packed)), + MC (full_alpha_packed)); +#else + __m64 ffs = _mm_cmpeq_pi8 (v, v); + return (_mm_movemask_pi8 (_mm_cmpeq_pi8 (v, ffs)) & 0x08); +#endif +} + +static force_inline pixman_bool_t is_zero (__m64 v) { return is_equal (v, _mm_setzero_si64 ()); @@ -728,19 +746,20 @@ combine (const uint32_t *src, const uint32_t *mask) static force_inline __m64 core_combine_over_u_pixel_mmx (__m64 vsrc, __m64 vdst) { - vsrc = _mm_unpacklo_pi8 (vsrc, _mm_setzero_si64 ()); - - if (is_opaque (vsrc)) + if (is_opaque_packed (vsrc)) { return vsrc; } else if (!is_zero (vsrc)) { - return over (vsrc, expand_alpha (vsrc), - _mm_unpacklo_pi8 (vdst, _mm_setzero_si64 ())); + vsrc = _mm_unpacklo_pi8 (vsrc, _mm_setzero_si64 ()); + + return pack8888 (over (vsrc, expand_alpha (vsrc), + _mm_unpacklo_pi8 (vdst, _mm_setzero_si64 ())), + _mm_setzero_si64 ()); } - return _mm_unpacklo_pi8 (vdst, _mm_setzero_si64 ()); + return vdst; } static void @@ -3575,7 +3594,7 @@ scaled_nearest_scanline_mmx_8888_8888_OVER (uint32_t* pd, while (vx >= 0) vx -= src_width_fixed; - store8888 (pd, core_combine_over_u_pixel_mmx (s, d)); + store (pd, core_combine_over_u_pixel_mmx (s, d)); pd++; w--; @@ -3763,7 +3782,7 @@ scaled_bilinear_scanline_mmx_8888_8888_OVER (uint32_t * dst, if (!is_zero (pix1)) { pix2 = load (dst); - store8888 (dst, core_combine_over_u_pixel_mmx (pix1, pix2)); + store (dst, core_combine_over_u_pixel_mmx (pix1, pix2)); } w--; |