diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2011-09-14 16:06:46 +0100 |
---|---|---|
committer | Søren Sandmann Pedersen <ssp@redhat.com> | 2011-09-23 10:43:16 -0400 |
commit | 2746ee8b90821f74e5d33277cb558323cfceca5b (patch) | |
tree | 01534b0b2ef6aeed75d568f5fe15fcc521e67fc9 | |
parent | 5a3b536d1a23fe35f2c63302b9cf9781dcce34c8 (diff) |
sse2: Implement a LERP_SRC combinerllerp
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
-rw-r--r-- | pixman/pixman-sse2.c | 80 |
1 files changed, 80 insertions, 0 deletions
diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c index 6689c53a..fee74573 100644 --- a/pixman/pixman-sse2.c +++ b/pixman/pixman-sse2.c @@ -1478,6 +1478,85 @@ sse2_combine_saturate_u (pixman_implementation_t *imp, } } +static force_inline uint32_t +core_combine_lerp_u_pixel_sse2 (uint32_t src, + uint32_t mask, + uint32_t dst) +{ + __m128i s = unpack_32_1x128 (src); + __m128i m = expand_alpha_1x128 (unpack_32_1x128 (mask)); + __m128i d = unpack_32_1x128 (dst); + + return pack_1x128_32 (over_1x128 (pix_multiply_1x128 (s, m), m, d)); +} + +static void +sse2_combine_lerp_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * pd, + const uint32_t * ps, + const uint32_t * pm, + int w) +{ + uint32_t s, m, d; + + __m128i xmm_src_lo, xmm_src_hi; + __m128i xmm_dst_lo, xmm_dst_hi; + __m128i xmm_mask_lo, xmm_mask_hi; + + if (pm == NULL) + { + memcpy (pd, ps, 4*w); + return; + } + + while (w && (unsigned long)pd & 15) + { + s = *ps++; + m = *pm++; + d = *pd; + + *pd++ = core_combine_lerp_u_pixel_sse2 (s, m, d); + w--; + } + + while (w >= 4) + { + xmm_dst_hi = load_128_aligned ((__m128i*)pd); + xmm_src_hi = load_128_unaligned ((__m128i*)ps); + xmm_mask_hi = load_128_unaligned ((__m128i*)pm); + + unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); + unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); + + pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, + &xmm_mask_lo, &xmm_mask_hi, + &xmm_src_lo, &xmm_src_hi); + + over_2x128 (&xmm_src_lo, &xmm_src_hi, + &xmm_mask_lo, &xmm_mask_hi, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ( + (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + + ps += 4; + pd += 4; + pm += 4; + w -= 4; + } + + while (w--) + { + s = *ps++; + m = *pm++; + d = *pd; + + *pd++ = core_combine_lerp_u_pixel_sse2 (s, m, d); + } +} + static void sse2_combine_src_ca (pixman_implementation_t *imp, pixman_op_t op, @@ -5779,6 +5858,7 @@ _pixman_implementation_create_sse2 (pixman_implementation_t *fallback) imp->combine_32[PIXMAN_OP_ADD] = sse2_combine_add_u; imp->combine_32[PIXMAN_OP_SATURATE] = sse2_combine_saturate_u; + imp->combine_32[PIXMAN_OP_LERP_SRC] = sse2_combine_lerp_u; imp->combine_32_ca[PIXMAN_OP_SRC] = sse2_combine_src_ca; imp->combine_32_ca[PIXMAN_OP_OVER] = sse2_combine_over_ca; |