summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2011-09-14 16:06:46 +0100
committerSøren Sandmann Pedersen <ssp@redhat.com>2011-09-23 10:43:16 -0400
commit2746ee8b90821f74e5d33277cb558323cfceca5b (patch)
tree01534b0b2ef6aeed75d568f5fe15fcc521e67fc9
parent5a3b536d1a23fe35f2c63302b9cf9781dcce34c8 (diff)
sse2: Implement a LERP_SRC combinerllerp
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
-rw-r--r--pixman/pixman-sse2.c80
1 files changed, 80 insertions, 0 deletions
diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
index 6689c53a..fee74573 100644
--- a/pixman/pixman-sse2.c
+++ b/pixman/pixman-sse2.c
@@ -1478,6 +1478,85 @@ sse2_combine_saturate_u (pixman_implementation_t *imp,
}
}
+static force_inline uint32_t
+core_combine_lerp_u_pixel_sse2 (uint32_t src,
+ uint32_t mask,
+ uint32_t dst)
+{
+ __m128i s = unpack_32_1x128 (src);
+ __m128i m = expand_alpha_1x128 (unpack_32_1x128 (mask));
+ __m128i d = unpack_32_1x128 (dst);
+
+ return pack_1x128_32 (over_1x128 (pix_multiply_1x128 (s, m), m, d));
+}
+
+static void
+sse2_combine_lerp_u (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * pd,
+ const uint32_t * ps,
+ const uint32_t * pm,
+ int w)
+{
+ uint32_t s, m, d;
+
+ __m128i xmm_src_lo, xmm_src_hi;
+ __m128i xmm_dst_lo, xmm_dst_hi;
+ __m128i xmm_mask_lo, xmm_mask_hi;
+
+ if (pm == NULL)
+ {
+ memcpy (pd, ps, 4*w);
+ return;
+ }
+
+ while (w && (unsigned long)pd & 15)
+ {
+ s = *ps++;
+ m = *pm++;
+ d = *pd;
+
+ *pd++ = core_combine_lerp_u_pixel_sse2 (s, m, d);
+ w--;
+ }
+
+ while (w >= 4)
+ {
+ xmm_dst_hi = load_128_aligned ((__m128i*)pd);
+ xmm_src_hi = load_128_unaligned ((__m128i*)ps);
+ xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
+
+ unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+ unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+ unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
+
+ pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
+ &xmm_mask_lo, &xmm_mask_hi,
+ &xmm_src_lo, &xmm_src_hi);
+
+ over_2x128 (&xmm_src_lo, &xmm_src_hi,
+ &xmm_mask_lo, &xmm_mask_hi,
+ &xmm_dst_lo, &xmm_dst_hi);
+
+ save_128_aligned (
+ (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+
+ ps += 4;
+ pd += 4;
+ pm += 4;
+ w -= 4;
+ }
+
+ while (w--)
+ {
+ s = *ps++;
+ m = *pm++;
+ d = *pd;
+
+ *pd++ = core_combine_lerp_u_pixel_sse2 (s, m, d);
+ }
+}
+
static void
sse2_combine_src_ca (pixman_implementation_t *imp,
pixman_op_t op,
@@ -5779,6 +5858,7 @@ _pixman_implementation_create_sse2 (pixman_implementation_t *fallback)
imp->combine_32[PIXMAN_OP_ADD] = sse2_combine_add_u;
imp->combine_32[PIXMAN_OP_SATURATE] = sse2_combine_saturate_u;
+ imp->combine_32[PIXMAN_OP_LERP_SRC] = sse2_combine_lerp_u;
imp->combine_32_ca[PIXMAN_OP_SRC] = sse2_combine_src_ca;
imp->combine_32_ca[PIXMAN_OP_OVER] = sse2_combine_over_ca;