summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTaekyun Kim <tkq.kim@samsung.com>2011-09-07 22:57:29 +0900
committerTaekyun Kim <tkq.kim@samsung.com>2011-09-07 22:57:29 +0900
commit35d23679d8acb4f51d7da2bd20898f3d9e12ad1d (patch)
tree2a9f649e3e73eb321e3e4c482def456a419bcc29
parent3d85f7465ee5b21938622fca13b7da7462aba801 (diff)
sse2: Bilinear scaled over_8888_8888
-rw-r--r--pixman/pixman-sse2.c107
1 files changed, 106 insertions, 1 deletions
diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
index 0bfd26b..af897f9 100644
--- a/pixman/pixman-sse2.c
+++ b/pixman/pixman-sse2.c
@@ -5396,6 +5396,107 @@ FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_normal_SRC,
uint32_t, uint32_t, uint32_t,
NORMAL, FLAG_NONE)
+static force_inline void
+scaled_bilinear_scanline_sse2_8888_8888_OVER (uint32_t * dst,
+ const uint32_t * mask,
+ const uint32_t * src_top,
+ const uint32_t * src_bottom,
+ int32_t w,
+ int wt,
+ int wb,
+ pixman_fixed_t vx,
+ pixman_fixed_t unit_x,
+ pixman_fixed_t max_vx,
+ pixman_bool_t zero_src)
+{
+ BILINEAR_DECLARE_VARIABLES;
+ uint32_t pix1, pix2, pix3, pix4;
+
+ while (w && ((unsigned long)dst & 15))
+ {
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
+
+ if (pix1)
+ {
+ pix2 = *dst;
+ *dst = core_combine_over_u_pixel_sse2 (pix1, pix2);
+ }
+
+ w--;
+ dst++;
+ }
+
+ while (w >= 4)
+ {
+ __m128i xmm_src;
+ __m128i xmm_src_hi, xmm_src_lo, xmm_dst_hi, xmm_dst_lo;
+ __m128i xmm_alpha_hi, xmm_alpha_lo;
+
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix2);
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix3);
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix4);
+
+ xmm_src = _mm_set_epi32 (pix4, pix3, pix2, pix1);
+
+ if (!is_zero (xmm_src))
+ {
+ if (is_opaque (xmm_src))
+ {
+ save_128_aligned ((__m128i *)dst, xmm_src);
+ }
+ else
+ {
+ __m128i xmm_dst = load_128_aligned ((__m128i *)dst);
+
+ unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
+ unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
+
+ expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_alpha_lo, &xmm_alpha_hi);
+ over_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_alpha_lo, &xmm_alpha_hi,
+ &xmm_dst_lo, &xmm_dst_hi);
+
+ save_128_aligned ((__m128i *)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+ }
+ }
+
+ w -= 4;
+ dst += 4;
+ }
+
+ while (w)
+ {
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
+
+ if (pix1)
+ {
+ pix2 = *dst;
+ *dst = core_combine_over_u_pixel_sse2 (pix1, pix2);
+ }
+
+ w--;
+ dst++;
+ }
+}
+
+FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_cover_OVER,
+ scaled_bilinear_scanline_sse2_8888_8888_OVER,
+ uint32_t, uint32_t, uint32_t,
+ COVER, FLAG_NONE)
+FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_pad_OVER,
+ scaled_bilinear_scanline_sse2_8888_8888_OVER,
+ uint32_t, uint32_t, uint32_t,
+ PAD, FLAG_NONE)
+FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_none_OVER,
+ scaled_bilinear_scanline_sse2_8888_8888_OVER,
+ uint32_t, uint32_t, uint32_t,
+ NONE, FLAG_NONE)
+FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_normal_OVER,
+ scaled_bilinear_scanline_sse2_8888_8888_OVER,
+ uint32_t, uint32_t, uint32_t,
+ NORMAL, FLAG_NONE)
+
+
static const pixman_fast_path_t sse2_fast_paths[] =
{
/* PIXMAN_OP_OVER */
@@ -5501,10 +5602,14 @@ static const pixman_fast_path_t sse2_fast_paths[] =
SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, sse2_8888_8888),
SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, sse2_8888_8888),
+ SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888),
+ SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888),
+ SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888),
+ SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888),
+
{ PIXMAN_OP_NONE },
};
-
static pixman_bool_t
sse2_blt (pixman_implementation_t *imp,
uint32_t * src_bits,