summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSiarhei Siamashka <siarhei.siamashka@gmail.com>2013-10-02 00:54:30 +0000
committerSiarhei Siamashka <siarhei.siamashka@gmail.com>2013-10-14 00:26:51 +0300
commita863bbcce0924f8d4004a352c670a9f774012463 (patch)
treea67a8619e088bfc5d26d3a964302efe89a1e3529
parent8f75f638ab03078546cc89edfbec4f6801b77e5e (diff)
sse2: bilinear fast path for src_x888_8888
Running cairo-perf-trace benchmark on Intel Core2 T7300: Before: [ 0] image t-firefox-canvas-swscroll 1.989 2.008 0.43% 8/8 [ 1] image firefox-canvas-scroll 4.574 4.609 0.50% 8/8 After: [ 0] image t-firefox-canvas-swscroll 1.404 1.418 0.51% 8/8 [ 1] image firefox-canvas-scroll 4.228 4.259 0.36% 8/8
-rw-r--r--pixman/pixman-sse2.c67
1 files changed, 67 insertions, 0 deletions
diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
index 2ab2690..a6e7808 100644
--- a/pixman/pixman-sse2.c
+++ b/pixman/pixman-sse2.c
@@ -5751,6 +5751,66 @@ FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_normal_SRC,
NORMAL, FLAG_NONE)
static force_inline void
+scaled_bilinear_scanline_sse2_x888_8888_SRC (uint32_t * dst,
+ const uint32_t * mask,
+ const uint32_t * src_top,
+ const uint32_t * src_bottom,
+ int32_t w,
+ int wt,
+ int wb,
+ pixman_fixed_t vx_,
+ pixman_fixed_t unit_x_,
+ pixman_fixed_t max_vx,
+ pixman_bool_t zero_src)
+{
+ intptr_t vx = vx_;
+ intptr_t unit_x = unit_x_;
+ BILINEAR_DECLARE_VARIABLES;
+ uint32_t pix1, pix2;
+
+ while (w && ((uintptr_t)dst & 15))
+ {
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
+ *dst++ = pix1 | 0xFF000000;
+ w--;
+ }
+
+ while ((w -= 4) >= 0) {
+ __m128i xmm_src;
+ BILINEAR_INTERPOLATE_FOUR_PIXELS (xmm_src);
+ _mm_store_si128 ((__m128i *)dst, _mm_or_si128 (xmm_src, mask_ff000000));
+ dst += 4;
+ }
+
+ if (w & 2)
+ {
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix2);
+ *dst++ = pix1 | 0xFF000000;
+ *dst++ = pix2 | 0xFF000000;
+ }
+
+ if (w & 1)
+ {
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
+ *dst = pix1 | 0xFF000000;
+ }
+}
+
+FAST_BILINEAR_MAINLOOP_COMMON (sse2_x888_8888_cover_SRC,
+ scaled_bilinear_scanline_sse2_x888_8888_SRC,
+ uint32_t, uint32_t, uint32_t,
+ COVER, FLAG_NONE)
+FAST_BILINEAR_MAINLOOP_COMMON (sse2_x888_8888_pad_SRC,
+ scaled_bilinear_scanline_sse2_x888_8888_SRC,
+ uint32_t, uint32_t, uint32_t,
+ PAD, FLAG_NONE)
+FAST_BILINEAR_MAINLOOP_COMMON (sse2_x888_8888_normal_SRC,
+ scaled_bilinear_scanline_sse2_x888_8888_SRC,
+ uint32_t, uint32_t, uint32_t,
+ NORMAL, FLAG_NONE)
+
+static force_inline void
scaled_bilinear_scanline_sse2_8888_8888_OVER (uint32_t * dst,
const uint32_t * mask,
const uint32_t * src_top,
@@ -6247,6 +6307,13 @@ static const pixman_fast_path_t sse2_fast_paths[] =
SIMPLE_BILINEAR_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, sse2_8888_8888),
SIMPLE_BILINEAR_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, sse2_8888_8888),
+ SIMPLE_BILINEAR_FAST_PATH_COVER (SRC, x8r8g8b8, a8r8g8b8, sse2_x888_8888),
+ SIMPLE_BILINEAR_FAST_PATH_COVER (SRC, x8b8g8r8, a8b8g8r8, sse2_x888_8888),
+ SIMPLE_BILINEAR_FAST_PATH_PAD (SRC, x8r8g8b8, a8r8g8b8, sse2_x888_8888),
+ SIMPLE_BILINEAR_FAST_PATH_PAD (SRC, x8b8g8r8, a8b8g8r8, sse2_x888_8888),
+ SIMPLE_BILINEAR_FAST_PATH_NORMAL (SRC, x8r8g8b8, a8r8g8b8, sse2_x888_8888),
+ SIMPLE_BILINEAR_FAST_PATH_NORMAL (SRC, x8b8g8r8, a8b8g8r8, sse2_x888_8888),
+
SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888),
SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888),
SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888),