summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatt Turner <mattst88@gmail.com>2013-01-01 21:18:09 -0800
committerMatt Turner <mattst88@gmail.com>2014-03-13 19:45:51 -0700
commit14c379391cf8045f507dd6d61f8fe9a5ec27cf56 (patch)
tree1157e9fca4a1974fce091ef61bbe80b6170ad14c
parent82d094654a46bd97d47f1f132a01ae0a74b986f3 (diff)
mmx: Add nearest over_8888_n_8888
Unscaled: over_8888_n_8888 = L1: 395.65 L2: 395.19 M:372.80 ( 19.59%) HT:257.44 VT:194.30 R:164.31 RT: 75.20 ( 882Kops/s) Before: over_8888_n_8888 = L1: 107.65 L2: 106.34 M:103.18 ( 5.44%) HT: 49.62 VT: 45.55 R: 43.48 RT: 20.38 ( 242Kops/s) After: over_8888_n_8888 = L1: 224.50 L2: 223.42 M:215.18 ( 11.30%) HT:172.70 VT:148.65 R:127.62 RT: 64.50 ( 615Kops/s)
-rw-r--r--pixman/pixman-mmx.c62
1 files changed, 62 insertions, 0 deletions
diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
index f9a92ce..63f4cdf 100644
--- a/pixman/pixman-mmx.c
+++ b/pixman/pixman-mmx.c
@@ -3555,6 +3555,59 @@ mmx_composite_over_reverse_n_8888 (pixman_implementation_t *imp,
_mm_empty ();
}
+static force_inline void
+scaled_nearest_scanline_mmx_8888_n_8888_OVER (const uint32_t * mask,
+ uint32_t * dst,
+ const uint32_t * src,
+ int32_t w,
+ pixman_fixed_t vx,
+ pixman_fixed_t unit_x,
+ pixman_fixed_t src_width_fixed,
+ pixman_bool_t zero_src)
+{
+ __m64 mm_mask;
+
+ if (zero_src || (*mask >> 24) == 0)
+ return;
+
+ mm_mask = expand_alpha (load8888 (mask));
+
+ while (w)
+ {
+ uint32_t s = *(src + pixman_fixed_to_int (vx));
+ vx += unit_x;
+ while (vx >= 0)
+ vx -= src_width_fixed;
+
+ if (s)
+ {
+ __m64 ms = load8888 (&s);
+ __m64 alpha = expand_alpha (ms);
+ __m64 dest = load8888 (dst);
+
+ store8888 (dst, (in_over (ms, alpha, mm_mask, dest)));
+ }
+
+ dst++;
+ w--;
+ }
+
+ _mm_empty ();
+}
+
+FAST_NEAREST_MAINLOOP_COMMON (mmx_8888_n_8888_cover_OVER,
+ scaled_nearest_scanline_mmx_8888_n_8888_OVER,
+ uint32_t, uint32_t, uint32_t, COVER, TRUE, TRUE)
+FAST_NEAREST_MAINLOOP_COMMON (mmx_8888_n_8888_pad_OVER,
+ scaled_nearest_scanline_mmx_8888_n_8888_OVER,
+ uint32_t, uint32_t, uint32_t, PAD, TRUE, TRUE)
+FAST_NEAREST_MAINLOOP_COMMON (mmx_8888_n_8888_none_OVER,
+ scaled_nearest_scanline_mmx_8888_n_8888_OVER,
+ uint32_t, uint32_t, uint32_t, NONE, TRUE, TRUE)
+FAST_NEAREST_MAINLOOP_COMMON (mmx_8888_n_8888_normal_OVER,
+ scaled_nearest_scanline_mmx_8888_n_8888_OVER,
+ uint32_t, uint32_t, uint32_t, NORMAL, TRUE, TRUE)
+
#define BSHIFT ((1 << BILINEAR_INTERPOLATION_BITS))
#define BMSK (BSHIFT - 1)
@@ -3995,6 +4048,15 @@ static const pixman_fast_path_t mmx_fast_paths[] =
PIXMAN_STD_FAST_PATH (IN, a8, null, a8, mmx_composite_in_8_8 ),
PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, mmx_composite_in_n_8_8 ),
+ SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, mmx_8888_n_8888 ),
+ SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, mmx_8888_n_8888 ),
+ SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, mmx_8888_n_8888 ),
+ SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, mmx_8888_n_8888 ),
+ SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL (OVER, a8r8g8b8, a8r8g8b8, mmx_8888_n_8888 ),
+ SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL (OVER, a8b8g8r8, a8b8g8r8, mmx_8888_n_8888 ),
+ SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL (OVER, a8r8g8b8, x8r8g8b8, mmx_8888_n_8888 ),
+ SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL (OVER, a8b8g8r8, x8b8g8r8, mmx_8888_n_8888 ),
+
SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, mmx_8888_8888 ),
SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, mmx_8888_8888 ),
SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, mmx_8888_8888 ),