summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTaekyun Kim <tkq.kim@samsung.com>2011-09-07 22:51:46 +0900
committerTaekyun Kim <tkq.kim@samsung.com>2011-10-10 12:12:47 +0900
commitd67c0b883daeeaacf3f21f1ddbdcf9ecf94fac43 (patch)
tree7c385d268ee441e29562b8d5318df026be4fb003
parent741eb8462c3ff72cbf2d9acfeb1e97208a414fcd (diff)
sse2: Macros for assembling bilinear interpolation code fractions
Primitive bilinear interpolation code is reusable to implement other bilinear functions. BILINEAR_DECLARE_VARIABLES - Declare variables needed to interpolate src pixels. BILINEAR_INTERPOLATE_ONE_PIXEL - Interpolate one pixel and advance to next pixel BILINEAR_SKIP_ONE_PIXEL - Skip interpolation and just advance to next pixel This is useful for skipping zero mask
-rw-r--r--pixman/pixman-sse2.c157
1 files changed, 77 insertions, 80 deletions
diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
index 6689c53..0bfd26b 100644
--- a/pixman/pixman-sse2.c
+++ b/pixman/pixman-sse2.c
@@ -5287,83 +5287,53 @@ FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_none_OVER,
scaled_nearest_scanline_sse2_8888_n_8888_OVER,
uint32_t, uint32_t, uint32_t, NONE, TRUE, TRUE)
-static void
-bilinear_interpolate_line_sse2 (uint32_t * out,
- const uint32_t * top,
- const uint32_t * bottom,
- int wt,
- int wb,
- pixman_fixed_t x,
- pixman_fixed_t ux,
- int width)
-{
- const __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt);
- const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb, wb, wb, wb, wb);
- const __m128i xmm_xorc = _mm_set_epi16 (0, 0, 0, 0, 0xff, 0xff, 0xff, 0xff);
- const __m128i xmm_addc = _mm_set_epi16 (0, 0, 0, 0, 1, 1, 1, 1);
- const __m128i xmm_ux = _mm_set_epi16 (ux, ux, ux, ux, ux, ux, ux, ux);
- const __m128i xmm_zero = _mm_setzero_si128 ();
- __m128i xmm_x = _mm_set_epi16 (x, x, x, x, x, x, x, x);
- uint32_t pix1, pix2, pix3, pix4;
-
- #define INTERPOLATE_ONE_PIXEL(pix) \
- do { \
- __m128i xmm_wh, xmm_lo, xmm_hi, a; \
- /* fetch 2x2 pixel block into sse2 register */ \
- uint32_t tl = top [pixman_fixed_to_int (x)]; \
- uint32_t tr = top [pixman_fixed_to_int (x) + 1]; \
- uint32_t bl = bottom [pixman_fixed_to_int (x)]; \
- uint32_t br = bottom [pixman_fixed_to_int (x) + 1]; \
- a = _mm_set_epi32 (tr, tl, br, bl); \
- x += ux; \
- /* vertical interpolation */ \
- a = _mm_add_epi16 (_mm_mullo_epi16 (_mm_unpackhi_epi8 (a, xmm_zero), \
- xmm_wt), \
- _mm_mullo_epi16 (_mm_unpacklo_epi8 (a, xmm_zero), \
- xmm_wb)); \
- /* calculate horizontal weights */ \
- xmm_wh = _mm_add_epi16 (xmm_addc, \
- _mm_xor_si128 (xmm_xorc, \
- _mm_srli_epi16 (xmm_x, 8))); \
- xmm_x = _mm_add_epi16 (xmm_x, xmm_ux); \
- /* horizontal interpolation */ \
- xmm_lo = _mm_mullo_epi16 (a, xmm_wh); \
- xmm_hi = _mm_mulhi_epu16 (a, xmm_wh); \
- a = _mm_add_epi32 (_mm_unpacklo_epi16 (xmm_lo, xmm_hi), \
- _mm_unpackhi_epi16 (xmm_lo, xmm_hi)); \
- /* shift and pack the result */ \
- a = _mm_srli_epi32 (a, 16); \
- a = _mm_packs_epi32 (a, a); \
- a = _mm_packus_epi16 (a, a); \
- pix = _mm_cvtsi128_si32 (a); \
- } while (0)
-
- while ((width -= 4) >= 0)
- {
- INTERPOLATE_ONE_PIXEL (pix1);
- INTERPOLATE_ONE_PIXEL (pix2);
- INTERPOLATE_ONE_PIXEL (pix3);
- INTERPOLATE_ONE_PIXEL (pix4);
- *out++ = pix1;
- *out++ = pix2;
- *out++ = pix3;
- *out++ = pix4;
- }
- if (width & 2)
- {
- INTERPOLATE_ONE_PIXEL (pix1);
- INTERPOLATE_ONE_PIXEL (pix2);
- *out++ = pix1;
- *out++ = pix2;
- }
- if (width & 1)
- {
- INTERPOLATE_ONE_PIXEL (pix1);
- *out = pix1;
- }
-
- #undef INTERPOLATE_ONE_PIXEL
-}
+#define BILINEAR_DECLARE_VARIABLES \
+ const __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt); \
+ const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb, wb, wb, wb, wb); \
+ const __m128i xmm_xorc = _mm_set_epi16 (0, 0, 0, 0, 0xff, 0xff, 0xff, 0xff);\
+ const __m128i xmm_addc = _mm_set_epi16 (0, 0, 0, 0, 1, 1, 1, 1); \
+ const __m128i xmm_ux = _mm_set_epi16 (unit_x, unit_x, unit_x, unit_x, \
+ unit_x, unit_x, unit_x, unit_x); \
+ const __m128i xmm_zero = _mm_setzero_si128 (); \
+ __m128i xmm_x = _mm_set_epi16 (vx, vx, vx, vx, vx, vx, vx, vx)
+
+#define BILINEAR_INTERPOLATE_ONE_PIXEL(pix) \
+do { \
+ __m128i xmm_wh, xmm_lo, xmm_hi, a; \
+ /* fetch 2x2 pixel block into sse2 register */ \
+ uint32_t tl = src_top [pixman_fixed_to_int (vx)]; \
+ uint32_t tr = src_top [pixman_fixed_to_int (vx) + 1]; \
+ uint32_t bl = src_bottom [pixman_fixed_to_int (vx)]; \
+ uint32_t br = src_bottom [pixman_fixed_to_int (vx) + 1]; \
+ a = _mm_set_epi32 (tr, tl, br, bl); \
+ vx += unit_x; \
+ /* vertical interpolation */ \
+ a = _mm_add_epi16 (_mm_mullo_epi16 (_mm_unpackhi_epi8 (a, xmm_zero), \
+ xmm_wt), \
+ _mm_mullo_epi16 (_mm_unpacklo_epi8 (a, xmm_zero), \
+ xmm_wb)); \
+ /* calculate horizontal weights */ \
+ xmm_wh = _mm_add_epi16 (xmm_addc, \
+ _mm_xor_si128 (xmm_xorc, \
+ _mm_srli_epi16 (xmm_x, 8))); \
+ xmm_x = _mm_add_epi16 (xmm_x, xmm_ux); \
+ /* horizontal interpolation */ \
+ xmm_lo = _mm_mullo_epi16 (a, xmm_wh); \
+ xmm_hi = _mm_mulhi_epu16 (a, xmm_wh); \
+ a = _mm_add_epi32 (_mm_unpacklo_epi16 (xmm_lo, xmm_hi), \
+ _mm_unpackhi_epi16 (xmm_lo, xmm_hi)); \
+ /* shift and pack the result */ \
+ a = _mm_srli_epi32 (a, 16); \
+ a = _mm_packs_epi32 (a, a); \
+ a = _mm_packus_epi16 (a, a); \
+ pix = _mm_cvtsi128_si32 (a); \
+} while (0)
+
+#define BILINEAR_SKIP_ONE_PIXEL() \
+do { \
+ vx += unit_x; \
+ xmm_x = _mm_add_epi16 (xmm_x, xmm_ux); \
+} while(0)
static force_inline void
scaled_bilinear_scanline_sse2_8888_8888_SRC (uint32_t * dst,
@@ -5378,8 +5348,35 @@ scaled_bilinear_scanline_sse2_8888_8888_SRC (uint32_t * dst,
pixman_fixed_t max_vx,
pixman_bool_t zero_src)
{
- bilinear_interpolate_line_sse2 (dst, src_top, src_bottom,
- wt, wb, vx, unit_x, w);
+ BILINEAR_DECLARE_VARIABLES;
+ uint32_t pix1, pix2, pix3, pix4;
+
+ while ((w -= 4) >= 0)
+ {
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix2);
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix3);
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix4);
+ *dst++ = pix1;
+ *dst++ = pix2;
+ *dst++ = pix3;
+ *dst++ = pix4;
+ }
+
+ if (w & 2)
+ {
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix2);
+ *dst++ = pix1;
+ *dst++ = pix2;
+ }
+
+ if (w & 1)
+ {
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
+ *dst = pix1;
+ }
+
}
FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_cover_SRC,
@@ -5399,7 +5396,6 @@ FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_normal_SRC,
uint32_t, uint32_t, uint32_t,
NORMAL, FLAG_NONE)
-
static const pixman_fast_path_t sse2_fast_paths[] =
{
/* PIXMAN_OP_OVER */
@@ -5508,6 +5504,7 @@ static const pixman_fast_path_t sse2_fast_paths[] =
{ PIXMAN_OP_NONE },
};
+
static pixman_bool_t
sse2_blt (pixman_implementation_t *imp,
uint32_t * src_bits,