diff options
author | Oded Gabbay <oded.gabbay@gmail.com> | 2015-06-28 09:42:08 +0300 |
---|---|---|
committer | Oded Gabbay <oded.gabbay@gmail.com> | 2015-07-16 16:13:35 +0300 |
commit | c12ee95089e7d281a29a24bf56b81f5c16dec6ee (patch) | |
tree | aab0113df529f3f2860ddb73320194b2e98d7434 | |
parent | 034149537be94862b43fb09699b8c2149bfe948d (diff) |
vmx: add helper functions
This patch adds the following helper functions for reuse of code,
hiding BE/LE differences and maintainability.
All of the functions were defined as static force_inline.
Names were copied from pixman-sse2.c so conversion of fast-paths between
sse2 and vmx would be easier from now on. Therefore, I tried to keep the
input/output of the functions to be as close as possible to the sse2
definitions.
The functions are:
- load_128_aligned : load 128-bit from a 16-byte aligned memory
address into a vector
- load_128_unaligned : load 128-bit from memory into a vector,
without guarantee of alignment for the
source pointer
- save_128_aligned : save 128-bit vector into a 16-byte aligned
memory address
- create_mask_16_128 : take a 16-bit value and fill with it
a new vector
- create_mask_1x32_128 : take a 32-bit pointer and fill a new
vector with the 32-bit value from that pointer
- create_mask_32_128 : take a 32-bit value and fill with it
a new vector
- unpack_32_1x128 : unpack 32-bit value into a vector
- unpacklo_128_16x8 : unpack the eight low 8-bit values of a vector
- unpackhi_128_16x8 : unpack the eight high 8-bit values of a vector
- unpacklo_128_8x16 : unpack the four low 16-bit values of a vector
- unpackhi_128_8x16 : unpack the four high 16-bit values of a vector
- unpack_128_2x128 : unpack the eight low 8-bit values of a vector
into one vector and the eight high 8-bit
values into another vector
- unpack_128_2x128_16 : unpack the four low 16-bit values of a vector
into one vector and the four high 16-bit
values into another vector
- unpack_565_to_8888 : unpack an RGB_565 vector to 8888 vector
- pack_1x128_32 : pack a vector and return the LSB 32-bit of it
- pack_2x128_128 : pack two vectors into one and return it
- negate_2x128 : xor two vectors with mask_00ff (separately)
- is_opaque : returns whether all the pixels contained in
the vector are opaque
- is_zero : returns whether the vector equals 0
- is_transparent : returns whether all the pixels
contained in the vector are transparent
- expand_pixel_8_1x128 : expand an 8-bit pixel into lower 8 bytes of a
vector
- expand_alpha_1x128 : expand alpha from vector and return the new
vector
- expand_alpha_2x128 : expand alpha from one vector and another alpha
from a second vector
- expand_alpha_rev_2x128 : expand a reversed alpha from one vector and
another reversed alpha from a second vector
- pix_multiply_2x128 : do pix_multiply for two vectors (separately)
- over_2x128 : perform over op. on two vectors
- in_over_2x128 : perform in-over op. on two vectors
v2: removed expand_pixel_32_1x128 as it was not used by any function and
its implementation was erroneous
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
Acked-by: Siarhei Siamashka <siarhei.siamashka@gmail.com>
-rw-r--r-- | pixman/pixman-vmx.c | 476 |
1 files changed, 476 insertions, 0 deletions
diff --git a/pixman/pixman-vmx.c b/pixman/pixman-vmx.c index 880a19a..39d1a06 100644 --- a/pixman/pixman-vmx.c +++ b/pixman/pixman-vmx.c @@ -30,10 +30,19 @@ #endif #include "pixman-private.h" #include "pixman-combine32.h" +#include "pixman-inlines.h" #include <altivec.h> #define AVV(x...) {x} +static vector unsigned int mask_00ff; +static vector unsigned int mask_ff000000; +static vector unsigned int mask_red; +static vector unsigned int mask_green; +static vector unsigned int mask_blue; +static vector unsigned int mask_565_fix_rb; +static vector unsigned int mask_565_fix_g; + static force_inline vector unsigned int splat_alpha (vector unsigned int pix) { @@ -233,6 +242,464 @@ do \ #define STORE_VECTOR(dest) \ vec_st ((vector unsigned int) v ## dest, 0, dest); +/* load 4 pixels from a 16-byte boundary aligned address */ +static force_inline vector unsigned int +load_128_aligned (const uint32_t* src) +{ + return *((vector unsigned int *) src); +} + +/* load 4 pixels from a unaligned address */ +static force_inline vector unsigned int +load_128_unaligned (const uint32_t* src) +{ + vector unsigned int vsrc; + DECLARE_SRC_MASK_VAR; + + COMPUTE_SHIFT_MASK (src); + LOAD_VECTOR (src); + + return vsrc; +} + +/* save 4 pixels on a 16-byte boundary aligned address */ +static force_inline void +save_128_aligned (uint32_t* data, + vector unsigned int vdata) +{ + STORE_VECTOR(data) +} + +static force_inline vector unsigned int +create_mask_16_128 (uint16_t mask) +{ + uint16_t* src; + vector unsigned short vsrc; + DECLARE_SRC_MASK_VAR; + + src = &mask; + + COMPUTE_SHIFT_MASK (src); + LOAD_VECTOR (src); + return (vector unsigned int) vec_splat(vsrc, 0); +} + +static force_inline vector unsigned int +create_mask_1x32_128 (const uint32_t *src) +{ + vector unsigned int vsrc; + DECLARE_SRC_MASK_VAR; + + COMPUTE_SHIFT_MASK (src); + LOAD_VECTOR (src); + return vec_splat(vsrc, 0); +} + +static force_inline vector unsigned int +create_mask_32_128 (uint32_t mask) +{ + return create_mask_1x32_128(&mask); +} + +static force_inline vector unsigned int +unpack_32_1x128 (uint32_t data) +{ + vector unsigned int vdata = {0, 0, 0, data}; + vector unsigned short lo; + + lo = (vector unsigned short) +#ifdef WORDS_BIGENDIAN + vec_mergel ((vector unsigned char) AVV(0), + (vector unsigned char) vdata); +#else + vec_mergel ((vector unsigned char) vdata, + (vector unsigned char) AVV(0)); +#endif + + return (vector unsigned int) lo; +} + +static force_inline vector unsigned int +unpacklo_128_16x8 (vector unsigned int data1, vector unsigned int data2) +{ + vector unsigned char lo; + + /* unpack to short */ + lo = (vector unsigned char) +#ifdef WORDS_BIGENDIAN + vec_mergel ((vector unsigned char) data2, + (vector unsigned char) data1); +#else + vec_mergel ((vector unsigned char) data1, + (vector unsigned char) data2); +#endif + + return (vector unsigned int) lo; +} + +static force_inline vector unsigned int +unpackhi_128_16x8 (vector unsigned int data1, vector unsigned int data2) +{ + vector unsigned char hi; + + /* unpack to short */ + hi = (vector unsigned char) +#ifdef WORDS_BIGENDIAN + vec_mergeh ((vector unsigned char) data2, + (vector unsigned char) data1); +#else + vec_mergeh ((vector unsigned char) data1, + (vector unsigned char) data2); +#endif + + return (vector unsigned int) hi; +} + +static force_inline vector unsigned int +unpacklo_128_8x16 (vector unsigned int data1, vector unsigned int data2) +{ + vector unsigned short lo; + + /* unpack to char */ + lo = (vector unsigned short) +#ifdef WORDS_BIGENDIAN + vec_mergel ((vector unsigned short) data2, + (vector unsigned short) data1); +#else + vec_mergel ((vector unsigned short) data1, + (vector unsigned short) data2); +#endif + + return (vector unsigned int) lo; +} + +static force_inline vector unsigned int +unpackhi_128_8x16 (vector unsigned int data1, vector unsigned int data2) +{ + vector unsigned short hi; + + /* unpack to char */ + hi = (vector unsigned short) +#ifdef WORDS_BIGENDIAN + vec_mergeh ((vector unsigned short) data2, + (vector unsigned short) data1); +#else + vec_mergeh ((vector unsigned short) data1, + (vector unsigned short) data2); +#endif + + return (vector unsigned int) hi; +} + +static force_inline void +unpack_128_2x128 (vector unsigned int data1, vector unsigned int data2, + vector unsigned int* data_lo, vector unsigned int* data_hi) +{ + *data_lo = unpacklo_128_16x8(data1, data2); + *data_hi = unpackhi_128_16x8(data1, data2); +} + +static force_inline void +unpack_128_2x128_16 (vector unsigned int data1, vector unsigned int data2, + vector unsigned int* data_lo, vector unsigned int* data_hi) +{ + *data_lo = unpacklo_128_8x16(data1, data2); + *data_hi = unpackhi_128_8x16(data1, data2); +} + +static force_inline vector unsigned int +unpack_565_to_8888 (vector unsigned int lo) +{ + vector unsigned int r, g, b, rb, t; + + r = vec_and (vec_sl(lo, create_mask_32_128(8)), mask_red); + g = vec_and (vec_sl(lo, create_mask_32_128(5)), mask_green); + b = vec_and (vec_sl(lo, create_mask_32_128(3)), mask_blue); + + rb = vec_or (r, b); + t = vec_and (rb, mask_565_fix_rb); + t = vec_sr (t, create_mask_32_128(5)); + rb = vec_or (rb, t); + + t = vec_and (g, mask_565_fix_g); + t = vec_sr (t, create_mask_32_128(6)); + g = vec_or (g, t); + + return vec_or (rb, g); +} + +static force_inline uint32_t +pack_1x128_32 (vector unsigned int data) +{ + vector unsigned char vpack; + + vpack = vec_packsu((vector unsigned short) data, + (vector unsigned short) AVV(0)); + + return vec_extract((vector unsigned int) vpack, 1); +} + +static force_inline vector unsigned int +pack_2x128_128 (vector unsigned int lo, vector unsigned int hi) +{ + vector unsigned char vpack; + + vpack = vec_packsu((vector unsigned short) hi, + (vector unsigned short) lo); + + return (vector unsigned int) vpack; +} + +static force_inline void +negate_2x128 (vector unsigned int data_lo, + vector unsigned int data_hi, + vector unsigned int* neg_lo, + vector unsigned int* neg_hi) +{ + *neg_lo = vec_xor (data_lo, mask_00ff); + *neg_hi = vec_xor (data_hi, mask_00ff); +} + +static force_inline int +is_opaque (vector unsigned int x) +{ + uint32_t cmp_result; + vector bool int ffs = vec_cmpeq(x, x); + + cmp_result = vec_all_eq(x, ffs); + + return (cmp_result & 0x8888) == 0x8888; +} + +static force_inline int +is_zero (vector unsigned int x) +{ + uint32_t cmp_result; + + cmp_result = vec_all_eq(x, (vector unsigned int) AVV(0)); + + return cmp_result == 0xffff; +} + +static force_inline int +is_transparent (vector unsigned int x) +{ + uint32_t cmp_result; + + cmp_result = vec_all_eq(x, (vector unsigned int) AVV(0)); + return (cmp_result & 0x8888) == 0x8888; +} + +static force_inline vector unsigned int +expand_pixel_8_1x128 (uint8_t data) +{ + vector unsigned int vdata; + + vdata = unpack_32_1x128 ((uint32_t) data); + +#ifdef WORDS_BIGENDIAN + return vec_perm (vdata, vdata, + (vector unsigned char)AVV ( + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x0E, 0x0F, 0x0E, 0x0F, 0x0E, 0x0F, 0x0E, 0x0F)); +#else + return vec_perm (vdata, vdata, + (vector unsigned char)AVV ( + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x08, 0x09, 0x08, 0x09, 0x08, 0x09)); +#endif +} + +static force_inline vector unsigned int +expand_alpha_1x128 (vector unsigned int data) +{ +#ifdef WORDS_BIGENDIAN + return vec_perm (data, data, + (vector unsigned char)AVV ( + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, + 0x08, 0x09, 0x08, 0x09, 0x08, 0x09, 0x08, 0x09)); +#else + return vec_perm (data, data, + (vector unsigned char)AVV ( + 0x06, 0x07, 0x06, 0x07, 0x06, 0x07, 0x06, 0x07, + 0x0E, 0x0F, 0x0E, 0x0F, 0x0E, 0x0F, 0x0E, 0x0F)); +#endif +} + +static force_inline void +expand_alpha_2x128 (vector unsigned int data_lo, + vector unsigned int data_hi, + vector unsigned int* alpha_lo, + vector unsigned int* alpha_hi) +{ + + *alpha_lo = expand_alpha_1x128(data_lo); + *alpha_hi = expand_alpha_1x128(data_hi); +} + +static force_inline void +expand_alpha_rev_2x128 (vector unsigned int data_lo, + vector unsigned int data_hi, + vector unsigned int* alpha_lo, + vector unsigned int* alpha_hi) +{ +#ifdef WORDS_BIGENDIAN + *alpha_lo = vec_perm (data_lo, data_lo, + (vector unsigned char)AVV ( + 0x06, 0x07, 0x06, 0x07, 0x06, 0x07, 0x06, 0x07, + 0x0E, 0x0F, 0x0E, 0x0F, 0x0E, 0x0F, 0x0E, 0x0F)); + + *alpha_hi = vec_perm (data_hi, data_hi, + (vector unsigned char)AVV ( + 0x06, 0x07, 0x06, 0x07, 0x06, 0x07, 0x06, 0x07, + 0x0E, 0x0F, 0x0E, 0x0F, 0x0E, 0x0F, 0x0E, 0x0F)); +#else + *alpha_lo = vec_perm (data_lo, data_lo, + (vector unsigned char)AVV ( + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, + 0x08, 0x09, 0x08, 0x09, 0x08, 0x09, 0x08, 0x09)); + + *alpha_hi = vec_perm (data_hi, data_hi, + (vector unsigned char)AVV ( + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, + 0x08, 0x09, 0x08, 0x09, 0x08, 0x09, 0x08, 0x09)); +#endif +} + +static force_inline void +pix_multiply_2x128 (vector unsigned int* data_lo, + vector unsigned int* data_hi, + vector unsigned int* alpha_lo, + vector unsigned int* alpha_hi, + vector unsigned int* ret_lo, + vector unsigned int* ret_hi) +{ + *ret_lo = pix_multiply(*data_lo, *alpha_lo); + *ret_hi = pix_multiply(*data_hi, *alpha_hi); +} + +static force_inline void +over_2x128 (vector unsigned int* src_lo, + vector unsigned int* src_hi, + vector unsigned int* alpha_lo, + vector unsigned int* alpha_hi, + vector unsigned int* dst_lo, + vector unsigned int* dst_hi) +{ + vector unsigned int t1, t2; + + negate_2x128 (*alpha_lo, *alpha_hi, &t1, &t2); + + pix_multiply_2x128 (dst_lo, dst_hi, &t1, &t2, dst_lo, dst_hi); + + *dst_lo = (vector unsigned int) + vec_adds ((vector unsigned char) *src_lo, + (vector unsigned char) *dst_lo); + + *dst_hi = (vector unsigned int) + vec_adds ((vector unsigned char) *src_hi, + (vector unsigned char) *dst_hi); +} + +static force_inline void +in_over_2x128 (vector unsigned int* src_lo, + vector unsigned int* src_hi, + vector unsigned int* alpha_lo, + vector unsigned int* alpha_hi, + vector unsigned int* mask_lo, + vector unsigned int* mask_hi, + vector unsigned int* dst_lo, + vector unsigned int* dst_hi) +{ + vector unsigned int s_lo, s_hi; + vector unsigned int a_lo, a_hi; + + pix_multiply_2x128 (src_lo, src_hi, mask_lo, mask_hi, &s_lo, &s_hi); + pix_multiply_2x128 (alpha_lo, alpha_hi, mask_lo, mask_hi, &a_lo, &a_hi); + + over_2x128 (&s_lo, &s_hi, &a_lo, &a_hi, dst_lo, dst_hi); +} + +static force_inline uint32_t +core_combine_over_u_pixel_vmx (uint32_t src, uint32_t dst) +{ + uint8_t a; + vector unsigned int vmxs; + + a = src >> 24; + + if (a == 0xff) + { + return src; + } + else if (src) + { + vmxs = unpack_32_1x128 (src); + return pack_1x128_32( + over(vmxs, expand_alpha_1x128 (vmxs), unpack_32_1x128 (dst))); + } + + return dst; +} + +static force_inline uint32_t +combine1 (const uint32_t *ps, const uint32_t *pm) +{ + uint32_t s = *ps; + + if (pm) + { + vector unsigned int ms, mm; + + mm = unpack_32_1x128 (*pm); + mm = expand_alpha_1x128 (mm); + + ms = unpack_32_1x128 (s); + ms = pix_multiply (ms, mm); + + s = pack_1x128_32 (ms); + } + + return s; +} + +static force_inline vector unsigned int +combine4 (const uint32_t* ps, const uint32_t* pm) +{ + vector unsigned int vmx_src_lo, vmx_src_hi; + vector unsigned int vmx_msk_lo, vmx_msk_hi; + vector unsigned int s; + + if (pm) + { + vmx_msk_lo = load_128_unaligned(pm); + + if (is_transparent(vmx_msk_lo)) + return (vector unsigned int) AVV(0); + } + + s = load_128_unaligned(ps); + + if (pm) + { + unpack_128_2x128(s, (vector unsigned int) AVV(0), + &vmx_src_lo, &vmx_src_hi); + + unpack_128_2x128(vmx_msk_lo, (vector unsigned int) AVV(0), + &vmx_msk_lo, &vmx_msk_hi); + + expand_alpha_2x128(vmx_msk_lo, vmx_msk_hi, &vmx_msk_lo, &vmx_msk_hi); + + pix_multiply_2x128(&vmx_src_lo, &vmx_src_hi, + &vmx_msk_lo, &vmx_msk_hi, + &vmx_src_lo, &vmx_src_hi); + + s = pack_2x128_128(vmx_src_lo, vmx_src_hi); + } + + return s; +} + static void vmx_combine_over_u_no_mask (uint32_t * dest, const uint32_t *src, @@ -2080,6 +2547,15 @@ _pixman_implementation_create_vmx (pixman_implementation_t *fallback) { pixman_implementation_t *imp = _pixman_implementation_create (fallback, vmx_fast_paths); + /* VMX constants */ + mask_00ff = create_mask_16_128 (0x00ff); + mask_ff000000 = create_mask_32_128 (0xff000000); + mask_red = create_mask_32_128 (0x00f80000); + mask_green = create_mask_32_128 (0x0000fc00); + mask_blue = create_mask_32_128 (0x000000f8); + mask_565_fix_rb = create_mask_32_128 (0x00e000e0); + mask_565_fix_g = create_mask_32_128 (0x0000c000); + /* Set up function pointers */ imp->combine_32[PIXMAN_OP_OVER] = vmx_combine_over_u; |