summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOded Gabbay <oded.gabbay@gmail.com>2015-06-28 09:42:08 +0300
committerOded Gabbay <oded.gabbay@gmail.com>2015-07-16 16:13:35 +0300
commitc12ee95089e7d281a29a24bf56b81f5c16dec6ee (patch)
treeaab0113df529f3f2860ddb73320194b2e98d7434
parent034149537be94862b43fb09699b8c2149bfe948d (diff)
vmx: add helper functions
This patch adds the following helper functions for reuse of code, hiding BE/LE differences and maintainability. All of the functions were defined as static force_inline. Names were copied from pixman-sse2.c so conversion of fast-paths between sse2 and vmx would be easier from now on. Therefore, I tried to keep the input/output of the functions to be as close as possible to the sse2 definitions. The functions are: - load_128_aligned : load 128-bit from a 16-byte aligned memory address into a vector - load_128_unaligned : load 128-bit from memory into a vector, without guarantee of alignment for the source pointer - save_128_aligned : save 128-bit vector into a 16-byte aligned memory address - create_mask_16_128 : take a 16-bit value and fill with it a new vector - create_mask_1x32_128 : take a 32-bit pointer and fill a new vector with the 32-bit value from that pointer - create_mask_32_128 : take a 32-bit value and fill with it a new vector - unpack_32_1x128 : unpack 32-bit value into a vector - unpacklo_128_16x8 : unpack the eight low 8-bit values of a vector - unpackhi_128_16x8 : unpack the eight high 8-bit values of a vector - unpacklo_128_8x16 : unpack the four low 16-bit values of a vector - unpackhi_128_8x16 : unpack the four high 16-bit values of a vector - unpack_128_2x128 : unpack the eight low 8-bit values of a vector into one vector and the eight high 8-bit values into another vector - unpack_128_2x128_16 : unpack the four low 16-bit values of a vector into one vector and the four high 16-bit values into another vector - unpack_565_to_8888 : unpack an RGB_565 vector to 8888 vector - pack_1x128_32 : pack a vector and return the LSB 32-bit of it - pack_2x128_128 : pack two vectors into one and return it - negate_2x128 : xor two vectors with mask_00ff (separately) - is_opaque : returns whether all the pixels contained in the vector are opaque - is_zero : returns whether the vector equals 0 - is_transparent : returns whether all the pixels contained in the vector are transparent - expand_pixel_8_1x128 : expand an 8-bit pixel into lower 8 bytes of a vector - expand_alpha_1x128 : expand alpha from vector and return the new vector - expand_alpha_2x128 : expand alpha from one vector and another alpha from a second vector - expand_alpha_rev_2x128 : expand a reversed alpha from one vector and another reversed alpha from a second vector - pix_multiply_2x128 : do pix_multiply for two vectors (separately) - over_2x128 : perform over op. on two vectors - in_over_2x128 : perform in-over op. on two vectors v2: removed expand_pixel_32_1x128 as it was not used by any function and its implementation was erroneous Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com> Acked-by: Siarhei Siamashka <siarhei.siamashka@gmail.com>
-rw-r--r--pixman/pixman-vmx.c476
1 files changed, 476 insertions, 0 deletions
diff --git a/pixman/pixman-vmx.c b/pixman/pixman-vmx.c
index 880a19a..39d1a06 100644
--- a/pixman/pixman-vmx.c
+++ b/pixman/pixman-vmx.c
@@ -30,10 +30,19 @@
#endif
#include "pixman-private.h"
#include "pixman-combine32.h"
+#include "pixman-inlines.h"
#include <altivec.h>
#define AVV(x...) {x}
+static vector unsigned int mask_00ff;
+static vector unsigned int mask_ff000000;
+static vector unsigned int mask_red;
+static vector unsigned int mask_green;
+static vector unsigned int mask_blue;
+static vector unsigned int mask_565_fix_rb;
+static vector unsigned int mask_565_fix_g;
+
static force_inline vector unsigned int
splat_alpha (vector unsigned int pix)
{
@@ -233,6 +242,464 @@ do \
#define STORE_VECTOR(dest) \
vec_st ((vector unsigned int) v ## dest, 0, dest);
+/* load 4 pixels from a 16-byte boundary aligned address */
+static force_inline vector unsigned int
+load_128_aligned (const uint32_t* src)
+{
+ return *((vector unsigned int *) src);
+}
+
+/* load 4 pixels from a unaligned address */
+static force_inline vector unsigned int
+load_128_unaligned (const uint32_t* src)
+{
+ vector unsigned int vsrc;
+ DECLARE_SRC_MASK_VAR;
+
+ COMPUTE_SHIFT_MASK (src);
+ LOAD_VECTOR (src);
+
+ return vsrc;
+}
+
+/* save 4 pixels on a 16-byte boundary aligned address */
+static force_inline void
+save_128_aligned (uint32_t* data,
+ vector unsigned int vdata)
+{
+ STORE_VECTOR(data)
+}
+
+static force_inline vector unsigned int
+create_mask_16_128 (uint16_t mask)
+{
+ uint16_t* src;
+ vector unsigned short vsrc;
+ DECLARE_SRC_MASK_VAR;
+
+ src = &mask;
+
+ COMPUTE_SHIFT_MASK (src);
+ LOAD_VECTOR (src);
+ return (vector unsigned int) vec_splat(vsrc, 0);
+}
+
+static force_inline vector unsigned int
+create_mask_1x32_128 (const uint32_t *src)
+{
+ vector unsigned int vsrc;
+ DECLARE_SRC_MASK_VAR;
+
+ COMPUTE_SHIFT_MASK (src);
+ LOAD_VECTOR (src);
+ return vec_splat(vsrc, 0);
+}
+
+static force_inline vector unsigned int
+create_mask_32_128 (uint32_t mask)
+{
+ return create_mask_1x32_128(&mask);
+}
+
+static force_inline vector unsigned int
+unpack_32_1x128 (uint32_t data)
+{
+ vector unsigned int vdata = {0, 0, 0, data};
+ vector unsigned short lo;
+
+ lo = (vector unsigned short)
+#ifdef WORDS_BIGENDIAN
+ vec_mergel ((vector unsigned char) AVV(0),
+ (vector unsigned char) vdata);
+#else
+ vec_mergel ((vector unsigned char) vdata,
+ (vector unsigned char) AVV(0));
+#endif
+
+ return (vector unsigned int) lo;
+}
+
+static force_inline vector unsigned int
+unpacklo_128_16x8 (vector unsigned int data1, vector unsigned int data2)
+{
+ vector unsigned char lo;
+
+ /* unpack to short */
+ lo = (vector unsigned char)
+#ifdef WORDS_BIGENDIAN
+ vec_mergel ((vector unsigned char) data2,
+ (vector unsigned char) data1);
+#else
+ vec_mergel ((vector unsigned char) data1,
+ (vector unsigned char) data2);
+#endif
+
+ return (vector unsigned int) lo;
+}
+
+static force_inline vector unsigned int
+unpackhi_128_16x8 (vector unsigned int data1, vector unsigned int data2)
+{
+ vector unsigned char hi;
+
+ /* unpack to short */
+ hi = (vector unsigned char)
+#ifdef WORDS_BIGENDIAN
+ vec_mergeh ((vector unsigned char) data2,
+ (vector unsigned char) data1);
+#else
+ vec_mergeh ((vector unsigned char) data1,
+ (vector unsigned char) data2);
+#endif
+
+ return (vector unsigned int) hi;
+}
+
+static force_inline vector unsigned int
+unpacklo_128_8x16 (vector unsigned int data1, vector unsigned int data2)
+{
+ vector unsigned short lo;
+
+ /* unpack to char */
+ lo = (vector unsigned short)
+#ifdef WORDS_BIGENDIAN
+ vec_mergel ((vector unsigned short) data2,
+ (vector unsigned short) data1);
+#else
+ vec_mergel ((vector unsigned short) data1,
+ (vector unsigned short) data2);
+#endif
+
+ return (vector unsigned int) lo;
+}
+
+static force_inline vector unsigned int
+unpackhi_128_8x16 (vector unsigned int data1, vector unsigned int data2)
+{
+ vector unsigned short hi;
+
+ /* unpack to char */
+ hi = (vector unsigned short)
+#ifdef WORDS_BIGENDIAN
+ vec_mergeh ((vector unsigned short) data2,
+ (vector unsigned short) data1);
+#else
+ vec_mergeh ((vector unsigned short) data1,
+ (vector unsigned short) data2);
+#endif
+
+ return (vector unsigned int) hi;
+}
+
+static force_inline void
+unpack_128_2x128 (vector unsigned int data1, vector unsigned int data2,
+ vector unsigned int* data_lo, vector unsigned int* data_hi)
+{
+ *data_lo = unpacklo_128_16x8(data1, data2);
+ *data_hi = unpackhi_128_16x8(data1, data2);
+}
+
+static force_inline void
+unpack_128_2x128_16 (vector unsigned int data1, vector unsigned int data2,
+ vector unsigned int* data_lo, vector unsigned int* data_hi)
+{
+ *data_lo = unpacklo_128_8x16(data1, data2);
+ *data_hi = unpackhi_128_8x16(data1, data2);
+}
+
+static force_inline vector unsigned int
+unpack_565_to_8888 (vector unsigned int lo)
+{
+ vector unsigned int r, g, b, rb, t;
+
+ r = vec_and (vec_sl(lo, create_mask_32_128(8)), mask_red);
+ g = vec_and (vec_sl(lo, create_mask_32_128(5)), mask_green);
+ b = vec_and (vec_sl(lo, create_mask_32_128(3)), mask_blue);
+
+ rb = vec_or (r, b);
+ t = vec_and (rb, mask_565_fix_rb);
+ t = vec_sr (t, create_mask_32_128(5));
+ rb = vec_or (rb, t);
+
+ t = vec_and (g, mask_565_fix_g);
+ t = vec_sr (t, create_mask_32_128(6));
+ g = vec_or (g, t);
+
+ return vec_or (rb, g);
+}
+
+static force_inline uint32_t
+pack_1x128_32 (vector unsigned int data)
+{
+ vector unsigned char vpack;
+
+ vpack = vec_packsu((vector unsigned short) data,
+ (vector unsigned short) AVV(0));
+
+ return vec_extract((vector unsigned int) vpack, 1);
+}
+
+static force_inline vector unsigned int
+pack_2x128_128 (vector unsigned int lo, vector unsigned int hi)
+{
+ vector unsigned char vpack;
+
+ vpack = vec_packsu((vector unsigned short) hi,
+ (vector unsigned short) lo);
+
+ return (vector unsigned int) vpack;
+}
+
+static force_inline void
+negate_2x128 (vector unsigned int data_lo,
+ vector unsigned int data_hi,
+ vector unsigned int* neg_lo,
+ vector unsigned int* neg_hi)
+{
+ *neg_lo = vec_xor (data_lo, mask_00ff);
+ *neg_hi = vec_xor (data_hi, mask_00ff);
+}
+
+static force_inline int
+is_opaque (vector unsigned int x)
+{
+ uint32_t cmp_result;
+ vector bool int ffs = vec_cmpeq(x, x);
+
+ cmp_result = vec_all_eq(x, ffs);
+
+ return (cmp_result & 0x8888) == 0x8888;
+}
+
+static force_inline int
+is_zero (vector unsigned int x)
+{
+ uint32_t cmp_result;
+
+ cmp_result = vec_all_eq(x, (vector unsigned int) AVV(0));
+
+ return cmp_result == 0xffff;
+}
+
+static force_inline int
+is_transparent (vector unsigned int x)
+{
+ uint32_t cmp_result;
+
+ cmp_result = vec_all_eq(x, (vector unsigned int) AVV(0));
+ return (cmp_result & 0x8888) == 0x8888;
+}
+
+static force_inline vector unsigned int
+expand_pixel_8_1x128 (uint8_t data)
+{
+ vector unsigned int vdata;
+
+ vdata = unpack_32_1x128 ((uint32_t) data);
+
+#ifdef WORDS_BIGENDIAN
+ return vec_perm (vdata, vdata,
+ (vector unsigned char)AVV (
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x0E, 0x0F, 0x0E, 0x0F, 0x0E, 0x0F, 0x0E, 0x0F));
+#else
+ return vec_perm (vdata, vdata,
+ (vector unsigned char)AVV (
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x08, 0x09, 0x08, 0x09, 0x08, 0x09));
+#endif
+}
+
+static force_inline vector unsigned int
+expand_alpha_1x128 (vector unsigned int data)
+{
+#ifdef WORDS_BIGENDIAN
+ return vec_perm (data, data,
+ (vector unsigned char)AVV (
+ 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01,
+ 0x08, 0x09, 0x08, 0x09, 0x08, 0x09, 0x08, 0x09));
+#else
+ return vec_perm (data, data,
+ (vector unsigned char)AVV (
+ 0x06, 0x07, 0x06, 0x07, 0x06, 0x07, 0x06, 0x07,
+ 0x0E, 0x0F, 0x0E, 0x0F, 0x0E, 0x0F, 0x0E, 0x0F));
+#endif
+}
+
+static force_inline void
+expand_alpha_2x128 (vector unsigned int data_lo,
+ vector unsigned int data_hi,
+ vector unsigned int* alpha_lo,
+ vector unsigned int* alpha_hi)
+{
+
+ *alpha_lo = expand_alpha_1x128(data_lo);
+ *alpha_hi = expand_alpha_1x128(data_hi);
+}
+
+static force_inline void
+expand_alpha_rev_2x128 (vector unsigned int data_lo,
+ vector unsigned int data_hi,
+ vector unsigned int* alpha_lo,
+ vector unsigned int* alpha_hi)
+{
+#ifdef WORDS_BIGENDIAN
+ *alpha_lo = vec_perm (data_lo, data_lo,
+ (vector unsigned char)AVV (
+ 0x06, 0x07, 0x06, 0x07, 0x06, 0x07, 0x06, 0x07,
+ 0x0E, 0x0F, 0x0E, 0x0F, 0x0E, 0x0F, 0x0E, 0x0F));
+
+ *alpha_hi = vec_perm (data_hi, data_hi,
+ (vector unsigned char)AVV (
+ 0x06, 0x07, 0x06, 0x07, 0x06, 0x07, 0x06, 0x07,
+ 0x0E, 0x0F, 0x0E, 0x0F, 0x0E, 0x0F, 0x0E, 0x0F));
+#else
+ *alpha_lo = vec_perm (data_lo, data_lo,
+ (vector unsigned char)AVV (
+ 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01,
+ 0x08, 0x09, 0x08, 0x09, 0x08, 0x09, 0x08, 0x09));
+
+ *alpha_hi = vec_perm (data_hi, data_hi,
+ (vector unsigned char)AVV (
+ 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01,
+ 0x08, 0x09, 0x08, 0x09, 0x08, 0x09, 0x08, 0x09));
+#endif
+}
+
+static force_inline void
+pix_multiply_2x128 (vector unsigned int* data_lo,
+ vector unsigned int* data_hi,
+ vector unsigned int* alpha_lo,
+ vector unsigned int* alpha_hi,
+ vector unsigned int* ret_lo,
+ vector unsigned int* ret_hi)
+{
+ *ret_lo = pix_multiply(*data_lo, *alpha_lo);
+ *ret_hi = pix_multiply(*data_hi, *alpha_hi);
+}
+
+static force_inline void
+over_2x128 (vector unsigned int* src_lo,
+ vector unsigned int* src_hi,
+ vector unsigned int* alpha_lo,
+ vector unsigned int* alpha_hi,
+ vector unsigned int* dst_lo,
+ vector unsigned int* dst_hi)
+{
+ vector unsigned int t1, t2;
+
+ negate_2x128 (*alpha_lo, *alpha_hi, &t1, &t2);
+
+ pix_multiply_2x128 (dst_lo, dst_hi, &t1, &t2, dst_lo, dst_hi);
+
+ *dst_lo = (vector unsigned int)
+ vec_adds ((vector unsigned char) *src_lo,
+ (vector unsigned char) *dst_lo);
+
+ *dst_hi = (vector unsigned int)
+ vec_adds ((vector unsigned char) *src_hi,
+ (vector unsigned char) *dst_hi);
+}
+
+static force_inline void
+in_over_2x128 (vector unsigned int* src_lo,
+ vector unsigned int* src_hi,
+ vector unsigned int* alpha_lo,
+ vector unsigned int* alpha_hi,
+ vector unsigned int* mask_lo,
+ vector unsigned int* mask_hi,
+ vector unsigned int* dst_lo,
+ vector unsigned int* dst_hi)
+{
+ vector unsigned int s_lo, s_hi;
+ vector unsigned int a_lo, a_hi;
+
+ pix_multiply_2x128 (src_lo, src_hi, mask_lo, mask_hi, &s_lo, &s_hi);
+ pix_multiply_2x128 (alpha_lo, alpha_hi, mask_lo, mask_hi, &a_lo, &a_hi);
+
+ over_2x128 (&s_lo, &s_hi, &a_lo, &a_hi, dst_lo, dst_hi);
+}
+
+static force_inline uint32_t
+core_combine_over_u_pixel_vmx (uint32_t src, uint32_t dst)
+{
+ uint8_t a;
+ vector unsigned int vmxs;
+
+ a = src >> 24;
+
+ if (a == 0xff)
+ {
+ return src;
+ }
+ else if (src)
+ {
+ vmxs = unpack_32_1x128 (src);
+ return pack_1x128_32(
+ over(vmxs, expand_alpha_1x128 (vmxs), unpack_32_1x128 (dst)));
+ }
+
+ return dst;
+}
+
+static force_inline uint32_t
+combine1 (const uint32_t *ps, const uint32_t *pm)
+{
+ uint32_t s = *ps;
+
+ if (pm)
+ {
+ vector unsigned int ms, mm;
+
+ mm = unpack_32_1x128 (*pm);
+ mm = expand_alpha_1x128 (mm);
+
+ ms = unpack_32_1x128 (s);
+ ms = pix_multiply (ms, mm);
+
+ s = pack_1x128_32 (ms);
+ }
+
+ return s;
+}
+
+static force_inline vector unsigned int
+combine4 (const uint32_t* ps, const uint32_t* pm)
+{
+ vector unsigned int vmx_src_lo, vmx_src_hi;
+ vector unsigned int vmx_msk_lo, vmx_msk_hi;
+ vector unsigned int s;
+
+ if (pm)
+ {
+ vmx_msk_lo = load_128_unaligned(pm);
+
+ if (is_transparent(vmx_msk_lo))
+ return (vector unsigned int) AVV(0);
+ }
+
+ s = load_128_unaligned(ps);
+
+ if (pm)
+ {
+ unpack_128_2x128(s, (vector unsigned int) AVV(0),
+ &vmx_src_lo, &vmx_src_hi);
+
+ unpack_128_2x128(vmx_msk_lo, (vector unsigned int) AVV(0),
+ &vmx_msk_lo, &vmx_msk_hi);
+
+ expand_alpha_2x128(vmx_msk_lo, vmx_msk_hi, &vmx_msk_lo, &vmx_msk_hi);
+
+ pix_multiply_2x128(&vmx_src_lo, &vmx_src_hi,
+ &vmx_msk_lo, &vmx_msk_hi,
+ &vmx_src_lo, &vmx_src_hi);
+
+ s = pack_2x128_128(vmx_src_lo, vmx_src_hi);
+ }
+
+ return s;
+}
+
static void
vmx_combine_over_u_no_mask (uint32_t * dest,
const uint32_t *src,
@@ -2080,6 +2547,15 @@ _pixman_implementation_create_vmx (pixman_implementation_t *fallback)
{
pixman_implementation_t *imp = _pixman_implementation_create (fallback, vmx_fast_paths);
+ /* VMX constants */
+ mask_00ff = create_mask_16_128 (0x00ff);
+ mask_ff000000 = create_mask_32_128 (0xff000000);
+ mask_red = create_mask_32_128 (0x00f80000);
+ mask_green = create_mask_32_128 (0x0000fc00);
+ mask_blue = create_mask_32_128 (0x000000f8);
+ mask_565_fix_rb = create_mask_32_128 (0x00e000e0);
+ mask_565_fix_g = create_mask_32_128 (0x0000c000);
+
/* Set up function pointers */
imp->combine_32[PIXMAN_OP_OVER] = vmx_combine_over_u;