summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSiarhei Siamashka <siarhei.siamashka@gmail.com>2013-09-28 04:51:21 +0300
committerSiarhei Siamashka <siarhei.siamashka@gmail.com>2013-09-28 05:13:43 +0300
commit72d5d54c9a99350f730dbddf95bb65f3a7e3fb2d (patch)
tree6a4a93c561f9eb9b5d1e8791ec9fbbe544f22552
parentd887e91040c2a93c2cb64b014311a8683be26dff (diff)
vmx: there is no need to handle unaligned destination anymorevmx-fix-unaligned-writes
So the redundant variables, memory reads/writes and reshuffles can be safely removed. For example, this makes the inner loop of 'vmx_combine_add_u_no_mask' function much more simple. Before: 7a20:7d a8 48 ce lvx v13,r8,r9 7a24:7d 80 48 ce lvx v12,r0,r9 7a28:7d 28 50 ce lvx v9,r8,r10 7a2c:7c 20 50 ce lvx v1,r0,r10 7a30:39 4a 00 10 addi r10,r10,16 7a34:10 0d 62 eb vperm v0,v13,v12,v11 7a38:10 21 4a 2b vperm v1,v1,v9,v8 7a3c:11 2c 6a eb vperm v9,v12,v13,v11 7a40:10 21 4a 00 vaddubs v1,v1,v9 7a44:11 a1 02 ab vperm v13,v1,v0,v10 7a48:10 00 0a ab vperm v0,v0,v1,v10 7a4c:7d a8 49 ce stvx v13,r8,r9 7a50:7c 00 49 ce stvx v0,r0,r9 7a54:39 29 00 10 addi r9,r9,16 7a58:42 00 ff c8 bdnz+ 7a20 <.vmx_combine_add_u_no_mask+0x120> After: 76c0:7c 00 48 ce lvx v0,r0,r9 76c4:7d a8 48 ce lvx v13,r8,r9 76c8:39 29 00 10 addi r9,r9,16 76cc:7c 20 50 ce lvx v1,r0,r10 76d0:10 00 6b 2b vperm v0,v0,v13,v12 76d4:10 00 0a 00 vaddubs v0,v0,v1 76d8:7c 00 51 ce stvx v0,r0,r10 76dc:39 4a 00 10 addi r10,r10,16 76e0:42 00 ff e0 bdnz+ 76c0 <.vmx_combine_add_u_no_mask+0x120>
-rw-r--r--pixman/pixman-vmx.c117
1 files changed, 36 insertions, 81 deletions
diff --git a/pixman/pixman-vmx.c b/pixman/pixman-vmx.c
index 130d78e..c33631c 100644
--- a/pixman/pixman-vmx.c
+++ b/pixman/pixman-vmx.c
@@ -134,15 +134,11 @@ over (vector unsigned int src,
source ## _mask = vec_lvsl (0, source);
#define COMPUTE_SHIFT_MASKS(dest, source) \
- dest ## _mask = vec_lvsl (0, dest); \
- source ## _mask = vec_lvsl (0, source); \
- store_mask = vec_lvsr (0, dest);
+ source ## _mask = vec_lvsl (0, source);
#define COMPUTE_SHIFT_MASKC(dest, source, mask) \
mask ## _mask = vec_lvsl (0, mask); \
- dest ## _mask = vec_lvsl (0, dest); \
- source ## _mask = vec_lvsl (0, source); \
- store_mask = vec_lvsr (0, dest);
+ source ## _mask = vec_lvsl (0, source);
/* notice you have to declare temp vars...
* Note: tmp3 and tmp4 must remain untouched!
@@ -151,23 +147,17 @@ over (vector unsigned int src,
#define LOAD_VECTORS(dest, source) \
tmp1 = (typeof(tmp1))vec_ld (0, source); \
tmp2 = (typeof(tmp2))vec_ld (15, source); \
- tmp3 = (typeof(tmp3))vec_ld (0, dest); \
v ## source = (typeof(v ## source)) \
vec_perm (tmp1, tmp2, source ## _mask); \
- tmp4 = (typeof(tmp4))vec_ld (15, dest); \
- v ## dest = (typeof(v ## dest)) \
- vec_perm (tmp3, tmp4, dest ## _mask);
+ v ## dest = (typeof(v ## dest))vec_ld (0, dest);
#define LOAD_VECTORSC(dest, source, mask) \
tmp1 = (typeof(tmp1))vec_ld (0, source); \
tmp2 = (typeof(tmp2))vec_ld (15, source); \
- tmp3 = (typeof(tmp3))vec_ld (0, dest); \
v ## source = (typeof(v ## source)) \
vec_perm (tmp1, tmp2, source ## _mask); \
- tmp4 = (typeof(tmp4))vec_ld (15, dest); \
tmp1 = (typeof(tmp1))vec_ld (0, mask); \
- v ## dest = (typeof(v ## dest)) \
- vec_perm (tmp3, tmp4, dest ## _mask); \
+ v ## dest = (typeof(v ## dest))vec_ld (0, dest); \
tmp2 = (typeof(tmp2))vec_ld (15, mask); \
v ## mask = (typeof(v ## mask)) \
vec_perm (tmp1, tmp2, mask ## _mask);
@@ -178,11 +168,7 @@ over (vector unsigned int src,
splat_alpha (v ## mask));
#define STORE_VECTOR(dest) \
- edges = vec_perm (tmp4, tmp3, dest ## _mask); \
- tmp3 = vec_perm ((vector unsigned char)v ## dest, edges, store_mask); \
- tmp1 = vec_perm (edges, (vector unsigned char)v ## dest, store_mask); \
- vec_st ((vector unsigned int) tmp3, 15, dest); \
- vec_st ((vector unsigned int) tmp1, 0, dest);
+ vec_st ((vector unsigned int) v ## dest, 0, dest);
static void
vmx_combine_over_u_no_mask (uint32_t * dest,
@@ -191,8 +177,7 @@ vmx_combine_over_u_no_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, store_mask;
+ vector unsigned char tmp1, tmp2, src_mask;
while (width && ((uintptr_t)dest & 15))
{
@@ -242,8 +227,7 @@ vmx_combine_over_u_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, mask_mask, store_mask;
+ vector unsigned char tmp1, tmp2, src_mask, mask_mask;
while (width && ((uintptr_t)dest & 15))
{
@@ -314,8 +298,7 @@ vmx_combine_over_reverse_u_no_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, store_mask;
+ vector unsigned char tmp1, tmp2, src_mask;
while (width && ((uintptr_t)dest & 15))
{
@@ -363,8 +346,7 @@ vmx_combine_over_reverse_u_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, mask_mask, store_mask;
+ vector unsigned char tmp1, tmp2, src_mask, mask_mask;
while (width && ((uintptr_t)dest & 15))
{
@@ -432,8 +414,7 @@ vmx_combine_in_u_no_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, store_mask;
+ vector unsigned char tmp1, tmp2, src_mask;
while (width && ((uintptr_t)dest & 15))
{
@@ -478,8 +459,7 @@ vmx_combine_in_u_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, mask_mask, store_mask;
+ vector unsigned char tmp1, tmp2, src_mask, mask_mask;
while (width && ((uintptr_t)dest & 15))
{
@@ -544,8 +524,7 @@ vmx_combine_in_reverse_u_no_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, store_mask;
+ vector unsigned char tmp1, tmp2, src_mask;
while (width && ((uintptr_t)dest & 15))
{
@@ -592,8 +571,7 @@ vmx_combine_in_reverse_u_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, mask_mask, store_mask;
+ vector unsigned char tmp1, tmp2, src_mask, mask_mask;
while (width && ((uintptr_t)dest & 15))
{
@@ -660,8 +638,7 @@ vmx_combine_out_u_no_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, store_mask;
+ vector unsigned char tmp1, tmp2, src_mask;
while (width && ((uintptr_t)dest & 15))
{
@@ -708,8 +685,7 @@ vmx_combine_out_u_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, mask_mask, store_mask;
+ vector unsigned char tmp1, tmp2, src_mask, mask_mask;
while (width && ((uintptr_t)dest & 15))
{
@@ -774,8 +750,7 @@ vmx_combine_out_reverse_u_no_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, store_mask;
+ vector unsigned char tmp1, tmp2, src_mask;
while (width && ((uintptr_t)dest & 15))
{
@@ -823,8 +798,7 @@ vmx_combine_out_reverse_u_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, mask_mask, store_mask;
+ vector unsigned char tmp1, tmp2, src_mask, mask_mask;
while (width && ((uintptr_t)dest & 15))
{
@@ -891,8 +865,7 @@ vmx_combine_atop_u_no_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, store_mask;
+ vector unsigned char tmp1, tmp2, src_mask;
while (width && ((uintptr_t)dest & 15))
{
@@ -944,8 +917,7 @@ vmx_combine_atop_u_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, mask_mask, store_mask;
+ vector unsigned char tmp1, tmp2, src_mask, mask_mask;
while (width && ((uintptr_t)dest & 15))
{
@@ -1021,8 +993,7 @@ vmx_combine_atop_reverse_u_no_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, store_mask;
+ vector unsigned char tmp1, tmp2, src_mask;
while (width && ((uintptr_t)dest & 15))
{
@@ -1074,8 +1045,7 @@ vmx_combine_atop_reverse_u_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, mask_mask, store_mask;
+ vector unsigned char tmp1, tmp2, src_mask, mask_mask;
while (width && ((uintptr_t)dest & 15))
{
@@ -1151,8 +1121,7 @@ vmx_combine_xor_u_no_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, store_mask;
+ vector unsigned char tmp1, tmp2, src_mask;
while (width && ((uintptr_t)dest & 15))
{
@@ -1204,8 +1173,7 @@ vmx_combine_xor_u_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, mask_mask, store_mask;
+ vector unsigned char tmp1, tmp2, src_mask, mask_mask;
while (width && ((uintptr_t)dest & 15))
{
@@ -1281,8 +1249,7 @@ vmx_combine_add_u_no_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, store_mask;
+ vector unsigned char tmp1, tmp2, src_mask;
while (width && ((uintptr_t)dest & 15))
{
@@ -1328,8 +1295,7 @@ vmx_combine_add_u_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, mask_mask, store_mask;
+ vector unsigned char tmp1, tmp2, src_mask, mask_mask;
while (width && ((uintptr_t)dest & 15))
{
@@ -1397,8 +1363,7 @@ vmx_combine_src_ca (pixman_implementation_t *imp,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, mask_mask, src_mask, store_mask;
+ vector unsigned char tmp1, tmp2, mask_mask, src_mask;
while (width && ((uintptr_t)dest & 15))
{
@@ -1448,8 +1413,7 @@ vmx_combine_over_ca (pixman_implementation_t *imp,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, mask_mask, src_mask, store_mask;
+ vector unsigned char tmp1, tmp2, mask_mask, src_mask;
while (width && ((uintptr_t)dest & 15))
{
@@ -1507,8 +1471,7 @@ vmx_combine_over_reverse_ca (pixman_implementation_t *imp,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, mask_mask, src_mask, store_mask;
+ vector unsigned char tmp1, tmp2, mask_mask, src_mask;
while (width && ((uintptr_t)dest & 15))
{
@@ -1564,8 +1527,7 @@ vmx_combine_in_ca (pixman_implementation_t *imp,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, mask_mask, src_mask, store_mask;
+ vector unsigned char tmp1, tmp2, mask_mask, src_mask;
while (width && ((uintptr_t)dest & 15))
{
@@ -1619,8 +1581,7 @@ vmx_combine_in_reverse_ca (pixman_implementation_t *imp,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, mask_mask, src_mask, store_mask;
+ vector unsigned char tmp1, tmp2, mask_mask, src_mask;
while (width && ((uintptr_t)dest & 15))
{
@@ -1675,8 +1636,7 @@ vmx_combine_out_ca (pixman_implementation_t *imp,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, mask_mask, src_mask, store_mask;
+ vector unsigned char tmp1, tmp2, mask_mask, src_mask;
while (width && ((uintptr_t)dest & 15))
{
@@ -1733,8 +1693,7 @@ vmx_combine_out_reverse_ca (pixman_implementation_t *imp,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, mask_mask, src_mask, store_mask;
+ vector unsigned char tmp1, tmp2, mask_mask, src_mask;
while (width && ((uintptr_t)dest & 15))
{
@@ -1791,8 +1750,7 @@ vmx_combine_atop_ca (pixman_implementation_t *imp,
{
int i;
vector unsigned int vdest, vsrc, vmask, vsrca;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, mask_mask, src_mask, store_mask;
+ vector unsigned char tmp1, tmp2, mask_mask, src_mask;
while (width && ((uintptr_t)dest & 15))
{
@@ -1858,8 +1816,7 @@ vmx_combine_atop_reverse_ca (pixman_implementation_t *imp,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, mask_mask, src_mask, store_mask;
+ vector unsigned char tmp1, tmp2, mask_mask, src_mask;
while (width && ((uintptr_t)dest & 15))
{
@@ -1922,8 +1879,7 @@ vmx_combine_xor_ca (pixman_implementation_t *imp,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, mask_mask, src_mask, store_mask;
+ vector unsigned char tmp1, tmp2, mask_mask, src_mask;
while (width && ((uintptr_t)dest & 15))
{
@@ -1986,8 +1942,7 @@ vmx_combine_add_ca (pixman_implementation_t *imp,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, mask_mask, src_mask, store_mask;
+ vector unsigned char tmp1, tmp2, mask_mask, src_mask;
while (width && ((uintptr_t)dest & 15))
{