summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatt Turner <mattst88@gmail.com>2012-04-17 11:28:33 -0400
committerMatt Turner <mattst88@gmail.com>2012-04-27 13:41:47 -0400
commitfacceb4a1fbba476ad98e76d15868bf7eecd3a30 (patch)
treef545a7d58348f6cfaa6164e8af705cf6eaddc6c9
parent6d29b7d7557ccb657054e867f4e27f4aa89cb25e (diff)
mmx: Use Loongson pinsrh instruction in pack_565
The pinsrh instruction is analogous to MMX EXT's pinsrw, except like other Loongson vector instructions it cannot access the general purpose registers. In the cases of other Loongson vector instructions, this is a headache, but it is actually a good thing here. Since the instruction is different from MMX, I've named the intrinsic loongson_insert_pi16. text data bss dec hex filename 25976 1952 0 27928 6d18 .libs/libpixman_loongson_mmi_la-pixman-mmx.o 25336 1952 0 27288 6a98 .libs/libpixman_loongson_mmi_la-pixman-mmx.o -and: 181 +and: 147 -dsll: 143 +dsll: 95 -dsrl: 87 +dsrl: 135 -ldc1: 523 +ldc1: 462 -lw: 767 +lw: 721 +pinsrh: 35
-rw-r--r--pixman/loongson-mmintrin.h11
-rw-r--r--pixman/pixman-mmx.c14
2 files changed, 25 insertions, 0 deletions
diff --git a/pixman/loongson-mmintrin.h b/pixman/loongson-mmintrin.h
index 44d30f51..1c74ed8a 100644
--- a/pixman/loongson-mmintrin.h
+++ b/pixman/loongson-mmintrin.h
@@ -216,3 +216,14 @@ _mm_xor_si64 (__m64 __m1, __m64 __m2)
);
return ret;
}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+loongson_insert_pi16 (__m64 __m1, __m64 __m2, int64_t __pos)
+{
+ __m64 ret;
+ asm("pinsrh_%3 %0, %1, %2\n\t"
+ : "=f" (ret)
+ : "f" (__m1), "f" (__m2), "i" (__pos)
+ );
+ return ret;
+}
diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
index f40fa716..7aa40195 100644
--- a/pixman/pixman-mmx.c
+++ b/pixman/pixman-mmx.c
@@ -182,10 +182,12 @@ typedef struct
mmxdatafield mmx_565_r;
mmxdatafield mmx_565_g;
mmxdatafield mmx_565_b;
+#ifndef USE_LOONGSON_MMI
mmxdatafield mmx_mask_0;
mmxdatafield mmx_mask_1;
mmxdatafield mmx_mask_2;
mmxdatafield mmx_mask_3;
+#endif
mmxdatafield mmx_full_alpha;
mmxdatafield mmx_4x0101;
} mmx_data_t;
@@ -207,10 +209,12 @@ static const mmx_data_t c =
MMXDATA_INIT (.mmx_565_r, 0x000000f800000000),
MMXDATA_INIT (.mmx_565_g, 0x0000000000fc0000),
MMXDATA_INIT (.mmx_565_b, 0x00000000000000f8),
+#ifndef USE_LOONGSON_MMI
MMXDATA_INIT (.mmx_mask_0, 0xffffffffffff0000),
MMXDATA_INIT (.mmx_mask_1, 0xffffffff0000ffff),
MMXDATA_INIT (.mmx_mask_2, 0xffff0000ffffffff),
MMXDATA_INIT (.mmx_mask_3, 0x0000ffffffffffff),
+#endif
MMXDATA_INIT (.mmx_full_alpha, 0x00ff000000000000),
MMXDATA_INIT (.mmx_4x0101, 0x0101010101010101),
};
@@ -528,6 +532,15 @@ pack_565 (__m64 pixel, __m64 target, int pos)
g = _mm_and_si64 (p, MC (565_g));
b = _mm_and_si64 (p, MC (565_b));
+#ifdef USE_LOONGSON_MMI
+ r = shift (r, -(32 - 8));
+ g = shift (g, -(16 - 3));
+ b = shift (b, -(0 + 3));
+
+ p = _mm_or_si64 (r, g);
+ p = _mm_or_si64 (p, b);
+ return loongson_insert_pi16 (t, p, pos);
+#else
r = shift (r, -(32 - 8) + pos * 16);
g = shift (g, -(16 - 3) + pos * 16);
b = shift (b, -(0 + 3) + pos * 16);
@@ -545,6 +558,7 @@ pack_565 (__m64 pixel, __m64 target, int pos)
p = _mm_or_si64 (g, p);
return _mm_or_si64 (b, p);
+#endif
}
#ifndef _MSC_VER