From a432bdce6637aa96060b9f1e25aae51c6fb95670 Mon Sep 17 00:00:00 2001 From: Nemanja Lukic Date: Mon, 12 Nov 2012 22:48:53 +0100 Subject: MIPS: DSPr2: Added more fast-paths for OVER operation: Performance numbers before/after on MIPS-74kc @ 1GHz: lowlevel-blt-bench results Referent (before): over_n_0565 = L1: 14.48 L2: 21.36 M: 17.57 ( 23.30%) HT: 6.95 VT: 6.44 R: 6.39 RT: 2.16 ( 22Kops/s) over_n_8888 = L1: 92.60 L2: 86.13 M: 24.41 ( 64.74%) HT: 8.94 VT: 8.06 R: 8.00 RT: 2.53 ( 25Kops/s) Optimized: over_n_0565 = L1: 27.65 L2: 189.22 M: 58.19 ( 77.12%) HT: 52.80 VT: 49.88 R: 47.53 RT: 23.67 ( 72Kops/s) over_n_8888 = L1: 235.99 L2: 230.86 M: 29.09 ( 77.11%) HT: 27.95 VT: 27.24 R: 26.58 RT: 18.10 ( 67Kops/s) --- pixman/pixman-mips-dspr2-asm.S | 134 +++++++++++++++++++++++++++++++++++++++++ pixman/pixman-mips-dspr2.c | 9 ++- pixman/pixman-mips-dspr2.h | 36 +++++++++++ 3 files changed, 178 insertions(+), 1 deletion(-) diff --git a/pixman/pixman-mips-dspr2-asm.S b/pixman/pixman-mips-dspr2-asm.S index 02adb6d..791244a 100644 --- a/pixman/pixman-mips-dspr2-asm.S +++ b/pixman/pixman-mips-dspr2-asm.S @@ -1342,6 +1342,140 @@ LEAF_MIPS_DSPR2(pixman_composite_over_8888_8888_asm_mips) END(pixman_composite_over_8888_8888_asm_mips) +LEAF_MIPS_DSPR2(pixman_composite_over_n_0565_asm_mips) +/* + * a0 - dst (r5g6b5) + * a1 - src (32bit constant) + * a2 - w + */ + + beqz a2, 5f + nop + + not t0, a1 + srl t0, t0, 24 + bgtz t0, 1f + nop + CONVERT_1x8888_TO_1x0565 a1, t1, t2, t3 +0: + sh t1, 0(a0) + addiu a2, a2, -1 + bgtz a2, 0b + addiu a0, a0, 2 + j ra + nop + +1: + SAVE_REGS_ON_STACK 0, s0, s1, s2 + li t4, 0x00ff00ff + li t5, 0xf800f800 + li t6, 0x07e007e0 + li t7, 0x001F001F + addiu t1, a2, -1 + beqz t1, 3f + nop +2: + lhu t1, 0(a0) /* t1 = destination (r5g6b5) */ + lhu t2, 2(a0) /* t2 = destination (r5g6b5) */ + + CONVERT_2x0565_TO_2x8888 t1, t2, t3, t8, t6, t7, t9, s0, s1, s2 + MIPS_2xUN8x4_MUL_2xUN8 t3, t8, t0, t0, t1, t2, t4, t9, s0, s1, s2, t3, t8 + addu_s.qb t1, t1, a1 + addu_s.qb t2, t2, a1 + CONVERT_2x8888_TO_2x0565 t1, t2, t3, t8, t5, t6, t7, s0, s1 + + sh t3, 0(a0) + sh t8, 2(a0) + + addiu a2, a2, -2 + addiu t1, a2, -1 + bgtz t1, 2b + addiu a0, a0, 4 +3: + beqz a2, 4f + nop + + lhu t1, 0(a0) /* t1 = destination (r5g6b5) */ + + CONVERT_1x0565_TO_1x8888 t1, t2, s0, s1 + MIPS_UN8x4_MUL_UN8 t2, t0, t1, t4, s0, s1, s2 + addu_s.qb t1, t1, a1 + CONVERT_1x8888_TO_1x0565 t1, t2, s0, s1 + + sh t2, 0(a0) + +4: + RESTORE_REGS_FROM_STACK 0, s0, s1, s2 +5: + j ra + nop + +END(pixman_composite_over_n_0565_asm_mips) + +LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_asm_mips) +/* + * a0 - dst (a8r8g8b8) + * a1 - src (32bit constant) + * a2 - w + */ + + beqz a2, 5f + nop + + not t0, a1 + srl t0, t0, 24 + bgtz t0, 1f + nop +0: + sw a1, 0(a0) + addiu a2, a2, -1 + bgtz a2, 0b + addiu a0, a0, 4 + j ra + nop + +1: + SAVE_REGS_ON_STACK 0, s0, s1, s2 + li t4, 0x00ff00ff + addiu t1, a2, -1 + beqz t1, 3f + nop +2: + lw t2, 0(a0) /* t2 = destination (a8r8g8b8) */ + lw t3, 4(a0) /* t3 = destination (a8r8g8b8) */ + + MIPS_2xUN8x4_MUL_2xUN8 t2, t3, t0, t0, t7, t8, t4, t9, s0, s1, s2, t2, t3 + + addu_s.qb t7, t7, a1 + addu_s.qb t8, t8, a1 + + sw t7, 0(a0) + sw t8, 4(a0) + + addiu a2, a2, -2 + addiu t1, a2, -1 + bgtz t1, 2b + addiu a0, a0, 8 +3: + beqz a2, 4f + nop + + lw t1, 0(a0) /* t1 = destination (a8r8g8b8) */ + + MIPS_UN8x4_MUL_UN8 t1, t0, t3, t4, t5, t6, t7 + + addu_s.qb t3, t3, a1 + + sw t3, 0(a0) + +4: + RESTORE_REGS_FROM_STACK 0, s0, s1, s2 +5: + j ra + nop + +END(pixman_composite_over_n_8888_asm_mips) + LEAF_MIPS_DSPR2(pixman_composite_add_8_8_8_asm_mips) /* * a0 - dst (a8) diff --git a/pixman/pixman-mips-dspr2.c b/pixman/pixman-mips-dspr2.c index 44565e7..afe4ec6 100644 --- a/pixman/pixman-mips-dspr2.c +++ b/pixman/pixman-mips-dspr2.c @@ -81,6 +81,11 @@ PIXMAN_MIPS_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, over_0565_n_0565, PIXMAN_MIPS_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, add_8888_n_8888, uint32_t, 1, uint32_t, 1) +PIXMAN_MIPS_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, over_n_0565, + uint16_t, 1) +PIXMAN_MIPS_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, over_n_8888, + uint32_t, 1) + PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (add_8_8_8, uint8_t, 1, uint8_t, 1, uint8_t, 1) PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (add_8888_8_8888, uint32_t, 1, @@ -278,7 +283,9 @@ static const pixman_fast_path_t mips_dspr2_fast_paths[] = PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, mips_composite_over_n_8_8888), PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, mips_composite_over_n_8_0565), PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, mips_composite_over_n_8_0565), - + PIXMAN_STD_FAST_PATH (OVER, solid, null, r5g6b5, mips_composite_over_n_0565), + PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, mips_composite_over_n_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, mips_composite_over_n_8888), PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, mips_composite_over_8888_n_8888), PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, mips_composite_over_8888_n_8888), PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, r5g6b5, mips_composite_over_8888_n_0565), diff --git a/pixman/pixman-mips-dspr2.h b/pixman/pixman-mips-dspr2.h index bddcfd8..186a501 100644 --- a/pixman/pixman-mips-dspr2.h +++ b/pixman/pixman-mips-dspr2.h @@ -85,6 +85,42 @@ mips_composite_##name (pixman_implementation_t *imp, \ } \ } +/****************************************************************/ + +#define PIXMAN_MIPS_BIND_FAST_PATH_N_DST(flags, name, \ + dst_type, dst_cnt) \ +void \ +pixman_composite_##name##_asm_mips (dst_type *dst, \ + uint32_t src, \ + int32_t w); \ + \ +static void \ +mips_composite_##name (pixman_implementation_t *imp, \ + pixman_composite_info_t *info) \ +{ \ + PIXMAN_COMPOSITE_ARGS (info); \ + dst_type *dst_line, *dst; \ + int32_t dst_stride; \ + uint32_t src; \ + \ + src = _pixman_image_get_solid ( \ + imp, src_image, dest_image->bits.format); \ + \ + if ((flags & SKIP_ZERO_SRC) && src == 0) \ + return; \ + \ + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type, \ + dst_stride, dst_line, dst_cnt); \ + \ + while (height--) \ + { \ + dst = dst_line; \ + dst_line += dst_stride; \ + \ + pixman_composite_##name##_asm_mips (dst, src, width); \ + } \ +} + /*******************************************************************/ #define PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST(flags, name, \ -- cgit v1.2.3