diff options
author | Nemanja Lukic <nemanja.lukic@rt-rk.com> | 2013-01-22 02:59:44 +0100 |
---|---|---|
committer | Nemanja Lukic <nemanja.lukic@rt-rk.com> | 2013-01-22 03:10:31 +0100 |
commit | a67b0e24d7eaba3b9525eeb8bf357ded95cc6b7c (patch) | |
tree | 9f06595b6698fa35ad5e3bca3cb3896e881083ae | |
parent | 35cc965514ca6e665c18411fcf66db826d559c2a (diff) |
MIPS: DSPr2: Added more fast-paths for REVERSE operation:
- out_reverse_8_0565
- out_reverse_8_8888
Performance numbers before/after on MIPS-74kc @ 1GHz:
lowlevel-blt-bench results
Referent (before):
out_reverse_8_0565 = L1: 14.29 L2: 13.58 M: 12.14 ( 24.16%) HT: 9.23 VT: 9.12 R: 8.84 RT: 4.75 ( 36Kops/s)
out_reverse_8_8888 = L1: 27.46 L2: 23.24 M: 17.41 ( 57.73%) HT: 12.61 VT: 12.47 R: 11.79 RT: 5.86 ( 41Kops/s)
Optimized:
out_reverse_8_0565 = L1: 28.24 L2: 25.64 M: 20.63 ( 41.05%) HT: 16.69 VT: 16.14 R: 15.50 RT: 8.69 ( 52Kops/s)
out_reverse_8_8888 = L1: 52.78 L2: 41.44 M: 23.50 ( 77.94%) HT: 18.79 VT: 18.16 R: 16.90 RT: 9.11 ( 53Kops/s)
-rw-r--r-- | pixman/pixman-mips-dspr2-asm.S | 110 | ||||
-rw-r--r-- | pixman/pixman-mips-dspr2.c | 8 |
2 files changed, 118 insertions, 0 deletions
diff --git a/pixman/pixman-mips-dspr2-asm.S b/pixman/pixman-mips-dspr2-asm.S index ba22e62..64ef660 100644 --- a/pixman/pixman-mips-dspr2-asm.S +++ b/pixman/pixman-mips-dspr2-asm.S @@ -2100,6 +2100,116 @@ LEAF_MIPS_DSPR2(pixman_composite_add_8888_8888_asm_mips) END(pixman_composite_add_8888_8888_asm_mips) +LEAF_MIPS_DSPR2(pixman_composite_out_reverse_8_0565_asm_mips) +/* + * a0 - dst (r5g6b5) + * a1 - src (a8) + * a2 - w + */ + + beqz a2, 4f + nop + + SAVE_REGS_ON_STACK 0, s0, s1, s2, s3 + li t2, 0xf800f800 + li t3, 0x07e007e0 + li t4, 0x001F001F + li t5, 0x00ff00ff + + addiu t1, a2, -1 + beqz t1, 2f + nop +1: + lbu t0, 0(a1) /* t0 = source (a8) */ + lbu t1, 1(a1) /* t1 = source (a8) */ + lhu t6, 0(a0) /* t6 = destination (r5g6b5) */ + lhu t7, 2(a0) /* t7 = destination (r5g6b5) */ + addiu a1, a1, 2 + + not t0, t0 + not t1, t1 + andi t0, 0xff /* t0 = neg source1 */ + andi t1, 0xff /* t1 = neg source2 */ + CONVERT_2x0565_TO_2x8888 t6, t7, t8, t9, t3, t4, s0, s1, s2, s3 + MIPS_2xUN8x4_MUL_2xUN8 t8, t9, t0, t1, t6, t7, t5, s0, s1, s2, s3, t8, t9 + CONVERT_2x8888_TO_2x0565 t6, t7, t8, t9, t2, t3, t4, s0, s1 + + sh t8, 0(a0) + sh t9, 2(a0) + addiu a2, a2, -2 + addiu t1, a2, -1 + bgtz t1, 1b + addiu a0, a0, 4 +2: + beqz a2, 3f + nop + lbu t0, 0(a1) /* t0 = source (a8) */ + lhu t1, 0(a0) /* t1 = destination (r5g6b5) */ + + not t0, t0 + andi t0, 0xff /* t0 = neg source */ + CONVERT_1x0565_TO_1x8888 t1, t2, t3, t4 + MIPS_UN8x4_MUL_UN8 t2, t0, t1, t5, t3, t4, t6 + CONVERT_1x8888_TO_1x0565 t1, t2, t3, t4 + + sh t2, 0(a0) +3: + RESTORE_REGS_FROM_STACK 0, s0, s1, s2, s3 +4: + j ra + nop + +END(pixman_composite_out_reverse_8_0565_asm_mips) + +LEAF_MIPS_DSPR2(pixman_composite_out_reverse_8_8888_asm_mips) +/* + * a0 - dst (a8r8g8b8) + * a1 - src (a8) + * a2 - w + */ + + beqz a2, 3f + nop + li t4, 0x00ff00ff + addiu t1, a2, -1 + beqz t1, 2f + nop +1: + lbu t0, 0(a1) /* t0 = source (a8) */ + lbu t1, 1(a1) /* t1 = source (a8) */ + lw t2, 0(a0) /* t2 = destination (a8r8g8b8) */ + lw t3, 4(a0) /* t3 = destination (a8r8g8b8) */ + addiu a1, a1, 2 + not t0, t0 + not t1, t1 + andi t0, 0xff /* t0 = neg source */ + andi t1, 0xff /* t1 = neg source */ + + MIPS_2xUN8x4_MUL_2xUN8 t2, t3, t0, t1, t5, t6, t4, t7, t8, t9, t2, t3, t0 + + sw t5, 0(a0) + sw t6, 4(a0) + addiu a2, a2, -2 + addiu t1, a2, -1 + bgtz t1, 1b + addiu a0, a0, 8 +2: + beqz a2, 3f + nop + lbu t0, 0(a1) /* t0 = source (a8) */ + lw t1, 0(a0) /* t1 = destination (a8r8g8b8) */ + not t0, t0 + andi t0, 0xff /* t0 = neg source */ + + MIPS_UN8x4_MUL_UN8 t1, t0, t2, t4, t3, t5, t6 + + sw t2, 0(a0) +3: + j ra + nop + +END(pixman_composite_out_reverse_8_8888_asm_mips) + LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_8888_8_0565_OVER_asm_mips) /* * a0 - dst (r5g6b5) diff --git a/pixman/pixman-mips-dspr2.c b/pixman/pixman-mips-dspr2.c index 11f1254..a7e6f8a 100644 --- a/pixman/pixman-mips-dspr2.c +++ b/pixman/pixman-mips-dspr2.c @@ -54,6 +54,10 @@ PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, add_8_8, uint8_t, 1, uint8_t, 1) PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, add_8888_8888, uint32_t, 1, uint32_t, 1) +PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, out_reverse_8_0565, + uint8_t, 1, uint16_t, 1) +PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, out_reverse_8_8888, + uint8_t, 1, uint32_t, 1) PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (0, src_n_8_8888, uint8_t, 1, uint32_t, 1) @@ -324,6 +328,10 @@ static const pixman_fast_path_t mips_dspr2_fast_paths[] = PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, mips_composite_add_8_8), PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, mips_composite_add_8888_8888), PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, mips_composite_add_8888_8888), + PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, r5g6b5, mips_composite_out_reverse_8_0565), + PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, b5g6r5, mips_composite_out_reverse_8_0565), + PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, a8r8g8b8, mips_composite_out_reverse_8_8888), + PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, a8b8g8r8, mips_composite_out_reverse_8_8888), PIXMAN_MIPS_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8r8g8b8, r5g6b5, mips_8888_8_0565), PIXMAN_MIPS_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8b8g8r8, b5g6r5, mips_8888_8_0565), |