diff options
author | Nemanja Lukic <nemanja.lukic@rt-rk.com> | 2013-04-15 19:32:57 +0200 |
---|---|---|
committer | Søren Sandmann Pedersen <ssp@redhat.com> | 2013-04-30 15:38:43 -0400 |
commit | 5858f09d264ef762ddcf7ede324bfce9f5991d29 (patch) | |
tree | 21532165cc0716c40778149650d25038577fc63f | |
parent | 311d55b6d8e1ac3acaa12d1d7c3eefdcfdc70718 (diff) |
MIPS: DSPr2: Added src_0565_8888 nearest neighbor fast path.
Performance numbers before/after on MIPS-74kc @ 1GHz:
lowlevel-blt-bench results
Referent (before):
src_0565_8888 = L1: 20.70 L2: 19.22 M: 12.50 ( 49.79%) HT: 10.45 VT: 10.18 R: 9.99 RT: 5.31 ( 31Kops/s)
Optimized:
src_0565_8888 = L1: 62.98 L2: 53.44 M: 23.07 ( 91.87%) HT: 19.85 VT: 19.15 R: 17.70 RT: 9.68 ( 43Kops/s)
-rw-r--r-- | pixman/pixman-mips-dspr2-asm.S | 59 | ||||
-rw-r--r-- | pixman/pixman-mips-dspr2-asm.h | 21 | ||||
-rw-r--r-- | pixman/pixman-mips-dspr2.c | 10 |
3 files changed, 79 insertions, 11 deletions
diff --git a/pixman/pixman-mips-dspr2-asm.S b/pixman/pixman-mips-dspr2-asm.S index 3996756..b94e66f 100644 --- a/pixman/pixman-mips-dspr2-asm.S +++ b/pixman/pixman-mips-dspr2-asm.S @@ -3196,6 +3196,65 @@ LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_8888_0565_OVER_asm_mips) END(pixman_scaled_nearest_scanline_8888_0565_OVER_asm_mips) +LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_0565_8888_SRC_asm_mips) +/* + * a0 - dst (a8r8g8b8) + * a1 - src (r5g6b5) + * a2 - w + * a3 - vx + * 16(sp) - unit_x + */ + + SAVE_REGS_ON_STACK 0, v0 + beqz a2, 3f + nop + + lw v0, 16(sp) /* v0 = unit_x */ + addiu t1, a2, -1 + beqz t1, 2f + nop + + li t4, 0x07e007e0 + li t5, 0x001F001F +1: + sra t0, a3, 16 /* t0 = vx >> 16 */ + sll t0, t0, 1 /* t0 = t0 * 2 ((r5g6b5)) */ + addu t0, a1, t0 + lhu t0, 0(t0) /* t0 = source ((r5g6b5)) */ + addu a3, a3, v0 /* a3 = vx + unit_x */ + sra t1, a3, 16 /* t1 = vx >> 16 */ + sll t1, t1, 1 /* t1 = t1 * 2 ((r5g6b5)) */ + addu t1, a1, t1 + lhu t1, 0(t1) /* t1 = source ((r5g6b5)) */ + addu a3, a3, v0 /* a3 = vx + unit_x */ + addiu a2, a2, -2 + + CONVERT_2x0565_TO_2x8888 t0, t1, t2, t3, t4, t5, t6, t7, t8, t9 + + sw t2, 0(a0) + sw t3, 4(a0) + + addiu t2, a2, -1 + bgtz t2, 1b + addiu a0, a0, 8 +2: + beqz a2, 3f + nop + sra t0, a3, 16 /* t0 = vx >> 16 */ + sll t0, t0, 1 /* t0 = t0 * 2 ((r5g6b5)) */ + addu t0, a1, t0 + lhu t0, 0(t0) /* t0 = source ((r5g6b5)) */ + + CONVERT_1x0565_TO_1x8888 t0, t1, t2, t3 + + sw t1, 0(a0) +3: + RESTORE_REGS_FROM_STACK 0, v0 + j ra + nop + +END(pixman_scaled_nearest_scanline_0565_8888_SRC_asm_mips) + LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_8888_8_0565_OVER_asm_mips) /* * a0 - dst (r5g6b5) diff --git a/pixman/pixman-mips-dspr2-asm.h b/pixman/pixman-mips-dspr2-asm.h index bc458b6..cab122d 100644 --- a/pixman/pixman-mips-dspr2-asm.h +++ b/pixman/pixman-mips-dspr2-asm.h @@ -354,17 +354,16 @@ LEAF_MIPS32R2(symbol) \ out1_565, out2_565, \ maskR, maskG, maskB, \ scratch1, scratch2 - precrq.ph.w \scratch1, \in2_8888, \in1_8888 - precr_sra.ph.w \in2_8888, \in1_8888, 0 - shll.ph \scratch1, \scratch1, 8 - srl \in2_8888, \in2_8888, 3 - and \scratch2, \in2_8888, \maskB - and \scratch1, \scratch1, \maskR - srl \in2_8888, \in2_8888, 2 - and \out2_565, \in2_8888, \maskG - or \out2_565, \out2_565, \scratch2 - or \out1_565, \out2_565, \scratch1 - srl \out2_565, \out1_565, 16 + precr.qb.ph \scratch1, \in2_8888, \in1_8888 + precrq.qb.ph \in2_8888, \in2_8888, \in1_8888 + and \out1_565, \scratch1, \maskR + shrl.ph \scratch1, \scratch1, 3 + shll.ph \in2_8888, \in2_8888, 3 + and \scratch1, \scratch1, \maskB + or \out1_565, \out1_565, \scratch1 + and \in2_8888, \in2_8888, \maskG + or \out1_565, \out1_565, \in2_8888 + srl \out2_565, \out1_565, 16 .endm /* diff --git a/pixman/pixman-mips-dspr2.c b/pixman/pixman-mips-dspr2.c index c227feb..1949921 100644 --- a/pixman/pixman-mips-dspr2.c +++ b/pixman/pixman-mips-dspr2.c @@ -125,6 +125,8 @@ PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_DST (8888_8888, OVER, uint32_t, uint32_t) PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_DST (8888_0565, OVER, uint32_t, uint16_t) +PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_DST (0565_8888, SRC, + uint16_t, uint32_t) PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (0, 8888_8888, SRC, uint32_t, uint32_t) @@ -370,6 +372,14 @@ static const pixman_fast_path_t mips_dspr2_fast_paths[] = PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, mips_8888_0565), PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, b5g6r5, mips_8888_0565), + PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (SRC, b5g6r5, x8b8g8r8, mips_0565_8888), + PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, x8r8g8b8, mips_0565_8888), + /* Note: NONE repeat is not supported yet */ + SIMPLE_NEAREST_FAST_PATH_COVER (SRC, r5g6b5, a8r8g8b8, mips_0565_8888), + SIMPLE_NEAREST_FAST_PATH_COVER (SRC, b5g6r5, a8b8g8r8, mips_0565_8888), + SIMPLE_NEAREST_FAST_PATH_PAD (SRC, r5g6b5, a8r8g8b8, mips_0565_8888), + SIMPLE_NEAREST_FAST_PATH_PAD (SRC, b5g6r5, a8b8g8r8, mips_0565_8888), + PIXMAN_MIPS_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8r8g8b8, r5g6b5, mips_8888_8_0565), PIXMAN_MIPS_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8b8g8r8, b5g6r5, mips_8888_8_0565), |