diff options
-rw-r--r-- | pixman/pixman-mips-dspr2-asm.S | 252 | ||||
-rw-r--r-- | pixman/pixman-mips-dspr2-asm.h | 22 | ||||
-rw-r--r-- | pixman/pixman-mips-dspr2.c | 10 |
3 files changed, 284 insertions, 0 deletions
diff --git a/pixman/pixman-mips-dspr2-asm.S b/pixman/pixman-mips-dspr2-asm.S index 3a6b26a..614c628 100644 --- a/pixman/pixman-mips-dspr2-asm.S +++ b/pixman/pixman-mips-dspr2-asm.S @@ -1209,6 +1209,258 @@ LEAF_MIPS_DSPR2(pixman_composite_over_8888_8888_asm_mips) END(pixman_composite_over_8888_8888_asm_mips) +LEAF_MIPS_DSPR2(pixman_composite_add_8_8_8_asm_mips) +/* + * a0 - dst (a8) + * a1 - src (a8) + * a2 - mask (a8) + * a3 - w + */ + + SAVE_REGS_ON_STACK 0, v0, v1 + li t9, 0x00ff00ff + beqz a3, 3f + nop + + srl v0, a3, 2 /* v0 = how many multiples of 4 dst pixels */ + beqz v0, 1f /* branch if less than 4 src pixels */ + nop + +0: + beqz v0, 1f + addiu v0, v0, -1 + lbu t0, 0(a2) + lbu t1, 1(a2) + lbu t2, 2(a2) + lbu t3, 3(a2) + lbu t4, 0(a0) + lbu t5, 1(a0) + lbu t6, 2(a0) + lbu t7, 3(a0) + + addiu a2, a2, 4 + + precr_sra.ph.w t1, t0, 0 + precr_sra.ph.w t3, t2, 0 + precr_sra.ph.w t5, t4, 0 + precr_sra.ph.w t7, t6, 0 + + precr.qb.ph t0, t3, t1 + precr.qb.ph t1, t7, t5 + + lbu t4, 0(a1) + lbu v1, 1(a1) + lbu t7, 2(a1) + lbu t8, 3(a1) + + addiu a1, a1, 4 + + precr_sra.ph.w v1, t4, 0 + precr_sra.ph.w t8, t7, 0 + + muleu_s.ph.qbl t2, t0, t8 + muleu_s.ph.qbr t3, t0, v1 + shra_r.ph t4, t2, 8 + shra_r.ph t5, t3, 8 + and t4, t4, t9 + and t5, t5, t9 + addq.ph t2, t2, t4 + addq.ph t3, t3, t5 + shra_r.ph t2, t2, 8 + shra_r.ph t3, t3, 8 + precr.qb.ph t0, t2, t3 + + addu_s.qb t2, t0, t1 + + sb t2, 0(a0) + srl t2, t2, 8 + sb t2, 1(a0) + srl t2, t2, 8 + sb t2, 2(a0) + srl t2, t2, 8 + sb t2, 3(a0) + addiu a3, a3, -4 + b 0b + addiu a0, a0, 4 + +1: + beqz a3, 3f + nop +2: + lbu t8, 0(a1) + lbu t0, 0(a2) + lbu t1, 0(a0) + addiu a1, a1, 1 + addiu a2, a2, 1 + + mul t2, t0, t8 + shra_r.ph t3, t2, 8 + andi t3, t3, 0xff + addq.ph t2, t2, t3 + shra_r.ph t2, t2, 8 + andi t2, t2, 0xff + + addu_s.qb t2, t2, t1 + sb t2, 0(a0) + addiu a3, a3, -1 + bnez a3, 2b + addiu a0, a0, 1 + +3: + RESTORE_REGS_FROM_STACK 0, v0, v1 + j ra + nop + +END(pixman_composite_add_8_8_8_asm_mips) + +LEAF_MIPS_DSPR2(pixman_composite_add_n_8_8_asm_mips) +/* + * a0 - dst (a8) + * a1 - src (32bit constant) + * a2 - mask (a8) + * a3 - w + */ + + SAVE_REGS_ON_STACK 0, v0 + li t9, 0x00ff00ff + beqz a3, 3f + nop + + srl v0, a3, 2 /* v0 = how many multiples of 4 dst pixels */ + beqz v0, 1f /* branch if less than 4 src pixels */ + nop + + srl t8, a1, 24 + replv.ph t8, t8 + +0: + beqz v0, 1f + addiu v0, v0, -1 + lbu t0, 0(a2) + lbu t1, 1(a2) + lbu t2, 2(a2) + lbu t3, 3(a2) + lbu t4, 0(a0) + lbu t5, 1(a0) + lbu t6, 2(a0) + lbu t7, 3(a0) + + addiu a2, a2, 4 + + precr_sra.ph.w t1, t0, 0 + precr_sra.ph.w t3, t2, 0 + precr_sra.ph.w t5, t4, 0 + precr_sra.ph.w t7, t6, 0 + + precr.qb.ph t0, t3, t1 + precr.qb.ph t1, t7, t5 + + muleu_s.ph.qbl t2, t0, t8 + muleu_s.ph.qbr t3, t0, t8 + shra_r.ph t4, t2, 8 + shra_r.ph t5, t3, 8 + and t4, t4, t9 + and t5, t5, t9 + addq.ph t2, t2, t4 + addq.ph t3, t3, t5 + shra_r.ph t2, t2, 8 + shra_r.ph t3, t3, 8 + precr.qb.ph t0, t2, t3 + + addu_s.qb t2, t0, t1 + + sb t2, 0(a0) + srl t2, t2, 8 + sb t2, 1(a0) + srl t2, t2, 8 + sb t2, 2(a0) + srl t2, t2, 8 + sb t2, 3(a0) + addiu a3, a3, -4 + b 0b + addiu a0, a0, 4 + +1: + beqz a3, 3f + nop + srl t8, a1, 24 +2: + lbu t0, 0(a2) + lbu t1, 0(a0) + addiu a2, a2, 1 + + mul t2, t0, t8 + shra_r.ph t3, t2, 8 + andi t3, t3, 0xff + addq.ph t2, t2, t3 + shra_r.ph t2, t2, 8 + andi t2, t2, 0xff + + addu_s.qb t2, t2, t1 + sb t2, 0(a0) + addiu a3, a3, -1 + bnez a3, 2b + addiu a0, a0, 1 + +3: + RESTORE_REGS_FROM_STACK 0, v0 + j ra + nop + +END(pixman_composite_add_n_8_8_asm_mips) + +LEAF_MIPS_DSPR2(pixman_composite_add_n_8_8888_asm_mips) +/* + * a0 - dst (a8r8g8b8) + * a1 - src (32bit constant) + * a2 - mask (a8) + * a3 - w + */ + + SAVE_REGS_ON_STACK 0, s0, s1, s2 + li t4, 0x00ff00ff + beqz a3, 3f + nop + addiu t1, a3, -1 + beqz t1, 2f + nop +1: + /* a1 = source (32bit constant) */ + lbu t0, 0(a2) /* t0 = mask (a8) */ + lbu t1, 1(a2) /* t1 = mask (a8) */ + lw t2, 0(a0) /* t2 = destination (a8r8g8b8) */ + lw t3, 4(a0) /* t3 = destination (a8r8g8b8) */ + addiu a2, a2, 2 + + MIPS_2xUN8x4_MUL_2xUN8_ADD_2xUN8x4 a1, a1, \ + t0, t1, \ + t2, t3, \ + t5, t6, \ + t4, t7, t8, t9, s0, s1, s2 + + sw t5, 0(a0) + sw t6, 4(a0) + addiu a3, a3, -2 + addiu t1, a3, -1 + bgtz t1, 1b + addiu a0, a0, 8 +2: + beqz a3, 3f + nop + /* a1 = source (32bit constant) */ + lbu t0, 0(a2) /* t0 = mask (a8) */ + lw t1, 0(a0) /* t1 = destination (a8r8g8b8) */ + + MIPS_UN8x4_MUL_UN8_ADD_UN8x4 a1, t0, t1, t2, t4, t3, t5, t6 + + sw t2, 0(a0) +3: + RESTORE_REGS_FROM_STACK 0, s0, s1, s2 + j ra + nop + +END(pixman_composite_add_n_8_8888_asm_mips) + LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_mips) /* * a0 - *dst diff --git a/pixman/pixman-mips-dspr2-asm.h b/pixman/pixman-mips-dspr2-asm.h index 7327dc6..b330c0f 100644 --- a/pixman/pixman-mips-dspr2-asm.h +++ b/pixman/pixman-mips-dspr2-asm.h @@ -600,6 +600,28 @@ LEAF_MIPS32R2(symbol) \ addu_s.qb \out_8888, \out_8888, \d_8888 .endm +.macro MIPS_2xUN8x4_MUL_2xUN8_ADD_2xUN8x4 s1_8888, \ + s2_8888, \ + m1_8, \ + m2_8, \ + d1_8888, \ + d2_8888, \ + out1_8888, \ + out2_8888, \ + maskLSR, \ + scratch1, scratch2, scratch3, \ + scratch4, scratch5, scratch6 + MIPS_2xUN8x4_MUL_2xUN8 \s1_8888, \s2_8888, \ + \m1_8, \m2_8, \ + \out1_8888, \out2_8888, \ + \maskLSR, \ + \scratch1, \scratch2, \scratch3, \ + \scratch4, \scratch5, \scratch6 + + addu_s.qb \out1_8888, \out1_8888, \d1_8888 + addu_s.qb \out2_8888, \out2_8888, \d2_8888 +.endm + .macro BILINEAR_INTERPOLATE_SINGLE_PIXEL tl, tr, bl, br, \ scratch1, scratch2, \ alpha, red, green, blue \ diff --git a/pixman/pixman-mips-dspr2.c b/pixman/pixman-mips-dspr2.c index e80bbb6..30d2a85 100644 --- a/pixman/pixman-mips-dspr2.c +++ b/pixman/pixman-mips-dspr2.c @@ -59,6 +59,10 @@ PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8_8888, uint8_t, 1, uint32_t, 1) PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8_0565, uint8_t, 1, uint16_t, 1) +PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, add_n_8_8, + uint8_t, 1, uint8_t, 1) +PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, add_n_8_8888, + uint8_t, 1, uint32_t, 1) PIXMAN_MIPS_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, over_8888_n_8888, uint32_t, 1, uint32_t, 1) @@ -67,6 +71,8 @@ PIXMAN_MIPS_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, over_8888_n_0565, PIXMAN_MIPS_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, over_0565_n_0565, uint16_t, 1, uint16_t, 1) +PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (add_8_8_8, uint8_t, 1, + uint8_t, 1, uint8_t, 1) PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (over_8888_8_8888, uint32_t, 1, uint8_t, 1, uint32_t, 1) PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (over_8888_8_0565, uint32_t, 1, @@ -271,6 +277,10 @@ static const pixman_fast_path_t mips_dspr2_fast_paths[] = PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, mips_composite_over_8888_8888), PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, mips_composite_over_8888_8888), PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, mips_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, mips_composite_add_n_8_8), + PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8r8g8b8, mips_composite_add_n_8_8888), + PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8b8g8r8, mips_composite_add_n_8_8888), + PIXMAN_STD_FAST_PATH (ADD, a8, a8, a8, mips_composite_add_8_8_8), SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, mips_8888_8888), SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, mips_8888_8888), |