diff options
author | Ben Avison <bavison@riscosopen.org> | 2014-09-05 21:31:46 +0100 |
---|---|---|
committer | Ben Avison <bavison@riscosopen.org> | 2015-10-15 15:13:20 +0100 |
commit | eb89724b9ea7ce8773c4bdc344137fb07c9a7d35 (patch) | |
tree | b3fbd2faddbdb291bf8971ceff0fe90b2dc68ae4 | |
parent | 0391b69b8cb60a484cd74740a37eab2ea7386b8b (diff) |
armv6: Add fetcher for a8 bilinear-interpolation scaled images
This is constrained to support X increments in the positive X direction only.
It also doesn't attempt to support any form of image repeat.
Here are some affine-bench results for a variety of horizontal and vertical
scaling factors.
Before:
x increment 0.5 0.75 1.0 1.5 2.0
y increment
0.5 6.2 6.2 6.2 6.1 6.0
0.75 6.2 6.1 6.1 6.0 5.9
1.0 6.2 6.1 5.9 5.8
1.5 6.1 6.0 5.9 5.8 5.6
2.0 6.1 6.0 5.9 5.7 5.5
After:
x increment 0.5 0.75 1.0 1.5 2.0
y increment
0.5 22.2 21.2 19.7 21.0 20.4
0.75 19.4 18.3 16.7 18.2 17.4
1.0 24.7 22.3 22.1 20.4
1.5 14.2 13.0 11.5 12.9 12.1
2.0 12.0 10.9 9.5 10.8 10.0
Improvement:
x increment 0.5 0.75 1.0 1.5 2.0
y increment
0.5 +256.4% +242.8% +219.6% +246.6% +241.3%
0.75 +212.9% +197.8% +173.7% +203.4% +195.1%
1.0 +300.2% +265.9% +273.4% +251.2%
1.5 +131.8% +115.6% +93.1% +123.2% +114.0%
2.0 +97.7% +82.9% +62.8% +91.0% +82.9%
-rw-r--r-- | pixman/pixman-arm-common.h | 19 | ||||
-rw-r--r-- | pixman/pixman-arm-simd-asm-scaled.S | 15 | ||||
-rw-r--r-- | pixman/pixman-arm-simd.c | 2 |
3 files changed, 36 insertions, 0 deletions
diff --git a/pixman/pixman-arm-common.h b/pixman/pixman-arm-common.h index 9205b23..cccdac7 100644 --- a/pixman/pixman-arm-common.h +++ b/pixman/pixman-arm-common.h @@ -678,6 +678,25 @@ cputype##_convert_adjacent_r5g6b5 (const void *void_source, \ b |= b >> 5; \ *rag = 0xff0000 | g; \ *rrb = (r << 16) | b; \ +} \ + \ +static inline void \ +cputype##_convert_adjacent_a8 (const void *void_source, \ + int x, \ + uint32_t *lag, \ + uint32_t *rag, \ + uint32_t *lrb, \ + uint32_t *rrb) \ +{ \ + const uint8_t *source = void_source; \ + uint32_t left = source[pixman_fixed_to_int (x)]; \ + uint32_t right; \ + if (pixman_fixed_fraction (x) != 0) \ + right = source[pixman_fixed_to_int (x) + 1]; \ + *lag = left << 16; \ + *rag = right << 16; \ + *lrb = 0; \ + *rrb = 0; \ } #define PIXMAN_ARM_BIND_GET_SCANLINE_BILINEAR_SCALED_COMMON(cputype, name, type) \ diff --git a/pixman/pixman-arm-simd-asm-scaled.S b/pixman/pixman-arm-simd-asm-scaled.S index ca86bac..24c1a27 100644 --- a/pixman/pixman-arm-simd-asm-scaled.S +++ b/pixman/pixman-arm-simd-asm-scaled.S @@ -233,6 +233,21 @@ generate_bilinear_scaled_cover_functions 32, x8r8g8b8, 3, 3, 3, 3, 3, 3, 3, 3, generate_bilinear_scaled_cover_functions 16, r5g6b5, 2, 2, 3, 3, 3, 3, 3, 3, nop_macro, convert_0565_08080808 +.macro init_8_08080808 + @ Ideally we'd also skip all subsequent processing of the red/blue + @ components since they remain 0 throughout. Also, there may be + @ some scope for simplifying the alpha component processing. + @ However, this approach is still an improvement for now. + mov RB_IN0, #0 + mov RB_IN1, #0 +.endm + +.macro convert_8_08080808 in_ag, rb + mov \in_ag, \in_ag, lsl #16 +.endm + +generate_bilinear_scaled_cover_functions 8, a8, 2, 2, 3, 3, 3, 3, 3, 3, init_8_08080808, convert_8_08080808 + /******************************************************************************/ .macro pass2_1pixel_internal t0, t1, b0, b1, tmp, mul, d diff --git a/pixman/pixman-arm-simd.c b/pixman/pixman-arm-simd.c index 39e9afa..aec1b77 100644 --- a/pixman/pixman-arm-simd.c +++ b/pixman/pixman-arm-simd.c @@ -178,6 +178,7 @@ PIXMAN_ARM_DECLARE_BILINEAR_SCALED_SUPPORT(armv6) PIXMAN_ARM_BIND_GET_SCANLINE_BILINEAR_SCALED_COVER(armv6, a8r8g8b8, uint32_t) PIXMAN_ARM_BIND_GET_SCANLINE_BILINEAR_SCALED_COVER(armv6, x8r8g8b8, uint32_t) PIXMAN_ARM_BIND_GET_SCANLINE_BILINEAR_SCALED_COVER(armv6, r5g6b5, uint16_t) +PIXMAN_ARM_BIND_GET_SCANLINE_BILINEAR_SCALED_COVER(armv6, a8, uint8_t) #define NEAREST_SCALED_COVER_USES_SRC_BUFFER(op, src_format, dst_format) \ (PIXMAN_OP_##op != PIXMAN_OP_SRC || \ @@ -538,6 +539,7 @@ static const pixman_iter_info_t arm_simd_iters[] = PIXMAN_ARM_UNTRANSFORMED_COVER_FETCHER (armv6, a8), PIXMAN_ARM_NEAREST_SCALED_COVER_FETCHER (armv6, a8), + PIXMAN_ARM_BILINEAR_SCALED_FETCHER (armv6, a8), { PIXMAN_null }, }; |