summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Avison <bavison@riscosopen.org>2014-09-05 21:31:46 +0100
committerBen Avison <bavison@riscosopen.org>2015-10-15 15:13:20 +0100
commiteb89724b9ea7ce8773c4bdc344137fb07c9a7d35 (patch)
treeb3fbd2faddbdb291bf8971ceff0fe90b2dc68ae4
parent0391b69b8cb60a484cd74740a37eab2ea7386b8b (diff)
armv6: Add fetcher for a8 bilinear-interpolation scaled images
This is constrained to support X increments in the positive X direction only. It also doesn't attempt to support any form of image repeat. Here are some affine-bench results for a variety of horizontal and vertical scaling factors. Before: x increment 0.5 0.75 1.0 1.5 2.0 y increment 0.5 6.2 6.2 6.2 6.1 6.0 0.75 6.2 6.1 6.1 6.0 5.9 1.0 6.2 6.1 5.9 5.8 1.5 6.1 6.0 5.9 5.8 5.6 2.0 6.1 6.0 5.9 5.7 5.5 After: x increment 0.5 0.75 1.0 1.5 2.0 y increment 0.5 22.2 21.2 19.7 21.0 20.4 0.75 19.4 18.3 16.7 18.2 17.4 1.0 24.7 22.3 22.1 20.4 1.5 14.2 13.0 11.5 12.9 12.1 2.0 12.0 10.9 9.5 10.8 10.0 Improvement: x increment 0.5 0.75 1.0 1.5 2.0 y increment 0.5 +256.4% +242.8% +219.6% +246.6% +241.3% 0.75 +212.9% +197.8% +173.7% +203.4% +195.1% 1.0 +300.2% +265.9% +273.4% +251.2% 1.5 +131.8% +115.6% +93.1% +123.2% +114.0% 2.0 +97.7% +82.9% +62.8% +91.0% +82.9%
-rw-r--r--pixman/pixman-arm-common.h19
-rw-r--r--pixman/pixman-arm-simd-asm-scaled.S15
-rw-r--r--pixman/pixman-arm-simd.c2
3 files changed, 36 insertions, 0 deletions
diff --git a/pixman/pixman-arm-common.h b/pixman/pixman-arm-common.h
index 9205b23..cccdac7 100644
--- a/pixman/pixman-arm-common.h
+++ b/pixman/pixman-arm-common.h
@@ -678,6 +678,25 @@ cputype##_convert_adjacent_r5g6b5 (const void *void_source, \
b |= b >> 5; \
*rag = 0xff0000 | g; \
*rrb = (r << 16) | b; \
+} \
+ \
+static inline void \
+cputype##_convert_adjacent_a8 (const void *void_source, \
+ int x, \
+ uint32_t *lag, \
+ uint32_t *rag, \
+ uint32_t *lrb, \
+ uint32_t *rrb) \
+{ \
+ const uint8_t *source = void_source; \
+ uint32_t left = source[pixman_fixed_to_int (x)]; \
+ uint32_t right; \
+ if (pixman_fixed_fraction (x) != 0) \
+ right = source[pixman_fixed_to_int (x) + 1]; \
+ *lag = left << 16; \
+ *rag = right << 16; \
+ *lrb = 0; \
+ *rrb = 0; \
}
#define PIXMAN_ARM_BIND_GET_SCANLINE_BILINEAR_SCALED_COMMON(cputype, name, type) \
diff --git a/pixman/pixman-arm-simd-asm-scaled.S b/pixman/pixman-arm-simd-asm-scaled.S
index ca86bac..24c1a27 100644
--- a/pixman/pixman-arm-simd-asm-scaled.S
+++ b/pixman/pixman-arm-simd-asm-scaled.S
@@ -233,6 +233,21 @@ generate_bilinear_scaled_cover_functions 32, x8r8g8b8, 3, 3, 3, 3, 3, 3, 3, 3,
generate_bilinear_scaled_cover_functions 16, r5g6b5, 2, 2, 3, 3, 3, 3, 3, 3, nop_macro, convert_0565_08080808
+.macro init_8_08080808
+ @ Ideally we'd also skip all subsequent processing of the red/blue
+ @ components since they remain 0 throughout. Also, there may be
+ @ some scope for simplifying the alpha component processing.
+ @ However, this approach is still an improvement for now.
+ mov RB_IN0, #0
+ mov RB_IN1, #0
+.endm
+
+.macro convert_8_08080808 in_ag, rb
+ mov \in_ag, \in_ag, lsl #16
+.endm
+
+generate_bilinear_scaled_cover_functions 8, a8, 2, 2, 3, 3, 3, 3, 3, 3, init_8_08080808, convert_8_08080808
+
/******************************************************************************/
.macro pass2_1pixel_internal t0, t1, b0, b1, tmp, mul, d
diff --git a/pixman/pixman-arm-simd.c b/pixman/pixman-arm-simd.c
index 39e9afa..aec1b77 100644
--- a/pixman/pixman-arm-simd.c
+++ b/pixman/pixman-arm-simd.c
@@ -178,6 +178,7 @@ PIXMAN_ARM_DECLARE_BILINEAR_SCALED_SUPPORT(armv6)
PIXMAN_ARM_BIND_GET_SCANLINE_BILINEAR_SCALED_COVER(armv6, a8r8g8b8, uint32_t)
PIXMAN_ARM_BIND_GET_SCANLINE_BILINEAR_SCALED_COVER(armv6, x8r8g8b8, uint32_t)
PIXMAN_ARM_BIND_GET_SCANLINE_BILINEAR_SCALED_COVER(armv6, r5g6b5, uint16_t)
+PIXMAN_ARM_BIND_GET_SCANLINE_BILINEAR_SCALED_COVER(armv6, a8, uint8_t)
#define NEAREST_SCALED_COVER_USES_SRC_BUFFER(op, src_format, dst_format) \
(PIXMAN_OP_##op != PIXMAN_OP_SRC || \
@@ -538,6 +539,7 @@ static const pixman_iter_info_t arm_simd_iters[] =
PIXMAN_ARM_UNTRANSFORMED_COVER_FETCHER (armv6, a8),
PIXMAN_ARM_NEAREST_SCALED_COVER_FETCHER (armv6, a8),
+ PIXMAN_ARM_BILINEAR_SCALED_FETCHER (armv6, a8),
{ PIXMAN_null },
};