diff options
author | Ben Avison <bavison@riscosopen.org> | 2014-07-30 17:26:03 +0100 |
---|---|---|
committer | Ben Avison <bavison@riscosopen.org> | 2015-10-15 15:04:06 +0100 |
commit | 7d54a7a32df50371dd8c39969590e1af3d80e121 (patch) | |
tree | 9edb71438852cd02f9d90db6de2dd7c289a5b113 | |
parent | c9bc7dc3ee7a02aac17eefe1448f8958ed896388 (diff) |
armv6: Add four more nearest-scaled-cover fast paths
These complete the set of fast paths where currently pixman-fast-path.c
provides versions that get selected in preference to the armv6-optimised
scanline fetchers/combiners/writeback routines.
Because generation of these fast paths is macroised, the patch required
to add them is fairly simple.
lowlevel-blt-bench -n over_8888_8888:
Before After
Mean StdDev Mean StdDev Confidence Change
L1 13.8 0.0 26.5 0.2 100.0% +91.7%
L2 9.4 0.2 22.9 0.4 100.0% +142.6%
M 8.6 0.0 23.8 0.0 100.0% +176.1%
HT 7.4 0.0 14.1 0.1 100.0% +91.2%
VT 7.3 0.0 13.4 0.1 100.0% +84.1%
R 7.0 0.0 13.0 0.1 100.0% +85.9%
RT 4.5 0.1 6.2 0.1 100.0% +36.6%
affine-bench * 0 0 1 over a8r8g8b8 a8r8g8b8:
Before After
Mean StdDev Mean StdDev Confidence Change
0.5 9.4 0.0 28.0 0.0 100.0% +197.4%
0.75 9.0 0.0 26.1 0.0 100.0% +190.2%
1.0 8.6 0.0 24.4 0.0 100.0% +184.6%
1.5 7.9 0.0 21.7 0.0 100.0% +173.4%
2.0 7.3 0.0 19.6 0.0 100.0% +166.6%
lowlevel-blt-bench -n src_x888_8888:
Before After
Mean StdDev Mean StdDev Confidence Change
L1 108.6 2.0 66.3 0.9 100.0% -39.0%
L2 32.4 1.5 44.3 2.1 100.0% +36.8%
M 27.5 0.1 62.0 0.1 100.0% +125.6%
HT 20.3 0.1 28.7 0.2 100.0% +41.2%
VT 19.9 0.1 26.7 0.1 100.0% +34.4%
R 18.6 0.1 25.3 0.2 100.0% +36.3%
RT 8.7 0.1 9.8 0.2 100.0% +12.6%
affine-bench * 0 0 1 src x8r8g8b8 a8r8g8b8:
Before After
Mean StdDev Mean StdDev Confidence Change
0.5 45.2 0.0 97.2 0.1 100.0% +115.1%
0.75 35.9 0.1 76.7 0.1 100.0% +113.9%
1.0 29.6 0.1 61.1 0.1 100.0% +106.4%
1.5 21.4 0.0 52.7 0.1 100.0% +145.9%
2.0 16.7 0.0 43.0 0.1 100.0% +156.9%
lowlevel-blt-bench -n src_8888_0565:
Before After
Mean StdDev Mean StdDev Confidence Change
L1 57.2 0.7 43.1 0.4 100.0% -24.7%
L2 23.0 1.0 32.8 1.0 100.0% +42.5%
M 24.8 0.0 42.2 0.0 100.0% +70.0%
HT 18.0 0.1 22.1 0.1 100.0% +22.5%
VT 17.1 0.1 21.0 0.1 100.0% +22.5%
R 16.5 0.1 20.0 0.1 100.0% +21.4%
RT 8.3 0.2 8.4 0.1 95.0% +1.0% (insignificant)
affine-bench * 0 0 1 src a8r8g8b8 r5g6b5:
Before After
Mean StdDev Mean StdDev Confidence Change
0.5 34.9 0.0 55.3 0.0 100.0% +58.7%
0.75 29.3 0.0 49.1 0.0 100.0% +67.4%
1.0 24.8 0.0 42.6 0.1 100.0% +71.6%
1.5 19.0 0.0 38.2 0.1 100.0% +100.7%
2.0 15.4 0.0 31.8 0.0 100.0% +107.1%
lowlevel-blt-bench -n over_8888_0565:
Before After
Mean StdDev Mean StdDev Confidence Change
L1 9.8 0.0 15.3 0.1 100.0% +56.6%
L2 7.4 0.0 14.3 0.2 100.0% +91.7%
M 7.5 0.0 15.4 0.0 100.0% +106.0%
HT 6.5 0.0 10.1 0.0 100.0% +54.5%
VT 6.4 0.0 9.9 0.0 100.0% +54.6%
R 6.2 0.0 9.5 0.0 100.0% +52.1%
RT 4.2 0.0 4.6 0.1 100.0% +9.8%
affine-bench * 0 0 1 over a8r8g8b8 r5g6b5:
Before After
Mean StdDev Mean StdDev Confidence Change
0.5 8.0 0.0 17.3 0.0 100.0% +116.1%
0.75 7.8 0.0 16.5 0.0 100.0% +112.9%
1.0 7.5 0.0 15.7 0.0 100.0% +110.5%
1.5 7.0 0.0 14.8 0.0 100.0% +112.8%
2.0 6.5 0.0 13.7 0.0 100.0% +111.4%
-rw-r--r-- | pixman/pixman-arm-simd.c | 17 |
1 files changed, 17 insertions, 0 deletions
diff --git a/pixman/pixman-arm-simd.c b/pixman/pixman-arm-simd.c index 5f62ed3..cc946ec 100644 --- a/pixman/pixman-arm-simd.c +++ b/pixman/pixman-arm-simd.c @@ -238,7 +238,11 @@ pixman_get_scanline_r5g6b5_nearest_scaled_cover_r5g6b5_asm_armv6(uint32_t const uint16_t *source); BIND_NEAREST_SCALED_COVER_FAST_PATH_SRC_DST (armv6, src_8888_8888, SRC, src, uint32_t, uint32_t, a8r8g8b8, a8r8g8b8) +BIND_NEAREST_SCALED_COVER_FAST_PATH_SRC_DST (armv6, src_x888_8888, SRC, src, uint32_t, uint32_t, x8r8g8b8, a8r8g8b8) +BIND_NEAREST_SCALED_COVER_FAST_PATH_SRC_DST (armv6, src_8888_0565, SRC, src, uint32_t, uint16_t, a8r8g8b8, r5g6b5) BIND_NEAREST_SCALED_COVER_FAST_PATH_SRC_DST (armv6, src_0565_0565, SRC, src, uint16_t, uint16_t, r5g6b5, r5g6b5) +BIND_NEAREST_SCALED_COVER_FAST_PATH_SRC_DST (armv6, over_8888_8888, OVER, over, uint32_t, uint32_t, a8r8g8b8, a8r8g8b8) +BIND_NEAREST_SCALED_COVER_FAST_PATH_SRC_DST (armv6, over_8888_0565, OVER, over, uint32_t, uint16_t, a8r8g8b8, r5g6b5) void pixman_composite_src_n_8888_asm_armv6 (int32_t w, @@ -457,12 +461,25 @@ static const pixman_fast_path_t arm_simd_fast_paths[] = PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH (armv6, SRC, r5g6b5, r5g6b5, src_0565_0565), PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH (armv6, SRC, b5g6r5, b5g6r5, src_0565_0565), + PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH (armv6, SRC, a8r8g8b8, r5g6b5, src_8888_0565), + PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH (armv6, SRC, x8r8g8b8, r5g6b5, src_8888_0565), + PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH (armv6, SRC, a8b8g8r8, b5g6r5, src_8888_0565), + PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH (armv6, SRC, x8b8g8r8, b5g6r5, src_8888_0565), + PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH (armv6, OVER, a8r8g8b8, r5g6b5, over_8888_0565), + PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH (armv6, OVER, a8b8g8r8, b5g6r5, over_8888_0565), + PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH (armv6, SRC, a8r8g8b8, a8r8g8b8, src_8888_8888), PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH (armv6, SRC, a8r8g8b8, x8r8g8b8, src_8888_8888), PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH (armv6, SRC, x8r8g8b8, x8r8g8b8, src_8888_8888), PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH (armv6, SRC, a8b8g8r8, a8b8g8r8, src_8888_8888), PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH (armv6, SRC, a8b8g8r8, x8b8g8r8, src_8888_8888), PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH (armv6, SRC, x8b8g8r8, x8b8g8r8, src_8888_8888), + PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH (armv6, SRC, x8r8g8b8, a8r8g8b8, src_x888_8888), + PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH (armv6, SRC, x8b8g8r8, a8b8g8r8, src_x888_8888), + PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH (armv6, OVER, a8r8g8b8, a8r8g8b8, over_8888_8888), + PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH (armv6, OVER, a8r8g8b8, x8r8g8b8, over_8888_8888), + PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH (armv6, OVER, a8b8g8r8, a8b8g8r8, over_8888_8888), + PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH (armv6, OVER, a8b8g8r8, x8b8g8r8, over_8888_8888), SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, r5g6b5, armv6_0565_0565), SIMPLE_NEAREST_FAST_PATH (SRC, b5g6r5, b5g6r5, armv6_0565_0565), |