summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Avison <bavison@riscosopen.org>2014-07-30 17:26:03 +0100
committerBen Avison <bavison@riscosopen.org>2015-10-15 15:04:06 +0100
commit7d54a7a32df50371dd8c39969590e1af3d80e121 (patch)
tree9edb71438852cd02f9d90db6de2dd7c289a5b113
parentc9bc7dc3ee7a02aac17eefe1448f8958ed896388 (diff)
armv6: Add four more nearest-scaled-cover fast paths
These complete the set of fast paths where currently pixman-fast-path.c provides versions that get selected in preference to the armv6-optimised scanline fetchers/combiners/writeback routines. Because generation of these fast paths is macroised, the patch required to add them is fairly simple. lowlevel-blt-bench -n over_8888_8888: Before After Mean StdDev Mean StdDev Confidence Change L1 13.8 0.0 26.5 0.2 100.0% +91.7% L2 9.4 0.2 22.9 0.4 100.0% +142.6% M 8.6 0.0 23.8 0.0 100.0% +176.1% HT 7.4 0.0 14.1 0.1 100.0% +91.2% VT 7.3 0.0 13.4 0.1 100.0% +84.1% R 7.0 0.0 13.0 0.1 100.0% +85.9% RT 4.5 0.1 6.2 0.1 100.0% +36.6% affine-bench * 0 0 1 over a8r8g8b8 a8r8g8b8: Before After Mean StdDev Mean StdDev Confidence Change 0.5 9.4 0.0 28.0 0.0 100.0% +197.4% 0.75 9.0 0.0 26.1 0.0 100.0% +190.2% 1.0 8.6 0.0 24.4 0.0 100.0% +184.6% 1.5 7.9 0.0 21.7 0.0 100.0% +173.4% 2.0 7.3 0.0 19.6 0.0 100.0% +166.6% lowlevel-blt-bench -n src_x888_8888: Before After Mean StdDev Mean StdDev Confidence Change L1 108.6 2.0 66.3 0.9 100.0% -39.0% L2 32.4 1.5 44.3 2.1 100.0% +36.8% M 27.5 0.1 62.0 0.1 100.0% +125.6% HT 20.3 0.1 28.7 0.2 100.0% +41.2% VT 19.9 0.1 26.7 0.1 100.0% +34.4% R 18.6 0.1 25.3 0.2 100.0% +36.3% RT 8.7 0.1 9.8 0.2 100.0% +12.6% affine-bench * 0 0 1 src x8r8g8b8 a8r8g8b8: Before After Mean StdDev Mean StdDev Confidence Change 0.5 45.2 0.0 97.2 0.1 100.0% +115.1% 0.75 35.9 0.1 76.7 0.1 100.0% +113.9% 1.0 29.6 0.1 61.1 0.1 100.0% +106.4% 1.5 21.4 0.0 52.7 0.1 100.0% +145.9% 2.0 16.7 0.0 43.0 0.1 100.0% +156.9% lowlevel-blt-bench -n src_8888_0565: Before After Mean StdDev Mean StdDev Confidence Change L1 57.2 0.7 43.1 0.4 100.0% -24.7% L2 23.0 1.0 32.8 1.0 100.0% +42.5% M 24.8 0.0 42.2 0.0 100.0% +70.0% HT 18.0 0.1 22.1 0.1 100.0% +22.5% VT 17.1 0.1 21.0 0.1 100.0% +22.5% R 16.5 0.1 20.0 0.1 100.0% +21.4% RT 8.3 0.2 8.4 0.1 95.0% +1.0% (insignificant) affine-bench * 0 0 1 src a8r8g8b8 r5g6b5: Before After Mean StdDev Mean StdDev Confidence Change 0.5 34.9 0.0 55.3 0.0 100.0% +58.7% 0.75 29.3 0.0 49.1 0.0 100.0% +67.4% 1.0 24.8 0.0 42.6 0.1 100.0% +71.6% 1.5 19.0 0.0 38.2 0.1 100.0% +100.7% 2.0 15.4 0.0 31.8 0.0 100.0% +107.1% lowlevel-blt-bench -n over_8888_0565: Before After Mean StdDev Mean StdDev Confidence Change L1 9.8 0.0 15.3 0.1 100.0% +56.6% L2 7.4 0.0 14.3 0.2 100.0% +91.7% M 7.5 0.0 15.4 0.0 100.0% +106.0% HT 6.5 0.0 10.1 0.0 100.0% +54.5% VT 6.4 0.0 9.9 0.0 100.0% +54.6% R 6.2 0.0 9.5 0.0 100.0% +52.1% RT 4.2 0.0 4.6 0.1 100.0% +9.8% affine-bench * 0 0 1 over a8r8g8b8 r5g6b5: Before After Mean StdDev Mean StdDev Confidence Change 0.5 8.0 0.0 17.3 0.0 100.0% +116.1% 0.75 7.8 0.0 16.5 0.0 100.0% +112.9% 1.0 7.5 0.0 15.7 0.0 100.0% +110.5% 1.5 7.0 0.0 14.8 0.0 100.0% +112.8% 2.0 6.5 0.0 13.7 0.0 100.0% +111.4%
-rw-r--r--pixman/pixman-arm-simd.c17
1 files changed, 17 insertions, 0 deletions
diff --git a/pixman/pixman-arm-simd.c b/pixman/pixman-arm-simd.c
index 5f62ed3..cc946ec 100644
--- a/pixman/pixman-arm-simd.c
+++ b/pixman/pixman-arm-simd.c
@@ -238,7 +238,11 @@ pixman_get_scanline_r5g6b5_nearest_scaled_cover_r5g6b5_asm_armv6(uint32_t
const uint16_t *source);
BIND_NEAREST_SCALED_COVER_FAST_PATH_SRC_DST (armv6, src_8888_8888, SRC, src, uint32_t, uint32_t, a8r8g8b8, a8r8g8b8)
+BIND_NEAREST_SCALED_COVER_FAST_PATH_SRC_DST (armv6, src_x888_8888, SRC, src, uint32_t, uint32_t, x8r8g8b8, a8r8g8b8)
+BIND_NEAREST_SCALED_COVER_FAST_PATH_SRC_DST (armv6, src_8888_0565, SRC, src, uint32_t, uint16_t, a8r8g8b8, r5g6b5)
BIND_NEAREST_SCALED_COVER_FAST_PATH_SRC_DST (armv6, src_0565_0565, SRC, src, uint16_t, uint16_t, r5g6b5, r5g6b5)
+BIND_NEAREST_SCALED_COVER_FAST_PATH_SRC_DST (armv6, over_8888_8888, OVER, over, uint32_t, uint32_t, a8r8g8b8, a8r8g8b8)
+BIND_NEAREST_SCALED_COVER_FAST_PATH_SRC_DST (armv6, over_8888_0565, OVER, over, uint32_t, uint16_t, a8r8g8b8, r5g6b5)
void
pixman_composite_src_n_8888_asm_armv6 (int32_t w,
@@ -457,12 +461,25 @@ static const pixman_fast_path_t arm_simd_fast_paths[] =
PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH (armv6, SRC, r5g6b5, r5g6b5, src_0565_0565),
PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH (armv6, SRC, b5g6r5, b5g6r5, src_0565_0565),
+ PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH (armv6, SRC, a8r8g8b8, r5g6b5, src_8888_0565),
+ PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH (armv6, SRC, x8r8g8b8, r5g6b5, src_8888_0565),
+ PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH (armv6, SRC, a8b8g8r8, b5g6r5, src_8888_0565),
+ PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH (armv6, SRC, x8b8g8r8, b5g6r5, src_8888_0565),
+ PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH (armv6, OVER, a8r8g8b8, r5g6b5, over_8888_0565),
+ PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH (armv6, OVER, a8b8g8r8, b5g6r5, over_8888_0565),
+
PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH (armv6, SRC, a8r8g8b8, a8r8g8b8, src_8888_8888),
PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH (armv6, SRC, a8r8g8b8, x8r8g8b8, src_8888_8888),
PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH (armv6, SRC, x8r8g8b8, x8r8g8b8, src_8888_8888),
PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH (armv6, SRC, a8b8g8r8, a8b8g8r8, src_8888_8888),
PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH (armv6, SRC, a8b8g8r8, x8b8g8r8, src_8888_8888),
PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH (armv6, SRC, x8b8g8r8, x8b8g8r8, src_8888_8888),
+ PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH (armv6, SRC, x8r8g8b8, a8r8g8b8, src_x888_8888),
+ PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH (armv6, SRC, x8b8g8r8, a8b8g8r8, src_x888_8888),
+ PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH (armv6, OVER, a8r8g8b8, a8r8g8b8, over_8888_8888),
+ PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH (armv6, OVER, a8r8g8b8, x8r8g8b8, over_8888_8888),
+ PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH (armv6, OVER, a8b8g8r8, a8b8g8r8, over_8888_8888),
+ PIXMAN_ARM_NEAREST_SCALED_COVER_SRC_DST_FAST_PATH (armv6, OVER, a8b8g8r8, x8b8g8r8, over_8888_8888),
SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, r5g6b5, armv6_0565_0565),
SIMPLE_NEAREST_FAST_PATH (SRC, b5g6r5, b5g6r5, armv6_0565_0565),