summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Avison <bavison@riscosopen.org>2015-08-20 20:53:11 +0100
committerBen Avison <bavison@riscosopen.org>2015-10-15 13:52:06 +0100
commit35a8b723b6d7c92b00e7e27270311ff37111e1e9 (patch)
tree134997d363a65297bc0ca4883b6f16e55eb77740
parent7d4db711b3a80b70bbda4fd2497e110ecd897b95 (diff)
armv7: Add optimised scanline writeback for r5g6b5
lowlevel-blt-bench results for an example operation, src_1555_0565: Before After Mean StdDev Mean StdDev Confidence Change L1 85.8 2.12 114.0 1.65 100.00% +32.9% L2 83.7 0.96 106.0 1.01 100.00% +26.7% M 76.4 0.66 94.8 0.98 100.00% +24.0% HT 39.8 0.37 38.9 0.29 100.00% -2.3% VT 37.0 0.36 34.1 0.24 100.00% -7.7% R 33.9 0.37 30.3 0.24 100.00% -10.5% RT 14.7 0.20 11.5 0.11 100.00% -21.7%
-rw-r--r--pixman/pixman-arm-neon-asm.S10
-rw-r--r--pixman/pixman-arm-neon.c10
2 files changed, 20 insertions, 0 deletions
diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
index f48f773..46567cb 100644
--- a/pixman/pixman-arm-neon-asm.S
+++ b/pixman/pixman-arm-neon-asm.S
@@ -461,6 +461,16 @@ generate_composite_function \
pixman_composite_src_8888_0565_process_pixblock_tail, \
pixman_composite_src_8888_0565_process_pixblock_tail_head
+generate_composite_function_single_scanline \
+ pixman_write_back_r5g6b5_asm_neon, 32, 0, 16, \
+ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ default_init, \
+ default_cleanup, \
+ pixman_composite_src_8888_0565_process_pixblock_head, \
+ pixman_composite_src_8888_0565_process_pixblock_tail, \
+ pixman_composite_src_8888_0565_process_pixblock_tail_head
+
/******************************************************************************/
.macro pixman_composite_src_0565_8888_process_pixblock_head
diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
index d4675f1..9f3faa4 100644
--- a/pixman/pixman-arm-neon.c
+++ b/pixman/pixman-arm-neon.c
@@ -168,6 +168,15 @@ PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (SKIP_ZERO_SRC, neon, 8888_8_8888, AD
PIXMAN_ARM_BIND_GET_SCANLINE (neon, r5g6b5)
PIXMAN_ARM_BIND_GET_SCANLINE (neon, a1r5g5b5)
+PIXMAN_ARM_BIND_WRITE_BACK (neon, r5g6b5)
+
+static uint32_t *
+fast_dest_fetch_noop (pixman_iter_t *iter, const uint32_t *mask)
+{
+ iter->bits += iter->stride;
+ return iter->buffer;
+}
+
void
pixman_composite_src_n_8_asm_neon (int32_t w,
int32_t h,
@@ -444,6 +453,7 @@ static const pixman_fast_path_t arm_neon_fast_paths[] =
static const pixman_iter_info_t arm_neon_iters[] =
{
PIXMAN_ARM_UNTRANSFORMED_COVER_FETCHER (neon, r5g6b5),
+ PIXMAN_ARM_WRITEBACK (neon, r5g6b5),
PIXMAN_ARM_UNTRANSFORMED_COVER_FETCHER (neon, a1r5g5b5),