diff options
author | Ben Avison <bavison@riscosopen.org> | 2015-09-07 14:40:48 +0300 |
---|---|---|
committer | Pekka Paalanen <pekka.paalanen@collabora.co.uk> | 2015-09-17 14:40:39 +0300 |
commit | 9eb6889b15a180cc94aad8ac97189af5b3a68b96 (patch) | |
tree | 939a8ae495caf50da3e4d60cf608e0aaec5ff3e3 | |
parent | 4c71f595e3393be5b922df37d50d71dd83f4f979 (diff) |
armv6: Add over_n_8888 fast path (disabled)
This new fast path is initially disabled by putting the entries in the
lookup table after the sentinel. The compiler cannot tell the new code
is not used, so it cannot eliminate the code. Also the lookup table size
will include the new fast path. When the follow-up patch then enables
the new fast path, the binary layout (alignments, size, etc.) will stay
the same compared to the disabled case.
Keeping the binary layout identical is important for benchmarking on
Raspberry Pi 1. The addresses at which functions are loaded will have a
significant impact on benchmark results, causing unexpected performance
changes. Keeping all function addresses the same across the patch
enabling a new fast path improves the reliability of benchmarks.
Benchmark results are included in the patch enabling this fast path.
[Pekka: disabled the fast path, commit message]
Signed-off-by: Pekka Paalanen <pekka.paalanen@collabora.co.uk>
-rw-r--r-- | pixman/pixman-arm-simd-asm.S | 41 | ||||
-rw-r--r-- | pixman/pixman-arm-simd.c | 7 |
2 files changed, 48 insertions, 0 deletions
diff --git a/pixman/pixman-arm-simd-asm.S b/pixman/pixman-arm-simd-asm.S index 7b0727b..a74a0a8 100644 --- a/pixman/pixman-arm-simd-asm.S +++ b/pixman/pixman-arm-simd-asm.S @@ -1136,3 +1136,44 @@ generate_composite_function \ in_reverse_8888_8888_process_tail /******************************************************************************/ + +.macro over_n_8888_init + ldr SRC, [sp, #ARGS_STACK_OFFSET] + /* Hold loop invariant in MASK */ + ldr MASK, =0x00800080 + /* Hold multiplier for destination in STRIDE_M */ + mov STRIDE_M, #255 + sub STRIDE_M, STRIDE_M, SRC, lsr #24 + /* Set GE[3:0] to 0101 so SEL instructions do what we want */ + uadd8 SCRATCH, MASK, MASK +.endm + +.macro over_n_8888_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload + pixld , numbytes, firstreg, DST, 0 +.endm + +.macro over_n_8888_1pixel dst + mul_8888_8 WK&dst, STRIDE_M, SCRATCH, MASK + uqadd8 WK&dst, WK&dst, SRC +.endm + +.macro over_n_8888_process_tail cond, numbytes, firstreg + .set PROCESS_REG, firstreg + .rept numbytes / 4 + over_n_8888_1pixel %(PROCESS_REG) + .set PROCESS_REG, PROCESS_REG+1 + .endr + pixst , numbytes, firstreg, DST +.endm + +generate_composite_function \ + pixman_composite_over_n_8888_asm_armv6, 0, 0, 32 \ + FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_DOES_STORE \ + 2, /* prefetch distance */ \ + over_n_8888_init, \ + nop_macro, /* newline */ \ + nop_macro, /* cleanup */ \ + over_n_8888_process_head, \ + over_n_8888_process_tail + +/******************************************************************************/ diff --git a/pixman/pixman-arm-simd.c b/pixman/pixman-arm-simd.c index f40ff36..62c0f41 100644 --- a/pixman/pixman-arm-simd.c +++ b/pixman/pixman-arm-simd.c @@ -51,6 +51,8 @@ PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, over_8888_8888, PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, in_reverse_8888_8888, uint32_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, armv6, over_n_8888, + uint32_t, 1) PIXMAN_ARM_BIND_FAST_PATH_N_DST (0, armv6, over_reverse_n_8888, uint32_t, 1) @@ -271,6 +273,11 @@ static const pixman_fast_path_t arm_simd_fast_paths[] = SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, armv6_8888_8888), { PIXMAN_OP_NONE }, + + PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, armv6_composite_over_n_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, armv6_composite_over_n_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, null, a8b8g8r8, armv6_composite_over_n_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, null, x8b8g8r8, armv6_composite_over_n_8888), }; pixman_implementation_t * |