summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Avison <bavison@riscosopen.org>2014-04-09 16:25:31 +0300
committerSøren Sandmann <ssp@redhat.com>2014-04-21 20:34:26 -0400
commit68d2f7b486a9ccc877a2214f7f5ef562e2846581 (patch)
tree1f263b9aab82347f15baabd3ce45f972a3370106
parent4ad769cbec47ca0df43dc586f689b1968bbc942f (diff)
ARMv6: Add fast path flag to force no preload of destination buffer
-rw-r--r--pixman/pixman-arm-simd-asm.h14
1 files changed, 13 insertions, 1 deletions
diff --git a/pixman/pixman-arm-simd-asm.h b/pixman/pixman-arm-simd-asm.h
index 1bb8b45..0baf87a 100644
--- a/pixman/pixman-arm-simd-asm.h
+++ b/pixman/pixman-arm-simd-asm.h
@@ -78,6 +78,8 @@
.set FLAG_PROCESS_PRESERVES_SCRATCH, 64
.set FLAG_PROCESS_PRESERVES_WK0, 0
.set FLAG_PROCESS_CORRUPTS_WK0, 128 /* if possible, use the specified register(s) instead so WK0 can hold number of leading pixels */
+.set FLAG_PRELOAD_DST, 0
+.set FLAG_NO_PRELOAD_DST, 256
/*
* Number of bytes by which to adjust preload offset of destination
@@ -445,7 +447,7 @@
preload_middle src_bpp, SRC, 0
preload_middle mask_bpp, MASK, 0
.endif
- .if (dst_r_bpp > 0) && ((SUBBLOCK % 2) == 0)
+ .if (dst_r_bpp > 0) && ((SUBBLOCK % 2) == 0) && (((flags) & FLAG_NO_PRELOAD_DST) == 0)
/* Because we know that writes are 16-byte aligned, it's relatively easy to ensure that
* destination prefetches are 32-byte aligned. It's also the easiest channel to offset
* preloads for, to achieve staggered prefetches for multiple channels, because there are
@@ -480,7 +482,9 @@
.endif
preload_trailing src_bpp, src_bpp_shift, SRC
preload_trailing mask_bpp, mask_bpp_shift, MASK
+ .if ((flags) & FLAG_NO_PRELOAD_DST) == 0
preload_trailing dst_r_bpp, dst_bpp_shift, DST
+ .endif
add X, X, #(prefetch_distance+2)*pix_per_block - 128/dst_w_bpp
/* The remainder of the line is handled identically to the medium case */
medium_case_inner_loop_and_trailing_pixels process_head, process_tail,, exit_label, unaligned_src, unaligned_mask
@@ -779,7 +783,9 @@ fname:
newline
preload_leading_step1 src_bpp, WK1, SRC
preload_leading_step1 mask_bpp, WK2, MASK
+ .if ((flags) & FLAG_NO_PRELOAD_DST) == 0
preload_leading_step1 dst_r_bpp, WK3, DST
+ .endif
ands WK0, DST, #15
beq 154f
@@ -787,7 +793,9 @@ fname:
preload_leading_step2 src_bpp, src_bpp_shift, WK1, SRC
preload_leading_step2 mask_bpp, mask_bpp_shift, WK2, MASK
+ .if ((flags) & FLAG_NO_PRELOAD_DST) == 0
preload_leading_step2 dst_r_bpp, dst_bpp_shift, WK3, DST
+ .endif
leading_15bytes process_head, process_tail
@@ -827,7 +835,9 @@ fname:
newline
preload_line 0, src_bpp, src_bpp_shift, SRC /* in: X, corrupts: WK0-WK1 */
preload_line 0, mask_bpp, mask_bpp_shift, MASK
+ .if ((flags) & FLAG_NO_PRELOAD_DST) == 0
preload_line 0, dst_r_bpp, dst_bpp_shift, DST
+ .endif
sub X, X, #128/dst_w_bpp /* simplifies inner loop termination */
ands WK0, DST, #15
@@ -856,7 +866,9 @@ fname:
newline
preload_line 1, src_bpp, src_bpp_shift, SRC /* in: X, corrupts: WK0-WK1 */
preload_line 1, mask_bpp, mask_bpp_shift, MASK
+ .if ((flags) & FLAG_NO_PRELOAD_DST) == 0
preload_line 1, dst_r_bpp, dst_bpp_shift, DST
+ .endif
.if dst_w_bpp == 8
tst DST, #3