summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSiarhei Siamashka <siarhei.siamashka@nokia.com>2011-03-16 16:33:41 +0200
committerSiarhei Siamashka <siarhei.siamashka@nokia.com>2011-04-11 10:48:10 +0300
commit0f7be9f72ef6bfe2555b7f2cc29297c4f4762740 (patch)
tree7717544750dab4434a957fa0bdcbe03d1ddedf3e
parent9638af95832563040d6bd861cf4c20ab632058df (diff)
ARM: support for software pipelining in bilinear macros
Now it's possible to override the main loop of bilinear scaling code with optimized pipelined implementation.
-rw-r--r--pixman/pixman-arm-neon-asm.S31
1 files changed, 28 insertions, 3 deletions
diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
index a331f4d..e235511 100644
--- a/pixman/pixman-arm-neon-asm.S
+++ b/pixman/pixman-arm-neon-asm.S
@@ -2617,12 +2617,32 @@ fname:
bilinear_store_&dst_fmt 4, q2, q3
.endm
+.macro bilinear_interpolate_four_pixels_head src_fmt, dst_fmt
+.ifdef have_bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt
+ bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt&_head
+.else
+ bilinear_interpolate_four_pixels src_fmt, dst_fmt
+.endif
+.endm
+
+.macro bilinear_interpolate_four_pixels_tail src_fmt, dst_fmt
+.ifdef have_bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt
+ bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt&_tail
+.endif
+.endm
+
+.macro bilinear_interpolate_four_pixels_tail_head src_fmt, dst_fmt
+.ifdef have_bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt
+ bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt&_tail_head
+.else
+ bilinear_interpolate_four_pixels src_fmt, dst_fmt
+.endif
+.endm
+
/*
* Main template macro for generating NEON optimized bilinear scanline
* functions.
*
- * TODO: use software pipelining in order to improve performance
- *
* Bilinear scanline scaler macro template uses the following arguments:
* fname - name of the function to generate
* src_fmt - source color format (8888 or 0565)
@@ -2692,10 +2712,15 @@ pixman_asm_function fname
subs WIDTH, WIDTH, #4
blt 1f
mov PF_OFFS, PF_OFFS, asr #(16 - src_bpp_shift)
+ bilinear_interpolate_four_pixels_head src_fmt, dst_fmt
+ subs WIDTH, WIDTH, #4
+ blt 5f
0:
- bilinear_interpolate_four_pixels src_fmt, dst_fmt
+ bilinear_interpolate_four_pixels_tail_head src_fmt, dst_fmt
subs WIDTH, WIDTH, #4
bge 0b
+5:
+ bilinear_interpolate_four_pixels_tail src_fmt, dst_fmt
1:
/* handle the remaining trailing pixels */