diff options
author | Siarhei Siamashka <siarhei.siamashka@nokia.com> | 2011-03-16 16:33:41 +0200 |
---|---|---|
committer | Siarhei Siamashka <siarhei.siamashka@nokia.com> | 2011-04-11 10:48:10 +0300 |
commit | 0f7be9f72ef6bfe2555b7f2cc29297c4f4762740 (patch) | |
tree | 7717544750dab4434a957fa0bdcbe03d1ddedf3e | |
parent | 9638af95832563040d6bd861cf4c20ab632058df (diff) |
ARM: support for software pipelining in bilinear macros
Now it's possible to override the main loop of bilinear scaling code
with optimized pipelined implementation.
-rw-r--r-- | pixman/pixman-arm-neon-asm.S | 31 |
1 files changed, 28 insertions, 3 deletions
diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S index a331f4d..e235511 100644 --- a/pixman/pixman-arm-neon-asm.S +++ b/pixman/pixman-arm-neon-asm.S @@ -2617,12 +2617,32 @@ fname: bilinear_store_&dst_fmt 4, q2, q3 .endm +.macro bilinear_interpolate_four_pixels_head src_fmt, dst_fmt +.ifdef have_bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt + bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt&_head +.else + bilinear_interpolate_four_pixels src_fmt, dst_fmt +.endif +.endm + +.macro bilinear_interpolate_four_pixels_tail src_fmt, dst_fmt +.ifdef have_bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt + bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt&_tail +.endif +.endm + +.macro bilinear_interpolate_four_pixels_tail_head src_fmt, dst_fmt +.ifdef have_bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt + bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt&_tail_head +.else + bilinear_interpolate_four_pixels src_fmt, dst_fmt +.endif +.endm + /* * Main template macro for generating NEON optimized bilinear scanline * functions. * - * TODO: use software pipelining in order to improve performance - * * Bilinear scanline scaler macro template uses the following arguments: * fname - name of the function to generate * src_fmt - source color format (8888 or 0565) @@ -2692,10 +2712,15 @@ pixman_asm_function fname subs WIDTH, WIDTH, #4 blt 1f mov PF_OFFS, PF_OFFS, asr #(16 - src_bpp_shift) + bilinear_interpolate_four_pixels_head src_fmt, dst_fmt + subs WIDTH, WIDTH, #4 + blt 5f 0: - bilinear_interpolate_four_pixels src_fmt, dst_fmt + bilinear_interpolate_four_pixels_tail_head src_fmt, dst_fmt subs WIDTH, WIDTH, #4 bge 0b +5: + bilinear_interpolate_four_pixels_tail src_fmt, dst_fmt 1: /* handle the remaining trailing pixels */ |