diff options
author | Siarhei Siamashka <siarhei.siamashka@nokia.com> | 2011-03-06 16:17:12 +0200 |
---|---|---|
committer | Siarhei Siamashka <siarhei.siamashka@nokia.com> | 2011-03-12 21:23:54 +0200 |
commit | bb3d1b67fd0f42ae00af811c624ea1c44541034d (patch) | |
tree | 6e23d0ac415692e6f6ef86ef874ca535428d96a0 | |
parent | 84e361c8e357e26f299213fbeefe64c73447b116 (diff) |
ARM: use prefetch in nearest scaled 'src_0565_0565'
Benchmark on ARM Cortex-A8 r1p3 @500MHz, 32-bit LPDDR @166MHz:
Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
before: op=1, src=10020565, dst=10020565, speed=75.02 MPix/s
after: op=1, src=10020565, dst=10020565, speed=73.63 MPix/s
Benchmark on ARM Cortex-A8 r2p2 @1GHz, 32-bit LPDDR @200MHz:
Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
before: op=1, src=10020565, dst=10020565, speed=176.12 MPix/s
after: op=1, src=10020565, dst=10020565, speed=267.50 MPix/s
-rw-r--r-- | pixman/pixman-arm-simd-asm.S | 27 |
1 files changed, 25 insertions, 2 deletions
diff --git a/pixman/pixman-arm-simd-asm.S b/pixman/pixman-arm-simd-asm.S index 7567700..dd1366d 100644 --- a/pixman/pixman-arm-simd-asm.S +++ b/pixman/pixman-arm-simd-asm.S @@ -348,6 +348,7 @@ pixman_asm_function pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6 TMP1 .req r4 TMP2 .req r5 VXMASK .req r6 + PF_OFFS .req r7 ldr UNIT_X, [sp] push {r4, r5, r6, r7} @@ -366,12 +367,33 @@ pixman_asm_function pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6 strh TMP2, [DST], #2 .endm + /* + * stop prefetch before reaching the end of scanline (a good behaving + * value selected based on some benchmarks with short scanlines) + */ + #define PREFETCH_BRAKING_DISTANCE 32 + /* now do the scaling */ and TMP1, VXMASK, VX, lsr #15 add VX, VX, UNIT_X - subs W, #4 + subs W, #(8 + PREFETCH_BRAKING_DISTANCE) + blt 2f + /* set prefetch distance to 80 pixels ahead */ + add PF_OFFS, VX, UNIT_X, lsl #6 + add PF_OFFS, PF_OFFS, UNIT_X, lsl #4 +1: /* main loop, process 8 pixels per iteration with prefetch */ + subs W, W, #8 + add PF_OFFS, UNIT_X, lsl #3 + scale_2_pixels + scale_2_pixels + scale_2_pixels + scale_2_pixels + pld [SRC, PF_OFFS, lsr #15] + bge 1b +2: + subs W, #(4 - 8 - PREFETCH_BRAKING_DISTANCE) blt 2f -1: /* main loop, process 4 pixels per iteration */ +1: /* process the remaining pixels */ scale_2_pixels scale_2_pixels subs W, W, #4 @@ -394,6 +416,7 @@ pixman_asm_function pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6 .unreq TMP1 .unreq TMP2 .unreq VXMASK + .unreq PF_OFFS /* return */ pop {r4, r5, r6, r7} bx lr |