From 1cd916f3a5ebeb943f66eecf0b8ce99af0b95d11 Mon Sep 17 00:00:00 2001 From: Taekyun Kim Date: Fri, 23 Sep 2011 00:03:22 +0900 Subject: ARM: NEON: Replace old bilinear scanline generator with new template Bilinear scanline functions in pixman-arm-neon-asm-bilinear.S can be replaced with new template just by wrapping existing macros. --- pixman/pixman-arm-neon-asm-bilinear.S | 484 ++++++++++++++++++++-------------- 1 file changed, 292 insertions(+), 192 deletions(-) diff --git a/pixman/pixman-arm-neon-asm-bilinear.S b/pixman/pixman-arm-neon-asm-bilinear.S index 784e5df..25bcb24 100644 --- a/pixman/pixman-arm-neon-asm-bilinear.S +++ b/pixman/pixman-arm-neon-asm-bilinear.S @@ -582,198 +582,6 @@ fname: bilinear_store_&dst_fmt 4, q2, q3 .endm -.macro generate_bilinear_scanline_func_src_dst \ - fname, src_fmt, dst_fmt, op, \ - bpp_shift, prefetch_distance - -pixman_asm_function fname - OUT .req r0 - TOP .req r1 - BOTTOM .req r2 - WT .req r3 - WB .req r4 - X .req r5 - UX .req r6 - WIDTH .req ip - TMP1 .req r3 - TMP2 .req r4 - PF_OFFS .req r7 - TMP3 .req r8 - TMP4 .req r9 - STRIDE .req r2 - - mov ip, sp - push {r4, r5, r6, r7, r8, r9} - mov PF_OFFS, #prefetch_distance - ldmia ip, {WB, X, UX, WIDTH} - mul PF_OFFS, PF_OFFS, UX - - .set prefetch_offset, prefetch_distance - - sub STRIDE, BOTTOM, TOP - .unreq BOTTOM - - cmp WIDTH, #0 - ble 3f - - vdup.u16 q12, X - vdup.u16 q13, UX - vdup.u8 d28, WT - vdup.u8 d29, WB - vadd.u16 d25, d25, d26 - vadd.u16 q13, q13, q13 - vshr.u16 q15, q12, #8 - vadd.u16 q12, q12, q13 - - subs WIDTH, WIDTH, #4 - blt 1f - mov PF_OFFS, PF_OFFS, asr #(16 - bpp_shift) -0: - bilinear_interpolate_four_pixels src_fmt, x, dst_fmt, op - subs WIDTH, WIDTH, #4 - bge 0b -1: - tst WIDTH, #2 - beq 2f - bilinear_interpolate_two_pixels src_fmt, x, dst_fmt, op -2: - tst WIDTH, #1 - beq 3f - bilinear_interpolate_last_pixel src_fmt, x, dst_fmt, op -3: - pop {r4, r5, r6, r7, r8, r9} - bx lr - - .unreq OUT - .unreq TOP - .unreq WT - .unreq WB - .unreq X - .unreq UX - .unreq WIDTH - .unreq TMP1 - .unreq TMP2 - .unreq PF_OFFS - .unreq TMP3 - .unreq TMP4 - .unreq STRIDE -.endfunc - -.endm - -.macro generate_bilinear_scanline_func_src_a8_dst \ - fname, src_fmt, dst_fmt, op, \ - bpp_shift, prefetch_distance - -pixman_asm_function fname - OUT .req r0 - MASK .req r1 - TOP .req r2 - BOTTOM .req r3 - WT .req r4 - WB .req r5 - X .req r6 - UX .req r7 - WIDTH .req ip - TMP1 .req r4 - TMP2 .req r5 - PF_OFFS .req r8 - TMP3 .req r9 - TMP4 .req r10 - STRIDE .req r3 - - mov ip, sp - push {r4, r5, r6, r7, r8, r9, r10, ip} - mov PF_OFFS, #prefetch_distance - ldmia ip, {WT, WB, X, UX, WIDTH} - mul PF_OFFS, PF_OFFS, UX - - .set prefetch_offset, prefetch_distance - - sub STRIDE, BOTTOM, TOP - .unreq BOTTOM - - cmp WIDTH, #0 - ble 3f - - vdup.u16 q12, X - vdup.u16 q13, UX - vdup.u8 d28, WT - vdup.u8 d29, WB - vadd.u16 d25, d25, d26 - vadd.u16 q13, q13, q13 - vshr.u16 q15, q12, #8 - vadd.u16 q12, q12, q13 - - subs WIDTH, WIDTH, #4 - blt 1f - mov PF_OFFS, PF_OFFS, asr #(16 - bpp_shift) -0: - bilinear_interpolate_four_pixels src_fmt, 8, dst_fmt, op - subs WIDTH, WIDTH, #4 - bge 0b -1: - tst WIDTH, #2 - beq 2f - bilinear_interpolate_two_pixels src_fmt, 8, dst_fmt, op -2: - tst WIDTH, #1 - beq 3f - bilinear_interpolate_last_pixel src_fmt, 8, dst_fmt, op -3: - pop {r4, r5, r6, r7, r8, r9, r10, ip} - bx lr - - .unreq OUT - .unreq TOP - .unreq WT - .unreq WB - .unreq X - .unreq UX - .unreq WIDTH - .unreq MASK - .unreq TMP1 - .unreq TMP2 - .unreq PF_OFFS - .unreq TMP3 - .unreq TMP4 - .unreq STRIDE -.endfunc - -.endm - -generate_bilinear_scanline_func_src_dst \ - pixman_scaled_bilinear_scanline_8888_8888_OVER_asm_neon, \ - 8888, 8888, over, 2, 28 - -generate_bilinear_scanline_func_src_dst \ - pixman_scaled_bilinear_scanline_8888_8888_ADD_asm_neon, \ - 8888, 8888, add, 2, 28 - -generate_bilinear_scanline_func_src_a8_dst \ - pixman_scaled_bilinear_scanline_8888_8_8888_SRC_asm_neon, \ - 8888, 8888, src, 2, 28 - -generate_bilinear_scanline_func_src_a8_dst \ - pixman_scaled_bilinear_scanline_8888_8_0565_SRC_asm_neon, \ - 8888, 0565, src, 2, 28 - -generate_bilinear_scanline_func_src_a8_dst \ - pixman_scaled_bilinear_scanline_0565_8_x888_SRC_asm_neon, \ - 0565, 8888, src, 1, 28 - -generate_bilinear_scanline_func_src_a8_dst \ - pixman_scaled_bilinear_scanline_0565_8_0565_SRC_asm_neon, \ - 0565, 0565, src, 1, 28 - -generate_bilinear_scanline_func_src_a8_dst \ - pixman_scaled_bilinear_scanline_8888_8_8888_OVER_asm_neon, \ - 8888, 8888, over, 2, 28 - -generate_bilinear_scanline_func_src_a8_dst \ - pixman_scaled_bilinear_scanline_8888_8_8888_ADD_asm_neon, \ - 8888, 8888, add, 2, 28 - .set BILINEAR_FLAG_USE_MASK, 1 .set BILINEAR_FLAG_USE_ALL_NEON_REGS, 2 @@ -855,6 +663,8 @@ pixman_asm_function fname TMP4 .req r10 STRIDE .req r3 + .set prefetch_offset, prefetch_distance + mov ip, sp push {r4, r5, r6, r7, r8, r9, r10, ip} mov PF_OFFS, #prefetch_distance @@ -968,3 +778,293 @@ pixman_asm_function fname .endfunc .endm + +/* src_8888_8_8888 */ +.macro bilinear_src_8888_8_8888_process_last_pixel + bilinear_interpolate_last_pixel 8888, 8, 8888, src +.endm + +.macro bilinear_src_8888_8_8888_process_two_pixels + bilinear_interpolate_two_pixels 8888, 8, 8888, src +.endm + +.macro bilinear_src_8888_8_8888_process_four_pixels + bilinear_interpolate_four_pixels 8888, 8, 8888, src +.endm + +.macro bilinear_src_8888_8_8888_process_pixblock_head + bilinear_src_8888_8_8888_process_four_pixels +.endm + +.macro bilinear_src_8888_8_8888_process_pixblock_tail +.endm + +.macro bilinear_src_8888_8_8888_process_pixblock_tail_head + bilinear_src_8888_8_8888_process_pixblock_tail + bilinear_src_8888_8_8888_process_pixblock_head +.endm + +/* src_8888_8_0565 */ +.macro bilinear_src_8888_8_0565_process_last_pixel + bilinear_interpolate_last_pixel 8888, 8, 0565, src +.endm + +.macro bilinear_src_8888_8_0565_process_two_pixels + bilinear_interpolate_two_pixels 8888, 8, 0565, src +.endm + +.macro bilinear_src_8888_8_0565_process_four_pixels + bilinear_interpolate_four_pixels 8888, 8, 0565, src +.endm + +.macro bilinear_src_8888_8_0565_process_pixblock_head + bilinear_src_8888_8_0565_process_four_pixels +.endm + +.macro bilinear_src_8888_8_0565_process_pixblock_tail +.endm + +.macro bilinear_src_8888_8_0565_process_pixblock_tail_head + bilinear_src_8888_8_0565_process_pixblock_tail + bilinear_src_8888_8_0565_process_pixblock_head +.endm + +/* src_0565_8_x888 */ +.macro bilinear_src_0565_8_x888_process_last_pixel + bilinear_interpolate_last_pixel 0565, 8, 8888, src +.endm + +.macro bilinear_src_0565_8_x888_process_two_pixels + bilinear_interpolate_two_pixels 0565, 8, 8888, src +.endm + +.macro bilinear_src_0565_8_x888_process_four_pixels + bilinear_interpolate_four_pixels 0565, 8, 8888, src +.endm + +.macro bilinear_src_0565_8_x888_process_pixblock_head + bilinear_src_0565_8_x888_process_four_pixels +.endm + +.macro bilinear_src_0565_8_x888_process_pixblock_tail +.endm + +.macro bilinear_src_0565_8_x888_process_pixblock_tail_head + bilinear_src_0565_8_x888_process_pixblock_tail + bilinear_src_0565_8_x888_process_pixblock_head +.endm + +/* src_0565_8_0565 */ +.macro bilinear_src_0565_8_0565_process_last_pixel + bilinear_interpolate_last_pixel 0565, 8, 0565, src +.endm + +.macro bilinear_src_0565_8_0565_process_two_pixels + bilinear_interpolate_two_pixels 0565, 8, 0565, src +.endm + +.macro bilinear_src_0565_8_0565_process_four_pixels + bilinear_interpolate_four_pixels 0565, 8, 0565, src +.endm + +.macro bilinear_src_0565_8_0565_process_pixblock_head + bilinear_src_0565_8_0565_process_four_pixels +.endm + +.macro bilinear_src_0565_8_0565_process_pixblock_tail +.endm + +.macro bilinear_src_0565_8_0565_process_pixblock_tail_head + bilinear_src_0565_8_0565_process_pixblock_tail + bilinear_src_0565_8_0565_process_pixblock_head +.endm + +/* over_8888_8888 */ +.macro bilinear_over_8888_8888_process_last_pixel + bilinear_interpolate_last_pixel 8888, x, 8888, over +.endm + +.macro bilinear_over_8888_8888_process_two_pixels + bilinear_interpolate_two_pixels 8888, x, 8888, over +.endm + +.macro bilinear_over_8888_8888_process_four_pixels + bilinear_interpolate_four_pixels 8888, x, 8888, over +.endm + +.macro bilinear_over_8888_8888_process_pixblock_head + bilinear_over_8888_8888_process_four_pixels +.endm + +.macro bilinear_over_8888_8888_process_pixblock_tail +.endm + +.macro bilinear_over_8888_8888_process_pixblock_tail_head + bilinear_over_8888_8888_process_pixblock_tail + bilinear_over_8888_8888_process_pixblock_head +.endm + +/* over_8888_8_8888 */ +.macro bilinear_over_8888_8_8888_process_last_pixel + bilinear_interpolate_last_pixel 8888, 8, 8888, over +.endm + +.macro bilinear_over_8888_8_8888_process_two_pixels + bilinear_interpolate_two_pixels 8888, 8, 8888, over +.endm + +.macro bilinear_over_8888_8_8888_process_four_pixels + bilinear_interpolate_four_pixels 8888, 8, 8888, over +.endm + +.macro bilinear_over_8888_8_8888_process_pixblock_head + bilinear_over_8888_8_8888_process_four_pixels +.endm + +.macro bilinear_over_8888_8_8888_process_pixblock_tail +.endm + +.macro bilinear_over_8888_8_8888_process_pixblock_tail_head + bilinear_over_8888_8_8888_process_pixblock_tail + bilinear_over_8888_8_8888_process_pixblock_head +.endm + +/* add_8888_8888 */ +.macro bilinear_add_8888_8888_process_last_pixel + bilinear_interpolate_last_pixel 8888, x, 8888, add +.endm + +.macro bilinear_add_8888_8888_process_two_pixels + bilinear_interpolate_two_pixels 8888, x, 8888, add +.endm + +.macro bilinear_add_8888_8888_process_four_pixels + bilinear_interpolate_four_pixels 8888, x, 8888, add +.endm + +.macro bilinear_add_8888_8888_process_pixblock_head + bilinear_add_8888_8888_process_four_pixels +.endm + +.macro bilinear_add_8888_8888_process_pixblock_tail +.endm + +.macro bilinear_add_8888_8888_process_pixblock_tail_head + bilinear_add_8888_8888_process_pixblock_tail + bilinear_add_8888_8888_process_pixblock_head +.endm + +/* add_8888_8_8888 */ +.macro bilinear_add_8888_8_8888_process_last_pixel + bilinear_interpolate_last_pixel 8888, 8, 8888, add +.endm + +.macro bilinear_add_8888_8_8888_process_two_pixels + bilinear_interpolate_two_pixels 8888, 8, 8888, add +.endm + +.macro bilinear_add_8888_8_8888_process_four_pixels + bilinear_interpolate_four_pixels 8888, 8, 8888, add +.endm + +.macro bilinear_add_8888_8_8888_process_pixblock_head + bilinear_add_8888_8_8888_process_four_pixels +.endm + +.macro bilinear_add_8888_8_8888_process_pixblock_tail +.endm + +.macro bilinear_add_8888_8_8888_process_pixblock_tail_head + bilinear_add_8888_8_8888_process_pixblock_tail + bilinear_add_8888_8_8888_process_pixblock_head +.endm + + +/* Bilinear scanline functions */ +generate_bilinear_scanline_func \ + pixman_scaled_bilinear_scanline_8888_8_8888_SRC_asm_neon, \ + 8888, 8888, 2, 2, \ + bilinear_src_8888_8_8888_process_last_pixel, \ + bilinear_src_8888_8_8888_process_two_pixels, \ + bilinear_src_8888_8_8888_process_four_pixels, \ + bilinear_src_8888_8_8888_process_pixblock_head, \ + bilinear_src_8888_8_8888_process_pixblock_tail, \ + bilinear_src_8888_8_8888_process_pixblock_tail_head, \ + 4, 28, BILINEAR_FLAG_USE_MASK + +generate_bilinear_scanline_func \ + pixman_scaled_bilinear_scanline_8888_8_0565_SRC_asm_neon, \ + 8888, 0565, 2, 1, \ + bilinear_src_8888_8_0565_process_last_pixel, \ + bilinear_src_8888_8_0565_process_two_pixels, \ + bilinear_src_8888_8_0565_process_four_pixels, \ + bilinear_src_8888_8_0565_process_pixblock_head, \ + bilinear_src_8888_8_0565_process_pixblock_tail, \ + bilinear_src_8888_8_0565_process_pixblock_tail_head, \ + 4, 28, BILINEAR_FLAG_USE_MASK + +generate_bilinear_scanline_func \ + pixman_scaled_bilinear_scanline_0565_8_x888_SRC_asm_neon, \ + 0565, 8888, 1, 2, \ + bilinear_src_0565_8_x888_process_last_pixel, \ + bilinear_src_0565_8_x888_process_two_pixels, \ + bilinear_src_0565_8_x888_process_four_pixels, \ + bilinear_src_0565_8_x888_process_pixblock_head, \ + bilinear_src_0565_8_x888_process_pixblock_tail, \ + bilinear_src_0565_8_x888_process_pixblock_tail_head, \ + 4, 28, BILINEAR_FLAG_USE_MASK + +generate_bilinear_scanline_func \ + pixman_scaled_bilinear_scanline_0565_8_0565_SRC_asm_neon, \ + 0565, 0565, 1, 1, \ + bilinear_src_0565_8_0565_process_last_pixel, \ + bilinear_src_0565_8_0565_process_two_pixels, \ + bilinear_src_0565_8_0565_process_four_pixels, \ + bilinear_src_0565_8_0565_process_pixblock_head, \ + bilinear_src_0565_8_0565_process_pixblock_tail, \ + bilinear_src_0565_8_0565_process_pixblock_tail_head, \ + 4, 28, BILINEAR_FLAG_USE_MASK + +generate_bilinear_scanline_func \ + pixman_scaled_bilinear_scanline_8888_8888_OVER_asm_neon, \ + 8888, 8888, 2, 2, \ + bilinear_over_8888_8888_process_last_pixel, \ + bilinear_over_8888_8888_process_two_pixels, \ + bilinear_over_8888_8888_process_four_pixels, \ + bilinear_over_8888_8888_process_pixblock_head, \ + bilinear_over_8888_8888_process_pixblock_tail, \ + bilinear_over_8888_8888_process_pixblock_tail_head, \ + 4, 28, 0 + +generate_bilinear_scanline_func \ + pixman_scaled_bilinear_scanline_8888_8_8888_OVER_asm_neon, \ + 8888, 8888, 2, 2, \ + bilinear_over_8888_8_8888_process_last_pixel, \ + bilinear_over_8888_8_8888_process_two_pixels, \ + bilinear_over_8888_8_8888_process_four_pixels, \ + bilinear_over_8888_8_8888_process_pixblock_head, \ + bilinear_over_8888_8_8888_process_pixblock_tail, \ + bilinear_over_8888_8_8888_process_pixblock_tail_head, \ + 4, 28, BILINEAR_FLAG_USE_MASK + +generate_bilinear_scanline_func \ + pixman_scaled_bilinear_scanline_8888_8888_ADD_asm_neon, \ + 8888, 8888, 2, 2, \ + bilinear_add_8888_8888_process_last_pixel, \ + bilinear_add_8888_8888_process_two_pixels, \ + bilinear_add_8888_8888_process_four_pixels, \ + bilinear_add_8888_8888_process_pixblock_head, \ + bilinear_add_8888_8888_process_pixblock_tail, \ + bilinear_add_8888_8888_process_pixblock_tail_head, \ + 4, 28, 0 + +generate_bilinear_scanline_func \ + pixman_scaled_bilinear_scanline_8888_8_8888_ADD_asm_neon, \ + 8888, 8888, 2, 2, \ + bilinear_add_8888_8_8888_process_last_pixel, \ + bilinear_add_8888_8_8888_process_two_pixels, \ + bilinear_add_8888_8_8888_process_four_pixels, \ + bilinear_add_8888_8_8888_process_pixblock_head, \ + bilinear_add_8888_8_8888_process_pixblock_tail, \ + bilinear_add_8888_8_8888_process_pixblock_tail_head, \ + 4, 28, BILINEAR_FLAG_USE_MASK -- cgit v1.2.3