summaryrefslogtreecommitdiff
path: root/pixman/pixman-arm-simd-asm.S
diff options
context:
space:
mode:
Diffstat (limited to 'pixman/pixman-arm-simd-asm.S')
-rw-r--r--pixman/pixman-arm-simd-asm.S470
1 files changed, 236 insertions, 234 deletions
diff --git a/pixman/pixman-arm-simd-asm.S b/pixman/pixman-arm-simd-asm.S
index a74a0a8..34d38f1 100644
--- a/pixman/pixman-arm-simd-asm.S
+++ b/pixman/pixman-arm-simd-asm.S
@@ -40,6 +40,8 @@
#include "pixman-arm-asm.h"
#include "pixman-arm-simd-asm.h"
+ pixman_syntax_unified
+
/* A head macro should do all processing which results in an output of up to
* 16 bytes, as far as the final load instruction. The corresponding tail macro
* should complete the processing of the up-to-16 bytes. The calling macro will
@@ -57,7 +59,7 @@
.endm
.macro blit_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
- pixld cond, numbytes, firstreg, SRC, unaligned_src
+ pixld \cond, \numbytes, \firstreg, SRC, \unaligned_src
.endm
.macro blit_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, dst_alignment
@@ -65,8 +67,8 @@
WK5 .req STRIDE_S
WK6 .req MASK
WK7 .req STRIDE_M
-110: pixld , 16, 0, SRC, unaligned_src
- pixld , 16, 4, SRC, unaligned_src
+110: pixld , 16, 0, SRC, \unaligned_src
+ pixld , 16, 4, SRC, \unaligned_src
pld [SRC, SCRATCH]
pixst , 16, 0, DST
pixst , 16, 4, DST
@@ -122,7 +124,7 @@ generate_composite_function \
.macro src_n_0565_init
ldrh SRC, [sp, #ARGS_STACK_OFFSET]
- orr SRC, SRC, lsl #16
+ orr SRC, SRC, SRC, lsl #16
mov STRIDE_S, SRC
mov MASK, SRC
mov STRIDE_M, SRC
@@ -130,8 +132,8 @@ generate_composite_function \
.macro src_n_8_init
ldrb SRC, [sp, #ARGS_STACK_OFFSET]
- orr SRC, SRC, lsl #8
- orr SRC, SRC, lsl #16
+ orr SRC, SRC, SRC, lsl #8
+ orr SRC, SRC, SRC, lsl #16
mov STRIDE_S, SRC
mov MASK, SRC
mov STRIDE_M, SRC
@@ -142,7 +144,7 @@ generate_composite_function \
WK5 .req STRIDE_S
WK6 .req MASK
WK7 .req STRIDE_M
- pixst cond, numbytes, 4, DST
+ pixst \cond, \numbytes, 4, DST
.unreq WK4
.unreq WK5
.unreq WK6
@@ -182,20 +184,20 @@ generate_composite_function \
/******************************************************************************/
.macro src_x888_8888_pixel, cond, reg
- orr&cond WK&reg, WK&reg, #0xFF000000
+ orr\()\cond WK\()\reg, WK\()\reg, #0xFF000000
.endm
.macro pixman_composite_src_x888_8888_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
- pixld cond, numbytes, firstreg, SRC, unaligned_src
+ pixld \cond, \numbytes, \firstreg, SRC, \unaligned_src
.endm
.macro pixman_composite_src_x888_8888_process_tail cond, numbytes, firstreg
- src_x888_8888_pixel cond, %(firstreg+0)
- .if numbytes >= 8
- src_x888_8888_pixel cond, %(firstreg+1)
- .if numbytes == 16
- src_x888_8888_pixel cond, %(firstreg+2)
- src_x888_8888_pixel cond, %(firstreg+3)
+ src_x888_8888_pixel \cond, %(\firstreg+0)
+ .if \numbytes >= 8
+ src_x888_8888_pixel \cond, %(\firstreg+1)
+ .if \numbytes == 16
+ src_x888_8888_pixel \cond, %(\firstreg+2)
+ src_x888_8888_pixel \cond, %(\firstreg+3)
.endif
.endif
.endm
@@ -222,73 +224,73 @@ generate_composite_function \
.endm
.macro src_0565_8888_2pixels, reg1, reg2
- and SCRATCH, WK&reg1, MASK @ 00000GGGGGG0000000000gggggg00000
- bic WK&reg2, WK&reg1, MASK @ RRRRR000000BBBBBrrrrr000000bbbbb
- orr SCRATCH, SCRATCH, SCRATCH, lsr #6 @ 00000GGGGGGGGGGGG0000ggggggggggg
- mov WK&reg1, WK&reg2, lsl #16 @ rrrrr000000bbbbb0000000000000000
- mov SCRATCH, SCRATCH, ror #19 @ GGGG0000ggggggggggg00000GGGGGGGG
- bic WK&reg2, WK&reg2, WK&reg1, lsr #16 @ RRRRR000000BBBBB0000000000000000
- orr WK&reg1, WK&reg1, WK&reg1, lsr #5 @ rrrrrrrrrr0bbbbbbbbbb00000000000
- orr WK&reg2, WK&reg2, WK&reg2, lsr #5 @ RRRRRRRRRR0BBBBBBBBBB00000000000
- pkhtb WK&reg1, WK&reg1, WK&reg1, asr #5 @ rrrrrrrr--------bbbbbbbb--------
- sel WK&reg1, WK&reg1, SCRATCH @ rrrrrrrrggggggggbbbbbbbb--------
- mov SCRATCH, SCRATCH, ror #16 @ ggg00000GGGGGGGGGGGG0000gggggggg
- pkhtb WK&reg2, WK&reg2, WK&reg2, asr #5 @ RRRRRRRR--------BBBBBBBB--------
- sel WK&reg2, WK&reg2, SCRATCH @ RRRRRRRRGGGGGGGGBBBBBBBB--------
- orr WK&reg1, STRIDE_M, WK&reg1, lsr #8 @ 11111111rrrrrrrrggggggggbbbbbbbb
- orr WK&reg2, STRIDE_M, WK&reg2, lsr #8 @ 11111111RRRRRRRRGGGGGGGGBBBBBBBB
+ and SCRATCH, WK\()\reg1, MASK @ 00000GGGGGG0000000000gggggg00000
+ bic WK\()\reg2, WK\()\reg1, MASK @ RRRRR000000BBBBBrrrrr000000bbbbb
+ orr SCRATCH, SCRATCH, SCRATCH, lsr #6 @ 00000GGGGGGGGGGGG0000ggggggggggg
+ mov WK\()\reg1, WK\()\reg2, lsl #16 @ rrrrr000000bbbbb0000000000000000
+ mov SCRATCH, SCRATCH, ror #19 @ GGGG0000ggggggggggg00000GGGGGGGG
+ bic WK\()\reg2, WK\()\reg2, WK\()\reg1, lsr #16 @ RRRRR000000BBBBB0000000000000000
+ orr WK\()\reg1, WK\()\reg1, WK\()\reg1, lsr #5 @ rrrrrrrrrr0bbbbbbbbbb00000000000
+ orr WK\()\reg2, WK\()\reg2, WK\()\reg2, lsr #5 @ RRRRRRRRRR0BBBBBBBBBB00000000000
+ pkhtb WK\()\reg1, WK\()\reg1, WK\()\reg1, asr #5 @ rrrrrrrr--------bbbbbbbb--------
+ sel WK\()\reg1, WK\()\reg1, SCRATCH @ rrrrrrrrggggggggbbbbbbbb--------
+ mov SCRATCH, SCRATCH, ror #16 @ ggg00000GGGGGGGGGGGG0000gggggggg
+ pkhtb WK\()\reg2, WK\()\reg2, WK\()\reg2, asr #5 @ RRRRRRRR--------BBBBBBBB--------
+ sel WK\()\reg2, WK\()\reg2, SCRATCH @ RRRRRRRRGGGGGGGGBBBBBBBB--------
+ orr WK\()\reg1, STRIDE_M, WK\()\reg1, lsr #8 @ 11111111rrrrrrrrggggggggbbbbbbbb
+ orr WK\()\reg2, STRIDE_M, WK\()\reg2, lsr #8 @ 11111111RRRRRRRRGGGGGGGGBBBBBBBB
.endm
/* This version doesn't need STRIDE_M, but is one instruction longer.
It would however be preferable for an XRGB target, since we could knock off the last 2 instructions, but is that a common case?
- and SCRATCH, WK&reg1, MASK @ 00000GGGGGG0000000000gggggg00000
- bic WK&reg1, WK&reg1, MASK @ RRRRR000000BBBBBrrrrr000000bbbbb
- orr SCRATCH, SCRATCH, SCRATCH, lsr #6 @ 00000GGGGGGGGGGGG0000ggggggggggg
- mov WK&reg2, WK&reg1, lsr #16 @ 0000000000000000RRRRR000000BBBBB
- mov SCRATCH, SCRATCH, ror #27 @ GGGGGGGGGGGG0000ggggggggggg00000
- bic WK&reg1, WK&reg1, WK&reg2, lsl #16 @ 0000000000000000rrrrr000000bbbbb
- mov WK&reg2, WK&reg2, lsl #3 @ 0000000000000RRRRR000000BBBBB000
- mov WK&reg1, WK&reg1, lsl #3 @ 0000000000000rrrrr000000bbbbb000
- orr WK&reg2, WK&reg2, WK&reg2, lsr #5 @ 0000000000000RRRRRRRRRR0BBBBBBBB
- orr WK&reg1, WK&reg1, WK&reg1, lsr #5 @ 0000000000000rrrrrrrrrr0bbbbbbbb
- pkhbt WK&reg2, WK&reg2, WK&reg2, lsl #5 @ --------RRRRRRRR--------BBBBBBBB
- pkhbt WK&reg1, WK&reg1, WK&reg1, lsl #5 @ --------rrrrrrrr--------bbbbbbbb
- sel WK&reg2, SCRATCH, WK&reg2 @ --------RRRRRRRRGGGGGGGGBBBBBBBB
- sel WK&reg1, SCRATCH, WK&reg1 @ --------rrrrrrrrggggggggbbbbbbbb
- orr WK&reg2, WK&reg2, #0xFF000000 @ 11111111RRRRRRRRGGGGGGGGBBBBBBBB
- orr WK&reg1, WK&reg1, #0xFF000000 @ 11111111rrrrrrrrggggggggbbbbbbbb
+ and SCRATCH, WK\()\reg1, MASK @ 00000GGGGGG0000000000gggggg00000
+ bic WK\()\reg1, WK\()\reg1, MASK @ RRRRR000000BBBBBrrrrr000000bbbbb
+ orr SCRATCH, SCRATCH, SCRATCH, lsr #6 @ 00000GGGGGGGGGGGG0000ggggggggggg
+ mov WK\()\reg2, WK\()\reg1, lsr #16 @ 0000000000000000RRRRR000000BBBBB
+ mov SCRATCH, SCRATCH, ror #27 @ GGGGGGGGGGGG0000ggggggggggg00000
+ bic WK\()\reg1, WK\()\reg1, WK\()\reg2, lsl #16 @ 0000000000000000rrrrr000000bbbbb
+ mov WK\()\reg2, WK\()\reg2, lsl #3 @ 0000000000000RRRRR000000BBBBB000
+ mov WK\()\reg1, WK\()\reg1, lsl #3 @ 0000000000000rrrrr000000bbbbb000
+ orr WK\()\reg2, WK\()\reg2, WK\()\reg2, lsr #5 @ 0000000000000RRRRRRRRRR0BBBBBBBB
+ orr WK\()\reg1, WK\()\reg1, WK\()\reg1, lsr #5 @ 0000000000000rrrrrrrrrr0bbbbbbbb
+ pkhbt WK\()\reg2, WK\()\reg2, WK\()\reg2, lsl #5 @ --------RRRRRRRR--------BBBBBBBB
+ pkhbt WK\()\reg1, WK\()\reg1, WK\()\reg1, lsl #5 @ --------rrrrrrrr--------bbbbbbbb
+ sel WK\()\reg2, SCRATCH, WK\()\reg2 @ --------RRRRRRRRGGGGGGGGBBBBBBBB
+ sel WK\()\reg1, SCRATCH, WK\()\reg1 @ --------rrrrrrrrggggggggbbbbbbbb
+ orr WK\()\reg2, WK\()\reg2, #0xFF000000 @ 11111111RRRRRRRRGGGGGGGGBBBBBBBB
+ orr WK\()\reg1, WK\()\reg1, #0xFF000000 @ 11111111rrrrrrrrggggggggbbbbbbbb
*/
.macro src_0565_8888_1pixel, reg
- bic SCRATCH, WK&reg, MASK @ 0000000000000000rrrrr000000bbbbb
- and WK&reg, WK&reg, MASK @ 000000000000000000000gggggg00000
- mov SCRATCH, SCRATCH, lsl #3 @ 0000000000000rrrrr000000bbbbb000
- mov WK&reg, WK&reg, lsl #5 @ 0000000000000000gggggg0000000000
- orr SCRATCH, SCRATCH, SCRATCH, lsr #5 @ 0000000000000rrrrrrrrrr0bbbbbbbb
- orr WK&reg, WK&reg, WK&reg, lsr #6 @ 000000000000000gggggggggggg00000
- pkhbt SCRATCH, SCRATCH, SCRATCH, lsl #5 @ --------rrrrrrrr--------bbbbbbbb
- sel WK&reg, WK&reg, SCRATCH @ --------rrrrrrrrggggggggbbbbbbbb
- orr WK&reg, WK&reg, #0xFF000000 @ 11111111rrrrrrrrggggggggbbbbbbbb
+ bic SCRATCH, WK\()\reg, MASK @ 0000000000000000rrrrr000000bbbbb
+ and WK\()\reg, WK\()\reg, MASK @ 000000000000000000000gggggg00000
+ mov SCRATCH, SCRATCH, lsl #3 @ 0000000000000rrrrr000000bbbbb000
+ mov WK\()\reg, WK\()\reg, lsl #5 @ 0000000000000000gggggg0000000000
+ orr SCRATCH, SCRATCH, SCRATCH, lsr #5 @ 0000000000000rrrrrrrrrr0bbbbbbbb
+ orr WK\()\reg, WK\()\reg, WK\()\reg, lsr #6 @ 000000000000000gggggggggggg00000
+ pkhbt SCRATCH, SCRATCH, SCRATCH, lsl #5 @ --------rrrrrrrr--------bbbbbbbb
+ sel WK\()\reg, WK\()\reg, SCRATCH @ --------rrrrrrrrggggggggbbbbbbbb
+ orr WK\()\reg, WK\()\reg, #0xFF000000 @ 11111111rrrrrrrrggggggggbbbbbbbb
.endm
.macro src_0565_8888_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
- .if numbytes == 16
- pixldst ld,, 8, firstreg, %(firstreg+2),,, SRC, unaligned_src
- .elseif numbytes == 8
- pixld , 4, firstreg, SRC, unaligned_src
- .elseif numbytes == 4
- pixld , 2, firstreg, SRC, unaligned_src
+ .if \numbytes == 16
+ pixldst ld,, 8, \firstreg, %(\firstreg+2),,, SRC, \unaligned_src
+ .elseif \numbytes == 8
+ pixld , 4, \firstreg, SRC, \unaligned_src
+ .elseif \numbytes == 4
+ pixld , 2, \firstreg, SRC, \unaligned_src
.endif
.endm
.macro src_0565_8888_process_tail cond, numbytes, firstreg
- .if numbytes == 16
- src_0565_8888_2pixels firstreg, %(firstreg+1)
- src_0565_8888_2pixels %(firstreg+2), %(firstreg+3)
- .elseif numbytes == 8
- src_0565_8888_2pixels firstreg, %(firstreg+1)
+ .if \numbytes == 16
+ src_0565_8888_2pixels \firstreg, %(\firstreg+1)
+ src_0565_8888_2pixels %(\firstreg+2), %(\firstreg+3)
+ .elseif \numbytes == 8
+ src_0565_8888_2pixels \firstreg, %(\firstreg+1)
.else
- src_0565_8888_1pixel firstreg
+ src_0565_8888_1pixel \firstreg
.endif
.endm
@@ -311,23 +313,23 @@ generate_composite_function \
.endm
.macro src_x888_0565_1pixel s, d
- and WK&d, MASK, WK&s, lsr #3 @ 00000000000rrrrr00000000000bbbbb
- and STRIDE_S, WK&s, #0xFC00 @ 0000000000000000gggggg0000000000
- orr WK&d, WK&d, WK&d, lsr #5 @ 00000000000-----rrrrr000000bbbbb
- orr WK&d, WK&d, STRIDE_S, lsr #5 @ 00000000000-----rrrrrggggggbbbbb
+ and WK\()\d, MASK, WK\()\s, lsr #3 @ 00000000000rrrrr00000000000bbbbb
+ and STRIDE_S, WK\()\s, #0xFC00 @ 0000000000000000gggggg0000000000
+ orr WK\()\d, WK\()\d, WK\()\d, lsr #5 @ 00000000000-----rrrrr000000bbbbb
+ orr WK\()\d, WK\()\d, STRIDE_S, lsr #5 @ 00000000000-----rrrrrggggggbbbbb
/* Top 16 bits are discarded during the following STRH */
.endm
.macro src_x888_0565_2pixels slo, shi, d, tmp
- and SCRATCH, WK&shi, #0xFC00 @ 0000000000000000GGGGGG0000000000
- and WK&tmp, MASK, WK&shi, lsr #3 @ 00000000000RRRRR00000000000BBBBB
- and WK&shi, MASK, WK&slo, lsr #3 @ 00000000000rrrrr00000000000bbbbb
- orr WK&tmp, WK&tmp, WK&tmp, lsr #5 @ 00000000000-----RRRRR000000BBBBB
- orr WK&tmp, WK&tmp, SCRATCH, lsr #5 @ 00000000000-----RRRRRGGGGGGBBBBB
- and SCRATCH, WK&slo, #0xFC00 @ 0000000000000000gggggg0000000000
- orr WK&shi, WK&shi, WK&shi, lsr #5 @ 00000000000-----rrrrr000000bbbbb
- orr WK&shi, WK&shi, SCRATCH, lsr #5 @ 00000000000-----rrrrrggggggbbbbb
- pkhbt WK&d, WK&shi, WK&tmp, lsl #16 @ RRRRRGGGGGGBBBBBrrrrrggggggbbbbb
+ and SCRATCH, WK\()\shi, #0xFC00 @ 0000000000000000GGGGGG0000000000
+ and WK\()\tmp, MASK, WK\()\shi, lsr #3 @ 00000000000RRRRR00000000000BBBBB
+ and WK\()\shi, MASK, WK\()\slo, lsr #3 @ 00000000000rrrrr00000000000bbbbb
+ orr WK\()\tmp, WK\()\tmp, WK\()\tmp, lsr #5 @ 00000000000-----RRRRR000000BBBBB
+ orr WK\()\tmp, WK\()\tmp, SCRATCH, lsr #5 @ 00000000000-----RRRRRGGGGGGBBBBB
+ and SCRATCH, WK\()\slo, #0xFC00 @ 0000000000000000gggggg0000000000
+ orr WK\()\shi, WK\()\shi, WK\()\shi, lsr #5 @ 00000000000-----rrrrr000000bbbbb
+ orr WK\()\shi, WK\()\shi, SCRATCH, lsr #5 @ 00000000000-----rrrrrggggggbbbbb
+ pkhbt WK\()\d, WK\()\shi, WK\()\tmp, lsl #16 @ RRRRRGGGGGGBBBBBrrrrrggggggbbbbb
.endm
.macro src_x888_0565_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
@@ -335,33 +337,33 @@ generate_composite_function \
WK5 .req STRIDE_M
WK6 .req WK3
WK7 .req ORIG_W
- .if numbytes == 16
+ .if \numbytes == 16
pixld , 16, 4, SRC, 0
src_x888_0565_2pixels 4, 5, 0, 0
pixld , 8, 4, SRC, 0
src_x888_0565_2pixels 6, 7, 1, 1
pixld , 8, 6, SRC, 0
.else
- pixld , numbytes*2, 4, SRC, 0
+ pixld , \numbytes*2, 4, SRC, 0
.endif
.endm
.macro src_x888_0565_process_tail cond, numbytes, firstreg
- .if numbytes == 16
+ .if \numbytes == 16
src_x888_0565_2pixels 4, 5, 2, 2
src_x888_0565_2pixels 6, 7, 3, 4
- .elseif numbytes == 8
+ .elseif \numbytes == 8
src_x888_0565_2pixels 4, 5, 1, 1
src_x888_0565_2pixels 6, 7, 2, 2
- .elseif numbytes == 4
+ .elseif \numbytes == 4
src_x888_0565_2pixels 4, 5, 1, 1
.else
src_x888_0565_1pixel 4, 1
.endif
- .if numbytes == 16
- pixst , numbytes, 0, DST
+ .if \numbytes == 16
+ pixst , \numbytes, 0, DST
.else
- pixst , numbytes, 1, DST
+ pixst , \numbytes, 1, DST
.endif
.unreq WK4
.unreq WK5
@@ -382,37 +384,37 @@ generate_composite_function \
/******************************************************************************/
.macro add_8_8_8pixels cond, dst1, dst2
- uqadd8&cond WK&dst1, WK&dst1, MASK
- uqadd8&cond WK&dst2, WK&dst2, STRIDE_M
+ uqadd8\()\cond WK\()\dst1, WK\()\dst1, MASK
+ uqadd8\()\cond WK\()\dst2, WK\()\dst2, STRIDE_M
.endm
.macro add_8_8_4pixels cond, dst
- uqadd8&cond WK&dst, WK&dst, MASK
+ uqadd8\()\cond WK\()\dst, WK\()\dst, MASK
.endm
.macro add_8_8_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
WK4 .req MASK
WK5 .req STRIDE_M
- .if numbytes == 16
- pixld cond, 8, 4, SRC, unaligned_src
- pixld cond, 16, firstreg, DST, 0
- add_8_8_8pixels cond, firstreg, %(firstreg+1)
- pixld cond, 8, 4, SRC, unaligned_src
+ .if \numbytes == 16
+ pixld \cond, 8, 4, SRC, \unaligned_src
+ pixld \cond, 16, \firstreg, DST, 0
+ add_8_8_8pixels \cond, \firstreg, %(\firstreg+1)
+ pixld \cond, 8, 4, SRC, \unaligned_src
.else
- pixld cond, numbytes, 4, SRC, unaligned_src
- pixld cond, numbytes, firstreg, DST, 0
+ pixld \cond, \numbytes, 4, SRC, \unaligned_src
+ pixld \cond, \numbytes, \firstreg, DST, 0
.endif
.unreq WK4
.unreq WK5
.endm
.macro add_8_8_process_tail cond, numbytes, firstreg
- .if numbytes == 16
- add_8_8_8pixels cond, %(firstreg+2), %(firstreg+3)
- .elseif numbytes == 8
- add_8_8_8pixels cond, firstreg, %(firstreg+1)
+ .if \numbytes == 16
+ add_8_8_8pixels \cond, %(\firstreg+2), %(\firstreg+3)
+ .elseif \numbytes == 8
+ add_8_8_8pixels \cond, \firstreg, %(\firstreg+1)
.else
- add_8_8_4pixels cond, firstreg
+ add_8_8_4pixels \cond, \firstreg
.endif
.endm
@@ -441,8 +443,8 @@ generate_composite_function \
WK5 .req STRIDE_S
WK6 .req STRIDE_M
WK7 .req ORIG_W
- pixld , numbytes, %(4+firstreg), SRC, unaligned_src
- pixld , numbytes, firstreg, DST, 0
+ pixld , \numbytes, %(4+\firstreg), SRC, \unaligned_src
+ pixld , \numbytes, \firstreg, DST, 0
.unreq WK4
.unreq WK5
.unreq WK6
@@ -451,44 +453,44 @@ generate_composite_function \
.macro over_8888_8888_check_transparent numbytes, reg0, reg1, reg2, reg3
/* Since these colours a premultiplied by alpha, only 0 indicates transparent (any other colour with 0 in the alpha byte is luminous) */
- teq WK&reg0, #0
- .if numbytes > 4
- teqeq WK&reg1, #0
- .if numbytes > 8
- teqeq WK&reg2, #0
- teqeq WK&reg3, #0
+ teq WK\()\reg0, #0
+ .if \numbytes > 4
+ teqeq WK\()\reg1, #0
+ .if \numbytes > 8
+ teqeq WK\()\reg2, #0
+ teqeq WK\()\reg3, #0
.endif
.endif
.endm
.macro over_8888_8888_prepare next
- mov WK&next, WK&next, lsr #24
+ mov WK\()\next, WK\()\next, lsr #24
.endm
.macro over_8888_8888_1pixel src, dst, offset, next
/* src = destination component multiplier */
- rsb WK&src, WK&src, #255
+ rsb WK\()\src, WK\()\src, #255
/* Split even/odd bytes of dst into SCRATCH/dst */
- uxtb16 SCRATCH, WK&dst
- uxtb16 WK&dst, WK&dst, ror #8
+ uxtb16 SCRATCH, WK\()\dst
+ uxtb16 WK\()\dst, WK\()\dst, ror #8
/* Multiply through, adding 0.5 to the upper byte of result for rounding */
- mla SCRATCH, SCRATCH, WK&src, MASK
- mla WK&dst, WK&dst, WK&src, MASK
+ mla SCRATCH, SCRATCH, WK\()\src, MASK
+ mla WK\()\dst, WK\()\dst, WK\()\src, MASK
/* Where we would have had a stall between the result of the first MLA and the shifter input,
* reload the complete source pixel */
- ldr WK&src, [SRC, #offset]
+ ldr WK\()\src, [SRC, #\offset]
/* Multiply by 257/256 to approximate 256/255 */
uxtab16 SCRATCH, SCRATCH, SCRATCH, ror #8
/* In this stall, start processing the next pixel */
- .if offset < -4
- mov WK&next, WK&next, lsr #24
+ .if \offset < -4
+ mov WK\()\next, WK\()\next, lsr #24
.endif
- uxtab16 WK&dst, WK&dst, WK&dst, ror #8
+ uxtab16 WK\()\dst, WK\()\dst, WK\()\dst, ror #8
/* Recombine even/odd bytes of multiplied destination */
mov SCRATCH, SCRATCH, ror #8
- sel WK&dst, SCRATCH, WK&dst
+ sel WK\()\dst, SCRATCH, WK\()\dst
/* Saturated add of source to multiplied destination */
- uqadd8 WK&dst, WK&dst, WK&src
+ uqadd8 WK\()\dst, WK\()\dst, WK\()\src
.endm
.macro over_8888_8888_process_tail cond, numbytes, firstreg
@@ -496,17 +498,17 @@ generate_composite_function \
WK5 .req STRIDE_S
WK6 .req STRIDE_M
WK7 .req ORIG_W
- over_8888_8888_check_transparent numbytes, %(4+firstreg), %(5+firstreg), %(6+firstreg), %(7+firstreg)
+ over_8888_8888_check_transparent \numbytes, %(4+\firstreg), %(5+\firstreg), %(6+\firstreg), %(7+\firstreg)
beq 10f
- over_8888_8888_prepare %(4+firstreg)
- .set PROCESS_REG, firstreg
- .set PROCESS_OFF, -numbytes
- .rept numbytes / 4
+ over_8888_8888_prepare %(4+\firstreg)
+ .set PROCESS_REG, \firstreg
+ .set PROCESS_OFF, -\numbytes
+ .rept \numbytes / 4
over_8888_8888_1pixel %(4+PROCESS_REG), %(0+PROCESS_REG), PROCESS_OFF, %(5+PROCESS_REG)
.set PROCESS_REG, PROCESS_REG+1
.set PROCESS_OFF, PROCESS_OFF+4
.endr
- pixst , numbytes, firstreg, DST
+ pixst , \numbytes, \firstreg, DST
10:
.unreq WK4
.unreq WK5
@@ -536,16 +538,16 @@ generate_composite_function \
*/
.macro mul_8888_8 word, byte, tmp, half
/* Split even/odd bytes of word apart */
- uxtb16 tmp, word
- uxtb16 word, word, ror #8
+ uxtb16 \tmp, \word
+ uxtb16 \word, \word, ror #8
/* Multiply bytes together with rounding, then by 257/256 */
- mla tmp, tmp, byte, half
- mla word, word, byte, half /* 1 stall follows */
- uxtab16 tmp, tmp, tmp, ror #8 /* 1 stall follows */
- uxtab16 word, word, word, ror #8
+ mla \tmp, \tmp, \byte, \half
+ mla \word, \word, \byte, \half /* 1 stall follows */
+ uxtab16 \tmp, \tmp, \tmp, ror #8 /* 1 stall follows */
+ uxtab16 \word, \word, \word, ror #8
/* Recombine bytes */
- mov tmp, tmp, ror #8
- sel word, tmp, word
+ mov \tmp, \tmp, ror #8
+ sel \word, \tmp, \word
.endm
/******************************************************************************/
@@ -567,8 +569,8 @@ generate_composite_function \
WK5 .req STRIDE_D
WK6 .req STRIDE_S
WK7 .req ORIG_W
- pixld , numbytes, %(4+(firstreg%2)), SRC, unaligned_src
- pixld , numbytes, firstreg, DST, 0
+ pixld , \numbytes, %(4+(\firstreg%2)), SRC, \unaligned_src
+ pixld , \numbytes, \firstreg, DST, 0
.unreq WK4
.unreq WK5
.unreq WK6
@@ -576,10 +578,10 @@ generate_composite_function \
.endm
.macro over_8888_n_8888_1pixel src, dst
- mul_8888_8 WK&src, MASK, SCRATCH, STRIDE_M
- sub WK7, WK6, WK&src, lsr #24
- mul_8888_8 WK&dst, WK7, SCRATCH, STRIDE_M
- uqadd8 WK&dst, WK&dst, WK&src
+ mul_8888_8 WK\()\src, MASK, SCRATCH, STRIDE_M
+ sub WK7, WK6, WK\()\src, lsr #24
+ mul_8888_8 WK\()\dst, WK7, SCRATCH, STRIDE_M
+ uqadd8 WK\()\dst, WK\()\dst, WK\()\src
.endm
.macro over_8888_n_8888_process_tail cond, numbytes, firstreg
@@ -587,12 +589,12 @@ generate_composite_function \
WK5 .req STRIDE_D
WK6 .req STRIDE_S
WK7 .req ORIG_W
- over_8888_8888_check_transparent numbytes, %(4+(firstreg%2)), %(5+(firstreg%2)), %(6+firstreg), %(7+firstreg)
+ over_8888_8888_check_transparent \numbytes, %(4+(\firstreg%2)), %(5+(\firstreg%2)), %(6+\firstreg), %(7+\firstreg)
beq 10f
mov WK6, #255
- .set PROCESS_REG, firstreg
- .rept numbytes / 4
- .if numbytes == 16 && PROCESS_REG == 2
+ .set PROCESS_REG, \firstreg
+ .rept \numbytes / 4
+ .if \numbytes == 16 && PROCESS_REG == 2
/* We're using WK6 and WK7 as temporaries, so half way through
* 4 pixels, reload the second two source pixels but this time
* into WK4 and WK5 */
@@ -601,7 +603,7 @@ generate_composite_function \
over_8888_n_8888_1pixel %(4+(PROCESS_REG%2)), %(PROCESS_REG)
.set PROCESS_REG, PROCESS_REG+1
.endr
- pixst , numbytes, firstreg, DST
+ pixst , \numbytes, \firstreg, DST
10:
.unreq WK4
.unreq WK5
@@ -642,13 +644,13 @@ generate_composite_function \
.macro over_n_8_8888_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
WK4 .req STRIDE_M
- pixld , numbytes/4, 4, MASK, unaligned_mask
- pixld , numbytes, firstreg, DST, 0
+ pixld , \numbytes/4, 4, MASK, \unaligned_mask
+ pixld , \numbytes, \firstreg, DST, 0
.unreq WK4
.endm
.macro over_n_8_8888_1pixel src, dst
- uxtb Y, WK4, ror #src*8
+ uxtb Y, WK4, ror #\src*8
/* Trailing part of multiplication of source */
mla SCRATCH, STRIDE_S, Y, STRIDE_D
mla Y, SRC, Y, STRIDE_D
@@ -659,20 +661,20 @@ generate_composite_function \
sub ORIG_W, ORIG_W, Y, lsr #24
sel Y, SCRATCH, Y
/* Then multiply the destination */
- mul_8888_8 WK&dst, ORIG_W, SCRATCH, STRIDE_D
- uqadd8 WK&dst, WK&dst, Y
+ mul_8888_8 WK\()\dst, ORIG_W, SCRATCH, STRIDE_D
+ uqadd8 WK\()\dst, WK\()\dst, Y
.endm
.macro over_n_8_8888_process_tail cond, numbytes, firstreg
WK4 .req STRIDE_M
teq WK4, #0
beq 10f
- .set PROCESS_REG, firstreg
- .rept numbytes / 4
- over_n_8_8888_1pixel %(PROCESS_REG-firstreg), %(PROCESS_REG)
+ .set PROCESS_REG, \firstreg
+ .rept \numbytes / 4
+ over_n_8_8888_1pixel %(PROCESS_REG-\firstreg), %(PROCESS_REG)
.set PROCESS_REG, PROCESS_REG+1
.endr
- pixst , numbytes, firstreg, DST
+ pixst , \numbytes, \firstreg, DST
10:
.unreq WK4
.endm
@@ -705,14 +707,14 @@ generate_composite_function \
.endm
.macro over_reverse_n_8888_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
- pixld , numbytes, firstreg, DST, 0
+ pixld , \numbytes, \firstreg, DST, 0
.endm
.macro over_reverse_n_8888_1pixel d, is_only
- teq WK&d, #0
+ teq WK\()\d, #0
beq 8f /* replace with source */
- bics ORIG_W, STRIDE_D, WK&d, lsr #24
- .if is_only == 1
+ bics ORIG_W, STRIDE_D, WK\()\d, lsr #24
+ .if \is_only == 1
beq 49f /* skip store */
.else
beq 9f /* write same value back */
@@ -723,36 +725,36 @@ generate_composite_function \
uxtab16 ORIG_W, ORIG_W, ORIG_W, ror #8
mov SCRATCH, SCRATCH, ror #8
sel ORIG_W, SCRATCH, ORIG_W
- uqadd8 WK&d, WK&d, ORIG_W
+ uqadd8 WK\()\d, WK\()\d, ORIG_W
b 9f
-8: mov WK&d, SRC
+8: mov WK\()\d, SRC
9:
.endm
.macro over_reverse_n_8888_tail numbytes, reg1, reg2, reg3, reg4
- .if numbytes == 4
- over_reverse_n_8888_1pixel reg1, 1
+ .if \numbytes == 4
+ over_reverse_n_8888_1pixel \reg1, 1
.else
- and SCRATCH, WK&reg1, WK&reg2
- .if numbytes == 16
- and SCRATCH, SCRATCH, WK&reg3
- and SCRATCH, SCRATCH, WK&reg4
+ and SCRATCH, WK\()\reg1, WK\()\reg2
+ .if \numbytes == 16
+ and SCRATCH, SCRATCH, WK\()\reg3
+ and SCRATCH, SCRATCH, WK\()\reg4
.endif
mvns SCRATCH, SCRATCH, asr #24
beq 49f /* skip store if all opaque */
- over_reverse_n_8888_1pixel reg1, 0
- over_reverse_n_8888_1pixel reg2, 0
- .if numbytes == 16
- over_reverse_n_8888_1pixel reg3, 0
- over_reverse_n_8888_1pixel reg4, 0
+ over_reverse_n_8888_1pixel \reg1, 0
+ over_reverse_n_8888_1pixel \reg2, 0
+ .if \numbytes == 16
+ over_reverse_n_8888_1pixel \reg3, 0
+ over_reverse_n_8888_1pixel \reg4, 0
.endif
.endif
- pixst , numbytes, reg1, DST
+ pixst , \numbytes, \reg1, DST
49:
.endm
.macro over_reverse_n_8888_process_tail cond, numbytes, firstreg
- over_reverse_n_8888_tail numbytes, firstreg, %(firstreg+1), %(firstreg+2), %(firstreg+3)
+ over_reverse_n_8888_tail \numbytes, \firstreg, %(\firstreg+1), %(\firstreg+2), %(\firstreg+3)
.endm
generate_composite_function \
@@ -794,20 +796,20 @@ generate_composite_function \
.macro over_white_8888_8888_ca_combine m, d
uxtb16 TMP1, TMP0 /* rb_notmask */
- uxtb16 TMP2, d /* rb_dest; 1 stall follows */
+ uxtb16 TMP2, \d /* rb_dest; 1 stall follows */
smlatt TMP3, TMP2, TMP1, HALF /* red */
smlabb TMP2, TMP2, TMP1, HALF /* blue */
uxtb16 TMP0, TMP0, ror #8 /* ag_notmask */
- uxtb16 TMP1, d, ror #8 /* ag_dest; 1 stall follows */
- smlatt d, TMP1, TMP0, HALF /* alpha */
+ uxtb16 TMP1, \d, ror #8 /* ag_dest; 1 stall follows */
+ smlatt \d, TMP1, TMP0, HALF /* alpha */
smlabb TMP1, TMP1, TMP0, HALF /* green */
pkhbt TMP0, TMP2, TMP3, lsl #16 /* rb; 1 stall follows */
- pkhbt TMP1, TMP1, d, lsl #16 /* ag */
+ pkhbt TMP1, TMP1, \d, lsl #16 /* ag */
uxtab16 TMP0, TMP0, TMP0, ror #8
uxtab16 TMP1, TMP1, TMP1, ror #8
mov TMP0, TMP0, ror #8
- sel d, TMP0, TMP1
- uqadd8 d, d, m /* d is a late result */
+ sel \d, TMP0, TMP1
+ uqadd8 \d, \d, \m /* d is a late result */
.endm
.macro over_white_8888_8888_ca_1pixel_head
@@ -853,10 +855,10 @@ generate_composite_function \
.endm
.macro over_white_8888_8888_ca_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
- .if numbytes == 4
+ .if \numbytes == 4
over_white_8888_8888_ca_1pixel_head
.else
- .if numbytes == 16
+ .if \numbytes == 16
over_white_8888_8888_ca_2pixels_head
over_white_8888_8888_ca_2pixels_tail
.endif
@@ -865,7 +867,7 @@ generate_composite_function \
.endm
.macro over_white_8888_8888_ca_process_tail cond, numbytes, firstreg
- .if numbytes == 4
+ .if \numbytes == 4
over_white_8888_8888_ca_1pixel_tail
.else
over_white_8888_8888_ca_2pixels_tail
@@ -1004,7 +1006,7 @@ generate_composite_function \
.endm
.macro over_n_8888_8888_ca_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
- .rept (numbytes / 4) - 1
+ .rept (\numbytes / 4) - 1
over_n_8888_8888_ca_1pixel_head
over_n_8888_8888_ca_1pixel_tail
.endr
@@ -1020,7 +1022,7 @@ pixman_asm_function pixman_composite_over_n_8888_8888_ca_asm_armv6
cmp ip, #-1
beq pixman_composite_over_white_8888_8888_ca_asm_armv6
/* else drop through... */
- .endfunc
+pixman_end_asm_function
generate_composite_function \
pixman_composite_over_n_8888_8888_ca_asm_armv6_helper, 0, 32, 32 \
FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS | FLAG_PROCESS_CORRUPTS_SCRATCH | FLAG_PROCESS_CORRUPTS_WK0 \
@@ -1045,84 +1047,84 @@ generate_composite_function \
.macro in_reverse_8888_8888_head numbytes, reg1, reg2, reg3
ldrb ORIG_W, [SRC], #4
- .if numbytes >= 8
- ldrb WK&reg1, [SRC], #4
- .if numbytes == 16
- ldrb WK&reg2, [SRC], #4
- ldrb WK&reg3, [SRC], #4
+ .if \numbytes >= 8
+ ldrb WK\()\reg1, [SRC], #4
+ .if \numbytes == 16
+ ldrb WK\()\reg2, [SRC], #4
+ ldrb WK\()\reg3, [SRC], #4
.endif
.endif
- add DST, DST, #numbytes
+ add DST, DST, #\numbytes
.endm
.macro in_reverse_8888_8888_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
- in_reverse_8888_8888_head numbytes, firstreg, %(firstreg+1), %(firstreg+2)
+ in_reverse_8888_8888_head \numbytes, \firstreg, %(\firstreg+1), %(\firstreg+2)
.endm
.macro in_reverse_8888_8888_1pixel s, d, offset, is_only
- .if is_only != 1
- movs s, ORIG_W
- .if offset != 0
- ldrb ORIG_W, [SRC, #offset]
+ .if \is_only != 1
+ movs \s, ORIG_W
+ .if \offset != 0
+ ldrb ORIG_W, [SRC, #\offset]
.endif
beq 01f
teq STRIDE_M, #0xFF
beq 02f
.endif
- uxtb16 SCRATCH, d /* rb_dest */
- uxtb16 d, d, ror #8 /* ag_dest */
- mla SCRATCH, SCRATCH, s, MASK
- mla d, d, s, MASK
+ uxtb16 SCRATCH, \d /* rb_dest */
+ uxtb16 \d, \d, ror #8 /* ag_dest */
+ mla SCRATCH, SCRATCH, \s, MASK
+ mla \d, \d, \s, MASK
uxtab16 SCRATCH, SCRATCH, SCRATCH, ror #8
- uxtab16 d, d, d, ror #8
+ uxtab16 \d, \d, \d, ror #8
mov SCRATCH, SCRATCH, ror #8
- sel d, SCRATCH, d
+ sel \d, SCRATCH, \d
b 02f
- .if offset == 0
+ .if \offset == 0
48: /* Last mov d,#0 of the set - used as part of shortcut for
* source values all 0 */
.endif
-01: mov d, #0
+01: mov \d, #0
02:
.endm
.macro in_reverse_8888_8888_tail numbytes, reg1, reg2, reg3, reg4
- .if numbytes == 4
+ .if \numbytes == 4
teq ORIG_W, ORIG_W, asr #32
- ldrne WK&reg1, [DST, #-4]
- .elseif numbytes == 8
- teq ORIG_W, WK&reg1
+ ldrne WK\()\reg1, [DST, #-4]
+ .elseif \numbytes == 8
+ teq ORIG_W, WK\()\reg1
teqeq ORIG_W, ORIG_W, asr #32 /* all 0 or all -1? */
- ldmnedb DST, {WK&reg1-WK&reg2}
+ ldmdbne DST, {WK\()\reg1-WK\()\reg2}
.else
- teq ORIG_W, WK&reg1
- teqeq ORIG_W, WK&reg2
- teqeq ORIG_W, WK&reg3
+ teq ORIG_W, WK\()\reg1
+ teqeq ORIG_W, WK\()\reg2
+ teqeq ORIG_W, WK\()\reg3
teqeq ORIG_W, ORIG_W, asr #32 /* all 0 or all -1? */
- ldmnedb DST, {WK&reg1-WK&reg4}
+ ldmdbne DST, {WK\()\reg1-WK\()\reg4}
.endif
cmnne DST, #0 /* clear C if NE */
bcs 49f /* no writes to dest if source all -1 */
beq 48f /* set dest to all 0 if source all 0 */
- .if numbytes == 4
- in_reverse_8888_8888_1pixel ORIG_W, WK&reg1, 0, 1
- str WK&reg1, [DST, #-4]
- .elseif numbytes == 8
- in_reverse_8888_8888_1pixel STRIDE_M, WK&reg1, -4, 0
- in_reverse_8888_8888_1pixel STRIDE_M, WK&reg2, 0, 0
- stmdb DST, {WK&reg1-WK&reg2}
+ .if \numbytes == 4
+ in_reverse_8888_8888_1pixel ORIG_W, WK\()\reg1, 0, 1
+ str WK\()\reg1, [DST, #-4]
+ .elseif \numbytes == 8
+ in_reverse_8888_8888_1pixel STRIDE_M, WK\()\reg1, -4, 0
+ in_reverse_8888_8888_1pixel STRIDE_M, WK\()\reg2, 0, 0
+ stmdb DST, {WK\()\reg1-WK\()\reg2}
.else
- in_reverse_8888_8888_1pixel STRIDE_M, WK&reg1, -12, 0
- in_reverse_8888_8888_1pixel STRIDE_M, WK&reg2, -8, 0
- in_reverse_8888_8888_1pixel STRIDE_M, WK&reg3, -4, 0
- in_reverse_8888_8888_1pixel STRIDE_M, WK&reg4, 0, 0
- stmdb DST, {WK&reg1-WK&reg4}
+ in_reverse_8888_8888_1pixel STRIDE_M, WK\()\reg1, -12, 0
+ in_reverse_8888_8888_1pixel STRIDE_M, WK\()\reg2, -8, 0
+ in_reverse_8888_8888_1pixel STRIDE_M, WK\()\reg3, -4, 0
+ in_reverse_8888_8888_1pixel STRIDE_M, WK\()\reg4, 0, 0
+ stmdb DST, {WK\()\reg1-WK\()\reg4}
.endif
49:
.endm
.macro in_reverse_8888_8888_process_tail cond, numbytes, firstreg
- in_reverse_8888_8888_tail numbytes, firstreg, %(firstreg+1), %(firstreg+2), %(firstreg+3)
+ in_reverse_8888_8888_tail \numbytes, \firstreg, %(\firstreg+1), %(\firstreg+2), %(\firstreg+3)
.endm
generate_composite_function \
@@ -1149,21 +1151,21 @@ generate_composite_function \
.endm
.macro over_n_8888_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
- pixld , numbytes, firstreg, DST, 0
+ pixld , \numbytes, \firstreg, DST, 0
.endm
.macro over_n_8888_1pixel dst
- mul_8888_8 WK&dst, STRIDE_M, SCRATCH, MASK
- uqadd8 WK&dst, WK&dst, SRC
+ mul_8888_8 WK\()\dst, STRIDE_M, SCRATCH, MASK
+ uqadd8 WK\()\dst, WK\()\dst, SRC
.endm
.macro over_n_8888_process_tail cond, numbytes, firstreg
- .set PROCESS_REG, firstreg
- .rept numbytes / 4
+ .set PROCESS_REG, \firstreg
+ .rept \numbytes / 4
over_n_8888_1pixel %(PROCESS_REG)
.set PROCESS_REG, PROCESS_REG+1
.endr
- pixst , numbytes, firstreg, DST
+ pixst , \numbytes, \firstreg, DST
.endm
generate_composite_function \