diff options
-rw-r--r-- | meson.build | 7 | ||||
-rw-r--r-- | pixman/pixman-arm-asm.h | 6 | ||||
-rw-r--r-- | pixman/pixman-arm-neon-asm-bilinear.S | 2 | ||||
-rw-r--r-- | pixman/pixman-arm-neon-asm.S | 70 | ||||
-rw-r--r-- | pixman/pixman-arm-neon-asm.h | 24 | ||||
-rw-r--r-- | pixman/pixman-arm-simd-asm-scaled.S | 19 | ||||
-rw-r--r-- | pixman/pixman-arm-simd-asm.S | 17 | ||||
-rw-r--r-- | pixman/pixman-arm-simd-asm.h | 34 |
8 files changed, 74 insertions, 105 deletions
diff --git a/meson.build b/meson.build index 4337f93..438e6cf 100644 --- a/meson.build +++ b/meson.build @@ -252,6 +252,13 @@ if cc.compiles(''' config.set('ASM_HAVE_FUNC_DIRECTIVE', 1) endif +if cc.compiles(''' + __asm__ ( + ".syntax unified\n" + );''', + name : 'test for ASM .syntax unified directive') + config.set('ASM_HAVE_SYNTAX_UNIFIED', 1) +endif if cc.links(''' #include <stdint.h> diff --git a/pixman/pixman-arm-asm.h b/pixman/pixman-arm-asm.h index 8253906..edf8e82 100644 --- a/pixman/pixman-arm-asm.h +++ b/pixman/pixman-arm-asm.h @@ -50,6 +50,12 @@ #endif .endm +.macro pixman_syntax_unified +#ifdef ASM_HAVE_SYNTAX_UNIFIED + .syntax unified +#endif +.endm + .macro pixman_end_asm_function #ifdef ASM_HAVE_FUNC_DIRECTIVE .endfunc diff --git a/pixman/pixman-arm-neon-asm-bilinear.S b/pixman/pixman-arm-neon-asm-bilinear.S index ce4d5f8..6bd2736 100644 --- a/pixman/pixman-arm-neon-asm-bilinear.S +++ b/pixman/pixman-arm-neon-asm-bilinear.S @@ -68,6 +68,8 @@ #include "pixman-arm-asm.h" #include "pixman-arm-neon-asm.h" +pixman_syntax_unified + /* * Bilinear macros from pixman-arm-neon-asm.S */ diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S index 7025eba..0e09257 100644 --- a/pixman/pixman-arm-neon-asm.S +++ b/pixman/pixman-arm-neon-asm.S @@ -34,12 +34,6 @@ * - pixman_composite_over_n_8_0565_asm_neon */ -#ifdef __clang__ -#define ldrgeb ldrbge -#define subges subsge -#define subpls subspl -#endif - /* Prevent the stack from becoming executable for no reason... */ #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits @@ -59,6 +53,8 @@ #include "pixman-arm-asm.h" #include "pixman-arm-neon-asm.h" + pixman_syntax_unified + /* Global configuration options and preferences */ /* @@ -287,12 +283,12 @@ PF subge, PF_X, PF_X, ORIG_W vrshr.u16 q3, q11, #8 vrshr.u16 q15, q12, #8 - PF subges, PF_CTL, PF_CTL, #0x10 + PF subsge, PF_CTL, PF_CTL, #0x10 vsri.u16 q14, q9, #11 - PF ldrgeb, DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! + PF ldrbge, DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! vraddhn.u16 d20, q10, q13 vraddhn.u16 d23, q11, q3 - PF ldrgeb, DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! + PF ldrbge, DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! vraddhn.u16 d22, q12, q15 vst1.16 {d28, d29}, [DST_W, :128]! .endm @@ -451,9 +447,9 @@ generate_composite_function \ vshll.u8 q8, d1, #8 vst1.16 {d28, d29}, [DST_W, :128]! PF subge, PF_X, PF_X, ORIG_W - PF subges, PF_CTL, PF_CTL, #0x10 + PF subsge, PF_CTL, PF_CTL, #0x10 vshll.u8 q14, d2, #8 - PF ldrgeb, DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! + PF ldrbge, DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! vshll.u8 q9, d0, #8 .endm @@ -525,10 +521,10 @@ generate_composite_function \ PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift] PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift] PF subge, PF_X, PF_X, ORIG_W - PF subges, PF_CTL, PF_CTL, #0x10 + PF subsge, PF_CTL, PF_CTL, #0x10 vqadd.u8 q14, q0, q2 - PF ldrgeb, DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! - PF ldrgeb, DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! + PF ldrbge, DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! + PF ldrbge, DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! vqadd.u8 q15, q1, q3 .endm @@ -557,10 +553,10 @@ generate_composite_function \ PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift] PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift] PF subge, PF_X, PF_X, ORIG_W - PF subges, PF_CTL, PF_CTL, #0x10 + PF subsge, PF_CTL, PF_CTL, #0x10 vqadd.u8 q14, q0, q2 - PF ldrgeb, DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! - PF ldrgeb, DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! + PF ldrbge, DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! + PF ldrbge, DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! vqadd.u8 q15, q1, q3 .endm @@ -631,9 +627,9 @@ generate_composite_function_single_scanline \ vmull.u8 q8, d22, d4 PF subsge, PF_CTL, PF_CTL, #0x10 vmull.u8 q9, d22, d5 - PF ldrgeb, DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! + PF ldrbge, DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! vmull.u8 q10, d22, d6 - PF ldrgeb, DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! + PF ldrbge, DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! vmull.u8 q11, d22, d7 .endm @@ -683,11 +679,11 @@ generate_composite_function_single_scanline \ vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! PF subge, PF_X, PF_X, ORIG_W vmull.u8 q8, d22, d4 - PF subges, PF_CTL, PF_CTL, #0x10 + PF subsge, PF_CTL, PF_CTL, #0x10 vmull.u8 q9, d22, d5 - PF ldrgeb, DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! + PF ldrbge, DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! vmull.u8 q10, d22, d6 - PF ldrgeb, DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! + PF ldrbge, DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! vmull.u8 q11, d22, d7 .endm @@ -759,9 +755,9 @@ generate_composite_function_single_scanline \ vmull.u8 q9, d24, d5 PF subge, PF_X, PF_X, ORIG_W vmull.u8 q10, d24, d6 - PF subges, PF_CTL, PF_CTL, #0x10 + PF subsge, PF_CTL, PF_CTL, #0x10 vmull.u8 q11, d24, d7 - PF ldrgeb, DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! + PF ldrbge, DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! .endm @@ -810,10 +806,10 @@ generate_composite_function \ vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! PF subge, PF_X, PF_X, ORIG_W vmull.u8 q8, d22, d4 - PF subges, PF_CTL, PF_CTL, #0x10 + PF subsge, PF_CTL, PF_CTL, #0x10 vmull.u8 q9, d22, d5 vmull.u8 q10, d22, d6 - PF ldrgeb, DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! + PF ldrbge, DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! vmull.u8 q11, d22, d7 .endm @@ -1265,9 +1261,9 @@ generate_composite_function \ vmull.u8 q9, d24, d1 PF subge, PF_X, PF_X, ORIG_W vmull.u8 q10, d24, d2 - PF subges, PF_CTL, PF_CTL, #0x10 + PF subsge, PF_CTL, PF_CTL, #0x10 vmull.u8 q11, d24, d3 - PF ldrgeb, DUMMY, [PF_MASK, MASK_STRIDE, lsl #mask_bpp_shift]! + PF ldrbge, DUMMY, [PF_MASK, MASK_STRIDE, lsl #mask_bpp_shift]! vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! vrsra.u16 q8, q8, #8 vrsra.u16 q9, q9, #8 @@ -1334,9 +1330,9 @@ generate_composite_function \ vmull.u8 q1, d25, d16 PF subge, PF_X, PF_X, ORIG_W vmull.u8 q2, d26, d16 - PF subges, PF_CTL, PF_CTL, #0x10 + PF subsge, PF_CTL, PF_CTL, #0x10 vmull.u8 q3, d27, d16 - PF ldrgeb, DUMMY, [PF_MASK, MASK_STRIDE, lsl #mask_bpp_shift]! + PF ldrbge, DUMMY, [PF_MASK, MASK_STRIDE, lsl #mask_bpp_shift]! vst1.8 {d28, d29, d30, d31}, [DST_W, :128]! vrsra.u16 q0, q0, #8 vrsra.u16 q1, q1, #8 @@ -1430,11 +1426,11 @@ generate_composite_function \ vmull.u8 q7, d24, d9 PF subge, PF_X, PF_X, ORIG_W vmull.u8 q8, d24, d10 - PF subges, PF_CTL, PF_CTL, #0x10 + PF subsge, PF_CTL, PF_CTL, #0x10 vmull.u8 q9, d24, d11 - PF ldrgeb, DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! + PF ldrbge, DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! vqadd.u8 q14, q0, q14 - PF ldrgeb, DUMMY, [PF_MASK, MASK_STRIDE, lsl #mask_bpp_shift]! + PF ldrbge, DUMMY, [PF_MASK, MASK_STRIDE, lsl #mask_bpp_shift]! vqadd.u8 q15, q1, q15 vrshr.u16 q10, q6, #8 vrshr.u16 q11, q7, #8 @@ -2444,8 +2440,8 @@ generate_composite_function \ PF cmp, PF_X, ORIG_W PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift] PF subge, PF_X, PF_X, ORIG_W - PF subges, PF_CTL, PF_CTL, #0x10 - PF ldrgeb, DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! + PF subsge, PF_CTL, PF_CTL, #0x10 + PF ldrbge, DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! .endm generate_composite_function \ @@ -2501,8 +2497,8 @@ generate_composite_function \ PF cmp, PF_X, ORIG_W PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift] PF subge, PF_X, PF_X, ORIG_W - PF subges, PF_CTL, PF_CTL, #0x10 - PF ldrgeb, DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! + PF subsge, PF_CTL, PF_CTL, #0x10 + PF ldrbge, DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! .endm generate_composite_function \ diff --git a/pixman/pixman-arm-neon-asm.h b/pixman/pixman-arm-neon-asm.h index e85526d..06318d9 100644 --- a/pixman/pixman-arm-neon-asm.h +++ b/pixman/pixman-arm-neon-asm.h @@ -213,24 +213,24 @@ .if \elem_size == 16 mov TMP1, VX, asr #16 adds VX, VX, UNIT_X -5: subpls VX, VX, SRC_WIDTH_FIXED +5: subspl VX, VX, SRC_WIDTH_FIXED bpl 5b add TMP1, \mem_operand, TMP1, asl #1 mov TMP2, VX, asr #16 adds VX, VX, UNIT_X -5: subpls VX, VX, SRC_WIDTH_FIXED +5: subspl VX, VX, SRC_WIDTH_FIXED bpl 5b add TMP2, \mem_operand, TMP2, asl #1 vld1.16 {d\()\reg1\()[0]}, [TMP1, :16] mov TMP1, VX, asr #16 adds VX, VX, UNIT_X -5: subpls VX, VX, SRC_WIDTH_FIXED +5: subspl VX, VX, SRC_WIDTH_FIXED bpl 5b add TMP1, \mem_operand, TMP1, asl #1 vld1.16 {d\()\reg1\()[1]}, [TMP2, :16] mov TMP2, VX, asr #16 adds VX, VX, UNIT_X -5: subpls VX, VX, SRC_WIDTH_FIXED +5: subspl VX, VX, SRC_WIDTH_FIXED bpl 5b add TMP2, \mem_operand, TMP2, asl #1 vld1.16 {d\()\reg1\()[2]}, [TMP1, :16] @@ -238,12 +238,12 @@ .elseif \elem_size == 32 mov TMP1, VX, asr #16 adds VX, VX, UNIT_X -5: subpls VX, VX, SRC_WIDTH_FIXED +5: subspl VX, VX, SRC_WIDTH_FIXED bpl 5b add TMP1, \mem_operand, TMP1, asl #2 mov TMP2, VX, asr #16 adds VX, VX, UNIT_X -5: subpls VX, VX, SRC_WIDTH_FIXED +5: subspl VX, VX, SRC_WIDTH_FIXED bpl 5b add TMP2, \mem_operand, TMP2, asl #2 vld1.32 {d\()\reg1\()[0]}, [TMP1, :32] @@ -281,14 +281,14 @@ .if \elem_size == 16 mov TMP1, VX, asr #16 adds VX, VX, UNIT_X -5: subpls VX, VX, SRC_WIDTH_FIXED +5: subspl VX, VX, SRC_WIDTH_FIXED bpl 5b add TMP1, \mem_operand, TMP1, asl #1 vld1.16 {d\()\reg1\()[\idx]}, [TMP1, :16] .elseif \elem_size == 32 mov TMP1, VX, asr #16 adds VX, VX, UNIT_X -5: subpls VX, VX, SRC_WIDTH_FIXED +5: subspl VX, VX, SRC_WIDTH_FIXED bpl 5b add TMP1, \mem_operand, TMP1, asl #2 vld1.32 {d\()\reg1\()[\idx]}, [TMP1, :32] @@ -420,15 +420,15 @@ PF pld, [PF_MASK, PF_X, lsl #mask_bpp_shift] .endif PF subge, PF_X, PF_X, ORIG_W - PF subges, PF_CTL, PF_CTL, #0x10 + PF subsge, PF_CTL, PF_CTL, #0x10 .if src_bpp_shift >= 0 - PF ldrgeb, DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! + PF ldrbge, DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! .endif .if dst_r_bpp != 0 - PF ldrgeb, DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! + PF ldrbge, DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! .endif .if mask_bpp_shift >= 0 - PF ldrgeb, DUMMY, [PF_MASK, MASK_STRIDE, lsl #mask_bpp_shift]! + PF ldrbge, DUMMY, [PF_MASK, MASK_STRIDE, lsl #mask_bpp_shift]! .endif .endif .endm diff --git a/pixman/pixman-arm-simd-asm-scaled.S b/pixman/pixman-arm-simd-asm-scaled.S index 25445b8..cc62c81 100644 --- a/pixman/pixman-arm-simd-asm-scaled.S +++ b/pixman/pixman-arm-simd-asm-scaled.S @@ -25,10 +25,6 @@ * */ -#ifdef __clang__ -#define subpls subspl -#endif - /* Prevent the stack from becoming executable */ #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits @@ -43,6 +39,8 @@ #include "pixman-arm-asm.h" + pixman_syntax_unified + /* * Note: This code is only using armv5te instructions (not even armv6), * but is scheduled for ARM Cortex-A8 pipeline. So it might need to @@ -89,21 +87,21 @@ pixman_asm_function \fname and TMP2, VXMASK, VX, asr #(16 - \bpp_shift) adds VX, VX, UNIT_X str\()\t TMP1, [DST], #(1 << \bpp_shift) -9: subpls VX, VX, SRC_WIDTH_FIXED +9: subspl VX, VX, SRC_WIDTH_FIXED bpl 9b ldr\()\t TMP2, [SRC, TMP2] and TMP1, VXMASK, VX, asr #(16 - \bpp_shift) adds VX, VX, UNIT_X str\()\t TMP2, [DST], #(1 << \bpp_shift) -9: subpls VX, VX, SRC_WIDTH_FIXED +9: subspl VX, VX, SRC_WIDTH_FIXED bpl 9b .endm /* now do the scaling */ and TMP1, VXMASK, VX, asr #(16 - \bpp_shift) adds VX, VX, UNIT_X -9: subpls VX, VX, SRC_WIDTH_FIXED +9: subspl VX, VX, SRC_WIDTH_FIXED bpl 9b subs W, W, #(8 + \prefetch_braking_distance) blt 2f @@ -112,7 +110,7 @@ pixman_asm_function \fname mla PF_OFFS, UNIT_X, PF_OFFS, VX 1: /* main loop, process 8 pixels per iteration with prefetch */ pld [SRC, PF_OFFS, asr #(16 - \bpp_shift)] - add PF_OFFS, UNIT_X, lsl #3 + add PF_OFFS, PF_OFFS, UNIT_X, lsl #3 scale_2_pixels scale_2_pixels scale_2_pixels @@ -133,13 +131,8 @@ pixman_asm_function \fname scale_2_pixels 2: tst W, #1 -#ifdef __clang__ ldr\()\t\()ne TMP1, [SRC, TMP1] str\()\t\()ne TMP1, [DST] -#else - ldrne\()\t TMP1, [SRC, TMP1] - strne\()\t TMP1, [DST] -#endif /* cleanup helper macro */ .purgem scale_2_pixels .unreq DST diff --git a/pixman/pixman-arm-simd-asm.S b/pixman/pixman-arm-simd-asm.S index 0c93ef4..34d38f1 100644 --- a/pixman/pixman-arm-simd-asm.S +++ b/pixman/pixman-arm-simd-asm.S @@ -25,11 +25,6 @@ * */ -#ifdef __clang__ -#define adceqs adcseq -#define ldmnedb ldmdbne -#endif - /* Prevent the stack from becoming executable */ #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits @@ -45,6 +40,8 @@ #include "pixman-arm-asm.h" #include "pixman-arm-simd-asm.h" + pixman_syntax_unified + /* A head macro should do all processing which results in an output of up to * 16 bytes, as far as the final load instruction. The corresponding tail macro * should complete the processing of the up-to-16 bytes. The calling macro will @@ -127,7 +124,7 @@ generate_composite_function \ .macro src_n_0565_init ldrh SRC, [sp, #ARGS_STACK_OFFSET] - orr SRC, SRC, lsl #16 + orr SRC, SRC, SRC, lsl #16 mov STRIDE_S, SRC mov MASK, SRC mov STRIDE_M, SRC @@ -135,8 +132,8 @@ generate_composite_function \ .macro src_n_8_init ldrb SRC, [sp, #ARGS_STACK_OFFSET] - orr SRC, SRC, lsl #8 - orr SRC, SRC, lsl #16 + orr SRC, SRC, SRC, lsl #8 + orr SRC, SRC, SRC, lsl #16 mov STRIDE_S, SRC mov MASK, SRC mov STRIDE_M, SRC @@ -1098,13 +1095,13 @@ generate_composite_function \ .elseif \numbytes == 8 teq ORIG_W, WK\()\reg1 teqeq ORIG_W, ORIG_W, asr #32 /* all 0 or all -1? */ - ldmnedb DST, {WK\()\reg1-WK\()\reg2} + ldmdbne DST, {WK\()\reg1-WK\()\reg2} .else teq ORIG_W, WK\()\reg1 teqeq ORIG_W, WK\()\reg2 teqeq ORIG_W, WK\()\reg3 teqeq ORIG_W, ORIG_W, asr #32 /* all 0 or all -1? */ - ldmnedb DST, {WK\()\reg1-WK\()\reg4} + ldmdbne DST, {WK\()\reg1-WK\()\reg4} .endif cmnne DST, #0 /* clear C if NE */ bcs 49f /* no writes to dest if source all -1 */ diff --git a/pixman/pixman-arm-simd-asm.h b/pixman/pixman-arm-simd-asm.h index 3e78e8a..5ec19e0 100644 --- a/pixman/pixman-arm-simd-asm.h +++ b/pixman/pixman-arm-simd-asm.h @@ -119,37 +119,21 @@ \op\()r\()\cond WK\()\reg2, [\base], #4 \op\()r\()\cond WK\()\reg3, [\base], #4 .else -#ifdef __clang__ \op\()mia\()\cond \base!, {WK\()\reg0,WK\()\reg1,WK\()\reg2,WK\()\reg3} -#else - \op\()m\()\cond\()ia \base!, {WK\()\reg0,WK\()\reg1,WK\()\reg2,WK\()\reg3} -#endif .endif .elseif \numbytes == 8 .if \unaligned == 1 \op\()r\()\cond WK\()\reg0, [\base], #4 \op\()r\()\cond WK\()\reg1, [\base], #4 .else -#ifdef __clang__ \op\()mia\()\cond \base!, {WK\()\reg0,WK\()\reg1} -#else - \op\()m\()\cond\()ia \base!, {WK\()\reg0,WK\()\reg1} -#endif .endif .elseif \numbytes == 4 \op\()r\()\cond WK\()\reg0, [\base], #4 .elseif \numbytes == 2 -#ifdef __clang__ \op\()rh\()\cond WK\()\reg0, [\base], #2 -#else - \op\()r\()\cond\()h WK\()\reg0, [\base], #2 -#endif .elseif \numbytes == 1 -#ifdef __clang__ \op\()rb\()\cond WK\()\reg0, [\base], #1 -#else - \op\()r\()\cond\()b WK\()\reg0, [\base], #1 -#endif .else .error "unsupported size: \numbytes" .endif @@ -157,31 +141,15 @@ .macro pixst_baseupdated cond, numbytes, reg0, reg1, reg2, reg3, base .if \numbytes == 16 -#ifdef __clang__ stm\()\cond\()db \base, {WK\()\reg0,WK\()\reg1,WK\()\reg2,WK\()\reg3} -#else - stmdb\()\cond \base, {WK\()\reg0,WK\()\reg1,WK\()\reg2,WK\()\reg3} -#endif .elseif \numbytes == 8 -#ifdef __clang__ stmdb\()\cond \base, {WK\()\reg0,WK\()\reg1} -#else - stm\()\cond\()db \base, {WK\()\reg0,WK\()\reg1} -#endif .elseif \numbytes == 4 str\()\cond WK\()\reg0, [\base, #-4] .elseif \numbytes == 2 -#ifdef __clang__ strh\()\cond WK\()\reg0, [\base, #-2] -#else - str\()\cond\()h WK\()\reg0, [\base, #-2] -#endif .elseif \numbytes == 1 -#ifdef __clang__ strb\()\cond WK\()\reg0, [\base, #-1] -#else - str\()\cond\()b WK\()\reg0, [\base, #-1] -#endif .else .error "unsupported size: \numbytes" .endif @@ -291,7 +259,7 @@ /* If exactly one fetch per block, then we need either 0, 1 or 2 extra preloads */ PF mov, SCRATCH, \base, lsl #32-5 PF adds, SCRATCH, SCRATCH, X, lsl #32-5+\bpp_shift - PF adceqs, SCRATCH, SCRATCH, #0 + PF adcseq, SCRATCH, SCRATCH, #0 /* The instruction above has two effects: ensures Z is only * set if C was clear (so Z indicates that both shifted quantities * were 0), and clears C if Z was set (so C indicates that the sum |