diff options
author | Guo Ren <ren_guo@c-sky.com> | 2018-09-05 14:25:18 +0800 |
---|---|---|
committer | Guo Ren <ren_guo@c-sky.com> | 2018-10-26 00:54:24 +0800 |
commit | c5af58b769113c4045209973052db3e3a543ee43 (patch) | |
tree | cd31dd49aa07e63af65217f8f04d165fe328a312 /arch/csky/abiv1 | |
parent | 9d056df0924edbb0a30c85a1c1d3153c1229ec47 (diff) |
csky: Library functions
This patch adds string optimize codes and some auxiliary codes.
Signed-off-by: Chen Linfei <linfei_chen@c-sky.com>
Signed-off-by: Mao Han <han_mao@c-sky.com>
Signed-off-by: Guo Ren <ren_guo@c-sky.com>
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Diffstat (limited to 'arch/csky/abiv1')
-rw-r--r-- | arch/csky/abiv1/bswapdi.c | 12 | ||||
-rw-r--r-- | arch/csky/abiv1/bswapsi.c | 12 | ||||
-rw-r--r-- | arch/csky/abiv1/inc/abi/string.h | 13 | ||||
-rw-r--r-- | arch/csky/abiv1/memcpy.S | 347 | ||||
-rw-r--r-- | arch/csky/abiv1/memset.c | 37 | ||||
-rw-r--r-- | arch/csky/abiv1/strksyms.c | 7 |
6 files changed, 428 insertions, 0 deletions
diff --git a/arch/csky/abiv1/bswapdi.c b/arch/csky/abiv1/bswapdi.c new file mode 100644 index 000000000000..f50a1d6e337a --- /dev/null +++ b/arch/csky/abiv1/bswapdi.c @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd. + +#include <linux/export.h> +#include <linux/compiler.h> +#include <uapi/linux/swab.h> + +unsigned long long notrace __bswapdi2(unsigned long long u) +{ + return ___constant_swab64(u); +} +EXPORT_SYMBOL(__bswapdi2); diff --git a/arch/csky/abiv1/bswapsi.c b/arch/csky/abiv1/bswapsi.c new file mode 100644 index 000000000000..0f79182e8a5b --- /dev/null +++ b/arch/csky/abiv1/bswapsi.c @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd. + +#include <linux/export.h> +#include <linux/compiler.h> +#include <uapi/linux/swab.h> + +unsigned int notrace __bswapsi2(unsigned int u) +{ + return ___constant_swab32(u); +} +EXPORT_SYMBOL(__bswapsi2); diff --git a/arch/csky/abiv1/inc/abi/string.h b/arch/csky/abiv1/inc/abi/string.h new file mode 100644 index 000000000000..5abe80be044d --- /dev/null +++ b/arch/csky/abiv1/inc/abi/string.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd. + +#ifndef __ABI_CSKY_STRING_H +#define __ABI_CSKY_STRING_H + +#define __HAVE_ARCH_MEMCPY +extern void *memcpy(void *, const void *, __kernel_size_t); + +#define __HAVE_ARCH_MEMSET +extern void *memset(void *, int, __kernel_size_t); + +#endif /* __ABI_CSKY_STRING_H */ diff --git a/arch/csky/abiv1/memcpy.S b/arch/csky/abiv1/memcpy.S new file mode 100644 index 000000000000..5078eb5169fa --- /dev/null +++ b/arch/csky/abiv1/memcpy.S @@ -0,0 +1,347 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd. + +#include <linux/linkage.h> + +.macro GET_FRONT_BITS rx y +#ifdef __cskyLE__ + lsri \rx, \y +#else + lsli \rx, \y +#endif +.endm + +.macro GET_AFTER_BITS rx y +#ifdef __cskyLE__ + lsli \rx, \y +#else + lsri \rx, \y +#endif +.endm + +/* void *memcpy(void *dest, const void *src, size_t n); */ +ENTRY(memcpy) + mov r7, r2 + cmplti r4, 4 + bt .L_copy_by_byte + mov r6, r2 + andi r6, 3 + cmpnei r6, 0 + jbt .L_dest_not_aligned + mov r6, r3 + andi r6, 3 + cmpnei r6, 0 + jbt .L_dest_aligned_but_src_not_aligned +.L0: + cmplti r4, 16 + jbt .L_aligned_and_len_less_16bytes + subi sp, 8 + stw r8, (sp, 0) +.L_aligned_and_len_larger_16bytes: + ldw r1, (r3, 0) + ldw r5, (r3, 4) + ldw r8, (r3, 8) + stw r1, (r7, 0) + ldw r1, (r3, 12) + stw r5, (r7, 4) + stw r8, (r7, 8) + stw r1, (r7, 12) + subi r4, 16 + addi r3, 16 + addi r7, 16 + cmplti r4, 16 + jbf .L_aligned_and_len_larger_16bytes + ldw r8, (sp, 0) + addi sp, 8 + cmpnei r4, 0 + jbf .L_return + +.L_aligned_and_len_less_16bytes: + cmplti r4, 4 + bt .L_copy_by_byte +.L1: + ldw r1, (r3, 0) + stw r1, (r7, 0) + subi r4, 4 + addi r3, 4 + addi r7, 4 + cmplti r4, 4 + jbf .L1 + br .L_copy_by_byte + +.L_return: + rts + +.L_copy_by_byte: /* len less than 4 bytes */ + cmpnei r4, 0 + jbf .L_return +.L4: + ldb r1, (r3, 0) + stb r1, (r7, 0) + addi r3, 1 + addi r7, 1 + decne r4 + jbt .L4 + rts + +/* + * If dest is not aligned, just copying some bytes makes the dest align. + * Afther that, we judge whether the src is aligned. + */ +.L_dest_not_aligned: + mov r5, r3 + rsub r5, r5, r7 + abs r5, r5 + cmplt r5, r4 + bt .L_copy_by_byte + mov r5, r7 + sub r5, r3 + cmphs r5, r4 + bf .L_copy_by_byte + mov r5, r6 +.L5: + ldb r1, (r3, 0) /* makes the dest align. */ + stb r1, (r7, 0) + addi r5, 1 + subi r4, 1 + addi r3, 1 + addi r7, 1 + cmpnei r5, 4 + jbt .L5 + cmplti r4, 4 + jbt .L_copy_by_byte + mov r6, r3 /* judge whether the src is aligned. */ + andi r6, 3 + cmpnei r6, 0 + jbf .L0 + +/* Judge the number of misaligned, 1, 2, 3? */ +.L_dest_aligned_but_src_not_aligned: + mov r5, r3 + rsub r5, r5, r7 + abs r5, r5 + cmplt r5, r4 + bt .L_copy_by_byte + bclri r3, 0 + bclri r3, 1 + ldw r1, (r3, 0) + addi r3, 4 + cmpnei r6, 2 + bf .L_dest_aligned_but_src_not_aligned_2bytes + cmpnei r6, 3 + bf .L_dest_aligned_but_src_not_aligned_3bytes + +.L_dest_aligned_but_src_not_aligned_1byte: + mov r5, r7 + sub r5, r3 + cmphs r5, r4 + bf .L_copy_by_byte + cmplti r4, 16 + bf .L11 +.L10: /* If the len is less than 16 bytes */ + GET_FRONT_BITS r1 8 + mov r5, r1 + ldw r6, (r3, 0) + mov r1, r6 + GET_AFTER_BITS r6 24 + or r5, r6 + stw r5, (r7, 0) + subi r4, 4 + addi r3, 4 + addi r7, 4 + cmplti r4, 4 + bf .L10 + subi r3, 3 + br .L_copy_by_byte +.L11: + subi sp, 16 + stw r8, (sp, 0) + stw r9, (sp, 4) + stw r10, (sp, 8) + stw r11, (sp, 12) +.L12: + ldw r5, (r3, 0) + ldw r11, (r3, 4) + ldw r8, (r3, 8) + ldw r9, (r3, 12) + + GET_FRONT_BITS r1 8 /* little or big endian? */ + mov r10, r5 + GET_AFTER_BITS r5 24 + or r5, r1 + + GET_FRONT_BITS r10 8 + mov r1, r11 + GET_AFTER_BITS r11 24 + or r11, r10 + + GET_FRONT_BITS r1 8 + mov r10, r8 + GET_AFTER_BITS r8 24 + or r8, r1 + + GET_FRONT_BITS r10 8 + mov r1, r9 + GET_AFTER_BITS r9 24 + or r9, r10 + + stw r5, (r7, 0) + stw r11, (r7, 4) + stw r8, (r7, 8) + stw r9, (r7, 12) + subi r4, 16 + addi r3, 16 + addi r7, 16 + cmplti r4, 16 + jbf .L12 + ldw r8, (sp, 0) + ldw r9, (sp, 4) + ldw r10, (sp, 8) + ldw r11, (sp, 12) + addi sp , 16 + cmplti r4, 4 + bf .L10 + subi r3, 3 + br .L_copy_by_byte + +.L_dest_aligned_but_src_not_aligned_2bytes: + cmplti r4, 16 + bf .L21 +.L20: + GET_FRONT_BITS r1 16 + mov r5, r1 + ldw r6, (r3, 0) + mov r1, r6 + GET_AFTER_BITS r6 16 + or r5, r6 + stw r5, (r7, 0) + subi r4, 4 + addi r3, 4 + addi r7, 4 + cmplti r4, 4 + bf .L20 + subi r3, 2 + br .L_copy_by_byte + rts + +.L21: /* n > 16 */ + subi sp, 16 + stw r8, (sp, 0) + stw r9, (sp, 4) + stw r10, (sp, 8) + stw r11, (sp, 12) + +.L22: + ldw r5, (r3, 0) + ldw r11, (r3, 4) + ldw r8, (r3, 8) + ldw r9, (r3, 12) + + GET_FRONT_BITS r1 16 + mov r10, r5 + GET_AFTER_BITS r5 16 + or r5, r1 + + GET_FRONT_BITS r10 16 + mov r1, r11 + GET_AFTER_BITS r11 16 + or r11, r10 + + GET_FRONT_BITS r1 16 + mov r10, r8 + GET_AFTER_BITS r8 16 + or r8, r1 + + GET_FRONT_BITS r10 16 + mov r1, r9 + GET_AFTER_BITS r9 16 + or r9, r10 + + stw r5, (r7, 0) + stw r11, (r7, 4) + stw r8, (r7, 8) + stw r9, (r7, 12) + subi r4, 16 + addi r3, 16 + addi r7, 16 + cmplti r4, 16 + jbf .L22 + ldw r8, (sp, 0) + ldw r9, (sp, 4) + ldw r10, (sp, 8) + ldw r11, (sp, 12) + addi sp, 16 + cmplti r4, 4 + bf .L20 + subi r3, 2 + br .L_copy_by_byte + + +.L_dest_aligned_but_src_not_aligned_3bytes: + cmplti r4, 16 + bf .L31 +.L30: + GET_FRONT_BITS r1 24 + mov r5, r1 + ldw r6, (r3, 0) + mov r1, r6 + GET_AFTER_BITS r6 8 + or r5, r6 + stw r5, (r7, 0) + subi r4, 4 + addi r3, 4 + addi r7, 4 + cmplti r4, 4 + bf .L30 + subi r3, 1 + br .L_copy_by_byte +.L31: + subi sp, 16 + stw r8, (sp, 0) + stw r9, (sp, 4) + stw r10, (sp, 8) + stw r11, (sp, 12) +.L32: + ldw r5, (r3, 0) + ldw r11, (r3, 4) + ldw r8, (r3, 8) + ldw r9, (r3, 12) + + GET_FRONT_BITS r1 24 + mov r10, r5 + GET_AFTER_BITS r5 8 + or r5, r1 + + GET_FRONT_BITS r10 24 + mov r1, r11 + GET_AFTER_BITS r11 8 + or r11, r10 + + GET_FRONT_BITS r1 24 + mov r10, r8 + GET_AFTER_BITS r8 8 + or r8, r1 + + GET_FRONT_BITS r10 24 + mov r1, r9 + GET_AFTER_BITS r9 8 + or r9, r10 + + stw r5, (r7, 0) + stw r11, (r7, 4) + stw r8, (r7, 8) + stw r9, (r7, 12) + subi r4, 16 + addi r3, 16 + addi r7, 16 + cmplti r4, 16 + jbf .L32 + ldw r8, (sp, 0) + ldw r9, (sp, 4) + ldw r10, (sp, 8) + ldw r11, (sp, 12) + addi sp, 16 + cmplti r4, 4 + bf .L30 + subi r3, 1 + br .L_copy_by_byte diff --git a/arch/csky/abiv1/memset.c b/arch/csky/abiv1/memset.c new file mode 100644 index 000000000000..b4aa75b99c5d --- /dev/null +++ b/arch/csky/abiv1/memset.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd. + +#include <linux/types.h> + +void *memset(void *dest, int c, size_t l) +{ + char *d = dest; + int ch = c & 0xff; + int tmp = (ch | ch << 8 | ch << 16 | ch << 24); + + while (((uintptr_t)d & 0x3) && l--) + *d++ = ch; + + while (l >= 16) { + *(((u32 *)d)) = tmp; + *(((u32 *)d)+1) = tmp; + *(((u32 *)d)+2) = tmp; + *(((u32 *)d)+3) = tmp; + l -= 16; + d += 16; + } + + while (l > 3) { + *(((u32 *)d)) = tmp; + l -= 4; + d += 4; + } + + while (l) { + *d = ch; + l--; + d++; + } + + return dest; +} diff --git a/arch/csky/abiv1/strksyms.c b/arch/csky/abiv1/strksyms.c new file mode 100644 index 000000000000..436995c9b75c --- /dev/null +++ b/arch/csky/abiv1/strksyms.c @@ -0,0 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd. + +#include <linux/module.h> + +EXPORT_SYMBOL(memcpy); +EXPORT_SYMBOL(memset); |