From c5af58b769113c4045209973052db3e3a543ee43 Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Wed, 5 Sep 2018 14:25:18 +0800 Subject: csky: Library functions This patch adds string optimize codes and some auxiliary codes. Signed-off-by: Chen Linfei Signed-off-by: Mao Han Signed-off-by: Guo Ren Reviewed-by: Arnd Bergmann --- arch/csky/abiv1/bswapdi.c | 12 ++ arch/csky/abiv1/bswapsi.c | 12 ++ arch/csky/abiv1/inc/abi/string.h | 13 ++ arch/csky/abiv1/memcpy.S | 347 +++++++++++++++++++++++++++++++++++++++ arch/csky/abiv1/memset.c | 37 +++++ arch/csky/abiv1/strksyms.c | 7 + 6 files changed, 428 insertions(+) create mode 100644 arch/csky/abiv1/bswapdi.c create mode 100644 arch/csky/abiv1/bswapsi.c create mode 100644 arch/csky/abiv1/inc/abi/string.h create mode 100644 arch/csky/abiv1/memcpy.S create mode 100644 arch/csky/abiv1/memset.c create mode 100644 arch/csky/abiv1/strksyms.c (limited to 'arch/csky/abiv1') diff --git a/arch/csky/abiv1/bswapdi.c b/arch/csky/abiv1/bswapdi.c new file mode 100644 index 000000000000..f50a1d6e337a --- /dev/null +++ b/arch/csky/abiv1/bswapdi.c @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd. + +#include +#include +#include + +unsigned long long notrace __bswapdi2(unsigned long long u) +{ + return ___constant_swab64(u); +} +EXPORT_SYMBOL(__bswapdi2); diff --git a/arch/csky/abiv1/bswapsi.c b/arch/csky/abiv1/bswapsi.c new file mode 100644 index 000000000000..0f79182e8a5b --- /dev/null +++ b/arch/csky/abiv1/bswapsi.c @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd. + +#include +#include +#include + +unsigned int notrace __bswapsi2(unsigned int u) +{ + return ___constant_swab32(u); +} +EXPORT_SYMBOL(__bswapsi2); diff --git a/arch/csky/abiv1/inc/abi/string.h b/arch/csky/abiv1/inc/abi/string.h new file mode 100644 index 000000000000..5abe80be044d --- /dev/null +++ b/arch/csky/abiv1/inc/abi/string.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd. + +#ifndef __ABI_CSKY_STRING_H +#define __ABI_CSKY_STRING_H + +#define __HAVE_ARCH_MEMCPY +extern void *memcpy(void *, const void *, __kernel_size_t); + +#define __HAVE_ARCH_MEMSET +extern void *memset(void *, int, __kernel_size_t); + +#endif /* __ABI_CSKY_STRING_H */ diff --git a/arch/csky/abiv1/memcpy.S b/arch/csky/abiv1/memcpy.S new file mode 100644 index 000000000000..5078eb5169fa --- /dev/null +++ b/arch/csky/abiv1/memcpy.S @@ -0,0 +1,347 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd. + +#include + +.macro GET_FRONT_BITS rx y +#ifdef __cskyLE__ + lsri \rx, \y +#else + lsli \rx, \y +#endif +.endm + +.macro GET_AFTER_BITS rx y +#ifdef __cskyLE__ + lsli \rx, \y +#else + lsri \rx, \y +#endif +.endm + +/* void *memcpy(void *dest, const void *src, size_t n); */ +ENTRY(memcpy) + mov r7, r2 + cmplti r4, 4 + bt .L_copy_by_byte + mov r6, r2 + andi r6, 3 + cmpnei r6, 0 + jbt .L_dest_not_aligned + mov r6, r3 + andi r6, 3 + cmpnei r6, 0 + jbt .L_dest_aligned_but_src_not_aligned +.L0: + cmplti r4, 16 + jbt .L_aligned_and_len_less_16bytes + subi sp, 8 + stw r8, (sp, 0) +.L_aligned_and_len_larger_16bytes: + ldw r1, (r3, 0) + ldw r5, (r3, 4) + ldw r8, (r3, 8) + stw r1, (r7, 0) + ldw r1, (r3, 12) + stw r5, (r7, 4) + stw r8, (r7, 8) + stw r1, (r7, 12) + subi r4, 16 + addi r3, 16 + addi r7, 16 + cmplti r4, 16 + jbf .L_aligned_and_len_larger_16bytes + ldw r8, (sp, 0) + addi sp, 8 + cmpnei r4, 0 + jbf .L_return + +.L_aligned_and_len_less_16bytes: + cmplti r4, 4 + bt .L_copy_by_byte +.L1: + ldw r1, (r3, 0) + stw r1, (r7, 0) + subi r4, 4 + addi r3, 4 + addi r7, 4 + cmplti r4, 4 + jbf .L1 + br .L_copy_by_byte + +.L_return: + rts + +.L_copy_by_byte: /* len less than 4 bytes */ + cmpnei r4, 0 + jbf .L_return +.L4: + ldb r1, (r3, 0) + stb r1, (r7, 0) + addi r3, 1 + addi r7, 1 + decne r4 + jbt .L4 + rts + +/* + * If dest is not aligned, just copying some bytes makes the dest align. + * Afther that, we judge whether the src is aligned. + */ +.L_dest_not_aligned: + mov r5, r3 + rsub r5, r5, r7 + abs r5, r5 + cmplt r5, r4 + bt .L_copy_by_byte + mov r5, r7 + sub r5, r3 + cmphs r5, r4 + bf .L_copy_by_byte + mov r5, r6 +.L5: + ldb r1, (r3, 0) /* makes the dest align. */ + stb r1, (r7, 0) + addi r5, 1 + subi r4, 1 + addi r3, 1 + addi r7, 1 + cmpnei r5, 4 + jbt .L5 + cmplti r4, 4 + jbt .L_copy_by_byte + mov r6, r3 /* judge whether the src is aligned. */ + andi r6, 3 + cmpnei r6, 0 + jbf .L0 + +/* Judge the number of misaligned, 1, 2, 3? */ +.L_dest_aligned_but_src_not_aligned: + mov r5, r3 + rsub r5, r5, r7 + abs r5, r5 + cmplt r5, r4 + bt .L_copy_by_byte + bclri r3, 0 + bclri r3, 1 + ldw r1, (r3, 0) + addi r3, 4 + cmpnei r6, 2 + bf .L_dest_aligned_but_src_not_aligned_2bytes + cmpnei r6, 3 + bf .L_dest_aligned_but_src_not_aligned_3bytes + +.L_dest_aligned_but_src_not_aligned_1byte: + mov r5, r7 + sub r5, r3 + cmphs r5, r4 + bf .L_copy_by_byte + cmplti r4, 16 + bf .L11 +.L10: /* If the len is less than 16 bytes */ + GET_FRONT_BITS r1 8 + mov r5, r1 + ldw r6, (r3, 0) + mov r1, r6 + GET_AFTER_BITS r6 24 + or r5, r6 + stw r5, (r7, 0) + subi r4, 4 + addi r3, 4 + addi r7, 4 + cmplti r4, 4 + bf .L10 + subi r3, 3 + br .L_copy_by_byte +.L11: + subi sp, 16 + stw r8, (sp, 0) + stw r9, (sp, 4) + stw r10, (sp, 8) + stw r11, (sp, 12) +.L12: + ldw r5, (r3, 0) + ldw r11, (r3, 4) + ldw r8, (r3, 8) + ldw r9, (r3, 12) + + GET_FRONT_BITS r1 8 /* little or big endian? */ + mov r10, r5 + GET_AFTER_BITS r5 24 + or r5, r1 + + GET_FRONT_BITS r10 8 + mov r1, r11 + GET_AFTER_BITS r11 24 + or r11, r10 + + GET_FRONT_BITS r1 8 + mov r10, r8 + GET_AFTER_BITS r8 24 + or r8, r1 + + GET_FRONT_BITS r10 8 + mov r1, r9 + GET_AFTER_BITS r9 24 + or r9, r10 + + stw r5, (r7, 0) + stw r11, (r7, 4) + stw r8, (r7, 8) + stw r9, (r7, 12) + subi r4, 16 + addi r3, 16 + addi r7, 16 + cmplti r4, 16 + jbf .L12 + ldw r8, (sp, 0) + ldw r9, (sp, 4) + ldw r10, (sp, 8) + ldw r11, (sp, 12) + addi sp , 16 + cmplti r4, 4 + bf .L10 + subi r3, 3 + br .L_copy_by_byte + +.L_dest_aligned_but_src_not_aligned_2bytes: + cmplti r4, 16 + bf .L21 +.L20: + GET_FRONT_BITS r1 16 + mov r5, r1 + ldw r6, (r3, 0) + mov r1, r6 + GET_AFTER_BITS r6 16 + or r5, r6 + stw r5, (r7, 0) + subi r4, 4 + addi r3, 4 + addi r7, 4 + cmplti r4, 4 + bf .L20 + subi r3, 2 + br .L_copy_by_byte + rts + +.L21: /* n > 16 */ + subi sp, 16 + stw r8, (sp, 0) + stw r9, (sp, 4) + stw r10, (sp, 8) + stw r11, (sp, 12) + +.L22: + ldw r5, (r3, 0) + ldw r11, (r3, 4) + ldw r8, (r3, 8) + ldw r9, (r3, 12) + + GET_FRONT_BITS r1 16 + mov r10, r5 + GET_AFTER_BITS r5 16 + or r5, r1 + + GET_FRONT_BITS r10 16 + mov r1, r11 + GET_AFTER_BITS r11 16 + or r11, r10 + + GET_FRONT_BITS r1 16 + mov r10, r8 + GET_AFTER_BITS r8 16 + or r8, r1 + + GET_FRONT_BITS r10 16 + mov r1, r9 + GET_AFTER_BITS r9 16 + or r9, r10 + + stw r5, (r7, 0) + stw r11, (r7, 4) + stw r8, (r7, 8) + stw r9, (r7, 12) + subi r4, 16 + addi r3, 16 + addi r7, 16 + cmplti r4, 16 + jbf .L22 + ldw r8, (sp, 0) + ldw r9, (sp, 4) + ldw r10, (sp, 8) + ldw r11, (sp, 12) + addi sp, 16 + cmplti r4, 4 + bf .L20 + subi r3, 2 + br .L_copy_by_byte + + +.L_dest_aligned_but_src_not_aligned_3bytes: + cmplti r4, 16 + bf .L31 +.L30: + GET_FRONT_BITS r1 24 + mov r5, r1 + ldw r6, (r3, 0) + mov r1, r6 + GET_AFTER_BITS r6 8 + or r5, r6 + stw r5, (r7, 0) + subi r4, 4 + addi r3, 4 + addi r7, 4 + cmplti r4, 4 + bf .L30 + subi r3, 1 + br .L_copy_by_byte +.L31: + subi sp, 16 + stw r8, (sp, 0) + stw r9, (sp, 4) + stw r10, (sp, 8) + stw r11, (sp, 12) +.L32: + ldw r5, (r3, 0) + ldw r11, (r3, 4) + ldw r8, (r3, 8) + ldw r9, (r3, 12) + + GET_FRONT_BITS r1 24 + mov r10, r5 + GET_AFTER_BITS r5 8 + or r5, r1 + + GET_FRONT_BITS r10 24 + mov r1, r11 + GET_AFTER_BITS r11 8 + or r11, r10 + + GET_FRONT_BITS r1 24 + mov r10, r8 + GET_AFTER_BITS r8 8 + or r8, r1 + + GET_FRONT_BITS r10 24 + mov r1, r9 + GET_AFTER_BITS r9 8 + or r9, r10 + + stw r5, (r7, 0) + stw r11, (r7, 4) + stw r8, (r7, 8) + stw r9, (r7, 12) + subi r4, 16 + addi r3, 16 + addi r7, 16 + cmplti r4, 16 + jbf .L32 + ldw r8, (sp, 0) + ldw r9, (sp, 4) + ldw r10, (sp, 8) + ldw r11, (sp, 12) + addi sp, 16 + cmplti r4, 4 + bf .L30 + subi r3, 1 + br .L_copy_by_byte diff --git a/arch/csky/abiv1/memset.c b/arch/csky/abiv1/memset.c new file mode 100644 index 000000000000..b4aa75b99c5d --- /dev/null +++ b/arch/csky/abiv1/memset.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd. + +#include + +void *memset(void *dest, int c, size_t l) +{ + char *d = dest; + int ch = c & 0xff; + int tmp = (ch | ch << 8 | ch << 16 | ch << 24); + + while (((uintptr_t)d & 0x3) && l--) + *d++ = ch; + + while (l >= 16) { + *(((u32 *)d)) = tmp; + *(((u32 *)d)+1) = tmp; + *(((u32 *)d)+2) = tmp; + *(((u32 *)d)+3) = tmp; + l -= 16; + d += 16; + } + + while (l > 3) { + *(((u32 *)d)) = tmp; + l -= 4; + d += 4; + } + + while (l) { + *d = ch; + l--; + d++; + } + + return dest; +} diff --git a/arch/csky/abiv1/strksyms.c b/arch/csky/abiv1/strksyms.c new file mode 100644 index 000000000000..436995c9b75c --- /dev/null +++ b/arch/csky/abiv1/strksyms.c @@ -0,0 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd. + +#include + +EXPORT_SYMBOL(memcpy); +EXPORT_SYMBOL(memset); -- cgit v1.2.3