diff options
author | Christophe Leroy <christophe.leroy@c-s.fr> | 2017-08-23 16:54:36 +0200 |
---|---|---|
committer | Michael Ellerman <mpe@ellerman.id.au> | 2017-09-01 16:42:46 +1000 |
commit | 7bf6057b962016eee57bc76295f80a26f90172f7 (patch) | |
tree | f4b5f7cebcbe57cb9e983d5c117667239c89f1f0 | |
parent | c0622167e3d4195d50b925bfabd8966589635e75 (diff) |
powerpc/32: optimise memset()
There is no need to extend the set value to an int when the length
is lower than 4 as in that case we only do byte stores.
We can therefore immediately branch to the part handling it.
By separating it from the normal case, we are able to eliminate
a few actions on the destination pointer.
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
-rw-r--r-- | arch/powerpc/lib/copy_32.S | 21 |
1 files changed, 14 insertions, 7 deletions
diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S index a3ffeac69eca..05aaee20590f 100644 --- a/arch/powerpc/lib/copy_32.S +++ b/arch/powerpc/lib/copy_32.S @@ -91,17 +91,17 @@ EXPORT_SYMBOL(memset16) * replaced by a nop once cache is active. This is done in machine_init() */ _GLOBAL(memset) + cmplwi 0,r5,4 + blt 7f + rlwimi r4,r4,8,16,23 rlwimi r4,r4,16,0,15 - addi r6,r3,-4 - cmplwi 0,r5,4 - blt 7f - stwu r4,4(r6) + stw r4,0(r3) beqlr - andi. r0,r6,3 + andi. r0,r3,3 add r5,r0,r5 - subf r6,r0,r6 + subf r6,r0,r3 cmplwi 0,r4,0 bne 2f /* Use normal procedure if r4 is not zero */ _GLOBAL(memset_nocache_branch) @@ -132,13 +132,20 @@ _GLOBAL(memset_nocache_branch) 1: stwu r4,4(r6) bdnz 1b 6: andi. r5,r5,3 -7: cmpwi 0,r5,0 beqlr mtctr r5 addi r6,r6,3 8: stbu r4,1(r6) bdnz 8b blr + +7: cmpwi 0,r5,0 + beqlr + mtctr r5 + addi r6,r3,-1 +9: stbu r4,1(r6) + bdnz 9b + blr EXPORT_SYMBOL(memset) /* |