summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristophe Leroy <christophe.leroy@c-s.fr>2017-08-23 16:54:36 +0200
committerMichael Ellerman <mpe@ellerman.id.au>2017-09-01 16:42:46 +1000
commit7bf6057b962016eee57bc76295f80a26f90172f7 (patch)
treef4b5f7cebcbe57cb9e983d5c117667239c89f1f0
parentc0622167e3d4195d50b925bfabd8966589635e75 (diff)
powerpc/32: optimise memset()
There is no need to extend the set value to an int when the length is lower than 4 as in that case we only do byte stores. We can therefore immediately branch to the part handling it. By separating it from the normal case, we are able to eliminate a few actions on the destination pointer. Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
-rw-r--r--arch/powerpc/lib/copy_32.S21
1 files changed, 14 insertions, 7 deletions
diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S
index a3ffeac69eca..05aaee20590f 100644
--- a/arch/powerpc/lib/copy_32.S
+++ b/arch/powerpc/lib/copy_32.S
@@ -91,17 +91,17 @@ EXPORT_SYMBOL(memset16)
* replaced by a nop once cache is active. This is done in machine_init()
*/
_GLOBAL(memset)
+ cmplwi 0,r5,4
+ blt 7f
+
rlwimi r4,r4,8,16,23
rlwimi r4,r4,16,0,15
- addi r6,r3,-4
- cmplwi 0,r5,4
- blt 7f
- stwu r4,4(r6)
+ stw r4,0(r3)
beqlr
- andi. r0,r6,3
+ andi. r0,r3,3
add r5,r0,r5
- subf r6,r0,r6
+ subf r6,r0,r3
cmplwi 0,r4,0
bne 2f /* Use normal procedure if r4 is not zero */
_GLOBAL(memset_nocache_branch)
@@ -132,13 +132,20 @@ _GLOBAL(memset_nocache_branch)
1: stwu r4,4(r6)
bdnz 1b
6: andi. r5,r5,3
-7: cmpwi 0,r5,0
beqlr
mtctr r5
addi r6,r6,3
8: stbu r4,1(r6)
bdnz 8b
blr
+
+7: cmpwi 0,r5,0
+ beqlr
+ mtctr r5
+ addi r6,r3,-1
+9: stbu r4,1(r6)
+ bdnz 9b
+ blr
EXPORT_SYMBOL(memset)
/*