Merge branch 'for-linus' of master.kernel.org:/home/rmk/linux-2.6-arm

* 'for-linus' of master.kernel.org:/home/rmk/linux-2.6-arm: (241 commits) [ARM] 5171/1: ep93xx: fix compilation of modules using clocks [ARM] 5133/2: at91sam9g20 defconfig file [ARM] 5130/4: Support for the at91sam9g20 [ARM] 5160/1: IOP3XX: gpio/gpiolib support [ARM] at91: Fix NAND FLASH timings for at91sam9x evaluation kits. [ARM] 5084/1: zylonite: Register AC97 device [ARM] 5085/2: PXA: Move AC97 over to the new central device declaration model [ARM] 5120/1: pxa: correct platform driver names for PXA25x and PXA27x UDC drivers [ARM] 5147/1: pxaficp_ir: drop pxa_gpio_mode calls, as pin setting [ARM] 5145/1: PXA2xx: provide api to control IrDA pins state [ARM] 5144/1: pxaficp_ir: cleanup includes [ARM] pxa: remove pxa_set_cken() [ARM] pxa: allow clk aliases [ARM] Feroceon: don't disable BPU on boot [ARM] Orion: LED support for HP mv2120 [ARM] Orion: add RD88F5181L-FXO support [ARM] Orion: add RD88F5181L-GE support [ARM] Orion: add Netgear WNR854T support [ARM] s3c2410_defconfig: update for current build [ARM] Acer n30: Minor style and indentation fixes. ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2008-07-14 16:06:58 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> 2008-07-14 16:06:58 -0700
commit: 85082fd7cbe3173198aac0eb5e85ab1edcc6352c (patch)
tree: edbc09b7945994f78668d218fa02e991c3b3b365 /arch/arm/lib
parent: 666484f0250db2e016948d63b3ef33e202e3b8d0 (diff)
parent: 53ffe3b440aa85af6fc4eda09b2d44bcdd312d4d (diff)
4 files changed, 95 insertions, 21 deletions
diff --git a/arch/arm/lib/copy_template.S b/arch/arm/lib/copy_template.S
index cab355c0c1f7..139cce646055 100644
--- a/arch/arm/lib/copy_template.S
+++ b/arch/arm/lib/copy_template.S
@@ -13,14 +13,6 @@
  */
 
 /*
- * This can be used to enable code to cacheline align the source pointer.
- * Experiments on tested architectures (StrongARM and XScale) didn't show
- * this a worthwhile thing to do.  That might be different in the future.
- */
-//#define CALGN(code...)	code
-#define CALGN(code...)
-
-/*
  * Theory of operation
  * -------------------
  *
@@ -82,7 +74,7 @@
 		stmfd	sp!, {r5 - r8}
 		blt	5f
 
-	CALGN(	ands	ip, r1, #31		)
+	CALGN(	ands	ip, r0, #31		)
 	CALGN(	rsb	r3, ip, #32		)
 	CALGN(	sbcnes	r4, r3, r2		)  @ C is always set here
 	CALGN(	bcs	2f			)
@@ -168,7 +160,7 @@
 		subs	r2, r2, #28
 		blt	14f
 
-	CALGN(	ands	ip, r1, #31		)
+	CALGN(	ands	ip, r0, #31		)
 	CALGN(	rsb	ip, ip, #32		)
 	CALGN(	sbcnes	r4, ip, r2		)  @ C is always set here
 	CALGN(	subcc	r2, r2, ip		)
diff --git a/arch/arm/lib/memmove.S b/arch/arm/lib/memmove.S
index ef7fddc14ac9..2e301b7bd8f1 100644
--- a/arch/arm/lib/memmove.S
+++ b/arch/arm/lib/memmove.S
@@ -13,14 +13,6 @@
 #include <linux/linkage.h>
 #include <asm/assembler.h>
 
-/*
- * This can be used to enable code to cacheline align the source pointer.
- * Experiments on tested architectures (StrongARM and XScale) didn't show
- * this a worthwhile thing to do.  That might be different in the future.
- */
-//#define CALGN(code...)        code
-#define CALGN(code...)
-
 		.text
 
 /*
@@ -55,11 +47,12 @@ ENTRY(memmove)
 		stmfd	sp!, {r5 - r8}
 		blt	5f
 
-	CALGN(	ands	ip, r1, #31		)
+	CALGN(	ands	ip, r0, #31		)
 	CALGN(	sbcnes	r4, ip, r2		)  @ C is always set here
 	CALGN(	bcs	2f			)
 	CALGN(	adr	r4, 6f			)
 	CALGN(	subs	r2, r2, ip		)  @ C is set here
+	CALGN(	rsb	ip, ip, #32		)
 	CALGN(	add	pc, r4, ip		)
 
 	PLD(	pld	[r1, #-4]		)
@@ -138,8 +131,7 @@ ENTRY(memmove)
 		subs	r2, r2, #28
 		blt	14f
 
-	CALGN(	ands	ip, r1, #31		)
-	CALGN(	rsb	ip, ip, #32		)
+	CALGN(	ands	ip, r0, #31		)
 	CALGN(	sbcnes	r4, ip, r2		)  @ C is always set here
 	CALGN(	subcc	r2, r2, ip		)
 	CALGN(	bcc	15f			)
diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S
index 95b110b07a89..b477d4ac88ef 100644
--- a/arch/arm/lib/memset.S
+++ b/arch/arm/lib/memset.S
@@ -39,6 +39,9 @@ ENTRY(memset)
 	mov	r3, r1
 	cmp	r2, #16
 	blt	4f
+
+#if ! CALGN(1)+0
+
 /*
  * We need an extra register for this loop - save the return address and
  * use the LR
@@ -64,6 +67,49 @@ ENTRY(memset)
 	stmneia	r0!, {r1, r3, ip, lr}
 	ldr	lr, [sp], #4
 
+#else
+
+/*
+ * This version aligns the destination pointer in order to write
+ * whole cache lines at once.
+ */
+
+	stmfd	sp!, {r4-r7, lr}
+	mov	r4, r1
+	mov	r5, r1
+	mov	r6, r1
+	mov	r7, r1
+	mov	ip, r1
+	mov	lr, r1
+
+	cmp	r2, #96
+	tstgt	r0, #31
+	ble	3f
+
+	and	ip, r0, #31
+	rsb	ip, ip, #32
+	sub	r2, r2, ip
+	movs	ip, ip, lsl #(32 - 4)
+	stmcsia	r0!, {r4, r5, r6, r7}
+	stmmiia	r0!, {r4, r5}
+	tst	ip, #(1 << 30)
+	mov	ip, r1
+	strne	r1, [r0], #4
+
+3:	subs	r2, r2, #64
+	stmgeia	r0!, {r1, r3-r7, ip, lr}
+	stmgeia	r0!, {r1, r3-r7, ip, lr}
+	bgt	3b
+	ldmeqfd	sp!, {r4-r7, pc}
+
+	tst	r2, #32
+	stmneia	r0!, {r1, r3-r7, ip, lr}
+	tst	r2, #16
+	stmneia	r0!, {r4-r7}
+	ldmfd	sp!, {r4-r7, lr}
+
+#endif
+
 4:	tst	r2, #8
 	stmneia	r0!, {r1, r3}
 	tst	r2, #4
diff --git a/arch/arm/lib/memzero.S b/arch/arm/lib/memzero.S
index abf2508e8221..b8f79d80ee9b 100644
--- a/arch/arm/lib/memzero.S
+++ b/arch/arm/lib/memzero.S
@@ -39,6 +39,9 @@ ENTRY(__memzero)
  */
 	cmp	r1, #16			@ 1 we can skip this chunk if we
 	blt	4f			@ 1 have < 16 bytes
+
+#if ! CALGN(1)+0
+
 /*
  * We need an extra register for this loop - save the return address and
  * use the LR
@@ -64,6 +67,47 @@ ENTRY(__memzero)
 	stmneia	r0!, {r2, r3, ip, lr}	@ 4
 	ldr	lr, [sp], #4		@ 1
 
+#else
+
+/*
+ * This version aligns the destination pointer in order to write
+ * whole cache lines at once.
+ */
+
+	stmfd	sp!, {r4-r7, lr}
+	mov	r4, r2
+	mov	r5, r2
+	mov	r6, r2
+	mov	r7, r2
+	mov	ip, r2
+	mov	lr, r2
+
+	cmp	r1, #96
+	andgts	ip, r0, #31
+	ble	3f
+
+	rsb	ip, ip, #32
+	sub	r1, r1, ip
+	movs	ip, ip, lsl #(32 - 4)
+	stmcsia	r0!, {r4, r5, r6, r7}
+	stmmiia	r0!, {r4, r5}
+	movs	ip, ip, lsl #2
+	strcs	r2, [r0], #4
+
+3:	subs	r1, r1, #64
+	stmgeia	r0!, {r2-r7, ip, lr}
+	stmgeia	r0!, {r2-r7, ip, lr}
+	bgt	3b
+	ldmeqfd	sp!, {r4-r7, pc}
+
+	tst	r1, #32
+	stmneia	r0!, {r2-r7, ip, lr}
+	tst	r1, #16
+	stmneia	r0!, {r4-r7}
+	ldmfd	sp!, {r4-r7, lr}
+
+#endif
+
 4:	tst	r1, #8			@ 1 8 bytes or more?
 	stmneia	r0!, {r2, r3}		@ 2
 	tst	r1, #4			@ 1 4 bytes or more?
author	Linus Torvalds <torvalds@linux-foundation.org>	2008-07-14 16:06:58 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	2008-07-14 16:06:58 -0700
commit	85082fd7cbe3173198aac0eb5e85ab1edcc6352c (patch)
tree	edbc09b7945994f78668d218fa02e991c3b3b365 /arch/arm/lib
parent	666484f0250db2e016948d63b3ef33e202e3b8d0 (diff)
parent	53ffe3b440aa85af6fc4eda09b2d44bcdd312d4d (diff)