[PATCH] x86_64: Remove optimization for B stepping AMD K8

B stepping were the first shipping Opterons. memcpy/memset/copy_page/ clear_page had special optimized version for them. These are really old and in the minority now and the difference to the generic versions (using rep microcode) is not that big anyways. So just remove them. TODO: figure out optimized versions for Intel Netburst based EM64T Signed-off-by: Andi Kleen <ak@suse.de> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
author: Andi Kleen <ak@suse.de> 2005-11-05 17:25:54 +0100
committer: Linus Torvalds <torvalds@g5.osdl.org> 2005-11-14 19:55:17 -0800
commit: a5b250a428aabc619ace872f8220a7d0b8f7d557 (patch)
tree: 11cabf07982ae37f94bc929f9a605cbbd20e35ab /arch/x86_64/lib/copy_page.S
parent: a6f5deb2be4c82f24fefadcbf7e448f540c05ae6 (diff)
1 files changed, 0 insertions, 87 deletions
diff --git a/arch/x86_64/lib/copy_page.S b/arch/x86_64/lib/copy_page.S
index dd3aa47b6bf5..621a19769406 100644
--- a/arch/x86_64/lib/copy_page.S
+++ b/arch/x86_64/lib/copy_page.S
@@ -8,94 +8,7 @@
 	.globl copy_page
 	.p2align 4
 copy_page:
-	subq	$3*8,%rsp
-	movq	%rbx,(%rsp)
-	movq	%r12,1*8(%rsp)
-	movq	%r13,2*8(%rsp)
-			
-	movl	$(4096/64)-5,%ecx
-	.p2align 4
-.Loop64:	
-  	dec     %rcx
-
-	movq        (%rsi), %rax
-	movq      8 (%rsi), %rbx
-	movq     16 (%rsi), %rdx
-	movq     24 (%rsi), %r8
-	movq     32 (%rsi), %r9
-	movq     40 (%rsi), %r10
-	movq     48 (%rsi), %r11
-	movq     56 (%rsi), %r12
-
-	prefetcht0 5*64(%rsi)
-
-	movq     %rax,    (%rdi)
-	movq     %rbx,  8 (%rdi)
-	movq     %rdx, 16 (%rdi)
-	movq     %r8,  24 (%rdi)
-	movq     %r9,  32 (%rdi)
-	movq     %r10, 40 (%rdi)
-	movq     %r11, 48 (%rdi)
-	movq     %r12, 56 (%rdi)
-
-	leaq    64 (%rsi), %rsi
-	leaq    64 (%rdi), %rdi
-
-	jnz     .Loop64
-
-	movl	$5,%ecx
-	.p2align 4
-.Loop2:	
-	decl   %ecx
-
-	movq        (%rsi), %rax
-	movq      8 (%rsi), %rbx
-	movq     16 (%rsi), %rdx
-	movq     24 (%rsi), %r8
-	movq     32 (%rsi), %r9
-	movq     40 (%rsi), %r10
-	movq     48 (%rsi), %r11
-	movq     56 (%rsi), %r12
-
-	movq     %rax,    (%rdi)
-	movq     %rbx,  8 (%rdi)
-	movq     %rdx, 16 (%rdi)
-	movq     %r8,  24 (%rdi)
-	movq     %r9,  32 (%rdi)
-	movq     %r10, 40 (%rdi)
-	movq     %r11, 48 (%rdi)
-	movq     %r12, 56 (%rdi)
-	
-	leaq	64(%rdi),%rdi			
-	leaq	64(%rsi),%rsi			
-	
-	jnz	.Loop2		
-	
-	movq	(%rsp),%rbx
-	movq	1*8(%rsp),%r12
-	movq	2*8(%rsp),%r13
-	addq	$3*8,%rsp
-	ret
-	
-	/* C stepping K8 run faster using the string copy instructions.
-	   It is also a lot simpler. Use this when possible */
-
-#include <asm/cpufeature.h>		
-		
-	.section .altinstructions,"a"
-	.align 8
-	.quad  copy_page
-	.quad  copy_page_c
-	.byte  X86_FEATURE_K8_C
-	.byte  copy_page_c_end-copy_page_c
-	.byte  copy_page_c_end-copy_page_c
-	.previous
-
-	.section .altinstr_replacement,"ax"
-copy_page_c:
 	movl $4096/8,%ecx
 	rep 
 	movsq 
 	ret
-copy_page_c_end:
-	.previous
author	Andi Kleen <ak@suse.de>	2005-11-05 17:25:54 +0100
committer	Linus Torvalds <torvalds@g5.osdl.org>	2005-11-14 19:55:17 -0800
commit	a5b250a428aabc619ace872f8220a7d0b8f7d557 (patch)
tree	11cabf07982ae37f94bc929f9a605cbbd20e35ab /arch/x86_64/lib/copy_page.S
parent	a6f5deb2be4c82f24fefadcbf7e448f540c05ae6 (diff)