copy_page.s

来自「底层驱动开发」· S 代码 · 共 102 行

S
102
字号
/* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */	/* Don't use streaming store because it's better when the target   ends up in cache. */	    /* Could vary the prefetch distance based on SMP/UP */	.globl copy_page	.p2align 4copy_page:	subq	$3*8,%rsp	movq	%rbx,(%rsp)	movq	%r12,1*8(%rsp)	movq	%r13,2*8(%rsp)				movl	$(4096/64)-5,%ecx	.p2align 4.Loop64:	  	dec     %rcx	movq        (%rsi), %rax	movq      8 (%rsi), %rbx	movq     16 (%rsi), %rdx	movq     24 (%rsi), %r8	movq     32 (%rsi), %r9	movq     40 (%rsi), %r10	movq     48 (%rsi), %r11	movq     56 (%rsi), %r12	prefetcht0 5*64(%rsi)	movq     %rax,    (%rdi)	movq     %rbx,  8 (%rdi)	movq     %rdx, 16 (%rdi)	movq     %r8,  24 (%rdi)	movq     %r9,  32 (%rdi)	movq     %r10, 40 (%rdi)	movq     %r11, 48 (%rdi)	movq     %r12, 56 (%rdi)	leaq    64 (%rsi), %rsi	leaq    64 (%rdi), %rdi	jnz     .Loop64	movl	$5,%ecx	.p2align 4.Loop2:		decl   %ecx	movq        (%rsi), %rax	movq      8 (%rsi), %rbx	movq     16 (%rsi), %rdx	movq     24 (%rsi), %r8	movq     32 (%rsi), %r9	movq     40 (%rsi), %r10	movq     48 (%rsi), %r11	movq     56 (%rsi), %r12	movq     %rax,    (%rdi)	movq     %rbx,  8 (%rdi)	movq     %rdx, 16 (%rdi)	movq     %r8,  24 (%rdi)	movq     %r9,  32 (%rdi)	movq     %r10, 40 (%rdi)	movq     %r11, 48 (%rdi)	movq     %r12, 56 (%rdi)		leaq	64(%rdi),%rdi				leaq	64(%rsi),%rsi					jnz	.Loop2				movq	(%rsp),%rbx	movq	1*8(%rsp),%r12	movq	2*8(%rsp),%r13	addq	$3*8,%rsp	ret		/* C stepping K8 run faster using the string copy instructions.	   It is also a lot simpler. Use this when possible */#include <asm/cpufeature.h>					.section .altinstructions,"a"	.align 8	.quad  copy_page	.quad  copy_page_c	.byte  X86_FEATURE_K8_C	.byte  copy_page_c_end-copy_page_c	.byte  copy_page_c_end-copy_page_c	.previous	.section .altinstr_replacement,"ax"copy_page_c:	movl $4096/8,%ecx	rep 	movsq 	retcopy_page_c_end:	.previous

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?