⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 copy_user.s

📁 Linux内核源代码 为压缩文件 是<<Linux内核>>一书中的源代码
💻 S
📖 第 1 页 / 共 2 页
字号:
/* * * Optimized version of the copy_user() routine. * It is used to copy date across the kernel/user boundary. * * The source and destination are always on opposite side of * the boundary. When reading from user space we must catch * faults on loads. When writing to user space we must catch * errors on stores. Note that because of the nature of the copy * we don't need to worry about overlapping regions. * * * Inputs: *	in0	address of source buffer * 	in1	address of destination buffer *	in2	number of bytes to copy * * Outputs:  * 	ret0	0 in case of sucess. The number of bytes NOT copied in * 		case of error. * * Copyright (C) 2000 Hewlett-Packard Co * Copyright (C) 2000 Stephane Eranian <eranian@hpl.hp.com> * * Fixme: *	- handle the case where we have more than 16 bytes and the alignment * 	  are different. *	- more benchmarking * 	- fix extraneous stop bit introduced by the EX() macro. */#include <asm/asmmacro.h>// The label comes first because our store instruction contains a comma// and confuse the preprocessor otherwise//#undef DEBUG#ifdef DEBUG#define EX(y,x...)				\99:	x#else#define EX(y,x...)				\	.section __ex_table,"a";		\	data4 @gprel(99f);			\	data4 y-99f;				\	.previous;				\99:	x#endif//// Tuneable parameters//#define COPY_BREAK	16	// we do byte copy below (must be >=16)#define PIPE_DEPTH	4	// pipe depth#define EPI		p[PIPE_DEPTH-1] // PASTE(p,16+PIPE_DEPTH-1)//// arguments//#define dst		in0#define src		in1#define len		in2//// local registers//#define t1		r2	// rshift in bytes#define t2		r3	// lshift in bytes#define rshift		r14	// right shift in bits#define lshift		r15	// left shift in bits#define word1		r16#define word2		r17#define cnt		r18#define len2		r19#define saved_lc	r20#define saved_pr	r21#define tmp		r22#define val		r23#define src1		r24#define dst1		r25#define src2		r26#define dst2		r27#define len1		r28#define enddst		r29#define endsrc		r30#define saved_pfs	r31 	.text 	.psr	abi64 	.psr	lsbGLOBAL_ENTRY(__copy_user)	UNW(.prologue)	UNW(.save ar.pfs, saved_pfs)	alloc saved_pfs=ar.pfs,3,((2*PIPE_DEPTH+7)&~7),0,((2*PIPE_DEPTH+7)&~7)	.rotr val1[PIPE_DEPTH],val2[PIPE_DEPTH]	.rotp p[PIPE_DEPTH]	adds len2=-1,len	// br.ctop is repeat/until	mov ret0=r0	;;			// RAW of cfm when len=0	cmp.eq p8,p0=r0,len	// check for zero length	UNW(.save ar.lc, saved_lc)	mov saved_lc=ar.lc	// preserve ar.lc (slow)(p8)	br.ret.spnt.few rp	// empty mempcy()	;;	add enddst=dst,len	// first byte after end of source	add endsrc=src,len	// first byte after end of destination	UNW(.save pr, saved_pr)	mov saved_pr=pr		// preserve predicates	UNW(.body)	mov dst1=dst		// copy because of rotation	mov ar.ec=PIPE_DEPTH	mov pr.rot=1<<16	// p16=true all others are false	mov src1=src		// copy because of rotation	mov ar.lc=len2		// initialize lc for small count	cmp.lt p10,p7=COPY_BREAK,len	// if len > COPY_BREAK then long copy 	xor tmp=src,dst		// same alignment test prepare(p10)	br.cond.dptk.few long_copy_user	;;			// RAW pr.rot/p16 ?	//	// Now we do the byte by byte loop with software pipeline	//	// p7 is necessarily false by now1:					EX(failure_in_pipe1,(p16) ld1 val1[0]=[src1],1)	EX(failure_out,(EPI) st1 [dst1]=val1[PIPE_DEPTH-1],1)	br.ctop.dptk.few 1b	;;	mov ar.lc=saved_lc	mov pr=saved_pr,0xffffffffffff0000	mov ar.pfs=saved_pfs		// restore ar.ec	br.ret.sptk.few rp	// end of short memcpy	//	// Not 8-byte aligned	//diff_align_copy_user:	// At this point we know we have more than 16 bytes to copy	// and also that src and dest do _not_ have the same alignment.	and src2=0x7,src1				// src offset	and dst2=0x7,dst1				// dst offset	;;	// The basic idea is that we copy byte-by-byte at the head so 	// that we can reach 8-byte alignment for both src1 and dst1. 	// Then copy the body using software pipelined 8-byte copy, 	// shifting the two back-to-back words right and left, then copy 	// the tail by copying byte-by-byte.	//	// Fault handling. If the byte-by-byte at the head fails on the	// load, then restart and finish the pipleline by copying zeros	// to the dst1. Then copy zeros for the rest of dst1.	// If 8-byte software pipeline fails on the load, do the same as	// failure_in3 does. If the byte-by-byte at the tail fails, it is	// handled simply by failure_in_pipe1.	//	// The case p14 represents the source has more bytes in the	// the first word (by the shifted part), whereas the p15 needs to 	// copy some bytes from the 2nd word of the source that has the 	// tail of the 1st of the destination.	//	//	// Optimization. If dst1 is 8-byte aligned (not rarely), we don't need 	// to copy the head to dst1, to start 8-byte copy software pipleline. 	// We know src1 is not 8-byte aligned in this case.	//	cmp.eq p14,p15=r0,dst2(p15)	br.cond.spnt.few 1f					;;	sub t1=8,src2	mov t2=src2	;;	shl rshift=t2,3	sub len1=len,t1					// set len1	;;	sub lshift=64,rshift	;; 	br.cond.spnt.few word_copy_user	;; 1:				cmp.leu	p14,p15=src2,dst2	sub t1=dst2,src2	;;	.pred.rel "mutex", p14, p15(p14)	sub word1=8,src2				// (8 - src offset)(p15)	sub t1=r0,t1					// absolute value(p15)	sub word1=8,dst2				// (8 - dst offset)	;;	// For the case p14, we don't need to copy the shifted part to	// the 1st word of destination.	sub t2=8,t1	(p14)	sub word1=word1,t1	;;	sub len1=len,word1				// resulting len(p15)	shl rshift=t1,3					// in bits(p14)	shl rshift=t2,3	;; (p14)	sub len1=len1,t1	adds cnt=-1,word1	;; 	sub lshift=64,rshift	mov ar.ec=PIPE_DEPTH	mov pr.rot=1<<16	// p16=true all others are false	mov ar.lc=cnt	;; 2:		EX(failure_in_pipe2,(p16) ld1 val1[0]=[src1],1)	;; 	EX(failure_out,(EPI) st1 [dst1]=val1[PIPE_DEPTH-1],1)	br.ctop.dptk.few 2b	;;	clrrrb		;; word_copy_user:			cmp.gtu p9,p0=16,len1(p9)	br.cond.spnt.few 4f		// if (16 > len1) skip 8-byte copy	;;	shr.u cnt=len1,3		// number of 64-bit words	;;	adds cnt=-1,cnt	;;	.pred.rel "mutex", p14, p15	(p14)	sub src1=src1,t2(p15)	sub src1=src1,t1	//	// Now both src1 and dst1 point to an 8-byte aligned address. And	// we have more than 8 bytes to copy.	//	mov ar.lc=cnt	mov ar.ec=PIPE_DEPTH	mov pr.rot=1<<16	// p16=true all others are false	;; 3:	//	// The pipleline consists of 3 stages:		// 1 (p16):	Load a word from src1	// 2 (EPI_1):	Shift right pair, saving to tmp	// 3 (EPI):	Store tmp to dst1	//	// To make it simple, use at least 2 (p16) loops to set up val1[n] 	// because we need 2 back-to-back val1[] to get tmp.	// Note that this implies EPI_2 must be p18 or greater.	// #define EPI_1		p[PIPE_DEPTH-2]#define SWITCH(pred, shift)	cmp.eq pred,p0=shift,rshift#define CASE(pred, shift)	\	(pred)	br.cond.spnt.few copy_user_bit##shift	#define BODY(rshift)							\copy_user_bit##rshift:							\1:									\	EX(failure_out,(EPI) st8 [dst1]=tmp,8);				\(EPI_1) shrp tmp=val1[PIPE_DEPTH-3],val1[PIPE_DEPTH-2],rshift;		\	EX(failure_in2,(p16) ld8 val1[0]=[src1],8);			\	br.ctop.dptk.few 1b;						\	;;								\	br.cond.spnt.few .diff_align_do_tail	//	// Since the instruction 'shrp' requires a fixed 128-bit value	// specifying the bits to shift, we need to provide 7 cases	// below. 	//	SWITCH(p6, 8)	SWITCH(p7, 16)	SWITCH(p8, 24)		SWITCH(p9, 32)	SWITCH(p10, 40)	SWITCH(p11, 48)	SWITCH(p12, 56)	;;	CASE(p6, 8)	CASE(p7, 16)	CASE(p8, 24)	CASE(p9, 32)	CASE(p10, 40)	CASE(p11, 48)	CASE(p12, 56)	;;	BODY(8)	BODY(16)	BODY(24)	BODY(32)	BODY(40)			BODY(48)	BODY(56)	;; .diff_align_do_tail:		.pred.rel "mutex", p14, p15		(p14)	sub src1=src1,t1(p14)	adds dst1=-8,dst1			(p15)	sub dst1=dst1,t1	;; 4:		// Tail correction.	//	// The problem with this piplelined loop is that the last word is not	// loaded and thus parf of the last word written is not correct. 	// To fix that, we simply copy the tail byte by byte.		sub len1=endsrc,src1,1	clrrrb	;; 	mov ar.ec=PIPE_DEPTH	mov pr.rot=1<<16	// p16=true all others are false	mov ar.lc=len1	;;5:		

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -