📄 big_copy.s

📁 操作系统SunOS 4.1.3版本的源码
💻 S
📖 第 1 页 / 共 2 页
字号:
12 下一页
/*	@(#)big_copy.s 1.1 92/07/30 SMI	*/	.seg	"text"	.align	4/* *	Copyright (c) 1987 by Sun Microsystems, Inc. */#include <machine/asm_linkage.h>#include <sys/errno.h>#ifdef never#include <sys/param.h>#include <sun4/mmu.h>#include "assym.s"/* * Copy a block of storage, returning an error code if `from' or * `to' takes a kernel pagefault which cannot be resolved. * Returns EIO on pagefault error, 0 if all ok * * int * kcopy(from, to, count) *	caddr_t from, to; *	u_int count; */	ENTRY(kcopy)	sethi	%hi(copyerr), %o3	! copyerr is lofault value	b	do_copy			! common code	or	%o3, %lo(copyerr), %o3/* * We got here because of a fault during kcopy. */copyerr:	st	%l6, [_u+U_LOFAULT]	! restore old u.u_lofault	ret	restore	%g0, EIO, %o0		! return (EIO)/* * Copy a block of storage - must not overlap ( from + len <= to). * Registers: l6 - saved u.u_lofault * * bcopy(from, to, count) *	caddr_t from, to; *	u_int count; */	ENTRY(bcopy)	mov	0, %o3			! no lofault valuedo_copy:	save	%sp, -SA(MINFRAME), %sp	! get another window	ld	[_u+U_LOFAULT], %l6	! save u.u_lofault	cmp	%i2, 12			! for small counts	bl	bytecp			!   just copy bytes	st	%i3, [_u+U_LOFAULT]	! install new vector	!	! Probe to see if word access is allowed (i.e. not VME_D16)	! This assumes that the source and destination will not	! change to VME_D16 during the bcopy. This will get a data	! fault with be_vmeserr set if unsuccessful. Trap will	! then return to bcopy_vme16. This is gross but fast.	!	.global _bcopy_probe, _ebcopy_probe_bcopy_probe:	andn	%i0, 3, %l0		! align source	ld	[%l0], %g0		! probe source	andn	%i1, 3, %l0		! align dest	ld	[%l0], %g0		! probe dest_ebcopy_probe:	!	! use aligned transfers where possible	!	xor	%i0, %i1, %o4		! xor from and to address	btst	7, %o4			! if lower three bits zero	bz	aldoubcp		! can align on double boundary	btst	3, %o4			! if lower two bits zero	bz	alwordcp		! can align on word boundary	btst	3, %i0			! delay slot, from address unaligned?	!	! use aligned reads and writes where possible	! this differs from wordcp in that it copes	! with odd alignment between source and destnation	! using word reads and writes with the proper shifts	! in between to align transfers to and from memory	! i0 - src address, i1 - dest address, i2 - count	! i3, i4 - tmps for used generating complete word	! i5 (word to write)	! l0 size in bits of upper part of source word (US)	! l1 size in bits of lower part of source word (LS = 32 - US)	! l2 size in bits of upper part of destination word (UD)	! l3 size in bits of lower part of destination word (LD = 32 - UD)	! l4 number of bytes leftover after aligned transfers complete	! l5 the number 32	!	mov	32, %l5			! load an oft-needed constant	bz	align_dst_only	btst	3, %i1			! is destnation address aligned?	clr	%i4			! clear registers used in either case	bz	align_src_only	clr	%l0	!	! both source and destination addresses are unaligned	!1:					! align source	ldub	[%i0], %i3		! read a byte from source address	add	%i0, 1, %i0		! increment source address	or	%i4, %i3, %i4		! or in with previous bytes (if any)	btst	3, %i0			! is source aligned?	add	%l0, 8, %l0		! increment size of upper source (US)	bnz,a	1b	sll	%i4, 8, %i4		! make room for next byte	sub	%l5, %l0, %l1		! generate shift left count (LS)	sll	%i4, %l1, %i4		! prepare to get rest	ld	[%i0], %i3		! read a word	add	%i0, 4, %i0		! increment source address	srl	%i3, %l0, %i5		! upper src bits into lower dst bits	or	%i4, %i5, %i5		! merge	mov	24, %l3			! align destination1:	srl	%i5, %l3, %i4		! prepare to write a single byte	stb	%i4, [%i1]		! write a byte	add	%i1, 1, %i1		! increment destination address	sub	%i2, 1, %i2		! decrement count	btst	3, %i1			! is destination aligned?	bnz,a	1b	sub	%l3, 8, %l3		! delay slot, decrement shift count (LD)	sub	%l5, %l3, %l2		! generate shift left count (UD)	sll	%i5, %l2, %i5		! move leftover into upper bytes	cmp	%l2, %l0		! cmp # req'd to fill dst w old src left	bg	more_needed		! need more to fill than we have	nop	sll	%i3, %l1, %i3		! clear upper used byte(s)	srl	%i3, %l1, %i3	! get the odd bytes between alignments	sub	%l0, %l2, %l0		! regenerate shift count	sub	%l5, %l0, %l1		! generate new shift left count (LS)	and	%i2, 3, %l4		! must do remaining bytes if count%4 > 0	andn	%i2, 3, %i2		! # of aligned bytes that can be moved	srl	%i3, %l0, %i4	or	%i5, %i4, %i5	st	%i5, [%i1]		! write a word	subcc	%i2, 4, %i2		! decrement count	bz	unalign_out	add	%i1, 4, %i1		! increment destination address	b	2f	sll	%i3, %l1, %i5		! get leftover into upper bitsmore_needed:	sll	%i3, %l0, %i3		! save remaining byte(s)	srl	%i3, %l0, %i3	sub	%l2, %l0, %l1		! regenerate shift count	sub	%l5, %l1, %l0		! generate new shift left count	sll	%i3, %l1, %i4		! move to fill empty space	b	3f	or	%i5, %i4, %i5		! merge to complete word	!	! the source address is aligned and destination is not	!align_dst_only:	ld	[%i0], %i4		! read a word	add	%i0, 4, %i0		! increment source address	mov	24, %l0			! initial shift alignment count1:	srl	%i4, %l0, %i3		! prepare to write a single byte	stb	%i3, [%i1]		! write a byte	add	%i1, 1, %i1		! increment destination address	sub	%i2, 1, %i2		! decrement count	btst	3, %i1			! is destination aligned?	bnz,a	1b	sub	%l0, 8, %l0		! delay slot, decrement shift countxfer:	sub	%l5, %l0, %l1		! generate shift left count	sll	%i4, %l1, %i5		! get leftover3:	and	%i2, 3, %l4		! must do remaining bytes if count%4 > 0	andn	%i2, 3, %i2		! # of aligned bytes that can be moved2:	ld	[%i0], %i3		! read a source word	add	%i0, 4, %i0		! increment source address	srl	%i3, %l0, %i4		! upper src bits into lower dst bits	or	%i5, %i4, %i5		! merge with upper dest bits (leftover)	st	%i5, [%i1]		! write a destination word	subcc	%i2, 4, %i2		! decrement count	bz	unalign_out		! check if done	add	%i1, 4, %i1		! increment destination address	b	2b			! loop	sll	%i3, %l1, %i5		! get leftoverunalign_out:	tst	%l4			! any bytes leftover?	bz	cpdone	.empty				! allow next instruction in delay slot1:	sub	%l0, 8, %l0		! decrement shift	srl	%i3, %l0, %i4		! upper src byte into lower dst byte	stb	%i4, [%i1]		! write a byte	subcc	%l4, 1, %l4		! decrement count	bz	cpdone			! done?	add	%i1, 1, %i1		! increment destination	tst	%l0			! any more previously read bytes	bnz	1b			! we have leftover bytes	mov	%l4, %i2		! delay slot, mv cnt where bytecp wants	b,a	bytecp			! let bcopy do the rest	!	! the destination address is aligned and the source is not	!align_src_only:	ldub	[%i0], %i3		! read a byte from source address	add	%i0, 1, %i0		! increment source address	or	%i4, %i3, %i4		! or in with previous bytes (if any)	btst	3, %i0			! is source aligned?	add	%l0, 8, %l0		! increment shift count (US)	bnz,a	align_src_only	sll	%i4, 8, %i4		! make room for next byte	b,a	xfer	!	! if from address unaligned for double-word moves,	! move bytes till it is, if count is < 56 it could take	! longer to align the thing than to do the transfer	! in word size chunks right away	!aldoubcp:	cmp	%i2, 56			! if count < 56, use wordcp, it takes	bl,a	alwordcp		! longer to align doubles than words	mov	3, %o0			! mask for word alignment	call	alignit			! copy bytes until aligned	mov	7, %o0			! mask for double alignment	!	! source and destination are now double-word aligned	! see if transfer is large enough to gain by loop unrolling	!	cmp	%i2, 512		! if less than 512 bytes	bge,a	blkcopy			! just copy double-words (overwrite i3)	mov	0x100, %i3		! blk copy chunk size for unrolled loop	!	! i3 has aligned count returned by alignit	!	and	%i2, 7, %i2		! unaligned leftover count5:	ldd	[%i0], %o4		! read from address	add	%i0, 8, %i0		! inc from address	std	%o4, [%i1]		! write at destination address	subcc	%i3, 8, %i3		! dec count	bg	5b	add	%i1, 8, %i1		! delay slot, inc to addresswcpchk:	cmp	%i2, 4			! see if we can copy a word	bl	bytecp			! if 3 or less bytes use bytecp	!	! for leftover bytes we fall into wordcp, if needed	!wordcp:	and	%i2, 3, %i2		! unaligned leftover count5:	ld	[%i0], %o4		! read from address	add	%i0, 4, %i0		! inc from address	st	%o4, [%i1]		! write at destination address	subcc	%i3, 4, %i3		! dec count	bg	5b	add	%i1, 4, %i1		! delay slot, inc to address	b,a	bytecp	! we come here to align copies on word boundariesalwordcp:	call	alignit			! go word-align it	mov	3, %o0			! bits that must be zero to be aligned	b,a	wordcp	!	! byte copy, works with any alignment	!1:	add	%i0, 1, %i0		! inc from address	stb	%o4, [%i1]		! write to address	add	%i1, 1, %i1		! inc to addressbytecp:	subcc	%i2, 1, %i2		! dec count	bge,a	1b			! loop till done	ldub	[%i0], %o4		! read from addresscpdone:	st	%l6, [_u+U_LOFAULT]	! restore old u.u_lofault	ret	restore %g0, 0, %o0		! return (0)/* * Common code used to align transfers on word and doubleword * boudaries.  Aligns source and destination and returns a count * of aligned bytes to transfer in %i3 */1:	inc	%i0			! inc from	stb	%o4, [%i1]		! write a byte	inc	%i1			! inc to	dec	%i2			! dec countalignit:	btst	%o0, %i0		! %o0 is bit mask to check for alignment	bnz,a	1b	ldub	[%i0], %o4		! read next byte	retl	andn	%i2, %o0, %i3		! return size of aligned bytes/* * Copy a page of memory. * Assumes double word alignment and a count >= 256. * * pgcopy(from, to, count) *	caddr_t from, to; *	u_int count; */	ENTRY(pgcopy)	save	%sp, -SA(MINFRAME), %sp	! get another window	mov	0x100, %i3	!	! loops have been unrolled so that 64 instructions(16 cache-lines)	! are used; 256 bytes are moved each time through the loop	! i0 - from; i1 - to; i2 - count; i3 - chunksize; o4,o5 -tmp	!	! We read a whole cache line and then we write it to	! minimize thrashing.	!blkcopy:	ldd	[%i0+0xf8], %l0		! 0xfc	ldd	[%i0+0xf0], %l2	std	%l0, [%i1+0xf8]	std	%l2, [%i1+0xf0]	ldd	[%i0+0xe8], %l0		! 0xec	ldd	[%i0+0xe0], %l2	std	%l0, [%i1+0xe8]	std	%l2, [%i1+0xe0]	ldd	[%i0+0xd8], %l0		! 0xdc	ldd	[%i0+0xd0], %l2	std	%l0, [%i1+0xd8]	std	%l2, [%i1+0xd0]	ldd	[%i0+0xc8], %l0		! 0xcc	ldd	[%i0+0xc0], %l2	std	%l0, [%i1+0xc8]	std	%l2, [%i1+0xc0]	ldd	[%i0+0xb8], %l0		! 0xbc	ldd	[%i0+0xb0], %l2	std	%l0, [%i1+0xb8]	std	%l2, [%i1+0xb0]	ldd	[%i0+0xa8], %l0		! 0xac	ldd	[%i0+0xa0], %l2	std	%l0, [%i1+0xa8]	std	%l2, [%i1+0xa0]	ldd	[%i0+0x98], %l0		! 0x9c	ldd	[%i0+0x90], %l2	std	%l0, [%i1+0x98]	std	%l2, [%i1+0x90]	ldd	[%i0+0x88], %l0		! 0x8c	ldd	[%i0+0x80], %l2	std	%l0, [%i1+0x88]	std	%l2, [%i1+0x80]	ldd	[%i0+0x78], %l0		! 0x7c	ldd	[%i0+0x70], %l2	std	%l0, [%i1+0x78]	std	%l2, [%i1+0x70]	ldd	[%i0+0x68], %l0		! 0x6c	ldd	[%i0+0x60], %l2	std	%l0, [%i1+0x68]	std	%l2, [%i1+0x60]	ldd	[%i0+0x58], %l0		! 0x5c	ldd	[%i0+0x50], %l2	std	%l0, [%i1+0x58]	std	%l2, [%i1+0x50]	ldd	[%i0+0x48], %l0		! 0x4c	ldd	[%i0+0x40], %l2	std	%l0, [%i1+0x48]	std	%l2, [%i1+0x40]	ldd	[%i0+0x38], %l0		! 0x3c	ldd	[%i0+0x30], %l2	std	%l0, [%i1+0x38]	std	%l2, [%i1+0x30]	ldd	[%i0+0x28], %l0		! 0x2c	ldd	[%i0+0x20], %l2	std	%l0, [%i1+0x28]	std	%l2, [%i1+0x20]	ldd	[%i0+0x18], %l0		! 0x1c	ldd	[%i0+0x10], %l2	std	%l0, [%i1+0x18]	std	%l2, [%i1+0x10]	ldd	[%i0+0x8], %l0		! 0x0c	ldd	[%i0], %l2	std	%l0, [%i1+0x8]	std	%l2, [%i1]instr:	sub	%i2, %i3, %i2		! decrement count	add	%i0, %i3, %i0		! increment from address	cmp	%i2, 0x100		! enough to do another block?	bge	blkcopy			! yes, do another chunk	add	%i1, %i3, %i1		! increment to address	tst	%i2			! all done yet?	ble	cpdone			! yes, return	cmp	%i2, 15			! can we do more cache lines	bg,a	1f	andn	%i2, 15, %i3		! %i3 bytes left, aligned (to 16 bytes)	b	wcpchk	andn	%i2, 3, %i3		! %i3 bytes left, aligned to 4 bytes1:	set	instr, %o5		! address of copy instructions	sub	%o5, %i3, %o5		! jmp address relative to instr	jmp	%o5	nop#endif never/* * Block copy with possibly overlapped operands. * * ovbcopy(from, to, count) *	caddr_t from, to; *	u_int count; */	ENTRY(ovbcopy)	tst	%o2			! check count	bg,a	1f			! nothing to do or bad arguments	subcc	%o0, %o1, %o3		! difference of from and to address	retl				! return	nop1:	bneg,a	2f	neg	%o3			! if < 0, make it positive2:	cmp	%o2, %o3		! cmp size and abs(from - to)	ble	_bcopy			! if size <= abs(diff): use bcopy,	.empty				!   no overlap	cmp	%o0, %o1		! compare from and to addresses	blu	ov_bkwd			! if from < to, copy backwards	nop	!	! Copy forwards.	!ov_fwd:	ldub	[%o0], %o3		! read from address	inc	%o0			! inc from address	stb	%o3, [%o1]		! write to address	deccc	%o2			! dec count	bg	ov_fwd			! loop till done	inc	%o1			! inc to address	retl				! return	nop	!	! Copy backwards.	!ov_bkwd:	deccc	%o2			! dec count	ldub	[%o0 + %o2], %o3	! get byte at end of src	bg	ov_bkwd			! loop till done	stb	%o3, [%o1 + %o2]	! delay slot, store at end of dst	retl				! return	nop#ifdef never/* * Zero a block of storage, returning an error code if we * take a kernel pagefault which cannot be resolved. * Returns EIO on pagefault error, 0 if all ok * * int
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -