📄 balib.s

📁 vxworks5.5.1源代码。完整源代码
💻 S
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
/* bALib.s - ARM assembler buffer manipulation routines *//* Copyright 1991-1998 Advanced RISC Machines Ltd. *//*modification history--------------------01g,17oct01,t_m  convert to FUNC_LABEL:01f,11oct01,jb  Enabling removal of pre-pended underscores for new compilers                 (Diab/Gnu elf)01e,15jul98,cdp  added big-endian support.01d,25feb98,cdp  replaced ARM_ARCH4 stuff by ARM_HAS_HALFWORD_INSTRUCTIONS.01c,27oct97,kkk  took out "***EOF***" line from end of file.01b,23may97,jpd  Amalgamated into VxWorks.01a,09jul96,ams  Ported from ARM asm.*//*DESCRIPTIONThese are buffer manipulation routines, written by ARM/Acorn. It wastaken from the ARM C Library in assembler and ported here to gas.*/#define _ASMLANGUAGE#include "vxWorks.h"#include "asm.h"	.data	.globl	FUNC(copyright_wind_river)	.long	FUNC(copyright_wind_river)#if (defined(PORTABLE))#define bALib_PORTABLE#endif#ifndef bALib_PORTABLE#if (_BYTE_ORDER == _BIG_ENDIAN)#define SLA LSL			/* shift towards low address end */#define SHA LSR			/* shift towards high address end */#else#define SLA LSR			/* shift towards low address end */#define SHA LSL			/* shift towards high address end */#endif/* Register aliases */src	.req	r0dst	.req	r1n	.req	r2tmp1	.req	r3tmp3	.req	r12/* globals */	.global	FUNC(bcopy)			/* copy buffer as fast as possible */	.global	FUNC(bcopyBytes)		/* copy buffer byte at a time */	.global	FUNC(bcopyWords)		/* copy buffer word at a time */	.global	FUNC(bcopyLongs)		/* copy buffer long at a time */	.global	FUNC(bfill)			/* fill buffer as fast as possible */	.global	FUNC(bfillBytes)		/* fill buffer byte at a time */	.text	.balign	4/********************************************************************************* bcopy - copy one buffer to another** This routine copies the first <nbytes> characters from <source> to* <destination>.  Overlapping buffers are handled correctly.  Copying is done* in the most efficient way possible.  In general, the copy will be* significantly faster if both buffers are long-word aligned.** RETURNS: N/A** NOMANUAL** void bcopy*	(*	const char *	source,		/@ pointer to source buffer @/*	char *		destination,	/@ pointer to destination buffer @/*	int		nbytes		/@ number of bytes to copy @/*	)*/FUNC_LABEL(bcopy)	cmp	src, dst		/* copying up or down */	blo	CopyDown		/* Copy down then if lower */	moveq	pc, lr			/* dst == src, no move, RETURN */	stmfd	sp!, {lr}		/* Preserve lr */	/* Copy Up */	subs	n, n, #4		/* need at least 4 bytes */	blt	Up_TrailingBytes	/* < 4 bytes to go */	/*	 * word align the dst - first find out how many bytes must be	 * stored to do this.  If the number is 0 check the src too.	 */	ands	tmp3, dst, #3		/* eq means aligned! */	bne	Up_AlignDst	ands	tmp3, src, #3	bne	Up_SrcUnaligned 	/* more difficult! */	/*	 * We are here when source and destination are both aligned.	 * number of bytes to transfer is (n+4), n is >= 0.	 */Up_SrcDstAligned:	subs	n, n, #12-4		/* 12 bytes or more? */	blt	Up_TrailingWords	/*	 * We only have three registers to play with.  It is	 * worth gaining more only if the number of bytes to	 * transfer is greater than 12+8*<registers stacked>	 * We need to stack 8 (4+4) registers to gain 8 temporaries,	 * so look for >=44 bytes.  Since we would save 8*4 = 32	 * bytes at a time we actually compare with 64.	 */	subs	n, n, #32-12		/* test for n+32 to go. */	blt	Up_16			/* Less than 16 to go */	stmfd	sp!, {v1}		/* Save register */Up_Loop4:	/* loop loading 4 registers per time, twice (32 bytes) */	ldmia	src!, {tmp1, v1, tmp3, lr}	stmia	dst!, {tmp1, v1, tmp3, lr}	ldmia	src!, {tmp1, v1, tmp3, lr}	stmia	dst!, {tmp1, v1, tmp3, lr}	subs	n, n, #32	bge	Up_Loop4	/* see if we can handle another 8 */	cmn	n, #16	ldmgeia src!, {tmp1, v1, tmp3, lr}	stmgeia dst!, {tmp1, v1, tmp3, lr}	subge	n, n, #16	/*	 * Reload the register - note that we still have (n+32)	 * bytes to go, and that this is <16.	 */	ldmfd	sp!, {v1}Up_16:	/* Here when there are fewer than 16 bytes to go. */	adds	n, n, #32-12		/* (n-12) to go */Up_12:	/* Ok - do three words at a time. */	ldmgeia src!, {tmp1, tmp3, lr}	stmgeia dst!, {tmp1, tmp3, lr}	subges	n, n, #12	bge	Up_12Up_TrailingWords:	/* (n-12) bytes to go - 0, 1 or 2 words.  Check which. */	adds	n, n, #12-4		/* (n-4) to go */	blt	Up_TrailingBytes	/* < 4 bytes to go */	subs	n, n, #4	ldrlt	tmp1, [src], #4	strlt	tmp1, [dst], #4	ldmgeia src!, {tmp1, tmp3}	stmgeia dst!, {tmp1, tmp3}	subge	n, n, #4Up_TrailingBytes:	/* Here with less than 4 bytes to go */	adds	n, n, #4	ldmeqfd	sp!, {pc}		/* 0 bytes, RETURN */	cmp	n, #2			/* 1, 2 or 3 bytes */	ldrb	tmp1, [src], #1		/* 1 */	strb	tmp1, [dst], #1		/* 1 */	ldrgeb	tmp1, [src], #1		/* 2 */	strgeb	tmp1, [dst], #1		/* 2 */	ldrgtb	tmp1, [src], #1		/* 3 */	strgtb	tmp1, [dst], #1		/* 3 */	ldmfd	sp!, {pc}		/* Return *//************************************************************ * * word align dst - tmp3 contains current destination * alignment.  We can store at least 4 bytes here. */Up_AlignDst:	rsb	tmp3, tmp3, #4		/* 1-3 bytes to go */	cmp	tmp3, #2	ldrb	tmp1, [src], #1		/* 1 */	strb	tmp1, [dst], #1		/* 1 */	ldrgeb	tmp1, [src], #1		/* 2 */	strgeb	tmp1, [dst], #1		/* 2 */	ldrgtb	tmp1, [src], #1		/* 3 */	strgtb	tmp1, [dst], #1		/* 3 */	subs	n, n, tmp3		/* check number to go */	blt	Up_TrailingBytes	/* less than 4 bytes */	ands	tmp3, src, #3	beq	Up_SrcDstAligned	/* coaligned case */	/*	 * The source is not coaligned with the destination,	 * the destination IS currently word aligned.	 */Up_SrcUnaligned:	bic	src, src, #3		/* tmp3 holds extra! */	ldr	lr, [src], #4		/* 1-3 useful bytes */	cmp	tmp3, #2	bgt	Up_OneByte		/* one byte in tmp1 */	beq	Up_TwoBytes		/* two bytes in tmp1 *//* * The next three source bytes are in tmp1, one byte must * come from the next source word. At least four bytes * more must be stored.	Check first to see if there are a * sufficient number of bytes to go to justify using stm/ldm * instructions. */Up_ThreeBytes:	cmp	n, #16-4		/* at least 16 bytes? */	blt	Up_LT16a		/* no			1	*/	sub	n, n, #16-4		/* (n+16) bytes to go	1	*/	/*	 * save some work registers.  The point at which this	 * is done is based on the ldm/stm time being = (n+3)+(n/4)S	 */	stmfd	sp!, {v1, v2}	/*	 * loop doing 16 bytes at a time.  There are currently	 * three useful bytes in lr.	 */Up_GE16:	mov	tmp1, lr, SLA #8	/* first three bytes	1	*/	ldmia	src!, {v1, v2, tmp3, lr}	/*		12/13	*/	orr	tmp1, tmp1, v1, SHA #24		/* word 1	1	*/	mov	v1, v1, SLA #8			/*		...	*/	orr	v1, v1, v2, SHA #24		/* word 2	2 (1+1)	*/	mov	v2, v2, SLA #8	orr	v2, v2, tmp3, SHA #24		/* word 3	2	*/	mov	tmp3, tmp3, SLA #8	orr	tmp3, tmp3, lr, SHA #24		/* word 4	2	*/	stmia	dst!, {tmp1, v1, v2, tmp3}	/*		12/13	*/	subs	n, n, #16			/*		1	*/	bge	Up_GE16				/*		4 / 1	*/	/*	 * loop timing (depends on alignment) for n loops:-	 *	 *	pre:	17	 *		((45/46/47)n - 3) for 32n bytes	 *	post:	13/14	 *	total:	(45/46/47)n+(27/28)	 *	32 bytes:	72-75	 *	64 bytes:	117-122	 *	96 bytes:	162-169	 */	ldmfd	sp!, {v1, v2}		/* Reload registers 12/13 ????	*/	adds	n, n, #16-4		/* check for at least 4	*/	blt	Up_LT4a			/* < 4 bytes		*/Up_LT16a:	mov	tmp3, lr, SLA #8	/* first three bytes	1	*/	ldr	lr, [src], #4		/* next four bytes	4	*/	orr	tmp3, tmp3, lr, SHA #24	/*			1	*/	str	tmp3, [dst], #4 	/*			4	*/	subs	n, n, #4		/*			1	*/	bge	Up_LT16a		/* tmp1 contains three bytes 1 / 4 */	/*	 * Loop timing:	 *	 *		15n-3	for 4n bytes	 *	32:	117	 *	64:	237	 */Up_LT4a:	/* Less than four bytes to go - readjust the src address. */	sub	src, src, #3	b	Up_TrailingBytes/* * The next two source bytes are in tmp1, two bytes must * come from the next source word. At least four bytes * more must be stored. */Up_TwoBytes:	cmp	n, #16-4		/* at least 16 bytes?		*/	blt	Up_LT16b		/* no				*/	sub	n, n, #16-4		/* (n+16) bytes to go		*/	stmfd	sp!, {v1, v2}		/* save registers */	/*	 * loop doing 32 bytes at a time.  There are currently	 * two useful bytes in lr.	 */Up_32b:	mov	tmp1, lr, SLA #16	/* first two bytes		*/	ldmia	src!, {v1, v2, tmp3, lr}	orr	tmp1, tmp1, v1, SHA #16	/* word 1			*/	mov	v1, v1, SLA #16	orr	v1, v1, v2, SHA #16	/* word 2			*/	mov	v2, v2, SLA #16	orr	v2, v2, tmp3, SHA #16	/* word 3			*/	mov	tmp3, tmp3, SLA #16	orr	tmp3, tmp3, lr, SHA #16	/* word 4			*/	stmia	dst!, {tmp1, v1, v2, tmp3}	subs	n, n, #16	bge	Up_32b	ldmfd	sp!, {v1, v2}		/* Reload registers */	adds	n, n, #16-4		/* check number of bytes	*/	blt	Up_LT4bUp_LT16b:	mov	tmp3, lr, SLA #16	/* first two bytes		*/	ldr	lr, [src], #4		/* next four bytes		*/	orr	tmp3, tmp3, lr, SHA #16	str	tmp3, [dst], #4	subs	n, n, #4	bge	Up_LT16b		/* tmp1 contains two bytes	*/Up_LT4b:	/* Less than four bytes to go - readjust the src address. */	sub	src, src, #2	b	Up_TrailingBytes/* * The next source byte is in tmp1, three bytes must * come from the next source word. At least four bytes * more must be stored. */Up_OneByte:	cmp	n, #16-4		/* at least 16 bytes?		*/	blt	Up_LT16c		/* no				*/	sub	n, n, #16-4		/* (n+16) bytes to go		*/	stmfd	sp!, {v1, v2}		/* save registers */	/*	 * loop doing 32 bytes at a time.  There is currently	 * one useful byte in lr	 */Up_32c:	mov	tmp1, lr, SLA #24	/* first byte			*/	ldmia	src!, {v1, v2, tmp3, lr}	orr	tmp1, tmp1, v1, SHA #8	/* word 1			*/	mov	v1, v1, SLA #24	orr	v1, v1, v2, SHA #8	/* word 2			*/	mov	v2, v2, SLA #24	orr	v2, v2, tmp3, SHA #8	/* word 3			*/	mov	tmp3, tmp3, SLA #24	orr	tmp3, tmp3, lr, SHA #8	/* word 4			*/	stmia	dst!, {tmp1, v1, v2, tmp3}	subs	n, n, #16	bge	Up_32c	ldmfd	sp!, {v1, v2}		/* Reload registers */	adds	n, n, #16-4		/* check number of bytes	*/	blt	Up_LT4cUp_LT16c:	mov	tmp3, lr, SLA #24	/* first byte			*/	ldr	lr, [src], #4		/* next four bytes		*/	orr	tmp3, tmp3, lr, SHA #8	str	tmp3, [dst], #4	subs	n, n, #4	bge	Up_LT16c		/* tmp1 contains one byte	*/Up_LT4c:	/* Less than four bytes to go - one already in tmp3. */	sub	src, src, #1	b	Up_TrailingBytes/********************************************************************** * Copy down code * ============== * *	This is exactly the same as the copy up code - *	but it copies in the opposite direction. */CopyDown:	add	src, src, n		/* points beyond end */	add	dst, dst, n	subs	n, n, #4		/* need at least 4 bytes */	blt	Down_TrailingBytes	/* < 4 bytes to go */	/*	 * word align the dst - first find out how many bytes	 * must be stored to do this.  If the number is 0	 * check the src too.	 */	ands	tmp3, dst, #3		/* eq means aligned! */	bne	Down_AlignDst	ands	tmp3, src, #3	bne	Down_SrcUnaligned	/* more difficult! */	/*	 * here when source and destination are both aligned.	 * number of bytes to transfer is (n+4), n is >= 0.	 */Down_SrcDstAligned:	subs	n, n, #12-4		/* 12 bytes or more? */	blt	Down_TrailingWords	/*	 * We only have three registers to play with.  It is	 * worth gaining more only if the number of bytes to	 * transfer is greater than 12+8*<registers stacked>	 * We need to stack 8 (4+4) registers to gain 8 temporaries,	 * so look for >=44 bytes.  Since we would save 8*4 = 32	 * bytes at a time we actually compare with 64.	 */	stmfd	sp!, {v1, lr}	subs	n, n, #32-12		/* n+32 to go. */	blt	Down_16aDown_32a:	/* loop loading 4 registers per time, twice (32 bytes) */	ldmdb	src!, {tmp1, v1, tmp3, lr}	stmdb	dst!, {tmp1, v1, tmp3, lr}	ldmdb	src!, {tmp1, v1, tmp3, lr}	stmdb	dst!, {tmp1, v1, tmp3, lr}	subs	n, n, #32	bge	Down_32aDown_16a:	/* see if we can handle another 16 */	cmn	n, #16	ldmgedb src!, {tmp1, v1, tmp3, lr}	stmgedb dst!, {tmp1, v1, tmp3, lr}	subge	n, n, #16	/* Here when there are fewer than 16 bytes to go. */	adds	n, n, #32-12		/* (n-12) to go */	/* Ok - do three words at a time. */	ldmgedb src!, {tmp1, tmp3, lr}	stmgedb dst!, {tmp1, tmp3, lr}	subge	n, n, #12	ldmfd	sp!, {v1, lr}		/* Restore registers */	/* (n-12) bytes to go - 0, 1 or 2 words.  Check which. */Down_TrailingWords:	adds	n, n, #12-4		/* (n-4) to go */	blt	Down_TrailingBytes	/* < 4 bytes to go */	subs	n, n, #4	ldrlt	tmp1, [src, #-4]!	strlt	tmp1, [dst, #-4]!	ldmgedb src!, {tmp1, tmp3}	stmgedb dst!, {tmp1, tmp3}	subge	n, n, #4Down_TrailingBytes:	/* Here with less than 4 bytes to go */	adds	n, n, #4	moveq	pc, lr			/* 0 bytes, RETURN	*/	cmp	n, #2			/* 1, 2 or 3 bytes	*/	ldrb	tmp1, [src, #-1]!	/* 1 */	strb	tmp1, [dst, #-1]!	/* 1 */	ldrgeb	tmp1, [src, #-1]!	/* 2 */	strgeb	tmp1, [dst, #-1]!	/* 2 */	ldrgtb	tmp1, [src, #-1]!	/* 3 */	strgtb	tmp1, [dst, #-1]!	/* 3 */	mov	pc, lr			/* RETURN *//************************************************************ * * word align dst - tmp3 contains current destination * alignment.  We can store at least 4 bytes here.  We are * going downwards - so tmp3 is the actual number of bytes * to store. */Down_AlignDst:	cmp	tmp3, #2		/* 1, 2 or 3 bytes */	ldrb	tmp1, [src, #-1]!	/* 1 */	strb	tmp1, [dst, #-1]!	/* 1 */	ldrgeb	tmp1, [src, #-1]!	/* 2 */	strgeb	tmp1, [dst, #-1]!	/* 2 */	ldrgtb	tmp1, [src, #-1]!	/* 3 */	strgtb	tmp1, [dst, #-1]!	/* 3 */	subs	n, n, tmp3		/* check number to go */	blt	Down_TrailingBytes	/* less than 4 bytes */	ands	tmp3, src, #3	beq	Down_SrcDstAligned	/* coaligned case */	/*	 * The source is not coaligned with the destination,	 * the destination IS currently word aligned.	 */Down_SrcUnaligned:	bic	src, src, #3		/* tmp3 holds extra! */	ldr	tmp1, [src]		/* 1-3 useful bytes */	cmp	tmp3, #2	blt	Down_OneByte		/* one byte in tmp1 */	beq	Down_TwoBytes		/* two bytes in tmp1 *//* * The last three source bytes are in tmp1, one byte must * come from the previous source word. At least four bytes * more must be stored.	Check first to see if there are a * sufficient number of bytes to go to justify using stm/ldm * instructions. */Down_ThreeBytes:	cmp	n, #16-4		/* at least 16 bytes? */	blt	Down_LT16b		/* no */	sub	n, n, #16-4		/* (n+16) bytes to go */	stmfd	sp!, {v1, v2, lr}	/* save registers */	/*	 * loop doing 32 bytes at a time.  There are currently	 * three useful bytes in tmp1 (a4).	 */Down_32b:	mov	lr, tmp1, SHA #8	/* last three bytes	*/	ldmdb	src!, {tmp1, v1, v2, tmp3}	orr	lr, lr, tmp3, SLA #24	/* word 4		*/	mov	tmp3, tmp3, SHA #8	orr	tmp3, tmp3, v2, SLA #24	/* word 3		*/	mov	v2, v2, SHA #8	orr	v2, v2, v1, SLA #24	/* word 2		*/	mov	v1, v1, SHA #8	orr	v1, v1, tmp1, SLA #24	/* word 1		*/	stmdb	dst!, {v1, v2, tmp3, lr}	subs	n, n, #16	bge	Down_32b	ldmfd	sp!, {v1, v2, lr}	/* Reload registers */	adds	n, n, #16-4		/* check for at least 4	*/	blt	Down_LT4b		/* < 4 bytes		*/Down_LT16b:	mov	tmp3, tmp1, SHA #8	/* last three bytes	*/	ldr	tmp1, [src, #-4]!	/* previous four bytes	*/	orr	tmp3, tmp3, tmp1, SLA #24	str	tmp3, [dst, #-4]!	subs	n, n, #4
12 3 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -