📄 balib.s
字号:
/* bALib.s - ARM assembler buffer manipulation routines *//* Copyright 1991-1998 Advanced RISC Machines Ltd. *//*modification history--------------------01g,17oct01,t_m convert to FUNC_LABEL:01f,11oct01,jb Enabling removal of pre-pended underscores for new compilers (Diab/Gnu elf)01e,15jul98,cdp added big-endian support.01d,25feb98,cdp replaced ARM_ARCH4 stuff by ARM_HAS_HALFWORD_INSTRUCTIONS.01c,27oct97,kkk took out "***EOF***" line from end of file.01b,23may97,jpd Amalgamated into VxWorks.01a,09jul96,ams Ported from ARM asm.*//*DESCRIPTIONThese are buffer manipulation routines, written by ARM/Acorn. It wastaken from the ARM C Library in assembler and ported here to gas.*/#define _ASMLANGUAGE#include "vxWorks.h"#include "asm.h" .data .globl FUNC(copyright_wind_river) .long FUNC(copyright_wind_river)#if (defined(PORTABLE))#define bALib_PORTABLE#endif#ifndef bALib_PORTABLE#if (_BYTE_ORDER == _BIG_ENDIAN)#define SLA LSL /* shift towards low address end */#define SHA LSR /* shift towards high address end */#else#define SLA LSR /* shift towards low address end */#define SHA LSL /* shift towards high address end */#endif/* Register aliases */src .req r0dst .req r1n .req r2tmp1 .req r3tmp3 .req r12/* globals */ .global FUNC(bcopy) /* copy buffer as fast as possible */ .global FUNC(bcopyBytes) /* copy buffer byte at a time */ .global FUNC(bcopyWords) /* copy buffer word at a time */ .global FUNC(bcopyLongs) /* copy buffer long at a time */ .global FUNC(bfill) /* fill buffer as fast as possible */ .global FUNC(bfillBytes) /* fill buffer byte at a time */ .text .balign 4/********************************************************************************* bcopy - copy one buffer to another** This routine copies the first <nbytes> characters from <source> to* <destination>. Overlapping buffers are handled correctly. Copying is done* in the most efficient way possible. In general, the copy will be* significantly faster if both buffers are long-word aligned.** RETURNS: N/A** NOMANUAL** void bcopy* (* const char * source, /@ pointer to source buffer @/* char * destination, /@ pointer to destination buffer @/* int nbytes /@ number of bytes to copy @/* )*/FUNC_LABEL(bcopy) cmp src, dst /* copying up or down */ blo CopyDown /* Copy down then if lower */ moveq pc, lr /* dst == src, no move, RETURN */ stmfd sp!, {lr} /* Preserve lr */ /* Copy Up */ subs n, n, #4 /* need at least 4 bytes */ blt Up_TrailingBytes /* < 4 bytes to go */ /* * word align the dst - first find out how many bytes must be * stored to do this. If the number is 0 check the src too. */ ands tmp3, dst, #3 /* eq means aligned! */ bne Up_AlignDst ands tmp3, src, #3 bne Up_SrcUnaligned /* more difficult! */ /* * We are here when source and destination are both aligned. * number of bytes to transfer is (n+4), n is >= 0. */Up_SrcDstAligned: subs n, n, #12-4 /* 12 bytes or more? */ blt Up_TrailingWords /* * We only have three registers to play with. It is * worth gaining more only if the number of bytes to * transfer is greater than 12+8*<registers stacked> * We need to stack 8 (4+4) registers to gain 8 temporaries, * so look for >=44 bytes. Since we would save 8*4 = 32 * bytes at a time we actually compare with 64. */ subs n, n, #32-12 /* test for n+32 to go. */ blt Up_16 /* Less than 16 to go */ stmfd sp!, {v1} /* Save register */Up_Loop4: /* loop loading 4 registers per time, twice (32 bytes) */ ldmia src!, {tmp1, v1, tmp3, lr} stmia dst!, {tmp1, v1, tmp3, lr} ldmia src!, {tmp1, v1, tmp3, lr} stmia dst!, {tmp1, v1, tmp3, lr} subs n, n, #32 bge Up_Loop4 /* see if we can handle another 8 */ cmn n, #16 ldmgeia src!, {tmp1, v1, tmp3, lr} stmgeia dst!, {tmp1, v1, tmp3, lr} subge n, n, #16 /* * Reload the register - note that we still have (n+32) * bytes to go, and that this is <16. */ ldmfd sp!, {v1}Up_16: /* Here when there are fewer than 16 bytes to go. */ adds n, n, #32-12 /* (n-12) to go */Up_12: /* Ok - do three words at a time. */ ldmgeia src!, {tmp1, tmp3, lr} stmgeia dst!, {tmp1, tmp3, lr} subges n, n, #12 bge Up_12Up_TrailingWords: /* (n-12) bytes to go - 0, 1 or 2 words. Check which. */ adds n, n, #12-4 /* (n-4) to go */ blt Up_TrailingBytes /* < 4 bytes to go */ subs n, n, #4 ldrlt tmp1, [src], #4 strlt tmp1, [dst], #4 ldmgeia src!, {tmp1, tmp3} stmgeia dst!, {tmp1, tmp3} subge n, n, #4Up_TrailingBytes: /* Here with less than 4 bytes to go */ adds n, n, #4 ldmeqfd sp!, {pc} /* 0 bytes, RETURN */ cmp n, #2 /* 1, 2 or 3 bytes */ ldrb tmp1, [src], #1 /* 1 */ strb tmp1, [dst], #1 /* 1 */ ldrgeb tmp1, [src], #1 /* 2 */ strgeb tmp1, [dst], #1 /* 2 */ ldrgtb tmp1, [src], #1 /* 3 */ strgtb tmp1, [dst], #1 /* 3 */ ldmfd sp!, {pc} /* Return *//************************************************************ * * word align dst - tmp3 contains current destination * alignment. We can store at least 4 bytes here. */Up_AlignDst: rsb tmp3, tmp3, #4 /* 1-3 bytes to go */ cmp tmp3, #2 ldrb tmp1, [src], #1 /* 1 */ strb tmp1, [dst], #1 /* 1 */ ldrgeb tmp1, [src], #1 /* 2 */ strgeb tmp1, [dst], #1 /* 2 */ ldrgtb tmp1, [src], #1 /* 3 */ strgtb tmp1, [dst], #1 /* 3 */ subs n, n, tmp3 /* check number to go */ blt Up_TrailingBytes /* less than 4 bytes */ ands tmp3, src, #3 beq Up_SrcDstAligned /* coaligned case */ /* * The source is not coaligned with the destination, * the destination IS currently word aligned. */Up_SrcUnaligned: bic src, src, #3 /* tmp3 holds extra! */ ldr lr, [src], #4 /* 1-3 useful bytes */ cmp tmp3, #2 bgt Up_OneByte /* one byte in tmp1 */ beq Up_TwoBytes /* two bytes in tmp1 *//* * The next three source bytes are in tmp1, one byte must * come from the next source word. At least four bytes * more must be stored. Check first to see if there are a * sufficient number of bytes to go to justify using stm/ldm * instructions. */Up_ThreeBytes: cmp n, #16-4 /* at least 16 bytes? */ blt Up_LT16a /* no 1 */ sub n, n, #16-4 /* (n+16) bytes to go 1 */ /* * save some work registers. The point at which this * is done is based on the ldm/stm time being = (n+3)+(n/4)S */ stmfd sp!, {v1, v2} /* * loop doing 16 bytes at a time. There are currently * three useful bytes in lr. */Up_GE16: mov tmp1, lr, SLA #8 /* first three bytes 1 */ ldmia src!, {v1, v2, tmp3, lr} /* 12/13 */ orr tmp1, tmp1, v1, SHA #24 /* word 1 1 */ mov v1, v1, SLA #8 /* ... */ orr v1, v1, v2, SHA #24 /* word 2 2 (1+1) */ mov v2, v2, SLA #8 orr v2, v2, tmp3, SHA #24 /* word 3 2 */ mov tmp3, tmp3, SLA #8 orr tmp3, tmp3, lr, SHA #24 /* word 4 2 */ stmia dst!, {tmp1, v1, v2, tmp3} /* 12/13 */ subs n, n, #16 /* 1 */ bge Up_GE16 /* 4 / 1 */ /* * loop timing (depends on alignment) for n loops:- * * pre: 17 * ((45/46/47)n - 3) for 32n bytes * post: 13/14 * total: (45/46/47)n+(27/28) * 32 bytes: 72-75 * 64 bytes: 117-122 * 96 bytes: 162-169 */ ldmfd sp!, {v1, v2} /* Reload registers 12/13 ???? */ adds n, n, #16-4 /* check for at least 4 */ blt Up_LT4a /* < 4 bytes */Up_LT16a: mov tmp3, lr, SLA #8 /* first three bytes 1 */ ldr lr, [src], #4 /* next four bytes 4 */ orr tmp3, tmp3, lr, SHA #24 /* 1 */ str tmp3, [dst], #4 /* 4 */ subs n, n, #4 /* 1 */ bge Up_LT16a /* tmp1 contains three bytes 1 / 4 */ /* * Loop timing: * * 15n-3 for 4n bytes * 32: 117 * 64: 237 */Up_LT4a: /* Less than four bytes to go - readjust the src address. */ sub src, src, #3 b Up_TrailingBytes/* * The next two source bytes are in tmp1, two bytes must * come from the next source word. At least four bytes * more must be stored. */Up_TwoBytes: cmp n, #16-4 /* at least 16 bytes? */ blt Up_LT16b /* no */ sub n, n, #16-4 /* (n+16) bytes to go */ stmfd sp!, {v1, v2} /* save registers */ /* * loop doing 32 bytes at a time. There are currently * two useful bytes in lr. */Up_32b: mov tmp1, lr, SLA #16 /* first two bytes */ ldmia src!, {v1, v2, tmp3, lr} orr tmp1, tmp1, v1, SHA #16 /* word 1 */ mov v1, v1, SLA #16 orr v1, v1, v2, SHA #16 /* word 2 */ mov v2, v2, SLA #16 orr v2, v2, tmp3, SHA #16 /* word 3 */ mov tmp3, tmp3, SLA #16 orr tmp3, tmp3, lr, SHA #16 /* word 4 */ stmia dst!, {tmp1, v1, v2, tmp3} subs n, n, #16 bge Up_32b ldmfd sp!, {v1, v2} /* Reload registers */ adds n, n, #16-4 /* check number of bytes */ blt Up_LT4bUp_LT16b: mov tmp3, lr, SLA #16 /* first two bytes */ ldr lr, [src], #4 /* next four bytes */ orr tmp3, tmp3, lr, SHA #16 str tmp3, [dst], #4 subs n, n, #4 bge Up_LT16b /* tmp1 contains two bytes */Up_LT4b: /* Less than four bytes to go - readjust the src address. */ sub src, src, #2 b Up_TrailingBytes/* * The next source byte is in tmp1, three bytes must * come from the next source word. At least four bytes * more must be stored. */Up_OneByte: cmp n, #16-4 /* at least 16 bytes? */ blt Up_LT16c /* no */ sub n, n, #16-4 /* (n+16) bytes to go */ stmfd sp!, {v1, v2} /* save registers */ /* * loop doing 32 bytes at a time. There is currently * one useful byte in lr */Up_32c: mov tmp1, lr, SLA #24 /* first byte */ ldmia src!, {v1, v2, tmp3, lr} orr tmp1, tmp1, v1, SHA #8 /* word 1 */ mov v1, v1, SLA #24 orr v1, v1, v2, SHA #8 /* word 2 */ mov v2, v2, SLA #24 orr v2, v2, tmp3, SHA #8 /* word 3 */ mov tmp3, tmp3, SLA #24 orr tmp3, tmp3, lr, SHA #8 /* word 4 */ stmia dst!, {tmp1, v1, v2, tmp3} subs n, n, #16 bge Up_32c ldmfd sp!, {v1, v2} /* Reload registers */ adds n, n, #16-4 /* check number of bytes */ blt Up_LT4cUp_LT16c: mov tmp3, lr, SLA #24 /* first byte */ ldr lr, [src], #4 /* next four bytes */ orr tmp3, tmp3, lr, SHA #8 str tmp3, [dst], #4 subs n, n, #4 bge Up_LT16c /* tmp1 contains one byte */Up_LT4c: /* Less than four bytes to go - one already in tmp3. */ sub src, src, #1 b Up_TrailingBytes/********************************************************************** * Copy down code * ============== * * This is exactly the same as the copy up code - * but it copies in the opposite direction. */CopyDown: add src, src, n /* points beyond end */ add dst, dst, n subs n, n, #4 /* need at least 4 bytes */ blt Down_TrailingBytes /* < 4 bytes to go */ /* * word align the dst - first find out how many bytes * must be stored to do this. If the number is 0 * check the src too. */ ands tmp3, dst, #3 /* eq means aligned! */ bne Down_AlignDst ands tmp3, src, #3 bne Down_SrcUnaligned /* more difficult! */ /* * here when source and destination are both aligned. * number of bytes to transfer is (n+4), n is >= 0. */Down_SrcDstAligned: subs n, n, #12-4 /* 12 bytes or more? */ blt Down_TrailingWords /* * We only have three registers to play with. It is * worth gaining more only if the number of bytes to * transfer is greater than 12+8*<registers stacked> * We need to stack 8 (4+4) registers to gain 8 temporaries, * so look for >=44 bytes. Since we would save 8*4 = 32 * bytes at a time we actually compare with 64. */ stmfd sp!, {v1, lr} subs n, n, #32-12 /* n+32 to go. */ blt Down_16aDown_32a: /* loop loading 4 registers per time, twice (32 bytes) */ ldmdb src!, {tmp1, v1, tmp3, lr} stmdb dst!, {tmp1, v1, tmp3, lr} ldmdb src!, {tmp1, v1, tmp3, lr} stmdb dst!, {tmp1, v1, tmp3, lr} subs n, n, #32 bge Down_32aDown_16a: /* see if we can handle another 16 */ cmn n, #16 ldmgedb src!, {tmp1, v1, tmp3, lr} stmgedb dst!, {tmp1, v1, tmp3, lr} subge n, n, #16 /* Here when there are fewer than 16 bytes to go. */ adds n, n, #32-12 /* (n-12) to go */ /* Ok - do three words at a time. */ ldmgedb src!, {tmp1, tmp3, lr} stmgedb dst!, {tmp1, tmp3, lr} subge n, n, #12 ldmfd sp!, {v1, lr} /* Restore registers */ /* (n-12) bytes to go - 0, 1 or 2 words. Check which. */Down_TrailingWords: adds n, n, #12-4 /* (n-4) to go */ blt Down_TrailingBytes /* < 4 bytes to go */ subs n, n, #4 ldrlt tmp1, [src, #-4]! strlt tmp1, [dst, #-4]! ldmgedb src!, {tmp1, tmp3} stmgedb dst!, {tmp1, tmp3} subge n, n, #4Down_TrailingBytes: /* Here with less than 4 bytes to go */ adds n, n, #4 moveq pc, lr /* 0 bytes, RETURN */ cmp n, #2 /* 1, 2 or 3 bytes */ ldrb tmp1, [src, #-1]! /* 1 */ strb tmp1, [dst, #-1]! /* 1 */ ldrgeb tmp1, [src, #-1]! /* 2 */ strgeb tmp1, [dst, #-1]! /* 2 */ ldrgtb tmp1, [src, #-1]! /* 3 */ strgtb tmp1, [dst, #-1]! /* 3 */ mov pc, lr /* RETURN *//************************************************************ * * word align dst - tmp3 contains current destination * alignment. We can store at least 4 bytes here. We are * going downwards - so tmp3 is the actual number of bytes * to store. */Down_AlignDst: cmp tmp3, #2 /* 1, 2 or 3 bytes */ ldrb tmp1, [src, #-1]! /* 1 */ strb tmp1, [dst, #-1]! /* 1 */ ldrgeb tmp1, [src, #-1]! /* 2 */ strgeb tmp1, [dst, #-1]! /* 2 */ ldrgtb tmp1, [src, #-1]! /* 3 */ strgtb tmp1, [dst, #-1]! /* 3 */ subs n, n, tmp3 /* check number to go */ blt Down_TrailingBytes /* less than 4 bytes */ ands tmp3, src, #3 beq Down_SrcDstAligned /* coaligned case */ /* * The source is not coaligned with the destination, * the destination IS currently word aligned. */Down_SrcUnaligned: bic src, src, #3 /* tmp3 holds extra! */ ldr tmp1, [src] /* 1-3 useful bytes */ cmp tmp3, #2 blt Down_OneByte /* one byte in tmp1 */ beq Down_TwoBytes /* two bytes in tmp1 *//* * The last three source bytes are in tmp1, one byte must * come from the previous source word. At least four bytes * more must be stored. Check first to see if there are a * sufficient number of bytes to go to justify using stm/ldm * instructions. */Down_ThreeBytes: cmp n, #16-4 /* at least 16 bytes? */ blt Down_LT16b /* no */ sub n, n, #16-4 /* (n+16) bytes to go */ stmfd sp!, {v1, v2, lr} /* save registers */ /* * loop doing 32 bytes at a time. There are currently * three useful bytes in tmp1 (a4). */Down_32b: mov lr, tmp1, SHA #8 /* last three bytes */ ldmdb src!, {tmp1, v1, v2, tmp3} orr lr, lr, tmp3, SLA #24 /* word 4 */ mov tmp3, tmp3, SHA #8 orr tmp3, tmp3, v2, SLA #24 /* word 3 */ mov v2, v2, SHA #8 orr v2, v2, v1, SLA #24 /* word 2 */ mov v1, v1, SHA #8 orr v1, v1, tmp1, SLA #24 /* word 1 */ stmdb dst!, {v1, v2, tmp3, lr} subs n, n, #16 bge Down_32b ldmfd sp!, {v1, v2, lr} /* Reload registers */ adds n, n, #16-4 /* check for at least 4 */ blt Down_LT4b /* < 4 bytes */Down_LT16b: mov tmp3, tmp1, SHA #8 /* last three bytes */ ldr tmp1, [src, #-4]! /* previous four bytes */ orr tmp3, tmp3, tmp1, SLA #24 str tmp3, [dst, #-4]! subs n, n, #4
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -