📄 string.s
字号:
/* * String handling functions for PowerPC. * * Copyright (C) 1996 Paul Mackerras. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */#include <asm/ppc_asm.tmpl>#include <asm/processor.h>#include <asm/errno.h>#define CACHE_LINE_SIZE 128#define LG_CACHE_LINE_SIZE 7#define MAX_COPY_PREFETCH 1#define COPY_16_BYTES \ lwz r7,4(r4); \ lwz r8,8(r4); \ lwz r9,12(r4); \ lwzu r10,16(r4); \ stw r7,4(r6); \ stw r8,8(r6); \ stw r9,12(r6); \ stwu r10,16(r6)#define COPY_16_BYTES_WITHEX(n) \8 ## n ## 0: \ lwz r7,4(r4); \8 ## n ## 1: \ lwz r8,8(r4); \8 ## n ## 2: \ lwz r9,12(r4); \8 ## n ## 3: \ lwzu r10,16(r4); \8 ## n ## 4: \ stw r7,4(r6); \8 ## n ## 5: \ stw r8,8(r6); \8 ## n ## 6: \ stw r9,12(r6); \8 ## n ## 7: \ stwu r10,16(r6)#define COPY_16_BYTES_EXCODE(n) \9 ## n ## 0: \ addi r5,r5,-(16 * n); \ b 104f; \9 ## n ## 1: \ addi r5,r5,-(16 * n); \ b 105f; \.section __ex_table,"a"; \ .align 3; \ .llong 8 ## n ## 0b,9 ## n ## 0b; \ .llong 8 ## n ## 1b,9 ## n ## 0b; \ .llong 8 ## n ## 2b,9 ## n ## 0b; \ .llong 8 ## n ## 3b,9 ## n ## 0b; \ .llong 8 ## n ## 4b,9 ## n ## 1b; \ .llong 8 ## n ## 5b,9 ## n ## 1b; \ .llong 8 ## n ## 6b,9 ## n ## 1b; \ .llong 8 ## n ## 7b,9 ## n ## 1b; \.textCACHELINE_BYTES = CACHE_LINE_SIZELG_CACHELINE_BYTES = LG_CACHE_LINE_SIZECACHELINE_MASK = (CACHE_LINE_SIZE-1)_GLOBAL(strcpy) addi r5,r3,-1 addi r4,r4,-11: lbzu r0,1(r4) cmpwi 0,r0,0 stbu r0,1(r5) bne 1b blr_GLOBAL(strncpy) cmpwi 0,r5,0 beqlr mtctr r5 addi r6,r3,-1 addi r4,r4,-11: lbzu r0,1(r4) cmpwi 0,r0,0 stbu r0,1(r6) bdnzf 2,1b /* dec ctr, branch if ctr != 0 && !cr0.eq */ blr_GLOBAL(strcat) addi r5,r3,-1 addi r4,r4,-11: lbzu r0,1(r5) cmpwi 0,r0,0 bne 1b addi r5,r5,-11: lbzu r0,1(r4) cmpwi 0,r0,0 stbu r0,1(r5) bne 1b blr_GLOBAL(strcmp) addi r5,r3,-1 addi r4,r4,-11: lbzu r3,1(r5) cmpwi 1,r3,0 lbzu r0,1(r4) subf. r3,r0,r3 beqlr 1 beq 1b blr_GLOBAL(strlen) addi r4,r3,-11: lbzu r0,1(r4) cmpwi 0,r0,0 bne 1b subf r3,r3,r4 blr/* * Use dcbz on the complete cache lines in the destination * to set them to zero. This requires that the destination * area is cacheable. -- paulus */_GLOBAL(cacheable_memzero) mr r5,r4 li r4,0 addi r6,r3,-4 cmplwi 0,r5,4 blt 7f stwu r4,4(r6) beqlr andi. r0,r6,3 add r5,r0,r5 subf r6,r0,r6 clrlwi r7,r6,32-LG_CACHELINE_BYTES add r8,r7,r5 srwi r9,r8,LG_CACHELINE_BYTES addic. r9,r9,-1 /* total number of complete cachelines */ ble 2f xori r0,r7,CACHELINE_MASK & ~3 srwi. r0,r0,2 beq 3f mtctr r04: stwu r4,4(r6) bdnz 4b3: mtctr r9 li r7,410: dcbz r7,r6 addi r6,r6,CACHELINE_BYTES bdnz 10b clrlwi r5,r8,32-LG_CACHELINE_BYTES addi r5,r5,42: srwi r0,r5,2 mtctr r0 bdz 6f1: stwu r4,4(r6) bdnz 1b6: andi. r5,r5,37: cmpwi 0,r5,0 beqlr mtctr r5 addi r6,r6,38: stbu r4,1(r6) bdnz 8b blr_GLOBAL(memset) rlwimi r4,r4,8,16,23 rlwimi r4,r4,16,0,15 addi r6,r3,-4 cmplwi 0,r5,4 blt 7f stwu r4,4(r6) beqlr andi. r0,r6,3 add r5,r0,r5 subf r6,r0,r6 srwi r0,r5,2 mtctr r0 bdz 6f1: stwu r4,4(r6) bdnz 1b6: andi. r5,r5,37: cmpwi 0,r5,0 beqlr mtctr r5 addi r6,r6,38: stbu r4,1(r6) bdnz 8b blr_GLOBAL(memmove) cmplw 0,r3,r4 bgt .backwards_memcpy /* fall through */_GLOBAL(memcpy) srwi. r7,r5,3 addi r6,r3,-4 addi r4,r4,-4 beq 2f /* if less than 8 bytes to do */ andi. r0,r6,3 /* get dest word aligned */ mtctr r7 bne 5f1: lwz r7,4(r4) lwzu r8,8(r4) stw r7,4(r6) stwu r8,8(r6) bdnz 1b andi. r5,r5,72: cmplwi 0,r5,4 blt 3f lwzu r0,4(r4) addi r5,r5,-4 stwu r0,4(r6)3: cmpwi 0,r5,0 beqlr mtctr r5 addi r4,r4,3 addi r6,r6,34: lbzu r0,1(r4) stbu r0,1(r6) bdnz 4b blr5: subfic r0,r0,4 mtctr r06: lbz r7,4(r4) addi r4,r4,1 stb r7,4(r6) addi r6,r6,1 bdnz 6b subf r5,r0,r5 rlwinm. r7,r5,32-3,3,31 beq 2b mtctr r7 b 1b_GLOBAL(backwards_memcpy) rlwinm. r7,r5,32-3,3,31 /* r0 = r5 >> 3 */ add r6,r3,r5 add r4,r4,r5 beq 2f andi. r0,r6,3 mtctr r7 bne 5f1: lwz r7,-4(r4) lwzu r8,-8(r4) stw r7,-4(r6) stwu r8,-8(r6) bdnz 1b andi. r5,r5,72: cmplwi 0,r5,4 blt 3f lwzu r0,-4(r4) subi r5,r5,4 stwu r0,-4(r6)3: cmpwi 0,r5,0 beqlr mtctr r54: lbzu r0,-1(r4) stbu r0,-1(r6) bdnz 4b blr5: mtctr r06: lbzu r7,-1(r4) stbu r7,-1(r6) bdnz 6b subf r5,r0,r5 rlwinm. r7,r5,32-3,3,31 beq 2b mtctr r7 b 1b _GLOBAL(memcmp) cmpwi 0,r5,0 ble- 2f mtctr r5 addi r6,r3,-1 addi r4,r4,-11: lbzu r3,1(r6) lbzu r0,1(r4) subf. r3,r0,r3 bdnzt 2,1b blr2: li r3,0 blr_GLOBAL(memchr) cmpwi 0,r5,0 ble- 2f mtctr r5 addi r3,r3,-11: lbzu r0,1(r3) cmpw 0,r0,r4 bdnzf 2,1b beqlr2: li r3,0 blr_GLOBAL(__copy_tofrom_user) addi r4,r4,-4 addi r6,r3,-4 neg r0,r3 andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */ beq 58f cmplw 0,r5,r0 /* is this more than total to do? */ blt 63f /* if not much to do */ andi. r8,r0,3 /* get it word-aligned first */ mtctr r8 beq+ 61f70: lbz r9,4(r4) /* do some bytes */71: stb r9,4(r6) addi r4,r4,1 addi r6,r6,1 bdnz 70b61: subf r5,r0,r5 srwi. r0,r0,2 mtctr r0 beq 58f72: lwzu r9,4(r4) /* do some words */73: stwu r9,4(r6) bdnz 72b58: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */ clrlwi r5,r5,32-LG_CACHELINE_BYTES li r11,4 beq 63f /* Here we decide how far ahead to prefetch the source */#if MAX_COPY_PREFETCH > 1 /* Heuristically, for large transfers we prefetch MAX_COPY_PREFETCH cachelines ahead. For small transfers we prefetch 1 cacheline ahead. */ cmpwi r0,MAX_COPY_PREFETCH li r7,1 li r3,4 ble 111f li r7,MAX_COPY_PREFETCH111: mtctr r7112: dcbt r3,r4 addi r3,r3,CACHELINE_BYTES bdnz 112b#else /* MAX_COPY_PREFETCH == 1 */ li r3,CACHELINE_BYTES + 4 dcbt r11,r4#endif /* MAX_COPY_PREFETCH */ mtctr r0 dcbt r3,r453:54: dcbz r11,r6/* had to move these to keep extable in order */ .section __ex_table,"a" .align 3 .llong 70b,100f .llong 71b,101f .llong 72b,102f .llong 73b,103f .llong 54b,105f .text/* the main body of the cacheline loop */ COPY_16_BYTES_WITHEX(0)#if CACHE_LINE_SIZE >= 32 COPY_16_BYTES_WITHEX(1)#if CACHE_LINE_SIZE >= 64 COPY_16_BYTES_WITHEX(2) COPY_16_BYTES_WITHEX(3)#if CACHE_LINE_SIZE >= 128 COPY_16_BYTES_WITHEX(4) COPY_16_BYTES_WITHEX(5) COPY_16_BYTES_WITHEX(6) COPY_16_BYTES_WITHEX(7)#endif#endif#endif bdnz 53b63: srwi. r0,r5,2 mtctr r0 beq 64f30: lwzu r0,4(r4)31: stwu r0,4(r6) bdnz 30b64: andi. r0,r5,3 mtctr r0 beq+ 65f40: lbz r0,4(r4)41: stb r0,4(r6) addi r4,r4,1 addi r6,r6,1 bdnz 40b65: li r3,0 blr/* read fault, initial single-byte copy */100: li r4,0 b 90f/* write fault, initial single-byte copy */101: li r4,190: subf r5,r8,r5 li r3,0 b 99f/* read fault, initial word copy */102: li r4,0 b 91f/* write fault, initial word copy */103: li r4,191: li r3,2 b 99f/* * this stuff handles faults in the cacheline loop and branches to either * 104f (if in read part) or 105f (if in write part), after updating r5 */ COPY_16_BYTES_EXCODE(0)#if CACHE_LINE_SIZE >= 32 COPY_16_BYTES_EXCODE(1)#if CACHE_LINE_SIZE >= 64 COPY_16_BYTES_EXCODE(2) COPY_16_BYTES_EXCODE(3)#if CACHE_LINE_SIZE >= 128 COPY_16_BYTES_EXCODE(4) COPY_16_BYTES_EXCODE(5) COPY_16_BYTES_EXCODE(6) COPY_16_BYTES_EXCODE(7)#endif#endif#endif/* read fault in cacheline loop */104: li r4,0 b 92f/* fault on dcbz (effectively a write fault) *//* or write fault in cacheline loop */105: li r4,192: li r3,LG_CACHELINE_BYTES b 99f/* read fault in final word loop */108: li r4,0 b 93f/* write fault in final word loop */109: li r4,193: andi. r5,r5,3 li r3,2 b 99f/* read fault in final byte loop */110: li r4,0 b 94f/* write fault in final byte loop */111: li r4,194: li r5,0 li r3,0/* * At this stage the number of bytes not copied is * r5 + (ctr << r3), and r4 is 0 for read or 1 for write. */99: mfctr r0 slw r3,r0,r3 add r3,r3,r5 cmpwi 0,r4,0 bne 120f/* for read fault, clear out the destination: r3 bytes starting at 4(r6) */ srwi. r0,r3,2 li r9,0 mtctr r0 beq 113f112: stwu r9,4(r6) bdnz 112b113: andi. r0,r3,3 mtctr r0 beq 120f114: stb r9,4(r6) addi r6,r6,1 bdnz 114b120: blr .section __ex_table,"a" .align 3 .llong 30b,108b .llong 31b,109b .llong 40b,110b .llong 41b,111b .llong 112b,120b .llong 114b,120b .text_GLOBAL(__clear_user) addi r6,r3,-4 li r3,0 li r5,0 cmplwi 0,r4,4 blt 7f /* clear a single word */11: stwu r5,4(r6) beqlr /* clear word sized chunks */ andi. r0,r6,3 add r4,r0,r4 subf r6,r0,r6 srwi r0,r4,2 andi. r4,r4,3 mtctr r0 bdz 7f1: stwu r5,4(r6) bdnz 1b /* clear byte sized chunks */7: cmpwi 0,r4,0 beqlr mtctr r4 addi r6,r6,38: stbu r5,1(r6) bdnz 8b blr90: mr r3,r4 blr91: mfctr r3 slwi r3,r3,2 add r3,r3,r4 blr92: mfctr r3 blr .section __ex_table,"a" .align 3 .llong 11b,90b .llong 1b,91b .llong 8b,92b .text_GLOBAL(__strncpy_from_user) addi r6,r3,-1 addi r4,r4,-1 cmpwi 0,r5,0 beq 2f mtctr r51: lbzu r0,1(r4) cmpwi 0,r0,0 stbu r0,1(r6) bdnzf 2,1b /* dec ctr, branch if ctr != 0 && !cr0.eq */ beq 3f2: addi r6,r6,13: subf r3,r3,r6 blr99: li r3,-EFAULT blr .section __ex_table,"a" .align 3 .llong 1b,99b .text/* r3 = str, r4 = len (> 0), r5 = top (highest addr) */_GLOBAL(__strnlen_user) addi r7,r3,-1 subf r6,r7,r5 /* top+1 - str */ cmplw 0,r4,r6 bge 0f mr r6,r40: mtctr r6 /* ctr = min(len, top - str) */1: lbzu r0,1(r7) /* get next byte */ cmpwi 0,r0,0 bdnzf 2,1b /* loop if --ctr != 0 && byte != 0 */ addi r7,r7,1 subf r3,r3,r7 /* number of bytes we have looked at */ beqlr /* return if we found a 0 byte */ cmpw 0,r3,r4 /* did we look at all len bytes? */ blt 99f /* if not, must have hit top */ addi r3,r4,1 /* return len + 1 to indicate no null found */ blr99: li r3,0 /* bad address, return 0 */ blr .section __ex_table,"a" .align 3 .llong 1b,99b
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -