📄 string_vec.s
字号:
35: bne cr7,36f352: stvehx v3,r11,r9 addi r9,r9,236: bnslr cr7362: stvebx v3,r11,r9 blr37: stvx v3,r3,r7 li r3,0 blr .align 438: lvx v1,r4,r7 addi r10,r10,-1 mtcrf 0x02,r9 addi r9,r9,16 addi r0,r10,-2 vperm v3,v0,v1,v2 vor v0,v1,v1382: stvx v3,r3,r7 addi r7,r7,16 bdnzf 27,38b mtcrf 0x02,r11 lis r8,0x104 addi r9,r7,16 ori r8,r8,32 rlwinm r11,r0,29,3,31 rlwinm r0,r0,0,0,28 bgt cr7,43f39: addi r11,r4,256 xoris r8,r8,0x6 bns cr6,40f bdnz 40f40: lvx v1,r4,r7 addi r11,r11,32401: lvx v6,r4,r9 vperm v4,v0,v1,v2 dst r11,r8,1 DCBA_R3R7 vperm v3,v1,v6,v2 vor v0,v6,v6402: stvx v4,r3,r7 addi r7,r9,16 bdz 41f41: stvx v3,r3,r9 addi r9,r7,16 bdnz 40b bso cr6,42f b 32b42: lvx v1,r4,r7 vperm v3,v0,v1,v2 vor v0,v1,v1422: stvx v3,r3,r7 addi r7,r7,16 b 32b43: subf r10,r0,r10 blt cr5,39b mtctr r11 addi r11,r4,25644: lvx v1,r4,r7 addi r9,r7,32 addi r11,r11,128443: lvx v7,r4,r9 addi r9,r9,32447: lvx v9,r4,r9 addi r9,r9,32451: lvx v11,r4,r9 addi r9,r7,16441: lvx v6,r4,r9 addi r9,r9,32445: lvx v8,r4,r9 addi r9,r9,32449: lvx v10,r4,r9 addi r9,r9,32 vperm v3,v0,v1,v2453: lvx v0,r4,r9 vperm v4,v1,v6,v2 dst r11,r8,1 DCBA_R3R7440: stvx v3,r3,r7 addi r7,r7,16 vperm v5,v6,v7,v2442: stvx v4,r3,r7 addi r7,r7,16 vperm v6,v7,v8,v2 DCBA_R3R7444: stvx v5,r3,r7 addi r7,r7,16 vperm v7,v8,v9,v2446: stvx v6,r3,r7 addi r7,r7,16 vperm v8,v9,v10,v2 DCBA_R3R7448: stvx v7,r3,r7 addi r7,r7,16 vperm v9,v10,v11,v2450: stvx v8,r3,r7 addi r7,r7,16 vperm v10,v11,v0,v2 DCBA_R3R7452: stvx v9,r3,r7 addi r7,r7,16454: stvx v10,r3,r7 addi r7,r7,16 bdnz 44b mtctr r10 addi r9,r7,16 bns cr6,40b bdnz 40b/* Intent of this exception table is to return: * r3 = bytes not copied (but preserve dst address in r3 til end) * r4 = 0 on read fault; 1 on write fault * Register useage here: * r5 = (preserve as total byte count til near end) * r6 = bytes not copied (move to r3 at end) * r7 = byte count index from memcpy_vec * r9 = alternate byte count index in 128B loop * r10= vectors (QWs remaining) after 128B loop * r11= next destination address (assume word-aligned) * For read fault, clear out the destination for bytes remaining * starting at r3(dst) + r5(byte count) - r6 (bytes remaining). *//* read fault, initial single-byte copy */100: li r4,0 mfctr r3101: stbu r4,1(r9) bdnz 101b blr/* write fault, initial single-byte copy */102: li r4,1 mfctr r3 blr/* read fault, initial vector(s) load */103: li r4,0 b 91f/* write fault, initial partial vector store */104: li r4,1 subf r5,r7,r5 /* BC minus bytes in 1st vector already stored */ add r3,r3,r7 /* dst plus bytes in 1st vector already stored. */ b 91f/* write fault, initial full vector store */105: li r4,191: mr r6,r5 b 98f/* read fault in 16B loop(s) and 32B loop (treat as both loads fail)*/106: li r4,0 b 94f/* write fault in 16B loop(s), 128B, and first write fault in 32B loop */107: li r4,1 b 94f/* second write fault in 32B loop */108: li r4,1 add r11,r3,r5 /* Last dst byte + 1 */ add r3,r3,r9 /* Current dst byte */ b 95f/* read fault in 128B loop (treat as all loads fail)*/112: li r4,0 mfctr r0 slwi r0,r0,7 /* Convert 128B loop ctr to bytes */ add r11,r3,r5 slwi r10,r10,4 /* convert QW vectors remaining to bytes */ add r3,r3,r7 rlwinm r6,r11,0,28,31 /* Bytes in last vector(s) */ rlwinm r3,r3,0,0,27 add r6,r6,r10 add r6,r6,r0 b 98f/* read fault, final vector(s) load */114: li r4,094: add r11,r3,r5 add r3,r3,r795: rlwinm r3,r3,0,0,27 subf r6,r3,r11 b 98f/* write fault, final partial vector store */115: li r4,1 add r11,r3,r5 add r3,r3,r7 rlwinm r3,r3,0,0,27 subf r6,r3,r11 subf r6,r9,r6 /* minus bytes already stored */ b 98f/* write fault, final full vector store */116: li r4,1 add r3,r3,r7 rlwinm r3,r3,0,0,27 li r6,16 b 98f/* * At this stage the number of bytes not copied is in r6 * and r4 is 0 for read or 1 for write. * (Like the scalar version, assume dst is word-aligned.) */98: cmpwi 0,r4,0 bne 120f/* for read fault, clear out the destination: r6 bytes remaining */ srwi. r0,r6,2 addi r3,r3,-4 subf r10,r6,r5 mtctr r0 beq 118f117: stwu r4,4(r3) bdnz 117b118: andi. r0,r6,3 mtctr r0 beq 120f119: stb r4,4(r3) addi r3,r3,1 bdnz 119b120: mr r3,r6 blr121: li r4,1 mfctr r3 rlwinm r3,r3,2,0,29 andi. r0,r6,3 add r3,r3,r0 blr#ifndef TEST_OUTSIDE_LINUX .section __ex_table,"a" .align 2 .long 2b,100b .long 202b,102b .long 241b,103b .long 25b,103b .long 252b,104b .long 262b,104b .long 272b,104b .long 282b,104b .long 284b,104b .long 29b,105b .long 31b,106b .long 312b,107b .long 331b,114b .long 332b,115b .long 334b,115b .long 342b,115b .long 352b,115b .long 362b,115b .long 37b,116b .long 38b,106b .long 382b,107b .long 40b,106b .long 401b,106b .long 402b,107b .long 41b,108b .long 42b,106b .long 422b,107b .long 44b,112b .long 443b,112b .long 447b,112b .long 451b,112b .long 441b,112b .long 445b,112b .long 449b,112b .long 453b,112b .long 440b,107b .long 442b,107b .long 444b,107b .long 446b,107b .long 448b,107b .long 450b,107b .long 452b,107b .long 454b,107b .long 101b,102b .long 117b,121b .long 119b,102b#endif .text .align 5 .global strlen_vecstrlen_vec: lvxl v2,0,r3 vxor v0,v0,v0 lvsl v5,0,r3 vnor v1,v0,v0 rlwinm r5,r3,0,28,31 vperm v2,v2,v1,v5 mr r4,r3 li r3,16 vcmpequb. v4,v0,v2 vsldoi v5,v0,v1,8 bne cr6,2f subf r3,r5,r31: lvxl v2,r4,r3 addi r3,r3,16 vcmpequb. v4,v0,v2 beq cr6,1b2: vandc v3,v2,v5 vsldoi v7,v0,v1,4 vcmpequb. v4,v3,v5 vsldoi v8,v0,v1,12 beq cr6,10f vandc v3,v2,v8 vsldoi v5,v0,v1,10 vcmpequb. v4,v3,v8 vsldoi v9,v0,v1,14 beq cr6,6f vandc v3,v2,v9 vsldoi v8,v0,v1,13 vcmpequb. v4,v3,v9 vsldoi v10,v0,v1,15 beq cr6,4f vandc v3,v2,v10 vcmpequb. v4,v3,v10 beq cr6,3f addi r3,r3,-16 blr3: addi r3,r3,-15 blr4: vandc v3,v2,v8 vcmpequb. v4,v3,v8 beq cr6,5f addi r3,r3,-14 blr5: addi r3,r3,-13 blr6: vandc v3,v2,v5 vsldoi v9,v0,v1,9 vcmpequb. v4,v3,v5 vsldoi v10,v0,v1,11 beq cr6,8f vandc v3,v2,v10 vcmpequb. v4,v3,v10 beq cr6,7f addi r3,r3,-12 blr7: addi r3,r3,-11 blr8: vandc v3,v2,v9 vcmpequb. v4,v3,v9 beq cr6,9f addi r3,r3,-10 blr9: addi r3,r3,-9 blr10: vandc v3,v2,v7 vsldoi v5,v0,v1,2 vcmpequb. v4,v3,v7 vsldoi v10,v0,v1,6 beq cr6,14f vandc v3,v2,v10 vsldoi v9,v0,v1,5 vcmpequb. v4,v3,v10 vsldoi v7,v0,v1,7 beq cr6,12f vandc v3,v2,v7 vcmpequb. v4,v3,v7 beq cr6,11f addi r3,r3,-8 blr11: addi r3,r3,-7 blr12: vandc v3,v2,v9 vcmpequb. v4,v3,v9 beq cr6,13f addi r3,r3,-6 blr13: addi r3,r3,-5 blr14: vandc v3,v2,v5 vsldoi v8,v0,v1,1 vcmpequb. v4,v3,v5 vsldoi v10,v0,v1,3 beq cr6,16f vandc v3,v2,v10 vcmpequb. v4,v3,v10 beq cr6,15f addi r3,r3,-4 blr15: addi r3,r3,-3 blr16: vandc v3,v2,v8 vcmpequb. v4,v3,v8 beq cr6,17f addi r3,r3,-2 blr17: addi r3,r3,-1 blr .text .align 5 .global strcmp_vecstrcmp_vec: lvxl v2,0,r3 vxor v0,v0,v0 addi r7,r4,16 lvxl v3,0,r4 vnor v1,v0,v0 xor r8,r7,r4 lvsl v6,0,r3 vspltisb v4,8 cmpi 2,0,r8,0x1000 lvsl v10,0,r4 vspltisb v12,1 beq 2,8f1: andi. r8,r3,0xF lvxl v8,0,r7 vslb v13,v4,v12 andi. r9,r4,0xF vperm v2,v2,v1,v6 subf. r0,r8,r9 addi r5,r3,16 vperm v9,v0,v1,v6 lvsl v6,0,r0 vor v7,v3,v3 vperm v3,v3,v8,v10 addi r4,r7,16 vslb v11,v13,v12 vor v3,v3,v9 xor r3,r3,r3 vcmpequb. v10,v2,v3 vslb v14,v11,v12 vnor v9,v10,v10 bc 4,6*4+0,3f vcmpequb. v5,v0,v2 bc 4,6*4+2,7f blt 6f2: lvxl v7,0,r4 addi r4,r4,16 lvxl v2,0,r5 addi r5,r5,16 vperm v3,v8,v7,v6 vcmpequb. v10,v2,v3 vnor v9,v10,v10 bc 12,6*4+0,5f3: vcmpequb v5,v0,v2 vsum4ubs v7,v4,v14 vor v9,v9,v5 vsro v12,v9,v11 vsrw v11,v9,v4 vsro v6,v9,v14 vsrw v14,v9,v13 vsro v13,v9,v7 vor v9,v12,v6 vsro v7,v14,v4 vor v9,v9,v13 vcmpgtuw v9,v9,v0 vor v9,v9,v11 vor v9,v9,v14 vor v9,v9,v7 vandc v11,v10,v9 vcmpequb. v14,v11,v9 vcmpgtub v7,v3,v2 bc 12,6*4+2,4f vandc v11,v7,v9 li r3,-1 vcmpequb. v14,v11,v1 bc 4,6*4+2,4f li r3,14: blr5: vcmpequb. v5,v0,v2 bc 4,6*4+2,7f lvxl v8,0,r4 addi r4,r4,166: lvxl v2,0,r5 addi r5,r5,16 vperm v3,v7,v8,v6 vcmpequb. v10,v2,v3 vnor v9,v10,v10 bc 4,6*4+0,3b vcmpequb. v5,v0,v2 bc 12,6*4+2,2b7: blr8: vcmpequb. v5,v0,v2 bc 13,6*4+2,1b vcmpequb. v10,v2,v3 bc 4,6*4+0,3b blr .text .align 5 .global memcmp_vecmemcmp_vec: subf. r6,r4,r3 cmpi cr1,0,r5,0 cmpi cr7,0,r5,16 add r9,r3,r5 addi r7,r4,-1 addi r11,r3,16 beq 2f addi r10,r9,-1 addi r8,r3,-1 rlwinm r11,r11,0,0,27 beq cr1,2f subf r11,r11,r10 rlwinm r9,r9,0,28,31 bgt cr7,3f mtctr r51: lbzu r6,1(r7) lbzu r10,1(r8) subf. r3,r6,r10 bdnzt 2,1b blr 2: xor r3,r3,r3 blr 3: rlwinm r11,r11,28,4,31 rlwinm r7,r4,0,28,31 rlwinm r8,r3,0,28,31 cmpi cr1,0,r11,0 lvxl v0,0,r3 subf. r7,r7,r8 li r7,16 lvxl v1,0,r4 vor v2,v1,v1 addi r5,r5,-1 bge 4f lvxl v2,r4,r7 addi r4,r4,16 addi r5,r5,-164: lvsl v3,0,r3 vspltisb v4,8 vxor v5,v5,v5 lvsl v6,0,r4 vspltisb v7,1 vnor v8,v5,v5 lvsr v10,0,r6 cmpi cr5,0,r9,0 vperm v11,v5,v8,v3 lvsr v12,0,r9 vperm v0,v0,v8,v3 vperm v1,v1,v2,v6 vslb v3,v4,v7 vor v1,v1,v11 vslb v6,v3,v7 vcmpequb. v8,v0,v1 vslb v7,v6,v7 vnor v13,v8,v8 bc 4,6*4+0,8f ble cr1,6f mtctr r115: lvxl v9,r4,r7 lvxl v0,r3,r7 addi r7,r7,16 vperm v1,v2,v9,v10 vor v2,v9,v9 vcmpequb. v8,v0,v1 vnor v13,v8,v8 bdnzt 24,5b bc 4,6*4+0,8f6: lvxl v9,r4,r5 vperm v12,v5,v8,v12 lvxl v0,r3,r7 vperm v1,v2,v9,v10 beq cr5,7f vor v1,v1,v12 vor v0,v0,v127: vcmpequb. v8,v0,v1 vnor v13,v8,v8 bc 4,6*4+0,8f xor r3,r3,r3 blr 8: vsum4ubs v2,v4,v7 vsro v9,v13,v6 vsrw v6,v13,v4 vsro v10,v13,v7 vsrw v7,v13,v3 vsro v3,v13,v2 vor v11,v9,v10 vsro v2,v7,v4 vor v11,v11,v3 vcmpgtuw v11,v11,v5 vor v11,v11,v6 vor v11,v11,v7 vor v11,v11,v2 vor v1,v1,v11 vor v0,v0,v11 li r3,-1 vcmpgtub. v8,v1,v0 bclr 4,6*4+2 li r3,1 blr .text .align 5 .global strcpy_vecstrcpy_vec: addi r5,r3,32 subf. r6,r4,r3 subf r7,r3,r4 rlwinm r5,r5,0,0,26 mr r8,r3 beqlr bgt 1f mr r6,r71: subf. r9,r3,r5 addi r5,r8,4096 cmpi cr7,0,r6,16 mtctr r92: lbzx r0,0,r4 addi r4,r4,1 cmpi cr1,0,r0,0 stbx r0,0,r8 addi r8,r8,1 bdnzf 6,2b beqlr cr1 li r11,4096 rlwinm r5,r5,0,0,19 mr r10,r4 ble cr7,2b subf. r5,r8,r5 rlwinm r5,r5,28,4,31 lvsl v4,0,r4 vxor v0,v0,v0 ble 9f mtctr r53: lvx v1,0,r10 addi r10,r10,16 bdz 10f4: lvx v2,0,r10 addi r10,r10,16 bdz 11f5: lvx v3,0,r10 addi r10,r10,16 bdz 12f6: vperm v5,v1,v2,v4 vperm v6,v2,v3,v4 vor v1,v3,v3 vcmpequb. v7,v0,v5 bne cr6,8f addi r4,r4,16 vcmpequb. v7,v0,v6 bne cr6,7f DCBA_R0R8 addi r4,r4,16 stvx v5,0,r8 addi r8,r8,16 stvx v6,0,r8 addi r8,r8,16 b 4b7: stvx v5,0,r8 addi r8,r8,168: lbzx r0,0,r4 addi r4,r4,1 cmpi cr1,0,r0,0 stbx r0,0,r8 addi r8,r8,1 bne cr1,8b blr 9: mtctr r11 b 3b10: vcmpequb. v7,v0,v1 bnl cr6,8b mtctr r11 b 4b11: vcmpequb. v7,v0,v2 bnl cr6,8b mtctr r11 b 5b12: vcmpequb. v7,v0,v3 bnl cr6,8b mtctr r11 b 6b
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -