📄 string_vec.s
字号:
/* * AltiVec versions (*_vec) of equivalent Linux library functions * found in /arch/ppc/lib/string.S from Linux 2.4.17. Suggest this * file be appended to that one when building a Linux kernel that * will employ these functions. * * Copyright (C) Motorola, Inc. 2003 * * Revision history: * Rev 0.0 Original Chuck Corley 5/28/03 * Contact at risc10@motorola.com * Commented source code for Altivec version available at * www.motorola.com/altivec * * AltiVec versions will only deal with L1_CACHE_LINE_SIZE=32 */#ifndef TEST_OUTSIDE_LINUX#include "../kernel/ppc_asm.tmpl"#include <linux/config.h>#include <asm/processor.h>#include <asm/cache.h>#include <asm/errno.h>#if 0#define v0 vr0#define v1 vr1#define v2 vr2#define v3 vr3#define v4 vr4#define v5 vr5#define v6 vr6#define v7 vr7#define v8 vr8#define v9 vr9#define v10 vr10#define v11 vr11#define v12 vr12#define v13 vr13#define v14 vr14#define v15 vr15#endif#else#define EFAULT 0#define L1_CACHE_LINE_SIZE 32#define LG_L1_CACHE_LINE_SIZE 5#define MAX_L1_COPY_PREFETCH 1#endif/* AltiVec versions of selected functions for use on AltiVec * enabled G4 and later microprocessors. */ #if defined(__GNUC__) || defined(__MWERKS__) /* gcc and codewarrior don't assemble dcba */#define DCBA_R3R7 .long 0x7c033dec#define DCBA_R3R9 .long 0x7c034dec#define DCBA_R0R8 .long 0x7c0045ec#else#define DCBA_R3R7 dcba r4,r7#define DCBA_R3R9 dcba r4,r9#define DCBA_R0R8 dcba 0,r8#endif .text .align 5 .global backwards_memcpy_vecbackwards_memcpy_vec: nop .global memmove_vecmemmove_vec: nop .global cacheable_memcpy_veccacheable_memcpy_vec: nop .global memcpy_vecmemcpy_vec: subf. r7,r4,r3 cmpi cr1,0,r5,0 cmpi cr7,0,r5,16 addi r8,r4,-1 addi r9,r3,-1 add r10,r4,r5 beqlr add r11,r3,r5 subf r0,r3,r4 beqlr cr1 bgt 2f cmpi cr5,0,r0,128 bgt cr7,23f mtctr r51: lbzu r0,1(r8) stbu r0,1(r9) bdnz 1b blr 2: cmpi cr5,0,r7,128 cmp cr6,0,r7,r5 bgt cr7,4f mtctr r53: lbzu r0,-1(r10) stbu r0,-1(r11) bdnz 3b blr 4: rlwinm r8,r4,0,28,31 rlwinm r9,r3,0,28,31 bge cr6,24f lis r11,0x010c subf. r8,r9,r8 lvsr v2,0,r7 ori r11,r11,0xffe0 addi r11,r10,-1 bgt 5f addi r8,r8,165: rlwinm r11,r11,0,0,27 addi r7,r5,-1 subf r0,r11,r10 add r11,r3,r7 addi r10,r3,16 subf. r8,r0,r8 rlwinm r0,r11,0,28,31 rlwinm r10,r10,0,0,27 blt 6f lvx v1,r4,r7 addi r4,r4,-166: lvx v0,r4,r7 subf r10,r10,r11 cmpi cr7,0,r0,0xF cmpi cr1,0,r9,0 rlwinm r10,r10,28,4,31 add r0,r3,r5 cmpi cr6,0,r10,0 vperm v3,v0,v1,v2 vor v1,v0,v0 beq cr7,10f mtcrf 0x01,r0 rlwinm r11,r11 ,0,0,27 li r9,0 bnl cr7,7f stvewx v3,r11,r9 addi r9,r9,4 stvewx v3,r11,r9 addi r9,r9,47: bng cr7,8f stvewx v3,r11,r9 addi r9,r9,48: bne cr7,9f stvehx v3,r11,r9 addi r9,r9,29: bns cr7,11f stvebx v3,r11,r9 b 11f10: stvx v3,r3,r711: addi r7,r7,-16 ble cr6,13f mtctr r10 cmpi cr6,0,r10,412: lvx v0,r4,r7 vperm v3,v0,v1,v2 vor v1,v0,v0 stvx v3,r3,r7 addi r7,r7,-16 bdnzf 25,12b add r9,r3,r7 bgt cr6,19f13: blt 14f addi r4,r4,1614: lvx v0,0,r4 vperm v3,v0,v1,v2 subfic r9,r3,16 beq cr1,18f mtcrf 0x01,r9 li r9,0 bns cr7,15f stvebx v3,r3,r9 addi r9,r9,115: bne cr7,16f stvehx v3,r3,r9 addi r9,r9,216: bng cr7,17f stvewx v3,r3,r9 addi r9,r9,417: bnllr cr7 stvewx v3,r3,r9 addi r9,r9,4 stvewx v3,r3,r9 blr18: stvx v3,0,r3 blr 19: lvx v0,r4,r7 mtcrf 0x02,r9 vperm v3,v0,v1,v2 vor v1,v0,v0 addi r9,r9,-16 stvx v3,r3,r7 vor v7,v0,v0 addi r7,r7,-16 bdnzt 27,19b lis r8,0x102 mtcrf 0x02,r3 addi r9,r7,-16 ori r8,r8,0xffe0 addi r11,r4,-64 bso cr6,20f bdnz 20f20: lvx v6,r4,r7 addi r11,r11,-32 lvx v1,r4,r9 vperm v3,v6,v7,v2 DCBA_R3R9 vperm v4,v1,v6,v2 vor v7,v1,v1 bdz 21f21: stvx v3,r3,r7 addi r7,r9,-16 stvx v4,r3,r9 addi r9,r7,-16 bdnz 20b bns cr6,22f b 13b22: lvx v1,r4,r7 vperm v4,v1,v7,v2 stvx v4,r3,r7 b 13b23: rlwinm r8,r4,0,28,31 rlwinm r9,r3,0,28,3124: lis r10,0x010c subf. r8,r8,r9 lvsr v2,0,r7 ori r10,r10,32 dst r4,r10,0 addi r10,r3,16 addi r11,r11,-1 bge 25f lvx v0,0,r4 addi r4,r4,1625: lvx v1,0,r4 rlwinm r10,r10,0,0,27 cmpi cr1,0,r9,0 subf r0,r3,r10 subf r10,r10,r11 li r7,0 mtcrf 0x01,r0 rlwinm r10,r10,28,4,31 vperm v3,v0,v1,v2 vor v0,v1,v1 beq cr1,29f bns cr7,26f stvebx v3,r3,r7 addi r7,r7,126: bne cr7,27f stvehx v3,r3,r7 addi r7,r7,227: bng cr7,28f stvewx v3,r3,r7 addi r7,r7,428: bnl cr7,30f stvewx v3,r3,r7 addi r7,r7,4 stvewx v3,r3,r7 b 30f29: stvx v3,0,r330: rlwinm r0,r11,0,28,31 cmpi cr6,0,r10,0 li r7,16 cmpi cr1,0,r0,0xF cmpi cr7,0,r10,14 ble cr6,32f mtctr r10 cmpi cr6,0,r10,431: lvx v1,r4,r7 vperm v3,v0,v1,v2 vor v0,v1,v1 stvx v3,r3,r7 addi r7,r7,16 bdnzf 25,31b add r9,r3,r7 addi r10,r10,-1 bgt cr6,38f32: add r11,r3,r5 add r10,r4,r5 bge 33f addi r10,r10,-1633: mtcrf 0x01,r11 addi r11,r11,-1 addi r0,r10,-1 lvx v1,0,r0 dss 0 dss 1 vperm v3,v0,v1,v2 beq cr1,37f rlwinm r11,r11,0,0,27 li r9,0 bnl cr7,34f stvewx v3,r11,r9 addi r9,r9,4 stvewx v3,r11,r9 addi r9,r9,434: bng cr7,35f stvewx v3,r11,r9 addi r9,r9,435: bne cr7,36f stvehx v3,r11,r9 addi r9,r9,236: bnslr cr7 stvebx v3,r11,r9 blr37: stvx v3,r3,r7 blr 38: lvx v1,r4,r7 addi r10,r10,-1 mtcrf 0x02,r9 addi r9,r9,16 addi r0,r10,-2 vperm v3,v0,v1,v2 vor v0,v1,v1 stvx v3,r3,r7 addi r7,r7,16 bdnzf 27,38b mtcrf 0x02,r11 lis r8,0x104 addi r9,r7,16 ori r8,r8,32 rlwinm r11,r0,29,3,31 rlwinm r0,r0,0,0,28 bgt cr7,43f39: addi r11,r4,256 xoris r8,r8,0x6 bns cr6,40f bdnz 40f40: lvx v1,r4,r7 addi r11,r11,32 lvx v6,r4,r9 vperm v4,v0,v1,v2 dst r11,r8,1 DCBA_R3R7 vperm v3,v1,v6,v2 vor v0,v6,v6 bdz 41f41: stvx v4,r3,r7 addi r7,r9,16 stvx v3,r3,r9 addi r9,r7,16 bdnz 40b bso cr6,42f b 32b42: lvx v1,r4,r7 vperm v3,v0,v1,v2 vor v0,v1,v1 stvx v3,r3,r7 addi r7,r7,16 b 32b43: subf r10,r0,r10 blt cr5,39b mtctr r11 addi r11,r4,25644: lvx v1,r4,r7 addi r9,r7,32 addi r11,r11,128 lvx v7,r4,r9 addi r9,r9,32 lvx v9,r4,r9 addi r9,r9,32 lvx v11,r4,r9 addi r9,r7,16 lvx v6,r4,r9 addi r9,r9,32 lvx v8,r4,r9 addi r9,r9,32 lvx v10,r4,r9 addi r9,r9,32 vperm v3,v0,v1,v2 lvx v0,r4,r9 vperm v4,v1,v6,v2 dst r11,r8,1 DCBA_R3R7 stvx v3,r3,r7 addi r7,r7,16 vperm v5,v6,v7,v2 stvx v4,r3,r7 addi r7,r7,16 vperm v6,v7,v8,v2 DCBA_R3R7 stvx v5,r3,r7 addi r7,r7,16 vperm v7,v8,v9,v2 stvx v6,r3,r7 addi r7,r7,16 vperm v8,v9,v10,v2 DCBA_R3R7 stvx v7,r3,r7 addi r7,r7,16 vperm v9,v10,v11,v2 stvx v8,r3,r7 addi r7,r7,16 vperm v10,v11,v0,v2 DCBA_R3R7 stvx v9,r3,r7 addi r7,r7,16 stvx v10,r3,r7 addi r7,r7,16 bdnz 44b mtctr r10 addi r9,r7,16 bns cr6,40b bdnz 40b .global bcopy_vecbcopy_vec: mr r0,r3 mr r3,r4 mr r4,r0 b memcpy_vec .text .align 4 .globl __clear_user_vec__clear_user_vec: mr r5,r4 li r4,0 .globl memset_vecmemset_vec: cmpi cr7,0,r5,16 cmpi cr1,0,r5,0 rlwinm. r8,r4,28,28,3 addi r9,r3,-1 addi r10,r3,16 add r6,r3,r5 bgt cr7,2f mtctr r5 beqlr cr11: stbu r4,1(r9) bdnz 1b blr 2: rlwinm r10,r10,0,0,27 addi r11,r6,-1 subf r9,r3,r10 li r7,0 vxor v0,v0,v0 subf r10,r10 ,r11 cmpi cr1,0,r9,16 beq 3f lvsl v0,0,r8 vspltisb v1,4 lvsl v2,0,r4 vslb v0,v0,v1 vor v0,v0,v2 vspltb v0,v0,03: mtcrf 0x01,r9 rlwinm r10,r10,28,4,31 beq cr1,7f bns cr7,4f32: stvebx v0,r3,r7 addi r7,r7,14: bne cr7,5f42: stvehx v0,r3,r7 addi r7,r7,25: bng cr7,6f52: stvewx v0,r3,r7 addi r7,r7,46: bnl cr7,8f62: stvewx v0,r3,r7 addi r7,r7,464: stvewx v0,r3,r7 b 8f7: stvx v0,0,r38: rlwinm r0,r11,0,28,31 cmpi cr6,0,r10,0 li r7,16 cmpi cr1,0,r0,0xF ble cr6,10f mtctr r10 cmpi cr6,0,r10,49: stvx v0,r3,r7 addi r7,r7,16 bdnzf 25,9b add r9,r3,r7 addi r10,r10,-1 bgt cr6,16f10: mtcrf 0x01,r6 beq cr1,14f rlwinm r11,r11,0,0,27 li r9,0 bnl cr7,11f102: stvewx v0,r11,r9 addi r9,r9,4104: stvewx v0,r11,r9 addi r9,r9,411: bng cr7,12f112: stvewx v0,r11,r9 addi r9,r9,412: bne cr7,13f122: stvehx v0,r11,r9 addi r9 ,r9 ,213: bnslr cr7132: stvebx v0,r11,r9 blr 14: stvx v0,r3,r7 blr 16: addi r10,r10,-1 mtcrf 0x02,r9 addi r9,r9,16162: stvx v0,r3,r7 addi r7,r7,16 bdnzf 27,16b mtcrf 0x02,r11 bns cr6,17f bdnz 17f 17: stvx v0,r3,r7 addi r7,r7,16 bdz 18f18: stvx v0,r3,r7 addi r7,r7,16 bdnz 17b bso cr6,19f b 10b19: stvx v0,r3,r7 addi r7,r7,16 b 10b/* Intent of this exception table appears to be to return the byte count *//* remaining to be cleared when the current store error occurred. Chuck *//* Memset doesn't require it but the code is identical to __clear_user *//* FIRST FAILURE CHECKED BY RECOMPILATION WITH BRANCHES SUBSTITUTED * FOR STORES. chuckc 030515*/91: mfctr r3 /* Return byte count remaining */ blr92: subf r3,r7,r5 /* BC minus bytes already stored */ blr93: mr r3,r5 /* Nothing stored yet */ blr94: add r11,r3,r5 rlwinm r6,r11,0,28,31 /* Bytes in last vector */ b 99f95: add r11,r3,r5 rlwinm r6,r11,0,28,31 subf r3,r9,r6 blr96: li r3,16 /* 16 bytes in last vector to be stored. */ blr97: add r11,r3,r5 rlwinm r6,r11,0,27,3199: mfctr r3 rlwinm r3,r3,4,0,27 add r3,r3,r6 blr98: add r11,r3,r5 rlwinm r3,r11,0,27,31 blr#ifndef TEST_OUTSIDE_LINUX .section __ex_table,"a" .align 2 .long 1b,91b .long 32b,92b .long 42b,92b .long 52b,92b .long 62b,92b .long 64b,92b .long 7b,93b .long 9b,94b .long 102b,95b .long 104b,95b .long 112b,95b .long 122b,95b .long 132b,95b .long 14b,96b .long 162b,94b .long 17b,97b .long 18b,97b .long 19b,98b#endif .text/* Scalar __copy_tofrom_user always copies forward and never checks * for overlap, __copy_tofrom_user_vec will do the same except it will * check that overlap is > 128B before entering 128B loop when copying * forward. * The scalar version always assumes the destination and source * are word aligned. This routine will assume the same to simplify handling * exceptions. chuckc */ .globl __copy_tofrom_user_vec__copy_tofrom_user_vec: subf. r7,r4,r3 cmpi cr1,0,r5,0 cmpi cr7,0,r5,16 addi r8,r4,-1 addi r9,r3,-1 add r10,r4,r5 beqlr add r11,r3,r5 subf r0,r3,r4 beqlr cr1 bgt 1f cmpi cr5,0,r0,128 /* Overlap |(DST-SRC)|> 128B? */ bgt cr7,23f /* b to v_memcpy */1: cmpi cr5,0,r7,128 /* Overlap |(DST-SRC)|> 128B? */ bgt cr7,23f /* b to v_memcpy */ mtctr r52: lbzu r0,1(r8)202: stbu r0,1(r9) bdnz 2b li r3,0 blr 23: rlwinm r8,r4,0,28,31 rlwinm r9,r3,0,28,3124: lis r10,0x010c subf. r8,r8,r9 lvsr v2,0,r7 ori r10,r10,32 dst r4,r10,0 addi r10,r3,16 addi r11,r11,-1 bge 25f241: lvx v0,0,r4 addi r4,r4,1625: lvx v1,0,r4 rlwinm r10,r10,0,0,27 cmpi cr1,0,r9,0 subf r0,r3,r10 subf r10,r10,r11 li r7,0 mtcrf 0x01,r0 rlwinm r10,r10,28,4,31 vperm v3,v0,v1,v2 vor v0,v1,v1 beq cr1,29f bns cr7,26f252: stvebx v3,r3,r7 addi r7,r7,126: bne cr7,27f262: stvehx v3,r3,r7 addi r7,r7,227: bng cr7,28f272: stvewx v3,r3,r7 addi r7,r7,428: bnl cr7,30f282: stvewx v3,r3,r7 addi r7,r7,4284: stvewx v3,r3,r7 b 30f29: stvx v3,0,r330: rlwinm r0,r11,0,28,31 cmpi cr6,0,r10,0 li r7,16 cmpi cr1,0,r0,0xF cmpi cr7,0,r10,14 ble cr6,32f mtctr r10 cmpi cr6,0,r10,431: lvx v1,r4,r7 vperm v3,v0,v1,v2 vor v0,v1,v1312: stvx v3,r3,r7 addi r7,r7,16 bdnzf 25,31b add r9,r3,r7 addi r10,r10,-1 bgt cr6,38f32: add r11,r3,r5 add r10,r4,r5 bge 33f addi r10,r10,-1633: mtcrf 0x01,r11 addi r11,r11,-1 addi r0,r10,-1331: lvx v1,0,r0 dss 0 dss 1 vperm v3,v0,v1,v2 beq cr1,37f rlwinm r11,r11,0,0,27 li r9,0 li r3,0 bnl cr7,34f332: stvewx v3,r11,r9 addi r9,r9,4334: stvewx v3,r11,r9 addi r9,r9,434: bng cr7,35f342: stvewx v3,r11,r9 addi r9,r9,4
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -