⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 memcpy.s

📁 glibc 2.9,最新版的C语言库函数
💻 S
📖 第 1 页 / 共 2 页
字号:
     r5 == unadjusted len     r9 == adjusted Word length     r10 == src alignment (1-3)     r12 == adjuested src, not aligned     r31 == adjusted len     First we need to copy word upto but not crossing the next 32-byte     boundary. Then perform aligned loads just before and just after      the boundary and use shifts and or to gernerate the next aligned     word for dst. If more then 32 bytes remain we copy (unaligned src)     the next 7 words and repeat the loop until less then 32-bytes     remaim.     Then if more then 4 bytes remain we again use aligned loads,     shifts and or to generate the next dst word. We then process the     remaining words using unaligned loads as needed. Finally we check     if there more then 0 bytes (1-3) bytes remainting and use     halfword and or byte load/stores to complete the copy.*/    mr      4,12      /* restore unaligned adjusted src ptr */    clrlwi  0,12,27   /* Find dist from previous 32-byte boundary.  */    slwi    10,10,3   /* calculate number of bits to shift 1st word left */    cmplwi  cr5,0,16       subfic  8,0,32   /* Number of bytes to next 32-byte boundary.  */    mtcrf   0x01,8    cmplwi  cr1,10,16    subfic  9,10,32  /* number of bits to shift 2nd word right *//*  This test is reversed because the timing to compare the bytes to    32-byte boundary could not be meet.  So we compare the bytes from    previous 32-byte boundary and invert the test.  */    bge     cr5,L(wdu_h32_8)    .align  4    lwz   6,0(4)    lwz   7,4(4)    addi  12,4,16    /* generate alternate pointers to avoid agen */    addi  11,3,16    /* timing issues downstream.  */    stw   6,0(3)    stw   7,4(3)    subi  31,31,16    lwz   6,8(4)    lwz   7,12(4)    addi  4,4,16    stw   6,8(3)    stw   7,12(3)    addi  3,3,16    bf    28,L(wdu_h32_4)    lwz   6,0(12)    lwz   7,4(12)    subi  31,31,8    addi  4,4,8    stw   6,0(11)    stw   7,4(11)    addi  3,3,8    bf    29,L(wdu_h32_0)    lwz   6,8(12)    addi  4,4,4    subi  31,31,4    stw   6,8(11)    addi  3,3,4    b     L(wdu_h32_0)    .align  4L(wdu_h32_8):    bf    28,L(wdu_h32_4)    lwz   6,0(4)    lwz   7,4(4)    subi  31,31,8    bf    29,L(wdu_h32_8x)    stw   6,0(3)    stw   7,4(3)    lwz   6,8(4)    addi  4,4,12    subi  31,31,4    stw   6,8(3)    addi  3,3,12    b     L(wdu_h32_0)    .align  4L(wdu_h32_8x):    addi  4,4,8    stw   6,0(3)    stw   7,4(3)    addi  3,3,8    b     L(wdu_h32_0)    .align  4L(wdu_h32_4):    bf    29,L(wdu_h32_0)    lwz   6,0(4)    subi  31,31,4    addi  4,4,4    stw   6,0(3)    addi  3,3,4    .align  4L(wdu_h32_0):/*  set up for 32-byte boundry crossing word move and possibly 32-byte    move loop.  */    clrrwi  12,4,2    cmplwi  cr5,31,32    bge     cr1,L(wdu2_32)#if 0    b       L(wdu1_32)/*    cmplwi  cr1,10,8    beq     cr1,L(wdu1_32)    cmplwi  cr1,10,16    beq     cr1,L(wdu2_32)    cmplwi  cr1,10,24    beq     cr1,L(wdu3_32)*/L(wdu_32):    lwz     6,0(12)    cmplwi  cr6,31,4    srwi    8,31,5    /* calculate the 32 byte loop count */    slw     0,6,10     clrlwi  31,31,27   /* The remaining bytes, < 32.  */    blt     cr5,L(wdu_32tail)    mtctr   8    cmplwi  cr6,31,4    .align  4L(wdu_loop32):    /* copy 32 bytes at a time */    lwz   8,4(12)    addi  12,12,32    lwz   7,4(4)    srw   8,8,9     or    0,0,8    stw   0,0(3)    stw   7,4(3)    lwz   6,8(4)    lwz   7,12(4)    stw   6,8(3)    stw   7,12(3)    lwz   6,16(4)    lwz   7,20(4)    stw   6,16(3)    stw   7,20(3)    lwz   6,24(4)    lwz   7,28(4)    lwz   8,0(12)    addi  4,4,32    stw   6,24(3)    stw   7,28(3)    addi  3,3,32    slw   0,8,10     bdnz+ L(wdu_loop32)L(wdu_32tail):    mtcrf   0x01,31    cmplwi  cr5,31,16    blt     cr6,L(wdu_4tail)    /* calculate and store the final word */    lwz   8,4(12)    srw   8,8,9     or    6,0,8    b     L(wdu_32tailx)#endif    .align  4L(wdu1_32):    lwz     6,-1(4)    cmplwi  cr6,31,4    srwi    8,31,5    /* calculate the 32 byte loop count */    slwi    6,6,8    clrlwi  31,31,27   /* The remaining bytes, < 32.  */    blt     cr5,L(wdu1_32tail)    mtctr   8    cmplwi  cr6,31,4    lwz   8,3(4)    lwz   7,4(4)/*  Equivalent to: srwi   8,8,32-8;  or    6,6,8 */    rlwimi 6,8,8,(32-8),31    b      L(wdu1_loop32x)    .align  4L(wdu1_loop32):    /* copy 32 bytes at a time */    lwz   8,3(4)    lwz   7,4(4)    stw   10,-8(3)    stw   11,-4(3)/*  Equivalent to  srwi   8,8,32-8; or    6,6,8 */    rlwimi 6,8,8,(32-8),31L(wdu1_loop32x):    lwz   10,8(4)    lwz   11,12(4)    stw   6,0(3)    stw   7,4(3)    lwz   6,16(4)    lwz   7,20(4)    stw   10,8(3)    stw   11,12(3)    lwz   10,24(4)    lwz   11,28(4)    lwz   8,32-1(4)    addi  4,4,32    stw   6,16(3)    stw   7,20(3)    addi  3,3,32    slwi  6,8,8    bdnz+ L(wdu1_loop32)    stw   10,-8(3)    stw   11,-4(3)L(wdu1_32tail):    mtcrf   0x01,31    cmplwi  cr5,31,16    blt     cr6,L(wdu_4tail)    /* calculate and store the final word */    lwz   8,3(4)/*  Equivalent to: srwi   8,8,32-9;  or    6,6,8  */    rlwimi 6,8,8,(32-8),31    b     L(wdu_32tailx)L(wdu2_32):    bgt     cr1,L(wdu3_32)    lwz     6,-2(4)    cmplwi  cr6,31,4    srwi    8,31,5    /* calculate the 32 byte loop count */    slwi    6,6,16    clrlwi  31,31,27   /* The remaining bytes, < 32.  */    blt     cr5,L(wdu2_32tail)    mtctr   8    cmplwi  cr6,31,4    lwz   8,2(4)    lwz   7,4(4)/*  Equivalent to: srwi   8,8,32-8;  or    6,6,8 */    rlwimi 6,8,16,(32-16),31    b      L(wdu2_loop32x)    .align  4L(wdu2_loop32):    /* copy 32 bytes at a time */    lwz   8,2(4)    lwz   7,4(4)    stw   10,-8(3)    stw   11,-4(3)/*  Equivalent to  srwi   8,8,32-8; or    6,6,8 */    rlwimi 6,8,16,(32-16),31L(wdu2_loop32x):    lwz   10,8(4)    lwz   11,12(4)    stw   6,0(3)    stw   7,4(3)    lwz   6,16(4)    lwz   7,20(4)    stw   10,8(3)    stw   11,12(3)    lwz   10,24(4)    lwz   11,28(4)/*    lwz   8,0(12) */    lwz   8,32-2(4)    addi  4,4,32    stw   6,16(3)    stw   7,20(3)    addi  3,3,32    slwi  6,8,16    bdnz+ L(wdu2_loop32)    stw   10,-8(3)    stw   11,-4(3)L(wdu2_32tail):    mtcrf   0x01,31    cmplwi  cr5,31,16    blt     cr6,L(wdu_4tail)    /* calculate and store the final word */    lwz   8,2(4)/*  Equivalent to: srwi   8,8,32-9;  or    6,6,8  */    rlwimi 6,8,16,(32-16),31    b     L(wdu_32tailx)L(wdu3_32):/*    lwz     6,0(12) */    lwz     6,-3(4)    cmplwi  cr6,31,4    srwi    8,31,5    /* calculate the 32 byte loop count */    slwi    6,6,24    clrlwi  31,31,27   /* The remaining bytes, < 32.  */    blt     cr5,L(wdu3_32tail)    mtctr   8    cmplwi  cr6,31,4    lwz   8,1(4)    lwz   7,4(4)/*  Equivalent to: srwi   8,8,32-8;  or    6,6,8 */    rlwimi 6,8,24,(32-24),31    b      L(wdu3_loop32x)    .align  4L(wdu3_loop32):    /* copy 32 bytes at a time */    lwz   8,1(4)    lwz   7,4(4)    stw   10,-8(3)    stw   11,-4(3)/*  Equivalent to  srwi   8,8,32-8; or    6,6,8 */    rlwimi 6,8,24,(32-24),31L(wdu3_loop32x):    lwz   10,8(4)    lwz   11,12(4)    stw   6,0(3)    stw   7,4(3)    lwz   6,16(4)    lwz   7,20(4)    stw   10,8(3)    stw   11,12(3)    lwz   10,24(4)    lwz   11,28(4)    lwz   8,32-3(4)    addi  4,4,32    stw   6,16(3)    stw   7,20(3)    addi  3,3,32    slwi  6,8,24    bdnz+ L(wdu3_loop32)    stw   10,-8(3)    stw   11,-4(3)L(wdu3_32tail):    mtcrf   0x01,31    cmplwi  cr5,31,16    blt     cr6,L(wdu_4tail)    /* calculate and store the final word */    lwz   8,1(4)/*  Equivalent to: srwi   8,8,32-9;  or    6,6,8  */    rlwimi 6,8,24,(32-24),31    b     L(wdu_32tailx)    .align  4L(wdu_32tailx):    blt     cr5,L(wdu_t32_8)    lwz   7,4(4)    addi  12,4,16    /* generate alternate pointers to avoid agen */    addi  11,3,16    /* timing issues downstream.  */    stw   6,0(3)    stw   7,4(3)    subi  31,31,16    lwz   6,8(4)    lwz   7,12(4)    addi  4,4,16    stw   6,8(3)    stw   7,12(3)    addi  3,3,16    bf    28,L(wdu_t32_4x)    lwz   6,0(12)    lwz   7,4(12)    addi  4,4,8    subi  31,31,8    stw   6,0(11)    stw   7,4(11)    addi  3,3,8    bf    29,L(wdu_t32_0)    lwz   6,8(12)    addi  4,4,4    subi  31,31,4    stw   6,8(11)    addi  3,3,4    b     L(wdu_t32_0)    .align  4L(wdu_t32_4x):    bf    29,L(wdu_t32_0)    lwz   6,0(4)    addi  4,4,4    subi  31,31,4    stw   6,0(3)    addi  3,3,4    b     L(wdu_t32_0)    .align  4L(wdu_t32_8):    bf    28,L(wdu_t32_4)    lwz   7,4(4)    subi  31,31,8    bf    29,L(wdu_t32_8x)    stw   6,0(3)    stw   7,4(3)    lwz   6,8(4)    subi  31,31,4    addi  4,4,12    stw   6,8(3)    addi  3,3,12    b     L(wdu_t32_0)    .align  4L(wdu_t32_8x):    addi  4,4,8    stw   6,0(3)    stw   7,4(3)    addi  3,3,8    b     L(wdu_t32_0)    .align  4L(wdu_t32_4):    subi  31,31,4    stw   6,0(3)    addi  4,4,4    addi  3,3,4    .align  4L(wdu_t32_0):L(wdu_4tail):    cmplwi  cr6,31,0    beq   cr6,L(wdus_0)	/* If the tail is 0 bytes we are done!  */    bf    30,L(wdus_3)    lhz   7,0(4)    sth   7,0(3)     bf    31,L(wdus_0)    lbz   8,2(4)    stb   8,2(3)    mr    3,30    lwz   30,20(1)    lwz   31,24(1)    addi  1,1,32    blr    .align  4L(wdus_3):    bf    31,L(wus_0)    lbz   6,0(4)    stb   6,0(3)    .align  4L(wdus_0):  /* Return original dst pointer.  */    mr   3,30    lwz  30,20(1)    lwz  31,24(1)    addi 1,1,32    blrEND (BP_SYM (memcpy))libc_hidden_builtin_def (memcpy)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -