📄 memcpy.s
字号:
mtcrf 0x01,11 cmpldi cr6,9,4 mtctr 8 ld 7,8(5) /* pre load 2nd full doubleword. */ bge cr0, L(du4_do) blt cr5, L(du1_do) beq cr5, L(du2_do) b L(du3_do) .align 4L(du1_do): bf 30,L(du1_1dw) /* there are at least two DWs to copy */ sldi 0,6, 8 srdi 8,7, 64-8 or 0,0,8 ld 6,16(5) std 0,0(4) sldi 0,7, 8 srdi 8,6, 64-8 or 0,0,8 ld 7,24(5) std 0,8(4) addi 4,4,16 addi 5,5,32 blt cr6,L(du1_fini) /* if total DWs = 3, then bypass loop */ bf 31,L(du1_loop) /* there is a third DW to copy */ sldi 0,6, 8 srdi 8,7, 64-8 or 0,0,8 std 0,0(4) mr 6,7 ld 7,0(5) addi 5,5,8 addi 4,4,8 beq cr6,L(du1_fini) /* if total DWs = 4, then bypass loop */ b L(du1_loop) .align 4L(du1_1dw): sldi 0,6, 8 srdi 8,7, 64-8 addi 5,5,16 or 0,0,8 bf 31,L(du1_loop) mr 6,7 ld 7,0(5) addi 5,5,8 std 0,0(4) addi 4,4,8 .align 4/* copy 32 bytes at a time */L(du1_loop): sldi 0,6, 8 srdi 8,7, 64-8 or 0,0,8 ld 6,0(5) std 0,0(4) sldi 0,7, 8 srdi 8,6, 64-8 or 0,0,8 ld 7,8(5) std 0,8(4) sldi 0,6, 8 srdi 8,7, 64-8 or 0,0,8 ld 6,16(5) std 0,16(4) sldi 0,7, 8 srdi 8,6, 64-8 or 0,0,8 ld 7,24(5) std 0,24(4) addi 5,5,32 addi 4,4,32 bdnz+ L(du1_loop) .align 4L(du1_fini): /* calculate and store the final DW */ sldi 0,6, 8 srdi 8,7, 64-8 or 0,0,8 std 0,0(4) b L(du_done) .align 4L(du2_do): bf 30,L(du2_1dw) /* there are at least two DWs to copy */ sldi 0,6, 16 srdi 8,7, 64-16 or 0,0,8 ld 6,16(5) std 0,0(4) sldi 0,7, 16 srdi 8,6, 64-16 or 0,0,8 ld 7,24(5) std 0,8(4) addi 4,4,16 addi 5,5,32 blt cr6,L(du2_fini) /* if total DWs = 3, then bypass loop */ bf 31,L(du2_loop) /* there is a third DW to copy */ sldi 0,6, 16 srdi 8,7, 64-16 or 0,0,8 std 0,0(4) mr 6,7 ld 7,0(5) addi 5,5,8 addi 4,4,8 beq cr6,L(du2_fini) /* if total DWs = 4, then bypass loop */ b L(du2_loop) .align 4L(du2_1dw): sldi 0,6, 16 srdi 8,7, 64-16 addi 5,5,16 or 0,0,8 bf 31,L(du2_loop) mr 6,7 ld 7,0(5) addi 5,5,8 std 0,0(4) addi 4,4,8 .align 4/* copy 32 bytes at a time */L(du2_loop): sldi 0,6, 16 srdi 8,7, 64-16 or 0,0,8 ld 6,0(5) std 0,0(4) sldi 0,7, 16 srdi 8,6, 64-16 or 0,0,8 ld 7,8(5) std 0,8(4) sldi 0,6, 16 srdi 8,7, 64-16 or 0,0,8 ld 6,16(5) std 0,16(4) sldi 0,7, 16 srdi 8,6, 64-16 or 0,0,8 ld 7,24(5) std 0,24(4) addi 5,5,32 addi 4,4,32 bdnz+ L(du2_loop) .align 4L(du2_fini): /* calculate and store the final DW */ sldi 0,6, 16 srdi 8,7, 64-16 or 0,0,8 std 0,0(4) b L(du_done) .align 4L(du3_do): bf 30,L(du3_1dw) /* there are at least two DWs to copy */ sldi 0,6, 24 srdi 8,7, 64-24 or 0,0,8 ld 6,16(5) std 0,0(4) sldi 0,7, 24 srdi 8,6, 64-24 or 0,0,8 ld 7,24(5) std 0,8(4) addi 4,4,16 addi 5,5,32 blt cr6,L(du3_fini) /* if total DWs = 3, then bypass loop */ bf 31,L(du3_loop) /* there is a third DW to copy */ sldi 0,6, 24 srdi 8,7, 64-24 or 0,0,8 std 0,0(4) mr 6,7 ld 7,0(5) addi 5,5,8 addi 4,4,8 beq cr6,L(du3_fini) /* if total DWs = 4, then bypass loop */ b L(du3_loop) .align 4L(du3_1dw): sldi 0,6, 24 srdi 8,7, 64-24 addi 5,5,16 or 0,0,8 bf 31,L(du3_loop) mr 6,7 ld 7,0(5) addi 5,5,8 std 0,0(4) addi 4,4,8 .align 4/* copy 32 bytes at a time */L(du3_loop): sldi 0,6, 24 srdi 8,7, 64-24 or 0,0,8 ld 6,0(5) std 0,0(4) sldi 0,7, 24 srdi 8,6, 64-24 or 0,0,8 ld 7,8(5) std 0,8(4) sldi 0,6, 24 srdi 8,7, 64-24 or 0,0,8 ld 6,16(5) std 0,16(4) sldi 0,7, 24 srdi 8,6, 64-24 or 0,0,8 ld 7,24(5) std 0,24(4) addi 5,5,32 addi 4,4,32 bdnz+ L(du3_loop) .align 4L(du3_fini): /* calculate and store the final DW */ sldi 0,6, 24 srdi 8,7, 64-24 or 0,0,8 std 0,0(4) b L(du_done) .align 4L(du4_do): cmpldi cr5, 10, 6 beq cr0, L(du4_dox) blt cr5, L(du5_do) beq cr5, L(du6_do) b L(du7_do)L(du4_dox): bf 30,L(du4_1dw) /* there are at least two DWs to copy */ sldi 0,6, 32 srdi 8,7, 64-32 or 0,0,8 ld 6,16(5) std 0,0(4) sldi 0,7, 32 srdi 8,6, 64-32 or 0,0,8 ld 7,24(5) std 0,8(4) addi 4,4,16 addi 5,5,32 blt cr6,L(du4_fini) /* if total DWs = 3, then bypass loop */ bf 31,L(du4_loop) /* there is a third DW to copy */ sldi 0,6, 32 srdi 8,7, 64-32 or 0,0,8 std 0,0(4) mr 6,7 ld 7,0(5) addi 5,5,8 addi 4,4,8 beq cr6,L(du4_fini) /* if total DWs = 4, then bypass loop */ b L(du4_loop) .align 4L(du4_1dw): sldi 0,6, 32 srdi 8,7, 64-32 addi 5,5,16 or 0,0,8 bf 31,L(du4_loop) mr 6,7 ld 7,0(5) addi 5,5,8 std 0,0(4) addi 4,4,8 .align 4/* copy 32 bytes at a time */L(du4_loop): sldi 0,6, 32 srdi 8,7, 64-32 or 0,0,8 ld 6,0(5) std 0,0(4) sldi 0,7, 32 srdi 8,6, 64-32 or 0,0,8 ld 7,8(5) std 0,8(4) sldi 0,6, 32 srdi 8,7, 64-32 or 0,0,8 ld 6,16(5) std 0,16(4) sldi 0,7, 32 srdi 8,6, 64-32 or 0,0,8 ld 7,24(5) std 0,24(4) addi 5,5,32 addi 4,4,32 bdnz+ L(du4_loop) .align 4L(du4_fini): /* calculate and store the final DW */ sldi 0,6, 32 srdi 8,7, 64-32 or 0,0,8 std 0,0(4) b L(du_done) .align 4L(du5_do): bf 30,L(du5_1dw) /* there are at least two DWs to copy */ sldi 0,6, 40 srdi 8,7, 64-40 or 0,0,8 ld 6,16(5) std 0,0(4) sldi 0,7, 40 srdi 8,6, 64-40 or 0,0,8 ld 7,24(5) std 0,8(4) addi 4,4,16 addi 5,5,32 blt cr6,L(du5_fini) /* if total DWs = 3, then bypass loop */ bf 31,L(du5_loop) /* there is a third DW to copy */ sldi 0,6, 40 srdi 8,7, 64-40 or 0,0,8 std 0,0(4) mr 6,7 ld 7,0(5) addi 5,5,8 addi 4,4,8 beq cr6,L(du5_fini) /* if total DWs = 4, then bypass loop */ b L(du5_loop) .align 4L(du5_1dw): sldi 0,6, 40 srdi 8,7, 64-40 addi 5,5,16 or 0,0,8 bf 31,L(du5_loop) mr 6,7 ld 7,0(5) addi 5,5,8 std 0,0(4) addi 4,4,8 .align 4/* copy 32 bytes at a time */L(du5_loop): sldi 0,6, 40 srdi 8,7, 64-40 or 0,0,8 ld 6,0(5) std 0,0(4) sldi 0,7, 40 srdi 8,6, 64-40 or 0,0,8 ld 7,8(5) std 0,8(4) sldi 0,6, 40 srdi 8,7, 64-40 or 0,0,8 ld 6,16(5) std 0,16(4) sldi 0,7, 40 srdi 8,6, 64-40 or 0,0,8 ld 7,24(5) std 0,24(4) addi 5,5,32 addi 4,4,32 bdnz+ L(du5_loop) .align 4L(du5_fini): /* calculate and store the final DW */ sldi 0,6, 40 srdi 8,7, 64-40 or 0,0,8 std 0,0(4) b L(du_done) .align 4L(du6_do): bf 30,L(du6_1dw) /* there are at least two DWs to copy */ sldi 0,6, 48 srdi 8,7, 64-48 or 0,0,8 ld 6,16(5) std 0,0(4) sldi 0,7, 48 srdi 8,6, 64-48 or 0,0,8 ld 7,24(5) std 0,8(4) addi 4,4,16 addi 5,5,32 blt cr6,L(du6_fini) /* if total DWs = 3, then bypass loop */ bf 31,L(du6_loop) /* there is a third DW to copy */ sldi 0,6, 48 srdi 8,7, 64-48 or 0,0,8 std 0,0(4) mr 6,7 ld 7,0(5) addi 5,5,8 addi 4,4,8 beq cr6,L(du6_fini) /* if total DWs = 4, then bypass loop */ b L(du6_loop) .align 4L(du6_1dw): sldi 0,6, 48 srdi 8,7, 64-48 addi 5,5,16 or 0,0,8 bf 31,L(du6_loop) mr 6,7 ld 7,0(5) addi 5,5,8 std 0,0(4) addi 4,4,8 .align 4/* copy 32 bytes at a time */L(du6_loop): sldi 0,6, 48 srdi 8,7, 64-48 or 0,0,8 ld 6,0(5) std 0,0(4) sldi 0,7, 48 srdi 8,6, 64-48 or 0,0,8 ld 7,8(5) std 0,8(4) sldi 0,6, 48 srdi 8,7, 64-48 or 0,0,8 ld 6,16(5) std 0,16(4) sldi 0,7, 48 srdi 8,6, 64-48 or 0,0,8 ld 7,24(5) std 0,24(4) addi 5,5,32 addi 4,4,32 bdnz+ L(du6_loop) .align 4L(du6_fini): /* calculate and store the final DW */ sldi 0,6, 48 srdi 8,7, 64-48 or 0,0,8 std 0,0(4) b L(du_done) .align 4L(du7_do): bf 30,L(du7_1dw) /* there are at least two DWs to copy */ sldi 0,6, 56 srdi 8,7, 64-56 or 0,0,8 ld 6,16(5) std 0,0(4) sldi 0,7, 56 srdi 8,6, 64-56 or 0,0,8 ld 7,24(5) std 0,8(4) addi 4,4,16 addi 5,5,32 blt cr6,L(du7_fini) /* if total DWs = 3, then bypass loop */ bf 31,L(du7_loop) /* there is a third DW to copy */ sldi 0,6, 56 srdi 8,7, 64-56 or 0,0,8 std 0,0(4) mr 6,7 ld 7,0(5) addi 5,5,8 addi 4,4,8 beq cr6,L(du7_fini) /* if total DWs = 4, then bypass loop */ b L(du7_loop) .align 4L(du7_1dw): sldi 0,6, 56 srdi 8,7, 64-56 addi 5,5,16 or 0,0,8 bf 31,L(du7_loop) mr 6,7 ld 7,0(5) addi 5,5,8 std 0,0(4) addi 4,4,8 .align 4/* copy 32 bytes at a time */L(du7_loop): sldi 0,6, 56 srdi 8,7, 64-56 or 0,0,8 ld 6,0(5) std 0,0(4) sldi 0,7, 56 srdi 8,6, 64-56 or 0,0,8 ld 7,8(5) std 0,8(4) sldi 0,6, 56 srdi 8,7, 64-56 or 0,0,8 ld 6,16(5) std 0,16(4) sldi 0,7, 56 srdi 8,6, 64-56 or 0,0,8 ld 7,24(5) std 0,24(4) addi 5,5,32 addi 4,4,32 bdnz+ L(du7_loop) .align 4L(du7_fini): /* calculate and store the final DW */ sldi 0,6, 56 srdi 8,7, 64-56 or 0,0,8 std 0,0(4) b L(du_done) .align 4L(du_done): rldicr 0,31,0,60 mtcrf 0x01,31 beq cr1,0f /* If the tail is 0 bytes we are done! */ add 3,3,0 add 12,12,0 /* At this point we have a tail of 0-7 bytes and we know that the destiniation is double word aligned. */4: bf 29,2f lwz 6,0(12) addi 12,12,4 stw 6,0(3) addi 3,3,42: bf 30,1f lhz 6,0(12) addi 12,12,2 sth 6,0(3) addi 3,3,21: bf 31,0f lbz 6,0(12) stb 6,0(3)0: /* Return original dst pointer. */ ld 31,-8(1) ld 3,-16(1) blrEND_GEN_TB (BP_SYM (memcpy),TB_TOCLESS)libc_hidden_builtin_def (memcpy)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -