📄 memmove.s
字号:
TS = 0TE = 1FROM = 2N = 3TMP = 3 /* N and TMP don't overlap */TMP1 = 4TEXT memcpy(SB), $-4 B _memmoveTEXT memmove(SB), $-4_memmove: MOVW R(TS), to+0(FP) /* need to save for return value */ MOVW from+4(FP), R(FROM) MOVW n+8(FP), R(N) ADD R(N), R(TS), R(TE) /* to end pointer */ CMP R(FROM), R(TS) BLS _forward_back: ADD R(N), R(FROM) /* from end pointer */ CMP $4, R(N) /* need at least 4 bytes to copy */ BLT _b1tail_b4align: /* align destination on 4 */ AND.S $3, R(TE), R(TMP) BEQ _b4aligned MOVBU.W -1(R(FROM)), R(TMP) /* pre-indexed */ MOVBU.W R(TMP), -1(R(TE)) /* pre-indexed */ B _b4align_b4aligned: /* is source now aligned? */ AND.S $3, R(FROM), R(TMP) BNE _bunaligned ADD $31, R(TS), R(TMP) /* do 32-byte chunks if possible */_b32loop: CMP R(TMP), R(TE) BLS _b4tail MOVM.DB.W (R(FROM)), [R4-R11] MOVM.DB.W [R4-R11], (R(TE)) B _b32loop_b4tail: /* do remaining words if possible */ ADD $3, R(TS), R(TMP)_b4loop: CMP R(TMP), R(TE) BLS _b1tail MOVW.W -4(R(FROM)), R(TMP1) /* pre-indexed */ MOVW.W R(TMP1), -4(R(TE)) /* pre-indexed */ B _b4loop_b1tail: /* remaining bytes */ CMP R(TE), R(TS) BEQ _return MOVBU.W -1(R(FROM)), R(TMP) /* pre-indexed */ MOVBU.W R(TMP), -1(R(TE)) /* pre-indexed */ B _b1tail_forward: CMP $4, R(N) /* need at least 4 bytes to copy */ BLT _f1tail_f4align: /* align destination on 4 */ AND.S $3, R(TS), R(TMP) BEQ _f4aligned MOVBU.P 1(R(FROM)), R(TMP) /* implicit write back */ MOVBU.P R(TMP), 1(R(TS)) /* implicit write back */ B _f4align_f4aligned: /* is source now aligned? */ AND.S $3, R(FROM), R(TMP) BNE _funaligned SUB $31, R(TE), R(TMP) /* do 32-byte chunks if possible */_f32loop: CMP R(TMP), R(TS) BHS _f4tail MOVM.IA.W (R(FROM)), [R4-R11] MOVM.IA.W [R4-R11], (R(TS)) B _f32loop_f4tail: SUB $3, R(TE), R(TMP) /* do remaining words if possible */_f4loop: CMP R(TMP), R(TS) BHS _f1tail MOVW.P 4(R(FROM)), R(TMP1) /* implicit write back */ MOVW.P R4, 4(R(TS)) /* implicit write back */ B _f4loop_f1tail: CMP R(TS), R(TE) BEQ _return MOVBU.P 1(R(FROM)), R(TMP) /* implicit write back */ MOVBU.P R(TMP), 1(R(TS)) /* implicit write back */ B _f1tail_return: MOVW to+0(FP), R0 RETRSHIFT = 4LSHIFT = 5OFFSET = 6BR0 = 7BW0 = 8BR1 = 8BW1 = 9BR2 = 9BW2 = 10BR3 = 10BW3 = 11_bunaligned: CMP $2, R(TMP) /* is R(TMP) < 2 ? */ MOVW.LT $8, R(RSHIFT) /* (R(n)<<24)|(R(n-1)>>8) */ MOVW.LT $24, R(LSHIFT) MOVW.LT $1, R(OFFSET) MOVW.EQ $16, R(RSHIFT) /* (R(n)<<16)|(R(n-1)>>16) */ MOVW.EQ $16, R(LSHIFT) MOVW.EQ $2, R(OFFSET) MOVW.GT $24, R(RSHIFT) /* (R(n)<<8)|(R(n-1)>>24) */ MOVW.GT $8, R(LSHIFT) MOVW.GT $3, R(OFFSET) ADD $16, R(TS), R(TMP) /* do 16-byte chunks if possible */ CMP R(TMP), R(TE) BLS _b1tail AND $~0x03, R(FROM) /* align source */ MOVW (R(FROM)), R(BR0) /* prime first block register */_bu16loop: CMP R(TMP), R(TE) BLS _bu1tail MOVW R(BR0)<<R(LSHIFT), R(BW3) MOVM.DB.W (R(FROM)), [R(BR0)-R(BR3)] ORR R(BR3)>>R(RSHIFT), R(BW3) MOVW R(BR3)<<R(LSHIFT), R(BW2) ORR R(BR2)>>R(RSHIFT), R(BW2) MOVW R(BR2)<<R(LSHIFT), R(BW1) ORR R(BR1)>>R(RSHIFT), R(BW1) MOVW R(BR1)<<R(LSHIFT), R(BW0) ORR R(BR0)>>R(RSHIFT), R(BW0) MOVM.DB.W [R(BW0)-R(BW3)], (R(TE)) B _bu16loop_bu1tail: ADD R(OFFSET), R(FROM) B _b1tailFW0 = 7FR0 = 8FW1 = 8FR1 = 9FW2 = 9FR2 = 10FW3 = 10FR3 = 11_funaligned: CMP $2, R(TMP) MOVW.LT $8, R(RSHIFT) /* (R(n+1)<<24)|(R(n)>>8) */ MOVW.LT $24, R(LSHIFT) MOVW.LT $3, R(OFFSET) MOVW.EQ $16, R(RSHIFT) /* (R(n+1)<<16)|(R(n)>>16) */ MOVW.EQ $16, R(LSHIFT) MOVW.EQ $2, R(OFFSET) MOVW.GT $24, R(RSHIFT) /* (R(n+1)<<8)|(R(n)>>24) */ MOVW.GT $8, R(LSHIFT) MOVW.GT $1, R(OFFSET) SUB $16, R(TE), R(TMP) /* do 16-byte chunks if possible */ CMP R(TMP), R(TS) BHS _f1tail AND $~0x03, R(FROM) /* align source */ MOVW.P 4(R(FROM)), R(FR3) /* prime last block register, implicit write back */_fu16loop: CMP R(TMP), R(TS) BHS _fu1tail MOVW R(FR3)>>R(RSHIFT), R(FW0) MOVM.IA.W (R(FROM)), [R(FR0)-R(FR3)] ORR R(FR0)<<R(LSHIFT), R(FW0) MOVW R(FR0)>>R(RSHIFT), R(FW1) ORR R(FR1)<<R(LSHIFT), R(FW1) MOVW R(FR1)>>R(RSHIFT), R(FW2) ORR R(FR2)<<R(LSHIFT), R(FW2) MOVW R(FR2)>>R(RSHIFT), R(FW3) ORR R(FR3)<<R(LSHIFT), R(FW3) MOVM.IA.W [R(FW0)-R(FW3)], (R(TS)) B _fu16loop_fu1tail: SUB R(OFFSET), R(FROM) B _f1tail
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -