📄 memcpy_fast.s
字号:
#include "kxmips.h"
.data
.text
.align 2
LEAF_ENTRY(memcpy_fast)
.frame $sp,0,$31 // vars= 0, regs= 0/0, args= 0, gp= 0
.mask 0x00000000,0
.fmask 0x00000000,0
.set noreorder
.set nomacro
move v0,a0
$copy_user:
pref 0x0,0(a1)
pref 0x1,0(a0)
sltiu t2,a2,4
andi t1,a0,0x3
pref 0x0,32(a1)
pref 0x1,32(a0)
bnez t2,$copy_bytes_checklen
andi t0,a1,0x3
pref 0x0,64(a1)
pref 0x1,64(a0)
bnez t1,$dst_unaligned
nop
bnez t0,$src_unaligned_dst_aligned
$both_aligned:
srl t0,a2,0x5
beqz t0,$cleanup_both_aligned
andi t8,a2,0x1f
pref 0x0,96(a1)
pref 0x1,96(a0)
nop
$both_aligned_after_nop:
lw t0,0(a1)
lw t1,4(a1)
lw t2,8(a1)
lw t3,12(a1)
addiu a2,a2,-32
lw t4,16(a1)
lw t7,20(a1)
sw t0,0(a0)
sw t1,4(a0)
lw t0,24(a1)
lw t1,28(a1)
addiu a1,a1,32
addiu a0,a0,32
sw t2,-24(a0)
sw t3,-20(a0)
sw t4,-16(a0)
sw t7,-12(a0)
sw t0,-8(a0)
sw t1,-4(a0)
pref 0x0,256(a1)
pref 0x1,256(a0)
bne a2,t8,$both_aligned_after_nop
nop
$cleanup_both_aligned:
beqz a2,$done
sltiu t0,a2,16
bnez t0,$less_than_4units
andi t8,a2,0x3
lw t0,0(a1)
lw t1,4(a1)
lw t2,8(a1)
lw t3,12(a1)
addiu a2,a2,-16
addiu a1,a1,16
sw t0,0(a0)
sw t1,4(a0)
sw t2,8(a0)
sw t3,12(a0)
beqz a2,$done
addiu a0,a0,16
$less_than_4units:
beq t8,a2,$copy_bytes
nop
$less_than_4units_after_nop:
lw t0,0(a1)
addiu a1,a1,4
addiu a2,a2,-4
sw t0,0(a0)
bne t8,a2,$less_than_4units_after_nop
addiu a0,a0,4
beqz a2,$done
addu t1,a0,a2
li t2,32
sll t8,a2,0x3
lw t0,0(a1)
subu t2,t2,t8
sllv t0,t0,t2
swl t0,-1(t1)
jr ra
move a2,zero
$dst_unaligned:
lwr t3,0(a1)
li t2,4
lwl t3,3(a1)
subu t2,t2,t1
xor t8,t0,t1
swr t3,0(a0)
beq a2,t2,$done
subu a2,a2,t2
addu a0,a0,t2
beqz t8,$both_aligned
addu a1,a1,t2
$src_unaligned_dst_aligned:
srl t0,a2,0x4
pref 0x0,96(a1)
beqz t0,$cleanup_src_unaligned
andi t8,a2,0xf
pref 0x1,96(a0)
$src_unaligned_dst_aligned_after_prefecth:
lwr t0,0(a1)
lwr t1,4(a1)
addiu a2,a2,-16
lwl t0,3(a1)
lwl t1,7(a1)
lwr t2,8(a1)
lwr t3,12(a1)
lwl t2,11(a1)
lwl t3,15(a1)
pref 0x0,288(a1)
addiu a1,a1,16
sw t0,0(a0)
sw t1,4(a0)
sw t2,8(a0)
sw t3,12(a0)
pref 0x1,288(a0)
bne a2,t8,$src_unaligned_dst_aligned_after_prefecth
addiu a0,a0,16
$cleanup_src_unaligned:
beqz a2,$done
andi t8,a2,0x3
beq t8,a2,$copy_bytes
nop
$cleanup_src_unaligned_after_nop:
lwr t0,0(a1)
lwl t0,3(a1)
addiu a1,a1,4
addiu a2,a2,-4
sw t0,0(a0)
bne a2,t8,$cleanup_src_unaligned_after_nop
addiu a0,a0,4
$copy_bytes_checklen:
beqz a2,$done
nop
$copy_bytes:
lb t0,0(a1)
addiu a2,a2,-1
beqz a2,$done
sb t0,0(a0)
lb t0,1(a1)
addiu a2,a2,-1
beqz a2,$done
sb t0,1(a0)
lb t0,2(a1)
addiu a2,a2,-1
jr ra
sb t0,2(a0)
$done:
jr ra
nop
.set macro
.set reorder
.end memcpy_fast
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -