⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 memcpy_fast.s

📁 This the source release kit for the following system configuration(s): - AMD Alchemy(TM) DBAu1200(
💻 S
字号:
#include "kxmips.h"

.data
.text
.align  2

LEAF_ENTRY(memcpy_fast)
        .frame  $sp,0,$31               // vars= 0, regs= 0/0, args= 0, gp= 0
        .mask   0x00000000,0
        .fmask  0x00000000,0
        .set    noreorder
        .set    nomacro

        move	v0,a0

$copy_user:
        pref	0x0,0(a1)
        pref	0x1,0(a0)
        sltiu	t2,a2,4
        andi	t1,a0,0x3
        pref	0x0,32(a1)
        pref	0x1,32(a0)
        bnez	t2,$copy_bytes_checklen
        andi	t0,a1,0x3
        pref	0x0,64(a1)
        pref	0x1,64(a0)
        bnez	t1,$dst_unaligned
        nop
        bnez	t0,$src_unaligned_dst_aligned

$both_aligned:
        srl	t0,a2,0x5
        beqz	t0,$cleanup_both_aligned
        andi	t8,a2,0x1f
        pref	0x0,96(a1)
        pref	0x1,96(a0)
        nop

$both_aligned_after_nop:
        lw	t0,0(a1)
        lw	t1,4(a1)
        lw	t2,8(a1)
        lw	t3,12(a1)
        addiu	a2,a2,-32
        lw	t4,16(a1)
        lw	t7,20(a1)
        sw	t0,0(a0)
        sw	t1,4(a0)
        lw	t0,24(a1)
        lw	t1,28(a1)
        addiu	a1,a1,32
        addiu	a0,a0,32
        sw	t2,-24(a0)
        sw	t3,-20(a0)
        sw	t4,-16(a0)
        sw	t7,-12(a0)
        sw	t0,-8(a0)
        sw	t1,-4(a0)
        pref	0x0,256(a1)
        pref	0x1,256(a0)
        bne	a2,t8,$both_aligned_after_nop
        nop

$cleanup_both_aligned:
        beqz	a2,$done
        sltiu	t0,a2,16
        bnez	t0,$less_than_4units
        andi	t8,a2,0x3
        lw	t0,0(a1)
        lw	t1,4(a1)
        lw	t2,8(a1)
        lw	t3,12(a1)
        addiu	a2,a2,-16
        addiu	a1,a1,16
        sw	t0,0(a0)
        sw	t1,4(a0)
        sw	t2,8(a0)
        sw	t3,12(a0)
        beqz	a2,$done
        addiu	a0,a0,16

$less_than_4units:
        beq	t8,a2,$copy_bytes
        nop

$less_than_4units_after_nop:
        lw	t0,0(a1)
        addiu	a1,a1,4
        addiu	a2,a2,-4
        sw	t0,0(a0)
        bne	t8,a2,$less_than_4units_after_nop
        addiu	a0,a0,4
        beqz	a2,$done
        addu	t1,a0,a2
        li	t2,32
        sll	t8,a2,0x3
        lw	t0,0(a1)
        subu	t2,t2,t8
        sllv	t0,t0,t2
        swl	t0,-1(t1)
        jr	ra
        move	a2,zero

$dst_unaligned:
        lwr	t3,0(a1)
        li	t2,4
        lwl	t3,3(a1)
        subu	t2,t2,t1
        xor	t8,t0,t1
        swr	t3,0(a0)
        beq	a2,t2,$done
        subu	a2,a2,t2
        addu	a0,a0,t2
        beqz	t8,$both_aligned
        addu	a1,a1,t2

$src_unaligned_dst_aligned:
        srl	t0,a2,0x4
        pref	0x0,96(a1)
        beqz	t0,$cleanup_src_unaligned
        andi	t8,a2,0xf
        pref	0x1,96(a0)

$src_unaligned_dst_aligned_after_prefecth:
        lwr	t0,0(a1)
        lwr	t1,4(a1)
        addiu	a2,a2,-16
        lwl	t0,3(a1)
        lwl	t1,7(a1)
        lwr	t2,8(a1)
        lwr	t3,12(a1)
        lwl	t2,11(a1)
        lwl	t3,15(a1)
        pref	0x0,288(a1)
        addiu	a1,a1,16
        sw	t0,0(a0)
        sw	t1,4(a0)
        sw	t2,8(a0)
        sw	t3,12(a0)
        pref	0x1,288(a0)
        bne	a2,t8,$src_unaligned_dst_aligned_after_prefecth
        addiu	a0,a0,16

$cleanup_src_unaligned:
        beqz	a2,$done
        andi	t8,a2,0x3
        beq	t8,a2,$copy_bytes
        nop

$cleanup_src_unaligned_after_nop:
        lwr	t0,0(a1)
        lwl	t0,3(a1)
        addiu	a1,a1,4
        addiu	a2,a2,-4
        sw	t0,0(a0)
        bne	a2,t8,$cleanup_src_unaligned_after_nop
        addiu	a0,a0,4

$copy_bytes_checklen:
        beqz	a2,$done
        nop

$copy_bytes:
        lb	t0,0(a1)
        addiu	a2,a2,-1
        beqz	a2,$done
        sb	t0,0(a0)
        lb	t0,1(a1)
        addiu	a2,a2,-1
        beqz	a2,$done
        sb	t0,1(a0)
        lb	t0,2(a1)
        addiu	a2,a2,-1
        jr	ra
        sb	t0,2(a0)

$done:
        jr	ra
        nop

        .set    macro
        .set    reorder
        .end    memcpy_fast

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -