📄 memcpy.asm
字号:
comment ~
Copyright (C) 2008 Rouslan Dimitrov
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
~
.686
.model flat
.xmm
.code
; 16-byte aligned pointers required
; NOTE: According to MSDN, __fastcall callee should preserve ecx
; but it doesn't seem to be the case in VS2005
alias <@MemCopy@12> = <memcpy>
memcpy proc uses esi edi
mov esi, edx
mov edi, ecx
prefetchnta [esi] ; Request source directly into L1 cache
mov eax, [esp+12] ; byteCount
mov ecx, eax
and ecx, 63 ; Keep remainder in ecx for later
shr eax, 6
jz $finish_rest
$loop: ; 64 byte granularity
prefetchnta [esi + 64]
movaps xmm0, [esi+0] ; Transfer one cache line
movaps xmm1, [esi+16]
movaps xmm2, [esi+32]
movaps xmm3, [esi+48]
movntps [edi+0], xmm0 ; Bypass caches
movntps [edi+16], xmm1
movntps [edi+32], xmm2
movntps [edi+48], xmm3
add esi, 64
add edi, 64
dec eax
jnz $loop
$finish_rest:
test ecx, ecx
jz $done
$loop2: ; 16 byte granularity
test ecx, -16
jz $loop3
movaps xmm0, [esi]
movntps [edi],xmm0
add esi, 16
add edi, 16
sub ecx, 16
jg $loop2
$done:
ret 4
$loop3: ; 1 byte granularity
rep movsb
jmp $done
memcpy endp
end
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -