📄 x86inc.asm
字号:
;*****************************************************************************;* x86inc.asm;*****************************************************************************;* Copyright (C) 2005-2008 x264 project;*;* Authors: Loren Merritt <lorenm@u.washington.edu>;* Anton Mitrofanov <BugMaster@narod.ru>;*;* This program is free software; you can redistribute it and/or modify;* it under the terms of the GNU General Public License as published by;* the Free Software Foundation; either version 2 of the License, or;* (at your option) any later version.;*;* This program is distributed in the hope that it will be useful,;* but WITHOUT ANY WARRANTY; without even the implied warranty of;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the;* GNU General Public License for more details.;*;* You should have received a copy of the GNU General Public License;* along with this program; if not, write to the Free Software;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.;*****************************************************************************%ifdef ARCH_X86_64 %ifidn __OUTPUT_FORMAT__,win32 %define WIN64 %else %define UNIX64 %endif%endif; FIXME: All of the 64bit asm functions that take a stride as an argument; via register, assume that the high dword of that register is filled with 0.; This is true in practice (since we never do any 64bit arithmetic on strides,; and x264's strides are all positive), but is not guaranteed by the ABI.; Name of the .rodata section.; Kludge: Something on OS X fails to align .rodata even given an align attribute,; so use a different read-only section.%macro SECTION_RODATA 0 %ifidn __OUTPUT_FORMAT__,macho64 SECTION .text align=16 %elifidn __OUTPUT_FORMAT__,macho SECTION .text align=16 fakegot: %else SECTION .rodata align=16 %endif%endmacro; PIC support macros.; x86_64 can't fit 64bit address literals in most instruction types,; so shared objects (under the assumption that they might be anywhere; in memory) must use an address mode that does fit.; So all accesses to global variables must use this macro, e.g.; mov eax, [foo GLOBAL]; instead of; mov eax, [foo];; x86_32 doesn't require PIC.; Some distros prefer shared objects to be PIC, but nothing breaks if; the code contains a few textrels, so we'll skip that complexity.%ifdef WIN64 %define PIC%elifndef ARCH_X86_64 %undef PIC%endif%ifdef PIC %define GLOBAL wrt rip%else %define GLOBAL%endif; Macros to eliminate most code duplication between x86_32 and x86_64:; Currently this works only for leaf functions which load all their arguments; into registers at the start, and make no other use of the stack. Luckily that; covers most of x264's asm.; PROLOGUE:; %1 = number of arguments. loads them from stack if needed.; %2 = number of registers used. pushes callee-saved regs if needed.; %3 = number of xmm registers used. pushes callee-saved xmm regs if needed.; %4 = list of names to define to registers; PROLOGUE can also be invoked by adding the same options to cglobal; e.g.; cglobal foo, 2,3, dst, src, tmp; declares a function (foo), taking two args (dst and src) and one local variable (tmp); TODO Some functions can use some args directly from the stack. If they're the; last args then you can just not declare them, but if they're in the middle; we need more flexible macro.; RET:; Pops anything that was pushed by PROLOGUE; REP_RET:; Same, but if it doesn't pop anything it becomes a 2-byte ret, for athlons; which are slow when a normal ret follows a branch.; registers:; rN and rNq are the native-size register holding function argument N; rNd, rNw, rNb are dword, word, and byte size; rNm is the original location of arg N (a register or on the stack), dword; rNmp is native size%macro DECLARE_REG 6 %define r%1q %2 %define r%1d %3 %define r%1w %4 %define r%1b %5 %define r%1m %6 %ifid %6 ; i.e. it's a register %define r%1mp %2 %elifdef ARCH_X86_64 ; memory %define r%1mp qword %6 %else %define r%1mp dword %6 %endif %define r%1 %2%endmacro%macro DECLARE_REG_SIZE 2 %define r%1q r%1 %define e%1q r%1 %define r%1d e%1 %define e%1d e%1 %define r%1w %1 %define e%1w %1 %define r%1b %2 %define e%1b %2%ifndef ARCH_X86_64 %define r%1 e%1%endif%endmacroDECLARE_REG_SIZE ax, alDECLARE_REG_SIZE bx, blDECLARE_REG_SIZE cx, clDECLARE_REG_SIZE dx, dlDECLARE_REG_SIZE si, silDECLARE_REG_SIZE di, dilDECLARE_REG_SIZE bp, bpl; t# defines for when per-arch register allocation is more complex than just function arguments%macro DECLARE_REG_TMP 1-* %assign %%i 0 %rep %0 CAT_XDEFINE t, %%i, r%1 %assign %%i %%i+1 %rotate 1 %endrep%endmacro%macro DECLARE_REG_TMP_SIZE 0-* %rep %0 %define t%1q t%1 %+ q %define t%1d t%1 %+ d %define t%1w t%1 %+ w %define t%1b t%1 %+ b %rotate 1 %endrep%endmacroDECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7%ifdef ARCH_X86_64 %define gprsize 8%else %define gprsize 4%endif%macro PUSH 1 push %1 %assign stack_offset stack_offset+gprsize%endmacro%macro POP 1 pop %1 %assign stack_offset stack_offset-gprsize%endmacro%macro SUB 2 sub %1, %2 %ifidn %1, rsp %assign stack_offset stack_offset+(%2) %endif%endmacro%macro ADD 2 add %1, %2 %ifidn %1, rsp %assign stack_offset stack_offset-(%2) %endif%endmacro%macro movifnidn 2 %ifnidn %1, %2 mov %1, %2 %endif%endmacro%macro movsxdifnidn 2 %ifnidn %1, %2 movsxd %1, %2 %endif%endmacro%macro ASSERT 1 %if (%1) == 0 %error assert failed %endif%endmacro%macro DEFINE_ARGS 0-* %ifdef n_arg_names %assign %%i 0 %rep n_arg_names CAT_UNDEF arg_name %+ %%i, q CAT_UNDEF arg_name %+ %%i, d CAT_UNDEF arg_name %+ %%i, w CAT_UNDEF arg_name %+ %%i, b CAT_UNDEF arg_name, %%i %assign %%i %%i+1 %endrep %endif %assign %%i 0 %rep %0 %xdefine %1q r %+ %%i %+ q %xdefine %1d r %+ %%i %+ d %xdefine %1w r %+ %%i %+ w %xdefine %1b r %+ %%i %+ b CAT_XDEFINE arg_name, %%i, %1 %assign %%i %%i+1 %rotate 1 %endrep %assign n_arg_names %%i%endmacro%ifdef WIN64 ; Windows x64 ;=================================================DECLARE_REG 0, rcx, ecx, cx, cl, ecxDECLARE_REG 1, rdx, edx, dx, dl, edxDECLARE_REG 2, r8, r8d, r8w, r8b, r8dDECLARE_REG 3, r9, r9d, r9w, r9b, r9dDECLARE_REG 4, rdi, edi, di, dil, [rsp + stack_offset + 40]DECLARE_REG 5, rsi, esi, si, sil, [rsp + stack_offset + 48]DECLARE_REG 6, rax, eax, ax, al, [rsp + stack_offset + 56]%define r7m [rsp + stack_offset + 64]%define r8m [rsp + stack_offset + 72]%macro LOAD_IF_USED 2 ; reg_id, number_of_args %if %1 < %2 mov r%1, [rsp + stack_offset + 8 + %1*8] %endif%endmacro%macro PROLOGUE 2-4+ ; #args, #regs, #xmm_regs, arg_names... ASSERT %2 >= %1 %assign regs_used %2 ASSERT regs_used <= 7 %if %0 > 2 %assign xmm_regs_used %3 %else %assign xmm_regs_used 0 %endif ASSERT xmm_regs_used <= 16 %if regs_used > 4 push r4 push r5 %assign stack_offset stack_offset+16 %endif %if xmm_regs_used > 6 sub rsp, (xmm_regs_used-6)*16+16 %assign stack_offset stack_offset+(xmm_regs_used-6)*16+16 %assign %%i xmm_regs_used %rep (xmm_regs_used-6) %assign %%i %%i-1 movdqa [rsp + (%%i-6)*16+8], xmm %+ %%i %endrep %endif LOAD_IF_USED 4, %1 LOAD_IF_USED 5, %1 LOAD_IF_USED 6, %1 DEFINE_ARGS %4%endmacro%macro RESTORE_XMM_INTERNAL 1 %if xmm_regs_used > 6 %assign %%i xmm_regs_used %rep (xmm_regs_used-6) %assign %%i %%i-1 movdqa xmm %+ %%i, [%1 + (%%i-6)*16+8] %endrep add %1, (xmm_regs_used-6)*16+16 %endif%endmacro%macro RESTORE_XMM 1 RESTORE_XMM_INTERNAL %1 %assign stack_offset stack_offset-(xmm_regs_used-6)*16+16 %assign xmm_regs_used 0%endmacro%macro RET 0 RESTORE_XMM_INTERNAL rsp %if regs_used > 4 pop r5 pop r4
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -