📄 quant-a.asm.svn-base
字号:
;*****************************************************************************;* quant-a.asm: h264 encoder library;*****************************************************************************;* Copyright (C) 2005 x264 project;*;* Authors: Alex Izvorski <aizvorksi@gmail.com>;* Christian Heine <sennindemokrit@gmx.net>;*;* This program is free software; you can redistribute it and/or modify;* it under the terms of the GNU General Public License as published by;* the Free Software Foundation; either version 2 of the License, or;* (at your option) any later version.;*;* This program is distributed in the hope that it will be useful,;* but WITHOUT ANY WARRANTY; without even the implied warranty of;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the;* GNU General Public License for more details.;*;* You should have received a copy of the GNU General Public License;* along with this program; if not, write to the Free Software;* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.;*****************************************************************************;*****************************************************************************;* *;* Revision history: *;* *;* 2005.07.26 quant 4x4 & 8x8 MMX functions (AI) *;* 2005.09.04 quant MMXEXT (added precision) and DC (CH) *;* 2005.09.21 faster MMX and added MMXEXT16 (CH) *;* *;*****************************************************************************BITS 64%include "amd64inc.asm"SECTION .rodatapd_1: times 2 dd 1SECTION .textcglobal x264_quant_2x2_dc_core15_mmxcglobal x264_quant_4x4_dc_core15_mmxcglobal x264_quant_4x4_core15_mmxcglobal x264_quant_8x8_core15_mmxcglobal x264_quant_4x4_dc_core15_ssse3cglobal x264_quant_4x4_core15_ssse3cglobal x264_quant_8x8_core15_ssse3cglobal x264_quant_2x2_dc_core16_mmxextcglobal x264_quant_4x4_dc_core16_mmxextcglobal x264_quant_4x4_core16_mmxextcglobal x264_quant_8x8_core16_mmxextcglobal x264_quant_2x2_dc_core32_mmxextcglobal x264_quant_4x4_dc_core32_mmxextcglobal x264_quant_4x4_core32_mmxextcglobal x264_quant_8x8_core32_mmxextcglobal x264_dequant_4x4_mmxcglobal x264_dequant_8x8_mmx%macro MMX_QUANT_AC_START 0; mov rdi, rdi ; &dct[0][0]; mov rsi, rsi ; &quant_mf[0][0] movd mm6, parm3d ; i_qbits movd mm7, parm4d ; f punpckldq mm7, mm7 ; f in each dword%endmacro%macro MMX_QUANT15_DC_START 0; mov rdi, rdi ; &dct[0][0] movd mm5, parm2d ; i_qmf movd mm6, parm3d ; i_qbits movd mm7, parm4d ; f punpcklwd mm5, mm5 punpcklwd mm5, mm5 ; i_qmf in each word punpckldq mm7, mm7 ; f in each dword%endmacro%macro SSE2_QUANT_AC_START 0 movd xmm6, parm3d ; i_qbits movd xmm7, parm4d ; f pshufd xmm7, xmm7, 0 ; f in each dword%endmacro%macro SSE2_QUANT15_DC_START 0 movd xmm5, parm2d ; i_qmf movd xmm6, parm3d ; i_qbits movd xmm7, parm4d ; f pshuflw xmm5, xmm5, 0 punpcklqdq xmm5, xmm5 ; i_qmf in each word pshufd xmm7, xmm7, 0 ; f in each dword%endmacro%macro MMX_QUANT15_1x4 4;;; %1 (m64) dct[y][x];;; %2 (m64/mmx) quant_mf[y][x] or quant_mf[0][0] (as int16_t);;; %3 (mmx) i_qbits in the low doubleword;;; %4 (mmx) f as doublewords;;; trashes mm0-mm2,mm4 movq mm0, %1 ; load dct coeffs pxor mm4, mm4 pcmpgtw mm4, mm0 ; sign(coeff) pxor mm0, mm4 psubw mm0, mm4 ; abs(coeff) movq mm2, mm0 pmullw mm0, %2 pmulhw mm2, %2 movq mm1, mm0 punpcklwd mm0, mm2 punpckhwd mm1, mm2 paddd mm0, %4 ; round with f paddd mm1, %4 psrad mm0, %3 psrad mm1, %3 packssdw mm0, mm1 ; pack pxor mm0, mm4 ; restore sign psubw mm0, mm4 movq %1, mm0 ; store%endmacro%macro SSSE3_QUANT15_1x8 4 movdqa xmm0, %1 ; load dct coeffs movdqa xmm4, xmm0 ; save sign pabsw xmm0, xmm0 movdqa xmm2, xmm0 pmullw xmm0, %2 pmulhw xmm2, %2 movdqa xmm1, xmm0 punpcklwd xmm0, xmm2 punpckhwd xmm1, xmm2 paddd xmm0, %4 ; round with f paddd xmm1, %4 psrad xmm0, %3 psrad xmm1, %3 packssdw xmm0, xmm1 ; pack psignw xmm0, xmm4 ; restore sign movdqa %1, xmm0 ; store%endmacroALIGN 16;-----------------------------------------------------------------------------; void x264_quant_2x2_dc_core15_mmx( int16_t dct[2][2],; int const i_qmf, int const i_qbits, int const f );;-----------------------------------------------------------------------------x264_quant_2x2_dc_core15_mmx: MMX_QUANT15_DC_START MMX_QUANT15_1x4 [parm1q], mm5, mm6, mm7 retALIGN 16;-----------------------------------------------------------------------------; void x264_quant_4x4_dc_core15_mmx( int16_t dct[4][4],; int const i_qmf, int const i_qbits, int const f );;-----------------------------------------------------------------------------x264_quant_4x4_dc_core15_mmx: MMX_QUANT15_DC_START%rep 4 MMX_QUANT15_1x4 [parm1q], mm5, mm6, mm7 add parm1q, byte 8%endrep retALIGN 16;-----------------------------------------------------------------------------; void x264_quant_4x4_core15_mmx( int16_t dct[4][4],; int const quant_mf[4][4], int const i_qbits, int const f );;-----------------------------------------------------------------------------x264_quant_4x4_core15_mmx: MMX_QUANT_AC_START%rep 4 movq mm5, [parm2q] packssdw mm5, [parm2q+8] MMX_QUANT15_1x4 [parm1q], mm5, mm6, mm7 add parm2q, byte 16 add parm1q, byte 8%endrep retALIGN 16;-----------------------------------------------------------------------------; void x264_quant_8x8_core15_mmx( int16_t dct[8][8],; int const quant_mf[8][8], int const i_qbits, int const f );;-----------------------------------------------------------------------------x264_quant_8x8_core15_mmx: MMX_QUANT_AC_START%rep 16 movq mm5, [parm2q] packssdw mm5, [parm2q+8] MMX_QUANT15_1x4 [parm1q], mm5, mm6, mm7 add parm2q, byte 16 add parm1q, byte 8%endrep ret%ifdef HAVE_SSE3ALIGN 16;-----------------------------------------------------------------------------; void x264_quant_4x4_dc_core15_ssse3( int16_t dct[4][4],; int const i_qmf, int const i_qbits, int const f );;-----------------------------------------------------------------------------x264_quant_4x4_dc_core15_ssse3: SSE2_QUANT15_DC_START SSSE3_QUANT15_1x8 [parm1q], xmm5, xmm6, xmm7 SSSE3_QUANT15_1x8 [parm1q+16], xmm5, xmm6, xmm7 retALIGN 16;-----------------------------------------------------------------------------; void x264_quant_4x4_core15_ssse3( int16_t dct[4][4],; int const quant_mf[4][4], int const i_qbits, int const f );;-----------------------------------------------------------------------------x264_quant_4x4_core15_ssse3: SSE2_QUANT_AC_START%assign x 0%rep 2 movdqa xmm5, [parm2q+32*x] packssdw xmm5, [parm2q+32*x+16] SSSE3_QUANT15_1x8 [parm1q+16*x], xmm5, xmm6, xmm7 %assign x x+1%endrep retALIGN 16;-----------------------------------------------------------------------------; void x264_quant_8x8_core15_ssse3( int16_t dct[8][8],; int const quant_mf[8][8], int const i_qbits, int const f );;-----------------------------------------------------------------------------x264_quant_8x8_core15_ssse3: SSE2_QUANT_AC_START%assign x 0%rep 8 movdqa xmm5, [parm2q+32*x] packssdw xmm5, [parm2q+32*x+16] SSSE3_QUANT15_1x8 [parm1q+16*x], xmm5, xmm6, xmm7 %assign x x+1%endrep ret%endif ; HAVE_SSE3; ============================================================================%macro MMXEXT_QUANT16_DC_START 0; mov rdi, rdi ; &dct[0][0] movd mm5, parm2d ; i_qmf movd mm6, parm3d ; i_qbits movd mm7, parm4d ; f pshufw mm5, mm5, 0 ; i_qmf in each word punpckldq mm7, mm7 ; f in each dword%endmacro%macro MMXEXT_QUANT16_1x4 4;;; %1 (m64) dct[y][x];;; %2 (m64/mmx) quant_mf[y][x] or quant_mf[0][0] (as uint16_t);;; %3 (mmx) i_qbits in the low doubleword;;; %4 (mmx) f as doublewords;;; trashes mm0-mm2,mm4 movq mm0, %1 ; load dct coeffs pxor mm4, mm4 pcmpgtw mm4, mm0 ; sign(coeff) pxor mm0, mm4 psubw mm0, mm4 ; abs(coeff) movq mm2, mm0 pmullw mm0, %2 pmulhuw mm2, %2
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -