📄 quant-a.asm.svn-base

📁 H.264源码程序
💻 SVN-BASE
📖 第 1 页 / 共 2 页
字号:
12 下一页
;*****************************************************************************;* quant-a.asm: h264 encoder library;*****************************************************************************;* Copyright (C) 2005 x264 project;*;* Authors: Alex Izvorski <aizvorksi@gmail.com>;*          Christian Heine <sennindemokrit@gmx.net>;*;* This program is free software; you can redistribute it and/or modify;* it under the terms of the GNU General Public License as published by;* the Free Software Foundation; either version 2 of the License, or;* (at your option) any later version.;*;* This program is distributed in the hope that it will be useful,;* but WITHOUT ANY WARRANTY; without even the implied warranty of;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the;* GNU General Public License for more details.;*;* You should have received a copy of the GNU General Public License;* along with this program; if not, write to the Free Software;* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.;*****************************************************************************;*****************************************************************************;*                                                                           *;*  Revision history:                                                        *;*                                                                           *;*  2005.07.26  quant 4x4 & 8x8 MMX functions (AI)                           *;*  2005.09.04  quant MMXEXT (added precision) and DC (CH)                   *;*  2005.09.21  faster MMX and added MMXEXT16 (CH)                           *;*                                                                           *;*****************************************************************************BITS 64%include "amd64inc.asm"SECTION .rodatapd_1:  times 2 dd 1SECTION .textcglobal x264_quant_2x2_dc_core15_mmxcglobal x264_quant_4x4_dc_core15_mmxcglobal x264_quant_4x4_core15_mmxcglobal x264_quant_8x8_core15_mmxcglobal x264_quant_4x4_dc_core15_ssse3cglobal x264_quant_4x4_core15_ssse3cglobal x264_quant_8x8_core15_ssse3cglobal x264_quant_2x2_dc_core16_mmxextcglobal x264_quant_4x4_dc_core16_mmxextcglobal x264_quant_4x4_core16_mmxextcglobal x264_quant_8x8_core16_mmxextcglobal x264_quant_2x2_dc_core32_mmxextcglobal x264_quant_4x4_dc_core32_mmxextcglobal x264_quant_4x4_core32_mmxextcglobal x264_quant_8x8_core32_mmxextcglobal x264_dequant_4x4_mmxcglobal x264_dequant_8x8_mmx%macro MMX_QUANT_AC_START 0;   mov         rdi, rdi        ; &dct[0][0];   mov         rsi, rsi        ; &quant_mf[0][0]    movd        mm6, parm3d     ; i_qbits    movd        mm7, parm4d     ; f    punpckldq   mm7, mm7        ; f in each dword%endmacro%macro MMX_QUANT15_DC_START 0;   mov         rdi, rdi        ; &dct[0][0]    movd        mm5, parm2d     ; i_qmf    movd        mm6, parm3d     ; i_qbits    movd        mm7, parm4d     ; f    punpcklwd   mm5, mm5    punpcklwd   mm5, mm5        ; i_qmf in each word    punpckldq   mm7, mm7        ; f in each dword%endmacro%macro SSE2_QUANT_AC_START 0    movd       xmm6, parm3d     ; i_qbits    movd       xmm7, parm4d     ; f    pshufd     xmm7, xmm7, 0    ; f in each dword%endmacro%macro SSE2_QUANT15_DC_START 0    movd       xmm5, parm2d     ; i_qmf    movd       xmm6, parm3d     ; i_qbits    movd       xmm7, parm4d     ; f    pshuflw    xmm5, xmm5, 0    punpcklqdq xmm5, xmm5       ; i_qmf in each word    pshufd     xmm7, xmm7, 0    ; f in each dword%endmacro%macro MMX_QUANT15_1x4 4;;; %1      (m64)       dct[y][x];;; %2      (m64/mmx)   quant_mf[y][x] or quant_mf[0][0] (as int16_t);;; %3      (mmx)       i_qbits in the low doubleword;;; %4      (mmx)       f as doublewords;;; trashes mm0-mm2,mm4    movq        mm0, %1     ; load dct coeffs    pxor        mm4, mm4    pcmpgtw     mm4, mm0    ; sign(coeff)    pxor        mm0, mm4    psubw       mm0, mm4    ; abs(coeff)    movq        mm2, mm0    pmullw      mm0, %2    pmulhw      mm2, %2    movq        mm1, mm0    punpcklwd   mm0, mm2    punpckhwd   mm1, mm2    paddd       mm0, %4     ; round with f    paddd       mm1, %4    psrad       mm0, %3    psrad       mm1, %3    packssdw    mm0, mm1    ; pack    pxor        mm0, mm4    ; restore sign    psubw       mm0, mm4    movq         %1, mm0    ; store%endmacro%macro SSSE3_QUANT15_1x8 4    movdqa     xmm0, %1     ; load dct coeffs    movdqa     xmm4, xmm0   ; save sign    pabsw      xmm0, xmm0    movdqa     xmm2, xmm0    pmullw     xmm0, %2    pmulhw     xmm2, %2    movdqa     xmm1, xmm0    punpcklwd  xmm0, xmm2    punpckhwd  xmm1, xmm2    paddd      xmm0, %4     ; round with f    paddd      xmm1, %4    psrad      xmm0, %3    psrad      xmm1, %3    packssdw   xmm0, xmm1   ; pack    psignw     xmm0, xmm4   ; restore sign    movdqa       %1, xmm0   ; store%endmacroALIGN 16;-----------------------------------------------------------------------------;   void x264_quant_2x2_dc_core15_mmx( int16_t dct[2][2],;       int const i_qmf, int const i_qbits, int const f );;-----------------------------------------------------------------------------x264_quant_2x2_dc_core15_mmx:    MMX_QUANT15_DC_START    MMX_QUANT15_1x4 [parm1q], mm5, mm6, mm7    retALIGN 16;-----------------------------------------------------------------------------;   void x264_quant_4x4_dc_core15_mmx( int16_t dct[4][4],;       int const i_qmf, int const i_qbits, int const f );;-----------------------------------------------------------------------------x264_quant_4x4_dc_core15_mmx:    MMX_QUANT15_DC_START%rep 4    MMX_QUANT15_1x4 [parm1q], mm5, mm6, mm7    add         parm1q, byte 8%endrep    retALIGN 16;-----------------------------------------------------------------------------;   void x264_quant_4x4_core15_mmx( int16_t dct[4][4],;       int const quant_mf[4][4], int const i_qbits, int const f );;-----------------------------------------------------------------------------x264_quant_4x4_core15_mmx:    MMX_QUANT_AC_START%rep 4    movq        mm5, [parm2q]    packssdw    mm5, [parm2q+8]    MMX_QUANT15_1x4 [parm1q], mm5, mm6, mm7    add         parm2q, byte 16    add         parm1q, byte 8%endrep    retALIGN 16;-----------------------------------------------------------------------------;   void x264_quant_8x8_core15_mmx( int16_t dct[8][8],;       int const quant_mf[8][8], int const i_qbits, int const f );;-----------------------------------------------------------------------------x264_quant_8x8_core15_mmx:    MMX_QUANT_AC_START%rep 16    movq        mm5, [parm2q]    packssdw    mm5, [parm2q+8]    MMX_QUANT15_1x4 [parm1q], mm5, mm6, mm7    add         parm2q, byte 16    add         parm1q, byte 8%endrep    ret%ifdef HAVE_SSE3ALIGN 16;-----------------------------------------------------------------------------;   void x264_quant_4x4_dc_core15_ssse3( int16_t dct[4][4],;       int const i_qmf, int const i_qbits, int const f );;-----------------------------------------------------------------------------x264_quant_4x4_dc_core15_ssse3:    SSE2_QUANT15_DC_START    SSSE3_QUANT15_1x8 [parm1q], xmm5, xmm6, xmm7    SSSE3_QUANT15_1x8 [parm1q+16], xmm5, xmm6, xmm7    retALIGN 16;-----------------------------------------------------------------------------;   void x264_quant_4x4_core15_ssse3( int16_t dct[4][4],;       int const quant_mf[4][4], int const i_qbits, int const f );;-----------------------------------------------------------------------------x264_quant_4x4_core15_ssse3:    SSE2_QUANT_AC_START%assign x 0%rep 2    movdqa      xmm5, [parm2q+32*x]    packssdw    xmm5, [parm2q+32*x+16]    SSSE3_QUANT15_1x8 [parm1q+16*x], xmm5, xmm6, xmm7    %assign x x+1%endrep    retALIGN 16;-----------------------------------------------------------------------------;   void x264_quant_8x8_core15_ssse3( int16_t dct[8][8],;       int const quant_mf[8][8], int const i_qbits, int const f );;-----------------------------------------------------------------------------x264_quant_8x8_core15_ssse3:    SSE2_QUANT_AC_START%assign x 0%rep 8    movdqa      xmm5, [parm2q+32*x]    packssdw    xmm5, [parm2q+32*x+16]    SSSE3_QUANT15_1x8 [parm1q+16*x], xmm5, xmm6, xmm7    %assign x x+1%endrep    ret%endif ; HAVE_SSE3; ============================================================================%macro MMXEXT_QUANT16_DC_START 0;   mov         rdi, rdi        ; &dct[0][0]    movd        mm5, parm2d     ; i_qmf    movd        mm6, parm3d     ; i_qbits    movd        mm7, parm4d     ; f    pshufw      mm5, mm5, 0     ; i_qmf in each word    punpckldq   mm7, mm7        ; f in each dword%endmacro%macro MMXEXT_QUANT16_1x4 4;;; %1      (m64)       dct[y][x];;; %2      (m64/mmx)   quant_mf[y][x] or quant_mf[0][0] (as uint16_t);;; %3      (mmx)       i_qbits in the low doubleword;;; %4      (mmx)       f as doublewords;;; trashes mm0-mm2,mm4    movq        mm0, %1     ; load dct coeffs    pxor        mm4, mm4    pcmpgtw     mm4, mm0    ; sign(coeff)    pxor        mm0, mm4    psubw       mm0, mm4    ; abs(coeff)    movq        mm2, mm0    pmullw      mm0, %2    pmulhuw     mm2, %2
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -