⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 reduced_mmx.asm

📁 这是一个压缩解压包,用C语言进行编程的,里面有详细的源代码.
💻 ASM
📖 第 1 页 / 共 2 页
字号:
;/*****************************************************************************; *; *  XVID MPEG-4 VIDEO CODEC; *  - Reduced-Resolution utilities -; *; *  Copyright(C) 2002 Pascal Massimino <skal@planet-d.net>; *; *  XviD is free software; you can redistribute it and/or modify it; *  under the terms of the GNU General Public License as published by; *  the Free Software Foundation; either version 2 of the License, or; *  (at your option) any later version.; *; *  This program is distributed in the hope that it will be useful,; *  but WITHOUT ANY WARRANTY; without even the implied warranty of; *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the; *  GNU General Public License for more details.; *; *  You should have received a copy of the GNU General Public License; *  along with this program; if not, write to the Free Software; *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA; *; * $Id: reduced_mmx.asm,v 1.6 2004/08/29 10:02:38 edgomez Exp $; *; *************************************************************************/BITS 32%macro cglobal 1	%ifdef PREFIX		%ifdef MARK_FUNCS			global _%1:function %1.endfunc-%1			%define %1 _%1:function %1.endfunc-%1		%else			global _%1			%define %1 _%1		%endif	%else		%ifdef MARK_FUNCS			global %1:function %1.endfunc-%1		%else			global %1		%endif	%endif%endmacro;===========================================================================%ifdef FORMAT_COFFSECTION .rodata%elseSECTION .rodata align=16%endifalign 16Up31 dw  3, 1, 3, 1Up13 dw  1, 3, 1, 3Up93 dw  9, 3, 9, 3Up39 dw  3, 9, 3, 9Cst0 dw  0, 0, 0, 0Cst2 dw  2, 2, 2, 2Cst3 dw  3, 3, 3, 3Cst32 dw 32,32,32,32Cst2000 dw  2, 0, 0, 0Cst0002 dw  0, 0, 0, 2Mask_ff dw 0xff,0xff,0xff,0xff;===========================================================================SECTION .textcglobal xvid_Copy_Upsampled_8x8_16To8_mmxcglobal xvid_Add_Upsampled_8x8_16To8_mmxcglobal xvid_Copy_Upsampled_8x8_16To8_xmmcglobal xvid_Add_Upsampled_8x8_16To8_xmmcglobal xvid_HFilter_31_mmxcglobal xvid_VFilter_31_x86cglobal xvid_HFilter_31_x86cglobal xvid_Filter_18x18_To_8x8_mmxcglobal xvid_Filter_Diff_18x18_To_8x8_mmx;//////////////////////////////////////////////////////////////////////;// 8x8 -> 16x16 upsampling (16b);//////////////////////////////////////////////////////////////////////%macro MUL_PACK 4     ; %1/%2: regs   %3/%4/%5: Up13/Up31  pmullw %1,  %3 ; [Up13]  pmullw mm4, %4 ; [Up31]  pmullw %2,  %3 ; [Up13]  pmullw mm5, %4 ; [Up31]  paddsw %1, [Cst2]  paddsw %2, [Cst2]  paddsw %1, mm4  paddsw %2, mm5%endmacro    ; MMX-way of reordering columns...%macro COL03 3    ;%1/%2: regs, %3: row   -output: mm4/mm5  movq %1, [edx+%3*16+0*2]   ; %1  = 0|1|2|3  movq %2,[edx+%3*16+1*2]    ; %2  = 1|2|3|4  movq mm5, %1               ; mm5 = 0|1|2|3  movq mm4, %1               ; mm4 = 0|1|2|3  punpckhwd mm5,%2           ; mm5 = 2|3|3|4  punpcklwd mm4,%2           ; mm4 = 0|1|1|2  punpcklwd %1,%1            ; %1  = 0|0|1|1  punpcklwd %2, mm5          ; %2  = 1|2|2|3  punpcklwd %1, mm4          ; %1  = 0|0|0|1%endmacro%macro COL47 3    ;%1-%2: regs, %3: row   -output: mm4/mm5  movq mm5, [edx+%3*16+4*2]   ; mm5 = 4|5|6|7  movq %1, [edx+%3*16+3*2]    ; %1  = 3|4|5|6  movq %2,  mm5               ; %2  = 4|5|6|7  movq mm4, mm5               ; mm4 = 4|5|6|7  punpckhwd %2, %2            ; %2  = 6|6|7|7  punpckhwd mm5, %2           ; mm5 = 6|7|7|7  movq %2,  %1                ; %2  = 3|4|5|6  punpcklwd %1, mm4           ; %1  = 3|4|4|5  punpckhwd %2, mm4           ; %2  = 5|6|6|7  punpcklwd mm4, %2           ; mm4 = 4|5|5|6%endmacro%macro MIX_ROWS 4   ; %1/%2:prev %3/4:cur (preserved)  mm4/mm5: output  ; we need to perform: (%1,%3) -> (%1 = 3*%1+%3, mm4 = 3*%3+%1), %3 preserved.  movq mm4, [Cst3]  movq mm5, [Cst3]  pmullw mm4, %3  pmullw mm5, %4  paddsw mm4, %1  paddsw mm5, %2  pmullw %1, [Cst3]  pmullw %2, [Cst3]  paddsw %1, %3  paddsw %2, %4%endmacro;===========================================================================;; void xvid_Copy_Upsampled_8x8_16To8_mmx(uint8_t *Dst,;                                        const int16_t *Src, const int BpS);;;===========================================================================  ; Note: we can use ">>2" instead of "/4" here, since we  ; are (supposed to be) averaging positive values%macro STORE_1 2  psraw %1, 2  psraw %2, 2  packuswb %1,%2  movq [ecx], %1%endmacro%macro STORE_2 2    ; pack and store (%1,%2) + (mm4,mm5)  psraw %1, 4  psraw %2, 4  psraw mm4, 4  psraw mm5, 4  packuswb %1,%2  packuswb mm4, mm5  movq [ecx], %1  movq [ecx+eax], mm4  lea ecx, [ecx+2*eax]%endmacro;//////////////////////////////////////////////////////////////////////align 16xvid_Copy_Upsampled_8x8_16To8_mmx:  ; 344c  mov ecx, [esp+4]  ; Dst  mov edx, [esp+8]  ; Src  mov eax, [esp+12] ; BpS  movq mm6, [Up13]  movq mm7, [Up31]  COL03 mm0, mm1, 0  MUL_PACK mm0,mm1, mm6, mm7  movq mm4, mm0  movq mm5, mm1  STORE_1 mm4, mm5  add ecx, eax  COL03 mm2, mm3, 1  MUL_PACK mm2,mm3, mm6, mm7  MIX_ROWS mm0, mm1, mm2, mm3  STORE_2 mm0, mm1  COL03 mm0, mm1, 2  MUL_PACK mm0,mm1, mm6, mm7  MIX_ROWS mm2, mm3, mm0, mm1  STORE_2 mm2, mm3  COL03 mm2, mm3, 3  MUL_PACK mm2,mm3, mm6, mm7  MIX_ROWS mm0, mm1, mm2, mm3  STORE_2 mm0, mm1  COL03 mm0, mm1, 4  MUL_PACK mm0,mm1, mm6, mm7  MIX_ROWS mm2, mm3, mm0, mm1  STORE_2 mm2, mm3  COL03 mm2, mm3, 5  MUL_PACK mm2,mm3, mm6, mm7  MIX_ROWS mm0, mm1, mm2, mm3  STORE_2 mm0, mm1  COL03 mm0, mm1, 6  MUL_PACK mm0,mm1, mm6, mm7  MIX_ROWS mm2, mm3, mm0, mm1  STORE_2 mm2, mm3  COL03 mm2, mm3, 7  MUL_PACK mm2,mm3, mm6, mm7  MIX_ROWS mm0, mm1, mm2, mm3  STORE_2 mm0, mm1  STORE_1 mm2, mm3  mov ecx, [esp+4]  add ecx, 8  COL47 mm0, mm1, 0  MUL_PACK mm0,mm1, mm6, mm7  movq mm4, mm0  movq mm5, mm1  STORE_1 mm4, mm5  add ecx, eax  COL47 mm2, mm3, 1  MUL_PACK mm2,mm3, mm6, mm7  MIX_ROWS mm0, mm1, mm2, mm3  STORE_2 mm0, mm1  COL47 mm0, mm1, 2  MUL_PACK mm0,mm1, mm6, mm7  MIX_ROWS mm2, mm3, mm0, mm1  STORE_2 mm2, mm3  COL47 mm2, mm3, 3  MUL_PACK mm2,mm3, mm6, mm7  MIX_ROWS mm0, mm1, mm2, mm3  STORE_2 mm0, mm1  COL47 mm0, mm1, 4  MUL_PACK mm0,mm1, mm6, mm7  MIX_ROWS mm2, mm3, mm0, mm1  STORE_2 mm2, mm3  COL47 mm2, mm3, 5  MUL_PACK mm2,mm3, mm6, mm7  MIX_ROWS mm0, mm1, mm2, mm3  STORE_2 mm0, mm1  COL47 mm0, mm1, 6  MUL_PACK mm0,mm1, mm6, mm7  MIX_ROWS mm2, mm3, mm0, mm1  STORE_2 mm2, mm3  COL47 mm2, mm3, 7  MUL_PACK mm2,mm3, mm6, mm7  MIX_ROWS mm0, mm1, mm2, mm3  STORE_2 mm0, mm1  STORE_1 mm2, mm3  ret.endfunc;===========================================================================;; void xvid_Add_Upsampled_8x8_16To8_mmx(uint8_t *Dst,;                                       const int16_t *Src, const int BpS);;;===========================================================================    ; Note: grrr... the 'pcmpgtw' stuff are the "/4" and "/16" operators    ; implemented with ">>2" and ">>4" using:    ;       x/4  = ( (x-(x<0))>>2 ) + (x<0)    ;       x/16 = ( (x-(x<0))>>4 ) + (x<0)%macro STORE_ADD_1 2    ; We substract the rounder '2' for corner pixels,    ; since when 'x' is negative, (x*4 + 2)/4 is *not*    ; equal to 'x'. In fact, the correct relation is:    ;         (x*4 + 2)/4 = x - (x<0)    ; So, better revert to (x*4)/4 = x.  psubsw %1, [Cst2000]  psubsw %2, [Cst0002]  pxor mm6, mm6  pxor mm7, mm7  pcmpgtw mm6, %1  pcmpgtw mm7, %2  paddsw %1, mm6  paddsw %2, mm7  psraw %1, 2  psraw %2, 2  psubsw %1, mm6  psubsw %2, mm7    ; mix with destination [ecx]  movq mm6, [ecx]  movq mm7, [ecx]  punpcklbw mm6, [Cst0]  punpckhbw mm7, [Cst0]  paddsw %1, mm6  paddsw %2, mm7  packuswb %1,%2  movq [ecx], %1%endmacro%macro STORE_ADD_2 2  pxor mm6, mm6  pxor mm7, mm7  pcmpgtw mm6, %1  pcmpgtw mm7, %2  paddsw %1, mm6  paddsw %2, mm7  psraw %1, 4  psraw %2, 4  psubsw %1, mm6  psubsw %2, mm7  pxor mm6, mm6  pxor mm7, mm7  pcmpgtw mm6, mm4  pcmpgtw mm7, mm5  paddsw mm4, mm6  paddsw mm5, mm7  psraw mm4, 4  psraw mm5, 4  psubsw mm4, mm6  psubsw mm5, mm7    ; mix with destination  movq mm6, [ecx]  movq mm7, [ecx]  punpcklbw mm6, [Cst0]  punpckhbw mm7, [Cst0]  paddsw %1, mm6  paddsw %2, mm7  movq mm6, [ecx+eax]  movq mm7, [ecx+eax]  punpcklbw mm6, [Cst0]  punpckhbw mm7, [Cst0]  paddsw mm4, mm6  paddsw mm5, mm7  packuswb %1,%2  packuswb mm4, mm5  movq [ecx], %1  movq [ecx+eax], mm4  lea ecx, [ecx+2*eax]%endmacro;//////////////////////////////////////////////////////////////////////align 16xvid_Add_Upsampled_8x8_16To8_mmx:  ; 579c  mov ecx, [esp+4]  ; Dst  mov edx, [esp+8]  ; Src  mov eax, [esp+12] ; BpS  COL03 mm0, mm1, 0  MUL_PACK mm0,mm1, [Up13], [Up31]  movq mm4, mm0  movq mm5, mm1  STORE_ADD_1 mm4, mm5  add ecx, eax  COL03 mm2, mm3, 1  MUL_PACK mm2,mm3, [Up13], [Up31]  MIX_ROWS mm0, mm1, mm2, mm3  STORE_ADD_2 mm0, mm1  COL03 mm0, mm1, 2  MUL_PACK mm0,mm1, [Up13], [Up31]  MIX_ROWS mm2, mm3, mm0, mm1  STORE_ADD_2 mm2, mm3  COL03 mm2, mm3, 3  MUL_PACK mm2,mm3, [Up13], [Up31]  MIX_ROWS mm0, mm1, mm2, mm3  STORE_ADD_2 mm0, mm1  COL03 mm0, mm1, 4  MUL_PACK mm0,mm1, [Up13], [Up31]  MIX_ROWS mm2, mm3, mm0, mm1  STORE_ADD_2 mm2, mm3  COL03 mm2, mm3, 5  MUL_PACK mm2,mm3, [Up13], [Up31]  MIX_ROWS mm0, mm1, mm2, mm3  STORE_ADD_2 mm0, mm1  COL03 mm0, mm1, 6  MUL_PACK mm0,mm1, [Up13], [Up31]  MIX_ROWS mm2, mm3, mm0, mm1  STORE_ADD_2 mm2, mm3  COL03 mm2, mm3, 7  MUL_PACK mm2,mm3, [Up13], [Up31]  MIX_ROWS mm0, mm1, mm2, mm3  STORE_ADD_2 mm0, mm1  STORE_ADD_1 mm2, mm3  mov ecx, [esp+4]  add ecx, 8  COL47 mm0, mm1, 0  MUL_PACK mm0,mm1, [Up13], [Up31]  movq mm4, mm0  movq mm5, mm1  STORE_ADD_1 mm4, mm5  add ecx, eax  COL47 mm2, mm3, 1  MUL_PACK mm2,mm3, [Up13], [Up31]  MIX_ROWS mm0, mm1, mm2, mm3  STORE_ADD_2 mm0, mm1  COL47 mm0, mm1, 2  MUL_PACK mm0,mm1, [Up13], [Up31]  MIX_ROWS mm2, mm3, mm0, mm1  STORE_ADD_2 mm2, mm3  COL47 mm2, mm3, 3  MUL_PACK mm2,mm3, [Up13], [Up31]  MIX_ROWS mm0, mm1, mm2, mm3  STORE_ADD_2 mm0, mm1  COL47 mm0, mm1, 4  MUL_PACK mm0,mm1, [Up13], [Up31]  MIX_ROWS mm2, mm3, mm0, mm1  STORE_ADD_2 mm2, mm3  COL47 mm2, mm3, 5  MUL_PACK mm2,mm3, [Up13], [Up31]  MIX_ROWS mm0, mm1, mm2, mm3  STORE_ADD_2 mm0, mm1  COL47 mm0, mm1, 6  MUL_PACK mm0,mm1, [Up13], [Up31]  MIX_ROWS mm2, mm3, mm0, mm1  STORE_ADD_2 mm2, mm3  COL47 mm2, mm3, 7  MUL_PACK mm2,mm3, [Up13], [Up31]  MIX_ROWS mm0, mm1, mm2, mm3  STORE_ADD_2 mm0, mm1  STORE_ADD_1 mm2, mm3  ret.endfunc;===========================================================================;; void xvid_Copy_Upsampled_8x8_16To8_xmm(uint8_t *Dst,;                                        const int16_t *Src, const int BpS);;;===========================================================================  ; xmm version can take (little) advantage of 'pshufw'%macro COL03_SSE 3    ;%1/%2: regs, %3: row   -trashes mm4/mm5  movq %2, [edx+%3*16+0*2]               ; <- 0|1|2|3  pshufw %1,  %2,  (0+0*4+0*16+1*64)     ; %1 = 0|0|0|1  pshufw mm4, %2,  (0+1*4+1*16+2*64)     ; mm4= 0|1|1|2  pshufw %2,  %2,  (1+2*4+2*16+3*64)     ; %2 = 1|2|2|3  pshufw mm5, [edx+%3*16+2*2],  (0+1*4+1*16+2*64) ; mm5 = 2|3|3|4%endmacro%macro COL47_SSE 3    ;%1-%2: regs, %3: row   -trashes mm4/mm5  pshufw %1, [edx+%3*16+2*2],  (1+2*4+2*16+3*64) ; 3|4|4|5  movq mm5, [edx+%3*16+2*4]                      ; <- 4|5|6|7  pshufw mm4, mm5,  (0+1*4+1*16+2*64)            ; 4|5|5|6  pshufw %2,  mm5,  (1+2*4+2*16+3*64)            ; 5|6|6|7  pshufw mm5, mm5,  (2+3*4+3*16+3*64)            ; 6|7|7|7%endmacro;//////////////////////////////////////////////////////////////////////align 16xvid_Copy_Upsampled_8x8_16To8_xmm:  ; 315c  mov ecx, [esp+4]  ; Dst  mov edx, [esp+8]  ; Src

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -