⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 quantize_mpeg_mmx.asm

📁 这是一个压缩解压包,用C语言进行编程的,里面有详细的源代码.
💻 ASM
📖 第 1 页 / 共 2 页
字号:
;/**************************************************************************; *; *  XVID MPEG-4 VIDEO CODEC; *  - 3dne Quantization/Dequantization -; *; *  Copyright (C) 2002-2003 Peter Ross <pross@xvid.org>; *                2002-2003 Michael Militzer <isibaar@xvid.org>; *                2002-2003 Pascal Massimino <skal@planet-d.net>; *; *  This program is free software ; you can redistribute it and/or modify; *  it under the terms of the GNU General Public License as published by; *  the Free Software Foundation ; either version 2 of the License, or; *  (at your option) any later version.; *; *  This program is distributed in the hope that it will be useful,; *  but WITHOUT ANY WARRANTY ; without even the implied warranty of; *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the; *  GNU General Public License for more details.; *; *  You should have received a copy of the GNU General Public License; *  along with this program ; if not, write to the Free Software; *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA; *; * $Id: quantize_mpeg_mmx.asm,v 1.5 2004/08/29 10:02:38 edgomez Exp $; *; *************************************************************************/%define SATURATEBITS 32%macro cglobal 1	%ifdef PREFIX		%ifdef MARK_FUNCS			global _%1:function %1.endfunc-%1			%define %1 _%1:function %1.endfunc-%1		%else			global _%1			%define %1 _%1		%endif	%else		%ifdef MARK_FUNCS			global %1:function %1.endfunc-%1		%else			global %1		%endif	%endif%endmacro%macro cextern 1	%ifdef PREFIX		extern _%1		%define %1 _%1	%else		extern %1	%endif%endmacro;=============================================================================; Local data (Read Only);=============================================================================%ifdef FORMAT_COFFSECTION .rodata%elseSECTION .rodata align=16%endifmmx_one:	times 4	dw	 1;-----------------------------------------------------------------------------; divide by 2Q table;-----------------------------------------------------------------------------ALIGN 16mmx_div:	times 4 dw 65535 ; the div by 2 formula will overflow for the case	                 ; quant=1 but we don't care much because quant=1	                 ; is handled by a different piece of code that	                 ; doesn't use this table.%assign quant 2%rep 30	times 4 dw  (1<<17) / (quant*2) + 1	%assign quant quant+1%endrep%define VM18P 3%define VM18Q 4;-----------------------------------------------------------------------------; quantd table;-----------------------------------------------------------------------------quantd:%assign quant 1%rep 31	times 4 dw  ((VM18P*quant) + (VM18Q/2)) / VM18Q	%assign quant quant+1%endrep;-----------------------------------------------------------------------------; multiple by 2Q table;-----------------------------------------------------------------------------mmx_mul_quant:%assign quant 1%rep 31	times 4 dw  quant	%assign quant quant+1%endrep;-----------------------------------------------------------------------------; saturation limits;-----------------------------------------------------------------------------ALIGN 16mmx_32767_minus_2047:	times 4 dw (32767-2047)mmx_32768_minus_2048:	times 4 dw (32768-2048)mmx_2047:	times 4 dw 2047mmx_minus_2048:	times 4 dw (-2048)zero:	times 4 dw 0;=============================================================================; Code;=============================================================================SECTION .textcglobal quant_mpeg_intra_mmxcglobal quant_mpeg_inter_mmxcglobal dequant_mpeg_intra_mmxcglobal dequant_mpeg_inter_mmx;-----------------------------------------------------------------------------;; uint32_t quant_mpeg_intra_mmx(int16_t * coeff,;                               const int16_t const * data,;                               const uint32_t quant,;                               const uint32_t dcscalar,;                               const uint16_t *mpeg_matrices);;;-----------------------------------------------------------------------------ALIGN 16quant_mpeg_intra_mmx:  push ecx  push esi  push edi  push ebx  mov edi, [esp + 16 + 4]       ; coeff  mov esi, [esp + 16 + 8]       ; data  mov eax, [esp + 16 + 12]      ; quant  mov ebx, [esp + 16 + 20]		; mpeg_quant_matrices  movq mm5, [quantd + eax * 8 - 8] ; quantd -> mm5  xor ecx, ecx  cmp al, 1  jz near .q1loop  cmp al, 2  jz near .q2loop  movq mm7, [mmx_div + eax * 8 - 8] ; multipliers[quant] -> mm7ALIGN 16.loop  movq mm0, [esi + 8*ecx]       ; mm0 = [1st]  movq mm3, [esi + 8*ecx + 8]   ;  pxor mm1, mm1                 ; mm1 = 0  pxor mm4, mm4  pcmpgtw mm1, mm0              ; mm1 = (0 > mm0)  pcmpgtw mm4, mm3  pxor mm0, mm1                 ; mm0 = |mm0|  pxor mm3, mm4                 ;  psubw mm0, mm1                ; displace  psubw mm3, mm4                ;  psllw mm0, 4                  ; level << 4  psllw mm3, 4  movq mm2, [ebx + 8*ecx]  psrlw mm2, 1                  ; intra_matrix[i]>>1  paddw mm0, mm2  movq mm2, [ebx + 256 + ecx*8]  pmulhw mm0, mm2                       ; (level<<4 + intra_matrix[i]>>1) / intra_matrix[i]  movq mm2, [ebx + 8*ecx + 8]  psrlw mm2, 1  paddw mm3, mm2  movq mm2, [ebx + 256 + ecx*8 + 8]  pmulhw mm3, mm2  paddw mm0, mm5                ; + quantd  paddw mm3, mm5  pmulhw mm0, mm7               ; mm0 = (mm0 / 2Q) >> 16  pmulhw mm3, mm7               ;  psrlw mm0, 1                  ; additional shift by 1 => 16 + 1 = 17  psrlw mm3, 1  pxor mm0, mm1                 ; mm0 *= sign(mm0)  pxor mm3, mm4                 ;  psubw mm0, mm1                ; undisplace  psubw mm3, mm4                ;  movq [edi + 8*ecx], mm0  movq [edi + 8*ecx + 8], mm3  add ecx,2  cmp ecx,16  jnz near .loop.done  ; caclulate  data[0] // (int32_t)dcscalar)  mov ecx, [esp + 16 + 16]  ; dcscalar  mov edx, ecx  movsx eax, word [esi]     ; data[0]  shr edx, 1                ; edx = dcscalar /2  cmp eax, 0  jg .gtzero  sub eax, edx  jmp short .mul.gtzero  add eax, edx.mul  cdq                       ; expand eax -> edx:eax  idiv ecx                  ; eax = edx:eax / dcscalar  mov [edi], ax             ; coeff[0] = ax  pop ebx  pop edi  pop esi  pop ecx  xor eax, eax              ; return(0);  retALIGN 16.q1loop  movq mm0, [esi + 8*ecx]       ; mm0 = [1st]  movq mm3, [esi + 8*ecx + 8]   ;  pxor mm1, mm1                 ; mm1 = 0  pxor mm4, mm4                 ;  pcmpgtw mm1, mm0              ; mm1 = (0 > mm0)  pcmpgtw mm4, mm3              ;  pxor mm0, mm1                 ; mm0 = |mm0|  pxor mm3, mm4                 ;  psubw mm0, mm1                ; displace  psubw mm3, mm4                ;  psllw mm0, 4  psllw mm3, 4  movq mm2, [ebx + 8*ecx]  psrlw mm2, 1  paddw mm0, mm2  movq mm2, [ebx + 256 + ecx*8]  pmulhw mm0, mm2                       ; (level<<4 + intra_matrix[i]>>1) / intra_matrix[i]  movq mm2, [ebx + 8*ecx + 8]  psrlw mm2, 1  paddw mm3, mm2  movq mm2, [ebx + 256 + ecx*8 + 8]  pmulhw mm3, mm2  paddw mm0, mm5  paddw mm3, mm5  psrlw mm0, 1              ; mm0 >>= 1   (/2)  psrlw mm3, 1              ;  pxor mm0, mm1             ; mm0 *= sign(mm0)  pxor mm3, mm4             ;  psubw mm0, mm1            ; undisplace  psubw mm3, mm4            ;  movq [edi + 8*ecx], mm0  movq [edi + 8*ecx + 8], mm3  add ecx, 2  cmp ecx, 16  jnz near .q1loop  jmp near .doneALIGN 16.q2loop  movq mm0, [esi + 8*ecx]       ; mm0 = [1st]  movq mm3, [esi + 8*ecx + 8]   ;  pxor mm1, mm1                 ; mm1 = 0  pxor mm4, mm4                 ;  pcmpgtw mm1, mm0              ; mm1 = (0 > mm0)  pcmpgtw mm4, mm3              ;  pxor mm0, mm1                 ; mm0 = |mm0|  pxor mm3, mm4                 ;  psubw mm0, mm1                ; displace  psubw mm3, mm4                ;  psllw mm0, 4  psllw mm3, 4  movq mm2, [ebx + 8*ecx]  psrlw mm2, 1  paddw mm0, mm2  movq mm2, [ebx + 256 + ecx*8]  pmulhw mm0, mm2                       ; (level<<4 + intra_matrix[i]>>1) / intra_matrix[i]  movq mm2, [ebx + 8*ecx + 8]  psrlw mm2, 1  paddw mm3, mm2  movq mm2, [ebx + 256 + ecx*8 + 8]  pmulhw mm3, mm2  paddw mm0, mm5  paddw mm3, mm5  psrlw mm0, 2                  ; mm0 >>= 1   (/4)  psrlw mm3, 2                  ;  pxor mm0, mm1                 ; mm0 *= sign(mm0)  pxor mm3, mm4                 ;  psubw mm0, mm1                ; undisplace  psubw mm3, mm4            ;  movq [edi + 8*ecx], mm0  movq [edi + 8*ecx + 8], mm3  add ecx,2  cmp ecx,16  jnz near .q2loop  jmp near .done.endfunc;-----------------------------------------------------------------------------;; uint32_t quant_mpeg_inter_mmx(int16_t * coeff,;                               const int16_t const * data,;                               const uint32_t quant,;                               const uint16_t *mpeg_matrices);;;-----------------------------------------------------------------------------ALIGN 16quant_mpeg_inter_mmx:  push ecx  push esi  push edi  push ebx  mov edi, [esp + 16 + 4]       ; coeff  mov esi, [esp + 16 + 8]       ; data  mov eax, [esp + 16 + 12]  ; quant  mov ebx, [esp + 16 + 16]		; mpeg_quant_matrices  xor ecx, ecx  pxor mm5, mm5                 ; sum  cmp al, 1  jz near .q1loop  cmp al, 2  jz near .q2loop  movq mm7, [mmx_div + eax * 8 - 8] ; dividerALIGN 16.loop  movq mm0, [esi + 8*ecx]       ; mm0 = [1st]  movq mm3, [esi + 8*ecx + 8]   ;  pxor mm1, mm1                 ; mm1 = 0  pxor mm4, mm4                 ;  pcmpgtw mm1, mm0              ; mm1 = (0 > mm0)  pcmpgtw mm4, mm3              ;  pxor mm0, mm1                 ; mm0 = |mm0|  pxor mm3, mm4                 ;  psubw mm0, mm1                ; displace  psubw mm3, mm4                ;  psllw mm0, 4  psllw mm3, 4  movq mm2, [ebx + 512 + 8*ecx]  psrlw mm2, 1  paddw mm0, mm2

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -