quantize_mpeg_xmm.asm

来自「这是一个压缩解压包,用C语言进行编程的,里面有详细的源代码.」· 汇编代码 · 共 810 行 · 第 1/2 页
ASM
810 行
;/****************************************************************************; *; *  XVID MPEG-4 VIDEO CODEC; *  - 3dne Quantization/Dequantization -; *; *  Copyright (C) 2002-2003 Peter Ross <pross@xvid.org>; *                2002      Jaan Kalda; *		   2004 Andre Werthmann <wertmann@aei.mpg.de>; *; *  This program is free software ; you can redistribute it and/or modify; *  it under the terms of the GNU General Public License as published by; *  the Free Software Foundation ; either version 2 of the License, or; *  (at your option) any later version.; *; *  This program is distributed in the hope that it will be useful,; *  but WITHOUT ANY WARRANTY ; without even the implied warranty of; *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the; *  GNU General Public License for more details.; *; *  You should have received a copy of the GNU General Public License; *  along with this program ; if not, write to the Free Software; *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA; *; * $Id: quantize_mpeg_xmm.asm,v 1.1 2005/01/05 23:02:15 edgomez Exp $; *; ***************************************************************************/; _3dne functions are compatible with iSSE, but are optimized specifically; for K7 pipelines%define SATURATEBITS 64%macro cglobal 1	%ifdef PREFIX		%ifdef MARK_FUNCS			global _%1:function %1.endfunc-%1			%define %1 _%1:function %1.endfunc-%1		%else			global _%1			%define %1 _%1		%endif	%else		%ifdef MARK_FUNCS			global %1:function %1.endfunc-%1		%else			global %1		%endif	%endif%endmacro;=============================================================================; Local data;=============================================================================%ifdef FORMAT_COFFSECTION .rodata%elseSECTION .rodata align=16%endifALIGN 8mmzero:	dd 0,0mmx_one:	times 4 dw 1;-----------------------------------------------------------------------------; divide by 2Q table;-----------------------------------------------------------------------------ALIGN 16mmx_divs:		;i>2%assign i 1%rep 31	times 4 dw  ((1 << 15) / i + 1)	%assign i i+1%endrepALIGN 16mmx_div:		;quant>2	times 4 dw 65535 ; the div by 2 formula will overflow for the case	                 ; quant=1 but we don't care much because quant=1	                 ; is handled by a different piece of code that	                 ; doesn't use this table.%assign quant 2%rep 31	times 4 dw  ((1 << 16) / quant + 1)	%assign quant quant+1%endrep%macro FIXX 1dw (1 << 16) / (%1) + 1%endmacro%define nop4	db	08Dh, 074h, 026h,0%define nop3	add	esp, byte 0%define nop2	mov	esp, esp%define nop7	db	08dh, 02ch, 02dh,0,0,0,0%define nop6	add	ebp, dword 0;-----------------------------------------------------------------------------; quantd table;-----------------------------------------------------------------------------%define VM18P	3%define VM18Q	4ALIGN 16quantd:%assign i 1%rep 31	times 4 dw  (((VM18P*i) + (VM18Q/2)) / VM18Q)	%assign i i+1%endrep;-----------------------------------------------------------------------------; multiple by 2Q table;-----------------------------------------------------------------------------ALIGN 16mmx_mul_quant:%assign i 1%rep 31	times 4 dw  i	%assign i i+1%endrep;-----------------------------------------------------------------------------; saturation limits;-----------------------------------------------------------------------------ALIGN 16mmx_32767_minus_2047:	times 4 dw (32767-2047)mmx_32768_minus_2048:	times 4 dw (32768-2048)mmx_2047:	times 4 dw 2047mmx_minus_2048:	times 4 dw (-2048)zero:	times 4 dw 0int_div:dd 0%assign i 1%rep 255	dd  (1 << 17) / ( i) + 1	%assign i i+1%endrep;=============================================================================; Code;=============================================================================SECTION .text align=16cglobal quant_mpeg_intra_x86_64cglobal quant_mpeg_inter_x86_64cglobal dequant_mpeg_intra_x86_64cglobal dequant_mpeg_inter_x86_64;-----------------------------------------------------------------------------;; uint32_t quant_mpeg_intra_x86_64(int16_t * coeff,;                               const int16_t const * data,;                               const uint32_t quant,;                               const uint32_t dcscalar,;                               const uint16_t *mpeg_matrices);; Ported from its 32bit xmm cousin;-----------------------------------------------------------------------------ALIGN 16quant_mpeg_intra_x86_64:  mov rax, rsi			; data  mov r9, rcx			; save dcscalar  mov rcx, rdx			; quant  mov rdx, rdi			; coeff  push rbx    mov rdi, r8				; mpeg_quant_matrices  mov rsi, -14  pxor mm0, mm0  pxor mm3, mm3  cmp rcx, byte 1  je near .q1loop  cmp rcx, byte 19  jg near .lloopALIGN 16.loop  movq mm1, [rax + 8*rsi+112]   ; mm0 = [1st]  psubw mm0, mm1                ;-mm1  movq mm4, [rax + 8*rsi + 120] ;  psubw mm3, mm4                ;-mm4  pmaxsw mm0, mm1               ;|src|  pmaxsw mm3,mm4;  nop2  psraw mm1, 15     ;sign src  psraw mm4, 15  psllw mm0, 4      ;level << 4 ;  psllw mm3, 4  paddw mm0, [rdi + 128 + 8*rsi+112]  paddw mm3, [rdi + 128 + 8*rsi+120]  movq mm5, [rdi + 384 + 8*rsi+112]  movq mm7, [rdi + 384 + 8*rsi+120]  pmulhuw mm5, mm0  pmulhuw mm7, mm3;  mov esp, esp  movq mm2, [rdi + 8*rsi+112]  movq mm6, [rdi + 8*rsi+120]  pmullw mm2, mm5  pmullw mm6, mm7  psubw mm0, mm2  psubw mm3, mm6;  nop4  lea r11, [quantd wrt rip]  movq mm2, [r11 + rcx * 8 - 8]  lea r11, [mmx_divs wrt rip]  movq mm6, [r11 + rcx * 8 - 8]  paddw mm5, mm2  paddw mm7, mm2;  mov esp, esp  pmulhuw mm0, [rdi + 256 + 8*rsi+112]  pmulhuw mm3, [rdi + 256 + 8*rsi+120]  paddw mm5, mm0  paddw mm7, mm3  pxor mm0, mm0  pxor mm3, mm3  pmulhuw mm5, mm6      ; mm0 = (mm0 / 2Q) >> 16  pmulhuw mm7, mm6      ;  (level + quantd) / quant (0<quant<32)  pxor mm5, mm1         ; mm0 *= sign(mm0)  pxor mm7, mm4         ;  psubw mm5, mm1        ; undisplace  psubw mm7, mm4        ;  movq [rdx + 8*rsi+112], mm5  movq [rdx + 8*rsi +120], mm7  add rsi, byte 2  jng near .loop.done; calculate  data[0] // (int32_t)dcscalar);  mov esi, [esp + 12 + 16]  ; dcscalar  mov rsi, r9			; dcscalar  movsx rcx, word [rax]  mov rdi, rcx;  mov edx, [esp + 12 + 16]  mov r11, rdx		; save rdx  mov rdx, r9		;  shr edx, 1            ; ebx = dcscalar /2  sar edi, 31           ; cdq is vectorpath  xor edx, edi          ; ebx = eax V -eax -1  sub ecx, edi  add ecx, edx;;  mov rdx, [dword esp + 12 + 4]  mov rdx, r11		; restore rdx  lea r11, [int_div wrt rip]  mov rsi, [r11+4*rsi]  imul ecx, esi  sar ecx, 17  lea rbx, [byte rcx + 1]  cmovs rcx, rbx  ; idiv    cx          ; ecx = edi:ecx / dcscalar;  mov ebx, [esp];  mov edi, [esp+4];  mov esi, [esp+8];  add esp, byte 12	; pops...  pop rbx;  mov [rdx], rcx     ; coeff[0] = ax  mov [rdx], cx		; coeff[0] = cx  xor rax, rax  retALIGN 16.q1loop  movq mm1, [rax + 8*rsi+112]               ; mm0 = [1st]  psubw mm0, mm1                            ;-mm1  movq mm4, [rax + 8*rsi+120]               ;  psubw mm3, mm4                            ;-mm4  pmaxsw mm0, mm1                           ;|src|  pmaxsw mm3, mm4;  nop2  psraw mm1, 15                             ;sign src  psraw mm4, 15  psllw mm0, 4                              ; level << 4  psllw mm3, 4  paddw mm0, [rdi + 128 + 8*rsi+112]    ;mm0 is to be divided  paddw mm3, [rdi + 128 + 8*rsi+120]    ;intra1 contains fix for division by 1  movq mm5, [rdi + 384 + 8*rsi+112] ;with rounding down  movq mm7, [rdi + 384 + 8*rsi+120]  pmulhuw mm5, mm0  pmulhuw mm7, mm3      ;mm7: first approx of division;  mov esp, esp  movq mm2, [rdi + 8*rsi+112]  movq mm6, [rdi + 8*rsi+120]      ; divs for q<=16  pmullw mm2, mm5       ;test value <= original  pmullw mm6, mm7  psubw mm0, mm2        ;mismatch  psubw mm3, mm6;  nop4  lea r11, [quantd wrt rip]  movq mm2, [r11 + rcx * 8 - 8]  paddw mm5, mm2        ;first approx with quantd  paddw mm7, mm2;  mov esp, esp  pmulhuw mm0, [rdi + 256 + 8*rsi+112]   ;correction  pmulhuw mm3, [rdi + 256 + 8*rsi+120]  paddw mm5, mm0        ;final result with quantd  paddw mm7, mm3  pxor mm0, mm0  pxor mm3, mm3;  mov esp, esp  psrlw mm5, 1          ;  (level + quantd) /2  (quant = 1)  psrlw mm7, 1  pxor mm5, mm1         ; mm0 *= sign(mm0)  pxor mm7, mm4         ;  psubw mm5, mm1        ; undisplace  psubw mm7, mm4        ;  movq [rdx + 8*rsi+112], mm5  movq [rdx + 8*rsi +120], mm7  add rsi, byte 2  jng near .q1loop  jmp near .doneALIGN 8.lloop  movq mm1, [rax + 8*rsi+112]       ; mm0 = [1st]  psubw mm0, mm1        ;-mm1  movq mm4, [rax + 8*rsi+120]  psubw mm3, mm4        ;-mm4  pmaxsw mm0, mm1       ;|src|  pmaxsw mm3, mm4;  nop2  psraw mm1, 15         ;sign src  psraw mm4, 15  psllw mm0, 4          ; level << 4  psllw mm3, 4          ;  paddw mm0, [rdi + 128 + 8*rsi+112] ;mm0 is to be divided intra1 contains fix for division by 1  paddw mm3, [rdi + 128 + 8*rsi+120]  movq mm5, [rdi + 384 + 8*rsi+112]  movq mm7, [rdi + 384 + 8*rsi+120]  pmulhuw mm5, mm0  pmulhuw mm7, mm3      ;mm7: first approx of division;  mov esp, esp  movq mm2, [rdi + 8*rsi+112]  movq mm6, [rdi + 8*rsi+120]  pmullw mm2, mm5       ;test value <= original  pmullw mm6, mm7  psubw mm0, mm2        ;mismatch  psubw mm3, mm6;  nop4  lea r11, [quantd wrt rip]  movq mm2, [r11 + rcx * 8 - 8]  lea r11, [mmx_div wrt rip]  movq mm6, [r11 + rcx * 8 - 8] ; divs for q<=16  paddw mm5, mm2        ;first approx with quantd  paddw mm7, mm2;  mov esp, esp  pmulhuw mm0, [rdi + 256 + 8*rsi+112] ;correction  pmulhuw mm3, [rdi + 256 + 8*rsi+120]  paddw mm5, mm0        ;final result with quantd  paddw mm7, mm3  pxor mm0, mm0  pxor mm3, mm3;  mov esp, esp  pmulhuw mm5, mm6      ; mm0 = (mm0 / 2Q) >> 16  pmulhuw mm7, mm6      ;  (level + quantd) / quant (0<quant<32)  psrlw mm5, 1          ; (level + quantd) / (2*quant)  psrlw mm7, 1  pxor mm5, mm1         ; mm0 *= sign(mm0)  pxor mm7, mm4         ;  psubw mm5, mm1        ; undisplace  psubw mm7, mm4        ;  movq [rdx + 8*rsi+112], mm5  movq [rdx + 8*rsi +120], mm7  add rsi,byte 2  jng near .lloop  jmp near .done.endfunc;-----------------------------------------------------------------------------;; uint32_t quant_mpeg_inter_x86_64(int16_t * coeff,;                               const int16_t const * data,;                               const uint32_t quant,;                               const uint16_t *mpeg_matrices);; Ported from its 32bit xmm cousin;-----------------------------------------------------------------------------ALIGN 16quant_mpeg_inter_x86_64:  mov rax, rsi			; data  mov r8, rdi			; save coeff  mov rdi, rcx			; mpeg_matrices  mov rcx, rdx			; quant  mov rdx, r8			; coeff  push rbx
quantize_mpeg_xmm.asm - 源码说明

本页面展示了「这是一个压缩解压包,用C语言进行编程的,里面有详细的源代码.」中的 quantize_mpeg_xmm.asm 源码文件，采用汇编编程语言编写，共 810 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与解压相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?