⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 taudiofilterconvert.asm

📁 从FFMPEG转换而来的H264解码程序,VC下编译..
💻 ASM
字号:
; optimized floating point conversion routines from Avisynth (c) Klaus Post 2001 - 2004

BITS 32

%macro cglobal 1
 %ifdef PREFIX
  global _%1
  %define %1 _%1
 %else
  global %1
 %endif
%endmacro

section .text

align 16

cglobal convert_16_32_mmx
;[esp+8+ 4] - inbuf
;[esp+8+ 8] - outbuf
;[esp+8+12] - c_loop
convert_16_32_mmx
 push esi
 push edi
 push ebx

 xor eax,eax
 mov ebx,[esp+12+12]
 shl ebx,1
 mov esi,[esp+12+ 4]
 mov edi,[esp+12+ 8]
 pxor mm0,mm0
 pxor mm1,mm1
 align 16
sl1_mmx:
 punpcklwd mm0,[esi+eax]
 punpcklwd mm1,[esi+eax+4]
 movq [edi+2*eax],mm0
 movq [edi+2*eax+8],mm1
 add eax,8
 cmp eax,ebx
 jne sl1_mmx
 emms

 pop ebx
 pop edi
 pop esi
 ret

align 16

multiplier_float_16 dd 32768.0,32768.0,32768.0,32768.0

cglobal convert_float_16_3dnow
;[esp+12+ 4] - inbuf
;[esp+12+ 8] - samples
;[esp+12+12] - c_loop
convert_float_16_3dnow
 push esi
 push ebx
 push edi

 xor eax,eax                   ; count
  mov ebx, [esp+12+12]
  shl ebx, 1                     ; in output bytes (*2)
  mov esi, [esp+12+4];
 mov edi, [esp+12+8];
 movq mm7,[multiplier_float_16]
  ;pshufw mm7,mm7, 01000100b
  align 16
c16f_loop:
 movq mm1, [esi+eax*2]            ;  b b | a a
  movq mm2, [esi+eax*2+8]          ;  d d | c c
  pfmul mm1,mm7                  ; x * 32 bit
  pfmul mm2,mm7                  ; x * 32 bit
  pf2id mm1, mm1                 ;  xb=int(b) | xa=int(a)
  pf2id mm2, mm2                 ;  xb=int(d) | xa=int(c)
  packssdw mm1,mm2
  movq [edi+eax], mm1            ;  store xb | xa
  add eax,8
  cmp eax, ebx
  jne c16f_loop
  emms

 pop edi
 pop ebx
 pop esi
 ret

align 16

multiplier_float_32 dd 2147483647.0,2147483647.0,2147483647.0,2147483647.0
limit_float_32      dd 2147483520.0,2147483520.0,2147483520.0,2147483520.0

cglobal convert_float_32_3dnow
;[esp+12+ 4] - inbuf
;[esp+12+ 8] - samples
;[esp+12+12] - c_loop
convert_float_32_3dnow
 push esi
 push ebx
 push edi

 xor eax,eax                   ; count
   mov ebx, [esp+12+12]
   shl ebx, 2                     ; in output bytes (*4)
   mov esi, [esp+12+4];
 mov edi, [esp+12+8];
 movq mm7,[multiplier_float_32]
   ;pshufw mm7,mm7, 01000100b
   align 16
c32f_loop:
 movq mm1, [esi+eax]            ;  b b | a a
   movq mm2, [esi+eax+8]          ;  d d | c c
   pfmul mm1,mm7                  ; x * 32 bit
   pfmul mm2,mm7                  ; x * 32 bit
   pf2id mm1, mm1                 ;  xb=int(b) | xa=int(a)
   pf2id mm2, mm2                 ;  xb=int(d) | xa=int(c)
   movq [edi+eax], mm1            ;  store xb | xa
   movq [edi+eax+8], mm2          ;  store xd | xc
   add eax,16
   cmp eax, ebx
   jne c32f_loop
   emms

 pop edi
 pop ebx
 pop esi
 ret

align 16

divisor_float_16 dd 0.000030517578125,0.000030517578125

cglobal convert_16_float_3dnow
;[esp+12+ 4] - inbuf
;[esp+12+ 8] - samples
;[esp+12+12] - c_loop
convert_16_float_3dnow
 push esi
 push ebx
 push edi

 xor eax,eax                   ; count
 mov ebx, [esp+12+12]
 shl ebx, 1  ; Number of input bytes.
 mov esi, [esp+12+4];
 mov edi, [esp+12+8];
 movq mm7,[divisor_float_16]
 ;pshufw mm7,mm7, 01000100b
 pxor mm6,mm6
 align 16
c16_loop_3dnow
 movq mm0, [esi+eax]          ;  d c | b a
 movq mm1, mm0
 punpcklwd mm0, mm6             ;  b b | a a
 punpckhwd mm1, mm6             ;  d d | c c
 pi2fw mm0, mm0                 ;  xb=float(b) | xa=float(a)
 pi2fw mm1, mm1                 ;  xb=float(d) | xa=float(c)
 pfmul mm0,mm7                  ; x / 32768.0
 pfmul mm1,mm7                  ; x / 32768.0
 movq [edi+eax*2], mm0          ;  store xb | xa
 movq [edi+eax*2+8], mm1        ;  store xd | xc
 add eax,8
 cmp eax, ebx
 jne c16_loop_3dnow
 emms

 pop edi
 pop ebx
 pop esi
 ret

align 16

divisor_float_32 dd 4.656612875245796924105750827168e-10,4.656612875245796924105750827168e-10

cglobal convert_32_float_3dnow
;[esp+12+ 4] - inbuf
;[esp+12+ 8] - samples
;[esp+12+12] - c_loop
convert_32_float_3dnow
 push esi
 push ebx
 push edi

 xor eax,eax                   ;  count
 mov ebx, [esp+12+12]
 shl ebx, 2                     ;  in input bytes (*4)
 mov esi, [esp+12+4];
 mov edi, [esp+12+8];
 movq mm7,[divisor_float_32]
 ;pshufw mm7,mm7, 01000100b
 align 16
c32_loop_3dnow:
 movq mm1, [esi+eax]            ;  b b | a a
 movq mm2, [esi+eax+8]          ;  d d | c c
 pi2fd mm1, mm1                 ;  xb=float(b) | xa=float(a)
 pi2fd mm2, mm2                 ;  xb=float(d) | xa=float(c)
 pfmul mm1,mm7                  ; x / 32768.0
 pfmul mm2,mm7                  ; x / 32768.0
 movq [edi+eax], mm1            ;  store xb | xa
 movq [edi+eax+8], mm2          ;  store xd | xc
 add eax,16
 cmp eax, ebx
 jne c32_loop_3dnow
 emms

 pop edi
 pop ebx
 pop esi
 ret

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -