📄 taudiofilterconvert.asm.svn-base
字号:
; optimized floating point conversion routines from Avisynth (c) Klaus Post 2001 - 2004BITS 32%macro cglobal 1 %ifdef PREFIX global _%1 %define %1 _%1 %else global %1 %endif%endmacrosection .textalign 16cglobal convert_16_32_mmx;[esp+8+ 4] - inbuf;[esp+8+ 8] - outbuf;[esp+8+12] - c_loopconvert_16_32_mmx push esi push edi push ebx xor eax,eax mov ebx,[esp+12+12] shl ebx,1 mov esi,[esp+12+ 4] mov edi,[esp+12+ 8] pxor mm0,mm0 pxor mm1,mm1 align 16sl1_mmx: punpcklwd mm0,[esi+eax] punpcklwd mm1,[esi+eax+4] movq [edi+2*eax],mm0 movq [edi+2*eax+8],mm1 add eax,8 cmp eax,ebx jne sl1_mmx emms pop ebx pop edi pop esi retalign 16multiplier_float_16 dd 32768.0,32768.0,32768.0,32768.0cglobal convert_float_16_3dnow;[esp+12+ 4] - inbuf;[esp+12+ 8] - samples;[esp+12+12] - c_loopconvert_float_16_3dnow push esi push ebx push edi xor eax,eax ; count mov ebx, [esp+12+12] shl ebx, 1 ; in output bytes (*2) mov esi, [esp+12+4]; mov edi, [esp+12+8]; movq mm7,[multiplier_float_16] ;pshufw mm7,mm7, 01000100b align 16c16f_loop: movq mm1, [esi+eax*2] ; b b | a a movq mm2, [esi+eax*2+8] ; d d | c c pfmul mm1,mm7 ; x * 32 bit pfmul mm2,mm7 ; x * 32 bit pf2id mm1, mm1 ; xb=int(b) | xa=int(a) pf2id mm2, mm2 ; xb=int(d) | xa=int(c) packssdw mm1,mm2 movq [edi+eax], mm1 ; store xb | xa add eax,8 cmp eax, ebx jne c16f_loop emms pop edi pop ebx pop esi retalign 16multiplier_float_32 dd 2147483647.0,2147483647.0,2147483647.0,2147483647.0limit_float_32 dd 2147483520.0,2147483520.0,2147483520.0,2147483520.0cglobal convert_float_32_3dnow;[esp+12+ 4] - inbuf;[esp+12+ 8] - samples;[esp+12+12] - c_loopconvert_float_32_3dnow push esi push ebx push edi xor eax,eax ; count mov ebx, [esp+12+12] shl ebx, 2 ; in output bytes (*4) mov esi, [esp+12+4]; mov edi, [esp+12+8]; movq mm7,[multiplier_float_32] ;pshufw mm7,mm7, 01000100b align 16c32f_loop: movq mm1, [esi+eax] ; b b | a a movq mm2, [esi+eax+8] ; d d | c c pfmul mm1,mm7 ; x * 32 bit pfmul mm2,mm7 ; x * 32 bit pf2id mm1, mm1 ; xb=int(b) | xa=int(a) pf2id mm2, mm2 ; xb=int(d) | xa=int(c) movq [edi+eax], mm1 ; store xb | xa movq [edi+eax+8], mm2 ; store xd | xc add eax,16 cmp eax, ebx jne c32f_loop emms pop edi pop ebx pop esi retalign 16divisor_float_16 dd 0.000030517578125,0.000030517578125cglobal convert_16_float_3dnow;[esp+12+ 4] - inbuf;[esp+12+ 8] - samples;[esp+12+12] - c_loopconvert_16_float_3dnow push esi push ebx push edi xor eax,eax ; count mov ebx, [esp+12+12] shl ebx, 1 ; Number of input bytes. mov esi, [esp+12+4]; mov edi, [esp+12+8]; movq mm7,[divisor_float_16] ;pshufw mm7,mm7, 01000100b pxor mm6,mm6 align 16c16_loop_3dnow movq mm0, [esi+eax] ; d c | b a movq mm1, mm0 punpcklwd mm0, mm6 ; b b | a a punpckhwd mm1, mm6 ; d d | c c pi2fw mm0, mm0 ; xb=float(b) | xa=float(a) pi2fw mm1, mm1 ; xb=float(d) | xa=float(c) pfmul mm0,mm7 ; x / 32768.0 pfmul mm1,mm7 ; x / 32768.0 movq [edi+eax*2], mm0 ; store xb | xa movq [edi+eax*2+8], mm1 ; store xd | xc add eax,8 cmp eax, ebx jne c16_loop_3dnow emms pop edi pop ebx pop esi retalign 16divisor_float_32 dd 4.656612875245796924105750827168e-10,4.656612875245796924105750827168e-10cglobal convert_32_float_3dnow;[esp+12+ 4] - inbuf;[esp+12+ 8] - samples;[esp+12+12] - c_loopconvert_32_float_3dnow push esi push ebx push edi xor eax,eax ; count mov ebx, [esp+12+12] shl ebx, 2 ; in input bytes (*4) mov esi, [esp+12+4]; mov edi, [esp+12+8]; movq mm7,[divisor_float_32] ;pshufw mm7,mm7, 01000100b align 16c32_loop_3dnow: movq mm1, [esi+eax] ; b b | a a movq mm2, [esi+eax+8] ; d d | c c pi2fd mm1, mm1 ; xb=float(b) | xa=float(a) pi2fd mm2, mm2 ; xb=float(d) | xa=float(c) pfmul mm1,mm7 ; x / 32768.0 pfmul mm2,mm7 ; x / 32768.0 movq [edi+eax], mm1 ; store xb | xa movq [edi+eax+8], mm2 ; store xd | xc add eax,16 cmp eax, ebx jne c32_loop_3dnow emms pop edi pop ebx pop esi ret
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -