📄 taudiofilter.cpp.svn-base
字号:
/* * Copyright (c) 2003-2006 Milan Cutka * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */#include "stdafx.h"#include "TaudioFilter.h"#include "IffdshowBase.h"#include "IffdshowDec.h"#include "IffdshowDecAudio.h"#include "dither.h"#include "TfilterSettings.h"#include "Tconfig.h"#include "TpresetSettingsAudio.h"#include "simd.h"TaudioFilter::TaudioFilter(IffdshowBase *Ideci,Tfilters *Iparent):Tfilter(Ideci),parent((TaudioFilters*)Iparent){ deciA=deci; dither=NULL;oldnoiseshaping=oldsfout=-1;}TaudioFilter::~TaudioFilter(){ if (dither) delete dither;}void* TaudioFilter::alloc_buffer(const TsampleFormat &sf,size_t numsamples,Tbuffer &buf){ size_t neededlen=std::max(size_t(1),numsamples)*sf.blockAlign(); return buf.alloc(neededlen);}//----------------------- generic integer conversions --------------------template<class Tin,class Tout> static Tout* convert(const Tin *in,Tout * const out,size_t count){ unsigned int bpsIn =TsampleFormatInfo<Tin >::bps(); unsigned int bpsOut=TsampleFormatInfo<Tout>::bps(); if (bpsIn<bpsOut) for (size_t i=0;i<count;i++) out[i]=Tout(Tout(in[i])<<(bpsOut-bpsIn)); else for (size_t i=0;i<count;i++) out[i]=Tout(in[i]>>(bpsIn-bpsOut)); return out;}//---------------------------- int16 -> int32 ----------------------------#ifndef WIN64extern "C" void convert_16_32_mmx(const int16_t *inbuf,int32_t *outbuf,unsigned int c_loop);#elsestatic void convert_16_32_mmx(const int16_t *inbuf,int32_t *outbuf,unsigned int c_loop){ int eax=0; int ebx=c_loop; ebx<<=1; const unsigned char *esi=(const unsigned char*)inbuf; __m64 mm0=_mm_setzero_si64(),mm1=mm0; for (unsigned char *edi=(unsigned char*)outbuf;eax!=ebx;eax+=8) { punpcklwd (mm0,esi+eax); punpcklwd (mm1,esi+eax+4); movq (edi+2*eax,mm0); movq (edi+2*eax+8,mm1); } _mm_empty(); }#endiftemplate<> int32_t* convert<int16_t,int32_t>(const int16_t *inbuf,int32_t * const outbuf,size_t count){ size_t c_miss=Tconfig::cpu_flags&FF_CPU_MMX?count&3:count; size_t c_loop=count-c_miss; if (c_loop) convert_16_32_mmx(inbuf,outbuf,(unsigned int)c_loop); for (size_t i=0;i<c_miss;i++) outbuf[i+c_loop]=int32_t(inbuf[i+c_loop])<<16; return outbuf;}//---------------------------- int32 -> int16 ----------------------------template<class _mm> static void convert_32_16_simd(const int32_t *inbuf,int16_t *outbuf,unsigned int c_loop){ int eax=0; int ebx=c_loop; ebx<<=1; const unsigned char *esi=(const unsigned char*)inbuf; for (unsigned char *edi=(unsigned char*)outbuf;eax!=ebx;eax+=_mm::size) { typename _mm::__m mm0,mm1; movq (mm0,esi+2*eax); movq (mm1,esi+2*eax+_mm::size); psrad (mm0,16); psrad (mm1,16); packssdw (mm0,mm1); movq (edi+eax,mm0); } _mm::empty();}template<> int16_t* convert<int32_t,int16_t>(const int32_t *inbuf,int16_t * const outbuf,size_t count){#ifdef __SSE2__ bool sse2=Tconfig::cpu_flags&FF_CPU_SSE2 && (intptr_t(inbuf)&15)==0 &&(intptr_t(outbuf)&15)==0;#else bool sse2=false;#endif size_t c_miss=sse2?count&7:(Tconfig::cpu_flags&FF_CPU_MMX?count&3:count); size_t c_loop=count-c_miss; if (c_loop) #ifdef __SSE2__ if (sse2) convert_32_16_simd<Tsse2>(inbuf,outbuf,(unsigned int)c_loop); else #endif convert_32_16_simd<Tmmx>(inbuf,outbuf,(unsigned int)c_loop); for (size_t i=0;i<c_miss;i++) outbuf[i+c_loop]=int16_t(inbuf[i+c_loop]>>16); return outbuf;}//---------------------------- float -> int16 ----------------------------#ifndef WIN64 extern "C" void convert_float_16_3dnow(const float *inbuf,int16_t *samples,unsigned int c_loop);#else #define convert_float_16_3dnow NULL#endifstatic void convert_float_16_sse(const float *inbuf,int16_t *samples,unsigned int c_loop){ const __m128 multiplier_float_16=_mm_set_ps1(32768.0); __m128 xmm7; movaps (xmm7, multiplier_float_16); const unsigned char *eax=(const unsigned char*)inbuf; int ebx=0; int ecx=c_loop; ecx<<=1; for (unsigned char *edx=(unsigned char*)samples;ecx!=ebx;ebx+=8) { __m128 xmm0,xmm1; movups (xmm0, eax+ebx*2); mulps (xmm0, xmm7); minps (xmm0, xmm7);// ; x=min(x, MAX_SHORT) -- +ve Signed Saturation > 2^31 movhlps (xmm1, xmm0); __m64 mm0,mm1; cvtps2pi( mm0, xmm0); cvtps2pi( mm1, xmm1); packssdw (mm0, mm1); movq (edx+ebx, mm0); } _mm_empty();}#ifdef __SSE2__static void convert_float_16_sse2(const float *inbuf,int16_t *samples,unsigned int c_loop){ const __m128 multiplier_float_16=_mm_set_ps1(32768.0); __m128 xmm7; movaps (xmm7, multiplier_float_16); const unsigned char *eax=(const unsigned char*)inbuf; int ecx=c_loop; int edx=0; ecx<<=1; for (unsigned char *edi=(unsigned char*)samples;ecx!=edx;edx+=16) { __m128 xmm0,xmm1; movups (xmm0, eax+edx*2); // xd | xc | xb | xa movups (xmm1, eax+edx*2+16); // xh | xg | xf | xe mulps (xmm0, xmm7); // *= MAX_SHORT mulps (xmm1, xmm7); // *= MAX_SHORT minps (xmm0, xmm7); // x=min(x, MAX_SHORT) -- +ve Signed Saturation > 2^31 minps (xmm1, xmm7); // x=min(x, MAX_SHORT) -- +ve Signed Saturation > 2^31 __m128i xmm2,xmm3; cvtps2dq (xmm2, xmm0); // float -> dd | cc | bb | aa cvtps2dq (xmm3, xmm1); // float -> hh | gg | ff | ee packssdw (xmm2, xmm3); // h g | f e | d c | b a -- +/-ve Signed Saturation > 2^15 movdqa (edi+edx-16+16, xmm2); // store h g | f e | d c | b a } }#else #define convert_float_16_sse2 NULL#endif//---------------------------- float -> int32 ----------------------------#ifndef WIN64 extern "C" void convert_float_32_3dnow(const float *inbuf,int32_t *samples,unsigned int c_loop);#else #define convert_float_32_3dnow NULL#endifstatic void convert_float_32_sse(const float *inbuf,int32_t *samples,unsigned int c_loop){ const __m128 multiplier_float_32=_mm_set_ps1(2147483647.0f); __m128 xmm7=multiplier_float_32; const unsigned char *eax=(const unsigned char*)inbuf; int ecx= c_loop; int edx=0; ecx<<=2; for (unsigned char *edi=(unsigned char*)samples;ecx!=edx;edx+=16) { __m128 xmm0,xmm1; movups (xmm0, eax+edx); //; xd | xc | xb | xa mulps (xmm0, xmm7 ); //; *= MAX_INT movhlps (xmm1, xmm0 ); //; xx | xx | xd | xc __m64 mm0,mm1; cvtps2pi (mm0, xmm0 ); //; float -> bb | aa -- -ve Signed Saturation cmpnltps (xmm0, xmm7 ); //;!(xd | xc | xb | xa < MAX_INT) cvtps2pi (mm1, xmm1 ); // float -> dd | cc -- -ve Signed Saturation // md | mc | mb | ma -- YUCK!!! pxor (mm0, &xmm0); // 0x80000000 -> 0x7FFFFFFF if +ve saturation pxor (mm1, (uint8_t*)&xmm0+8); movq (edi+edx-16+16, mm0); //store bb | aa movq (edi+edx-8+16, mm1); // store dd | cc } _mm_empty();}#ifdef __SSE2__static void convert_float_32_sse2(const float *inbuf,int32_t *samples,unsigned int c_loop){ const __m128 multiplier_float_32=_mm_set_ps1(2147483647.0f); __m128 xmm7=multiplier_float_32; //movss (xmm7, multiplier_float_32); //shufps xmm7, xmm7, 00000000b const unsigned char *eax=(const unsigned char*)inbuf; int ecx=c_loop; int edx=0; ecx<<= 2; for (unsigned char *edi=(unsigned char*)samples;ecx!=edx;edx+=32) { __m128 xmm0,xmm1; movups (xmm0, eax+edx); // xd | xc | xb | xa movups (xmm1, eax+edx+16); // xh | xg | xf | xe mulps (xmm0, xmm7); // *= MAX_INT mulps (xmm1, xmm7); // *= MAX_INT // Bloody Intel and their "indefinite integer value" it // is no use to man or beast, we need proper saturation // like AMD does with their 3DNow instructions. Grrr!!! __m128i xmm2,xmm3; cvtps2dq (xmm2, xmm0); // float -> dd | cc | bb | aa -- -ve Signed Saturation cvtps2dq (xmm3, xmm1); // float -> hh | gg | ff | ee -- -ve Signed Saturation cmpnltps (xmm0, xmm7); // !(xd | xc | xb | xa < MAX_INT) cmpnltps (xmm1, xmm7); // !(xh | xg | xf | xe < MAX_INT) pxor (xmm2, _mm_castps_si128(xmm0)); // 0x80000000 -> 0x7FFFFFFF if +ve saturation pxor (xmm3, _mm_castps_si128(xmm1)); movdqa (edi+edx-32+32, xmm2); // store dd | cc | bb | aa movdqa (edi+edx-16+32, xmm3); // store hh | gg | ff | ee }}#else #define convert_float_32_sse2 NULL#endiftemplate<class Tout> struct TconvertFromFloat{ typedef void (*TconvertFromFloatFc)(const float *inbuf,Tout *samples,unsigned int c_loop); static Tout* convert(const float *inbuf,Tout *samples,size_t count,TconvertFromFloatFc fc_3dnow,TconvertFromFloatFc fc_sse,TconvertFromFloatFc fc_sse2) { size_t c_miss,c_loop; if (fc_sse2 && Tconfig::cpu_flags&FF_CPU_SSE2 && !((intptr_t)samples&3)) { while (((intptr_t)samples&15) && count) { *samples++=TsampleFormatInfo<Tout>::limit(*inbuf++*TsampleFormatInfo<Tout>::max()); count-=1; } c_miss=count&7; c_loop=count-c_miss; if (c_loop) fc_sse2(inbuf,samples,(unsigned int)c_loop); } #ifndef WIN64 else if (fc_3dnow && Tconfig::cpu_flags&FF_CPU_3DNOW) { c_miss=count&3; c_loop=count-c_miss;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -