📄 taudiofilter.cpp
字号:
/*
* Copyright (c) 2003-2006 Milan Cutka
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "stdafx.h"
#include "TaudioFilter.h"
#include "IffdshowBase.h"
#include "IffdshowDec.h"
#include "IffdshowDecAudio.h"
#include "dither.h"
#include "TfilterSettings.h"
#include "Tconfig.h"
#include "TpresetSettingsAudio.h"
#include "simd.h"
TaudioFilter::TaudioFilter(IffdshowBase *Ideci,Tfilters *Iparent):Tfilter(Ideci),parent((TaudioFilters*)Iparent)
{
deciA=deci;
dither=NULL;oldnoiseshaping=oldsfout=-1;
}
TaudioFilter::~TaudioFilter()
{
if (dither) delete dither;
}
void* TaudioFilter::alloc_buffer(const TsampleFormat &sf,size_t numsamples,Tbuffer &buf)
{
size_t neededlen=std::max(size_t(1),numsamples)*sf.blockAlign();
return buf.alloc(neededlen);
}
//----------------------- generic integer conversions --------------------
template<class Tin,class Tout> static Tout* convert(const Tin *in,Tout * const out,size_t count)
{
unsigned int bpsIn =TsampleFormatInfo<Tin >::bps();
unsigned int bpsOut=TsampleFormatInfo<Tout>::bps();
if (bpsIn<bpsOut)
for (size_t i=0;i<count;i++)
out[i]=Tout(Tout(in[i])<<(bpsOut-bpsIn));
else
for (size_t i=0;i<count;i++)
out[i]=Tout(in[i]>>(bpsIn-bpsOut));
return out;
}
//---------------------------- int16 -> int32 ----------------------------
#ifndef WIN64
extern "C" void convert_16_32_mmx(const int16_t *inbuf,int32_t *outbuf,unsigned int c_loop);
#else
static void convert_16_32_mmx(const int16_t *inbuf,int32_t *outbuf,unsigned int c_loop)
{
int eax=0;
int ebx=c_loop;
ebx<<=1;
const unsigned char *esi=(const unsigned char*)inbuf;
__m64 mm0=_mm_setzero_si64(),mm1=mm0;
for (unsigned char *edi=(unsigned char*)outbuf;eax!=ebx;eax+=8)
{
punpcklwd (mm0,esi+eax);
punpcklwd (mm1,esi+eax+4);
movq (edi+2*eax,mm0);
movq (edi+2*eax+8,mm1);
}
_mm_empty();
}
#endif
template<> int32_t* convert<int16_t,int32_t>(const int16_t *inbuf,int32_t * const outbuf,size_t count)
{
size_t c_miss=Tconfig::cpu_flags&FF_CPU_MMX?count&3:count;
size_t c_loop=count-c_miss;
if (c_loop)
convert_16_32_mmx(inbuf,outbuf,(unsigned int)c_loop);
for (size_t i=0;i<c_miss;i++)
outbuf[i+c_loop]=int32_t(inbuf[i+c_loop])<<16;
return outbuf;
}
//---------------------------- int32 -> int16 ----------------------------
template<class _mm> static void convert_32_16_simd(const int32_t *inbuf,int16_t *outbuf,unsigned int c_loop)
{
int eax=0;
int ebx=c_loop;
ebx<<=1;
const unsigned char *esi=(const unsigned char*)inbuf;
for (unsigned char *edi=(unsigned char*)outbuf;eax!=ebx;eax+=_mm::size)
{
typename _mm::__m mm0,mm1;
movq (mm0,esi+2*eax);
movq (mm1,esi+2*eax+_mm::size);
psrad (mm0,16);
psrad (mm1,16);
packssdw (mm0,mm1);
movq (edi+eax,mm0);
}
_mm::empty();
}
template<> int16_t* convert<int32_t,int16_t>(const int32_t *inbuf,int16_t * const outbuf,size_t count)
{
#ifdef __SSE2__
bool sse2=Tconfig::cpu_flags&FF_CPU_SSE2 && (intptr_t(inbuf)&15)==0 &&(intptr_t(outbuf)&15)==0;
#else
bool sse2=false;
#endif
size_t c_miss=sse2?count&7:(Tconfig::cpu_flags&FF_CPU_MMX?count&3:count);
size_t c_loop=count-c_miss;
if (c_loop)
#ifdef __SSE2__
if (sse2)
convert_32_16_simd<Tsse2>(inbuf,outbuf,(unsigned int)c_loop);
else
#endif
convert_32_16_simd<Tmmx>(inbuf,outbuf,(unsigned int)c_loop);
for (size_t i=0;i<c_miss;i++)
outbuf[i+c_loop]=int16_t(inbuf[i+c_loop]>>16);
return outbuf;
}
//---------------------------- float -> int16 ----------------------------
#ifndef WIN64
extern "C" void convert_float_16_3dnow(const float *inbuf,int16_t *samples,unsigned int c_loop);
#else
#define convert_float_16_3dnow NULL
#endif
static void convert_float_16_sse(const float *inbuf,int16_t *samples,unsigned int c_loop)
{
const __m128 multiplier_float_16=_mm_set_ps1(32768.0);
__m128 xmm7;
movaps (xmm7, multiplier_float_16);
const unsigned char *eax=(const unsigned char*)inbuf;
int ebx=0;
int ecx=c_loop;
ecx<<=1;
for (unsigned char *edx=(unsigned char*)samples;ecx!=ebx;ebx+=8)
{
__m128 xmm0,xmm1;
movups (xmm0, eax+ebx*2);
mulps (xmm0, xmm7);
minps (xmm0, xmm7);// ; x=min(x, MAX_SHORT) -- +ve Signed Saturation > 2^31
movhlps (xmm1, xmm0);
__m64 mm0,mm1;
cvtps2pi( mm0, xmm0);
cvtps2pi( mm1, xmm1);
packssdw (mm0, mm1);
movq (edx+ebx, mm0);
}
_mm_empty();
}
#ifdef __SSE2__
static void convert_float_16_sse2(const float *inbuf,int16_t *samples,unsigned int c_loop)
{
const __m128 multiplier_float_16=_mm_set_ps1(32768.0);
__m128 xmm7;
movaps (xmm7, multiplier_float_16);
const unsigned char *eax=(const unsigned char*)inbuf;
int ecx=c_loop;
int edx=0;
ecx<<=1;
for (unsigned char *edi=(unsigned char*)samples;ecx!=edx;edx+=16)
{
__m128 xmm0,xmm1;
movups (xmm0, eax+edx*2); // xd | xc | xb | xa
movups (xmm1, eax+edx*2+16); // xh | xg | xf | xe
mulps (xmm0, xmm7); // *= MAX_SHORT
mulps (xmm1, xmm7); // *= MAX_SHORT
minps (xmm0, xmm7); // x=min(x, MAX_SHORT) -- +ve Signed Saturation > 2^31
minps (xmm1, xmm7); // x=min(x, MAX_SHORT) -- +ve Signed Saturation > 2^31
__m128i xmm2,xmm3;
cvtps2dq (xmm2, xmm0); // float -> dd | cc | bb | aa
cvtps2dq (xmm3, xmm1); // float -> hh | gg | ff | ee
packssdw (xmm2, xmm3); // h g | f e | d c | b a -- +/-ve Signed Saturation > 2^15
movdqa (edi+edx-16+16, xmm2); // store h g | f e | d c | b a
}
}
#else
#define convert_float_16_sse2 NULL
#endif
//---------------------------- float -> int32 ----------------------------
#ifndef WIN64
extern "C" void convert_float_32_3dnow(const float *inbuf,int32_t *samples,unsigned int c_loop);
#else
#define convert_float_32_3dnow NULL
#endif
static void convert_float_32_sse(const float *inbuf,int32_t *samples,unsigned int c_loop)
{
const __m128 multiplier_float_32=_mm_set_ps1(2147483647.0f);
__m128 xmm7=multiplier_float_32;
const unsigned char *eax=(const unsigned char*)inbuf;
int ecx= c_loop;
int edx=0;
ecx<<=2;
for (unsigned char *edi=(unsigned char*)samples;ecx!=edx;edx+=16)
{
__m128 xmm0,xmm1;
movups (xmm0, eax+edx); //; xd | xc | xb | xa
mulps (xmm0, xmm7 ); //; *= MAX_INT
movhlps (xmm1, xmm0 ); //; xx | xx | xd | xc
__m64 mm0,mm1;
cvtps2pi (mm0, xmm0 ); //; float -> bb | aa -- -ve Signed Saturation
cmpnltps (xmm0, xmm7 ); //;!(xd | xc | xb | xa < MAX_INT)
cvtps2pi (mm1, xmm1 ); // float -> dd | cc -- -ve Signed Saturation
// md | mc | mb | ma -- YUCK!!!
pxor (mm0, &xmm0); // 0x80000000 -> 0x7FFFFFFF if +ve saturation
pxor (mm1, (uint8_t*)&xmm0+8);
movq (edi+edx-16+16, mm0); //store bb | aa
movq (edi+edx-8+16, mm1); // store dd | cc
}
_mm_empty();
}
#ifdef __SSE2__
static void convert_float_32_sse2(const float *inbuf,int32_t *samples,unsigned int c_loop)
{
const __m128 multiplier_float_32=_mm_set_ps1(2147483647.0f);
__m128 xmm7=multiplier_float_32;
//movss (xmm7, multiplier_float_32);
//shufps xmm7, xmm7, 00000000b
const unsigned char *eax=(const unsigned char*)inbuf;
int ecx=c_loop;
int edx=0;
ecx<<= 2;
for (unsigned char *edi=(unsigned char*)samples;ecx!=edx;edx+=32)
{
__m128 xmm0,xmm1;
movups (xmm0, eax+edx); // xd | xc | xb | xa
movups (xmm1, eax+edx+16); // xh | xg | xf | xe
mulps (xmm0, xmm7); // *= MAX_INT
mulps (xmm1, xmm7); // *= MAX_INT
// Bloody Intel and their "indefinite integer value" it
// is no use to man or beast, we need proper saturation
// like AMD does with their 3DNow instructions. Grrr!!!
__m128i xmm2,xmm3;
cvtps2dq (xmm2, xmm0); // float -> dd | cc | bb | aa -- -ve Signed Saturation
cvtps2dq (xmm3, xmm1); // float -> hh | gg | ff | ee -- -ve Signed Saturation
cmpnltps (xmm0, xmm7); // !(xd | xc | xb | xa < MAX_INT)
cmpnltps (xmm1, xmm7); // !(xh | xg | xf | xe < MAX_INT)
pxor (xmm2, _mm_castps_si128(xmm0)); // 0x80000000 -> 0x7FFFFFFF if +ve saturation
pxor (xmm3, _mm_castps_si128(xmm1));
movdqa (edi+edx-32+32, xmm2); // store dd | cc | bb | aa
movdqa (edi+edx-16+32, xmm3); // store hh | gg | ff | ee
}
}
#else
#define convert_float_32_sse2 NULL
#endif
template<class Tout> struct TconvertFromFloat
{
typedef void (*TconvertFromFloatFc)(const float *inbuf,Tout *samples,unsigned int c_loop);
static Tout* convert(const float *inbuf,Tout *samples,size_t count,TconvertFromFloatFc fc_3dnow,TconvertFromFloatFc fc_sse,TconvertFromFloatFc fc_sse2)
{
size_t c_miss,c_loop;
if (fc_sse2 && Tconfig::cpu_flags&FF_CPU_SSE2 && !((intptr_t)samples&3))
{
while (((intptr_t)samples&15) && count)
{
*samples++=TsampleFormatInfo<Tout>::limit(*inbuf++*TsampleFormatInfo<Tout>::max());
count-=1;
}
c_miss=count&7;
c_loop=count-c_miss;
if (c_loop)
fc_sse2(inbuf,samples,(unsigned int)c_loop);
}
#ifndef WIN64
else if (fc_3dnow && Tconfig::cpu_flags&FF_CPU_3DNOW)
{
c_miss=count&3;
c_loop=count-c_miss;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -