📄 taudiofilter.cpp

📁 从FFMPEG转换而来的H264解码程序,VC下编译..
💻 CPP
📖 第 1 页 / 共 2 页
字号:
12 下一页
/*
 * Copyright (c) 2003-2006 Milan Cutka
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */

#include "stdafx.h"
#include "TaudioFilter.h"
#include "IffdshowBase.h"
#include "IffdshowDec.h"
#include "IffdshowDecAudio.h"
#include "dither.h"
#include "TfilterSettings.h"
#include "Tconfig.h"
#include "TpresetSettingsAudio.h"
#include "simd.h"

TaudioFilter::TaudioFilter(IffdshowBase *Ideci,Tfilters *Iparent):Tfilter(Ideci),parent((TaudioFilters*)Iparent)
{
 deciA=deci;
 dither=NULL;oldnoiseshaping=oldsfout=-1;
}
TaudioFilter::~TaudioFilter()
{
 if (dither) delete dither;
}

void* TaudioFilter::alloc_buffer(const TsampleFormat &sf,size_t numsamples,Tbuffer &buf)
{
 size_t neededlen=std::max(size_t(1),numsamples)*sf.blockAlign();
 return buf.alloc(neededlen);
}

//----------------------- generic integer conversions --------------------
template<class Tin,class Tout> static Tout* convert(const Tin *in,Tout * const out,size_t count)
{
 unsigned int bpsIn =TsampleFormatInfo<Tin >::bps();
 unsigned int bpsOut=TsampleFormatInfo<Tout>::bps();
 if (bpsIn<bpsOut)
  for (size_t i=0;i<count;i++)
   out[i]=Tout(Tout(in[i])<<(bpsOut-bpsIn));
 else
  for (size_t i=0;i<count;i++)
   out[i]=Tout(in[i]>>(bpsIn-bpsOut));
 return out;
}

//---------------------------- int16 -> int32 ----------------------------
#ifndef WIN64
extern "C" void convert_16_32_mmx(const int16_t *inbuf,int32_t *outbuf,unsigned int c_loop);
#else
static void convert_16_32_mmx(const int16_t *inbuf,int32_t *outbuf,unsigned int c_loop)
{
 int eax=0;
 int ebx=c_loop;
 ebx<<=1;
 const unsigned char *esi=(const unsigned char*)inbuf;
 __m64 mm0=_mm_setzero_si64(),mm1=mm0;
 for (unsigned char *edi=(unsigned char*)outbuf;eax!=ebx;eax+=8)
  {
   punpcklwd (mm0,esi+eax);
   punpcklwd (mm1,esi+eax+4);
   movq (edi+2*eax,mm0);
   movq (edi+2*eax+8,mm1);
  }
 _mm_empty();
}
#endif
template<> int32_t* convert<int16_t,int32_t>(const int16_t *inbuf,int32_t * const outbuf,size_t count)
{
 size_t c_miss=Tconfig::cpu_flags&FF_CPU_MMX?count&3:count;
 size_t c_loop=count-c_miss;
 if (c_loop)
  convert_16_32_mmx(inbuf,outbuf,(unsigned int)c_loop);
 for (size_t i=0;i<c_miss;i++)
  outbuf[i+c_loop]=int32_t(inbuf[i+c_loop])<<16;
 return outbuf;
}

//---------------------------- int32 -> int16 ----------------------------
template<class _mm> static void convert_32_16_simd(const int32_t *inbuf,int16_t *outbuf,unsigned int c_loop)
{
 int eax=0;
 int ebx=c_loop;
 ebx<<=1;
 const unsigned char *esi=(const unsigned char*)inbuf;
 for (unsigned char *edi=(unsigned char*)outbuf;eax!=ebx;eax+=_mm::size)
  {
   typename _mm::__m mm0,mm1;
   movq (mm0,esi+2*eax);
   movq (mm1,esi+2*eax+_mm::size);
   psrad (mm0,16);
   psrad (mm1,16);
   packssdw (mm0,mm1);
   movq (edi+eax,mm0);
  }
 _mm::empty();
}
template<> int16_t* convert<int32_t,int16_t>(const int32_t *inbuf,int16_t * const outbuf,size_t count)
{
#ifdef __SSE2__
 bool sse2=Tconfig::cpu_flags&FF_CPU_SSE2 && (intptr_t(inbuf)&15)==0 &&(intptr_t(outbuf)&15)==0;
#else
 bool sse2=false;
#endif
 size_t c_miss=sse2?count&7:(Tconfig::cpu_flags&FF_CPU_MMX?count&3:count);
 size_t c_loop=count-c_miss;
 if (c_loop)
  #ifdef __SSE2__
  if (sse2)
   convert_32_16_simd<Tsse2>(inbuf,outbuf,(unsigned int)c_loop);
  else
  #endif
   convert_32_16_simd<Tmmx>(inbuf,outbuf,(unsigned int)c_loop);
 for (size_t i=0;i<c_miss;i++)
  outbuf[i+c_loop]=int16_t(inbuf[i+c_loop]>>16);
 return outbuf;
}

//---------------------------- float -> int16 ----------------------------
#ifndef WIN64
 extern "C" void convert_float_16_3dnow(const float *inbuf,int16_t *samples,unsigned int c_loop);
#else
 #define convert_float_16_3dnow NULL
#endif
static void convert_float_16_sse(const float *inbuf,int16_t *samples,unsigned int c_loop)
{
 const __m128 multiplier_float_16=_mm_set_ps1(32768.0);
 __m128 xmm7;
 movaps (xmm7, multiplier_float_16);
 const unsigned char *eax=(const unsigned char*)inbuf;
 int ebx=0;
 int ecx=c_loop;
 ecx<<=1;
 for (unsigned char *edx=(unsigned char*)samples;ecx!=ebx;ebx+=8)
  {
   __m128 xmm0,xmm1;
   movups  (xmm0, eax+ebx*2);
   mulps   (xmm0, xmm7);
   minps   (xmm0, xmm7);//                  ; x=min(x, MAX_SHORT)  --  +ve Signed Saturation > 2^31
   movhlps (xmm1, xmm0);
   __m64 mm0,mm1;
   cvtps2pi( mm0, xmm0);
   cvtps2pi( mm1, xmm1);
   packssdw (mm0, mm1);
   movq (edx+ebx, mm0);
  }
 _mm_empty();
}
#ifdef __SSE2__
static void convert_float_16_sse2(const float *inbuf,int16_t *samples,unsigned int c_loop)
{
 const __m128 multiplier_float_16=_mm_set_ps1(32768.0);
 __m128 xmm7;
 movaps (xmm7, multiplier_float_16);
 const unsigned char *eax=(const unsigned char*)inbuf;
 int ecx=c_loop;
 int edx=0;
 ecx<<=1;
 for (unsigned char  *edi=(unsigned char*)samples;ecx!=edx;edx+=16)
  {
   __m128 xmm0,xmm1;
   movups   (xmm0, eax+edx*2);           // xd | xc | xb | xa
   movups   (xmm1, eax+edx*2+16);        // xh | xg | xf | xe
   mulps    (xmm0, xmm7);                  // *= MAX_SHORT
   mulps    (xmm1, xmm7);                  // *= MAX_SHORT
   minps    (xmm0, xmm7);                  // x=min(x, MAX_SHORT)  --  +ve Signed Saturation > 2^31
   minps    (xmm1, xmm7);                // x=min(x, MAX_SHORT)  --  +ve Signed Saturation > 2^31
   __m128i xmm2,xmm3;
   cvtps2dq (xmm2, xmm0);                 // float -> dd | cc | bb | aa
   cvtps2dq (xmm3, xmm1);                  // float -> hh | gg | ff | ee
   packssdw (xmm2, xmm3);                  // h g | f e | d c | b a  --  +/-ve Signed Saturation > 2^15
   movdqa   (edi+edx-16+16, xmm2);          // store h g | f e | d c | b a
  }
}
#else
 #define convert_float_16_sse2 NULL
#endif

//---------------------------- float -> int32 ----------------------------
#ifndef WIN64
 extern "C" void convert_float_32_3dnow(const float *inbuf,int32_t *samples,unsigned int c_loop);
#else
 #define convert_float_32_3dnow NULL
#endif
static void convert_float_32_sse(const float *inbuf,int32_t *samples,unsigned int c_loop)
{
 const __m128 multiplier_float_32=_mm_set_ps1(2147483647.0f);
 __m128 xmm7=multiplier_float_32;
 const unsigned char *eax=(const unsigned char*)inbuf;
 int ecx= c_loop;
 int edx=0;
 ecx<<=2;
 for (unsigned char *edi=(unsigned char*)samples;ecx!=edx;edx+=16)
  {
   __m128 xmm0,xmm1;
   movups   (xmm0, eax+edx);             //; xd | xc | xb | xa
   mulps    (xmm0, xmm7   );               //; *= MAX_INT
   movhlps  (xmm1, xmm0   );               //; xx | xx | xd | xc
   __m64 mm0,mm1;
   cvtps2pi (mm0, xmm0    );               //; float -> bb | aa  --  -ve Signed Saturation
   cmpnltps (xmm0, xmm7   );      //;!(xd | xc | xb | xa < MAX_INT)
   cvtps2pi (mm1, xmm1    );               // float -> dd | cc  --  -ve Signed Saturation
   // md | mc | mb | ma                          -- YUCK!!!
   pxor     (mm0, &xmm0);         // 0x80000000 -> 0x7FFFFFFF if +ve saturation
   pxor     (mm1, (uint8_t*)&xmm0+8);
   movq     (edi+edx-16+16, mm0);           //store bb | aa
   movq     (edi+edx-8+16, mm1);            // store dd | cc
  }
 _mm_empty();
}
#ifdef __SSE2__
static void convert_float_32_sse2(const float *inbuf,int32_t *samples,unsigned int c_loop)
{
 const __m128 multiplier_float_32=_mm_set_ps1(2147483647.0f);
 __m128 xmm7=multiplier_float_32;
 //movss    (xmm7, multiplier_float_32);
 //shufps   xmm7, xmm7, 00000000b
 const unsigned char *eax=(const unsigned char*)inbuf;
 int ecx=c_loop;
 int edx=0;
 ecx<<= 2;
 for (unsigned char *edi=(unsigned char*)samples;ecx!=edx;edx+=32)
  {
   __m128 xmm0,xmm1;
   movups   (xmm0, eax+edx);             // xd | xc | xb | xa
   movups   (xmm1, eax+edx+16);          // xh | xg | xf | xe
   mulps    (xmm0, xmm7);                  // *= MAX_INT
   mulps    (xmm1, xmm7);                  // *= MAX_INT
   // Bloody Intel and their "indefinite integer value" it
   // is no use to man or beast, we need proper saturation
   // like AMD does with their 3DNow instructions. Grrr!!!
   __m128i xmm2,xmm3;
   cvtps2dq (xmm2, xmm0);                  // float -> dd | cc | bb | aa  --  -ve Signed Saturation
   cvtps2dq (xmm3, xmm1);                  // float -> hh | gg | ff | ee  --  -ve Signed Saturation
   cmpnltps (xmm0, xmm7);                  // !(xd | xc | xb | xa < MAX_INT)
   cmpnltps (xmm1, xmm7);                  // !(xh | xg | xf | xe < MAX_INT)

   pxor     (xmm2, _mm_castps_si128(xmm0));                  // 0x80000000 -> 0x7FFFFFFF if +ve saturation
   pxor     (xmm3, _mm_castps_si128(xmm1));
   movdqa   (edi+edx-32+32, xmm2);          // store dd | cc | bb | aa
   movdqa   (edi+edx-16+32, xmm3);          // store hh | gg | ff | ee
  }
}
#else
 #define convert_float_32_sse2 NULL
#endif

template<class Tout> struct TconvertFromFloat
{
 typedef void (*TconvertFromFloatFc)(const float *inbuf,Tout *samples,unsigned int c_loop);
 static Tout* convert(const float *inbuf,Tout *samples,size_t count,TconvertFromFloatFc fc_3dnow,TconvertFromFloatFc fc_sse,TconvertFromFloatFc fc_sse2)
  {
   size_t c_miss,c_loop;
   if (fc_sse2 && Tconfig::cpu_flags&FF_CPU_SSE2 && !((intptr_t)samples&3))
    {
     while (((intptr_t)samples&15) && count)
      {
       *samples++=TsampleFormatInfo<Tout>::limit(*inbuf++*TsampleFormatInfo<Tout>::max());
       count-=1;
      }
     c_miss=count&7;
     c_loop=count-c_miss;
     if (c_loop)
      fc_sse2(inbuf,samples,(unsigned int)c_loop);
    }
   #ifndef WIN64
   else if (fc_3dnow && Tconfig::cpu_flags&FF_CPU_3DNOW)
    {
     c_miss=count&3;
     c_loop=count-c_miss;
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -