⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 timgfilterdct.cpp

📁 从FFMPEG转换而来的H264解码程序,VC下编译..
💻 CPP
📖 第 1 页 / 共 3 页
字号:
/*
 * Copyright (c) 2002-2006 Milan Cutka
 * Copyright (c) 2002 Tom Barry.  All rights reserved.
 *      trbarry@trbarry.com
 * idct, fdct, quantization and dequantization routines from XviD
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */

#include "stdafx.h"
#include "Tconfig.h"
#include "TimgFilterDCT.h"
#include "TdctSettings.h"

extern "C" void Skl_IDct16_MMX (short *block);
extern "C" void Skl_IDct16_SSE (short *block);
extern "C" void fdct_mmx_skal (short *block);
extern "C" void fdct_xmm_skal (short *block);
TimgFilterDCT::TimgFilterDCT(IffdshowBase *Ideci,Tfilters *Iparent):TimgFilter(Ideci,Iparent)
{
#ifdef __SSE2__
 if (Tconfig::cpu_flags&FF_CPU_SSE2)
  {
   fdct=fdct_sse2;
   idct=idct_sse2;
  }
#endif
#ifndef WIN64
 #ifdef __SSE2__
 else
 #endif
 if (Tconfig::cpu_flags&FF_CPU_MMXEXT)
  {
   fdct=fdct_xmm_skal;
   idct=Skl_IDct16_SSE;
  }
 else if (Tconfig::cpu_flags&FF_CPU_MMX)
  {
   fdct=fdct_mmx_skal;
   idct=Skl_IDct16_MMX;
  }
#endif
 else
  {
   fdct=fdct_c;
   idct=idct_c;
   idct_c_init();
  }
 oldfac[0]=INT_MAX;oldMode=-1;oldmatrix[0]=0;
 pWorkArea=(short*)aligned_malloc(64*sizeof(short),16);
}
TimgFilterDCT::~TimgFilterDCT()
{
 aligned_free(pWorkArea);
}

void TimgFilterDCT::multiply(void)
{
 const char * const factors8=(const char*)&factors[0][0];

 *(__m64*)(pWorkArea+0*8+0)=_mm_srai_pi16(_mm_mullo_pi16(*(__m64*)(pWorkArea+0*8+0),*(__m64*)(factors8+0*16  )),3);
 *(__m64*)(pWorkArea+0*8+4)=_mm_srai_pi16(_mm_mullo_pi16(*(__m64*)(pWorkArea+0*8+4),*(__m64*)(factors8+0*16+8)),3);

 *(__m64*)(pWorkArea+1*8+0)=_mm_srai_pi16(_mm_mullo_pi16(*(__m64*)(pWorkArea+1*8+0),*(__m64*)(factors8+1*16  )),3);
 *(__m64*)(pWorkArea+1*8+4)=_mm_srai_pi16(_mm_mullo_pi16(*(__m64*)(pWorkArea+1*8+4),*(__m64*)(factors8+1*16+8)),3);

 *(__m64*)(pWorkArea+2*8+0)=_mm_srai_pi16(_mm_mullo_pi16(*(__m64*)(pWorkArea+2*8+0),*(__m64*)(factors8+2*16  )),3);
 *(__m64*)(pWorkArea+2*8+4)=_mm_srai_pi16(_mm_mullo_pi16(*(__m64*)(pWorkArea+2*8+4),*(__m64*)(factors8+2*16+8)),3);

 *(__m64*)(pWorkArea+3*8+0)=_mm_srai_pi16(_mm_mullo_pi16(*(__m64*)(pWorkArea+3*8+0),*(__m64*)(factors8+3*16  )),3);
 *(__m64*)(pWorkArea+3*8+4)=_mm_srai_pi16(_mm_mullo_pi16(*(__m64*)(pWorkArea+3*8+4),*(__m64*)(factors8+3*16+8)),3);

 *(__m64*)(pWorkArea+4*8+0)=_mm_srai_pi16(_mm_mullo_pi16(*(__m64*)(pWorkArea+4*8+0),*(__m64*)(factors8+4*16  )),3);
 *(__m64*)(pWorkArea+4*8+4)=_mm_srai_pi16(_mm_mullo_pi16(*(__m64*)(pWorkArea+4*8+4),*(__m64*)(factors8+4*16+8)),3);

 *(__m64*)(pWorkArea+5*8+0)=_mm_srai_pi16(_mm_mullo_pi16(*(__m64*)(pWorkArea+5*8+0),*(__m64*)(factors8+5*16  )),3);
 *(__m64*)(pWorkArea+5*8+4)=_mm_srai_pi16(_mm_mullo_pi16(*(__m64*)(pWorkArea+5*8+4),*(__m64*)(factors8+5*16+8)),3);

 *(__m64*)(pWorkArea+6*8+0)=_mm_srai_pi16(_mm_mullo_pi16(*(__m64*)(pWorkArea+6*8+0),*(__m64*)(factors8+6*16  )),3);
 *(__m64*)(pWorkArea+6*8+4)=_mm_srai_pi16(_mm_mullo_pi16(*(__m64*)(pWorkArea+6*8+4),*(__m64*)(factors8+6*16+8)),3);

 *(__m64*)(pWorkArea+7*8+0)=_mm_srai_pi16(_mm_mullo_pi16(*(__m64*)(pWorkArea+7*8+0),*(__m64*)(factors8+7*16  )),3);
 *(__m64*)(pWorkArea+7*8+4)=_mm_srai_pi16(_mm_mullo_pi16(*(__m64*)(pWorkArea+7*8+4),*(__m64*)(factors8+7*16+8)),3);
}

void TimgFilterDCT::quant_h263_inter(int16_t * coeff,const uint32_t quant, const uint16_t *)
{
 #define SCALEBITS       16
 #define FIX(X)          ((1L << SCALEBITS) / (X) + 1)
 static const uint32_t multipliers[32] =
  {
   0,       FIX(2),  FIX(4),  FIX(6),
   FIX(8),  FIX(10), FIX(12), FIX(14),
   FIX(16), FIX(18), FIX(20), FIX(22),
   FIX(24), FIX(26), FIX(28), FIX(30),
   FIX(32), FIX(34), FIX(36), FIX(38),
   FIX(40), FIX(42), FIX(44), FIX(46),
   FIX(48), FIX(50), FIX(52), FIX(54),
   FIX(56), FIX(58), FIX(60), FIX(62)
  };
 #undef FIX

 const uint32_t mult = multipliers[quant];
 const uint16_t quant_m_2 = uint16_t(quant << 1);
 const uint16_t quant_d_2 = uint16_t(quant >> 1);
 uint32_t sum = 0;
 uint32_t i;

 for (i = 0; i < 64; i++) {
         int16_t acLevel = coeff[i];

         if (acLevel < 0) {
                 acLevel = (-acLevel) - quant_d_2;
                 if (acLevel < quant_m_2) {
                         coeff[i] = 0;
                         continue;
                 }

                 acLevel = int16_t((acLevel * mult) >> SCALEBITS);
                 sum += acLevel;         /* sum += |acLevel| */
                 coeff[i] = -acLevel;
         } else {
                 acLevel = int16_t(acLevel-quant_d_2);
                 if (acLevel < quant_m_2) {
                         coeff[i] = 0;
                         continue;
                 }
                 acLevel = int16_t((acLevel * mult) >> SCALEBITS);
                 sum += acLevel;
                 coeff[i] = acLevel;
         }
 }
 #undef SCALEBITS
}

void TimgFilterDCT::dequant_h263_inter(int16_t * data,const uint32_t quant,const uint16_t *)
{
 const uint16_t quant_m_2 = uint16_t(quant << 1);
 const uint16_t quant_add = uint16_t (quant & 1 ? quant : quant - 1);
 int i;

 for (i = 0; i < 64; i++) {
         int16_t acLevel = data[i];

         if (acLevel == 0) {
                 data[i] = 0;
         } else if (acLevel < 0) {
                 acLevel = acLevel * quant_m_2 - quant_add;
                 data[i] = (acLevel >= -2048 ? acLevel : -2048);
         } else {
                 acLevel = acLevel * quant_m_2 + quant_add;
                 data[i] = (acLevel <= 2047 ? acLevel : 2047);
         }
 }
}

void TimgFilterDCT::h263(void)
{
 quant_h263_inter(pWorkArea,quant);
 dequant_h263_inter(pWorkArea,quant);
}

void TimgFilterDCT::quant_mpeg_inter(int16_t * coeff,const uint32_t quant,const uint16_t * mpeg_quant_matrices)
{
 #define SCALEBITS 17
 #define FIX(X)	  ((1UL << SCALEBITS) / (X) + 1)
 static const uint32_t multipliers[32] =
  {
   0,       FIX(2),  FIX(4),  FIX(6),
   FIX(8),	 FIX(10), FIX(12), FIX(14),
   FIX(16), FIX(18), FIX(20), FIX(22),
   FIX(24), FIX(26), FIX(28), FIX(30),
   FIX(32), FIX(34), FIX(36), FIX(38),
   FIX(40), FIX(42), FIX(44), FIX(46),
   FIX(48), FIX(50), FIX(52), FIX(54),
   FIX(56), FIX(58), FIX(60), FIX(62)
  };
 #undef FIX
 #undef SCALEBITS

 const uint32_t mult = multipliers[quant];
 const uint16_t *inter_matrix = mpeg_quant_matrices;
 uint32_t sum = 0;
 int i;

 for (i = 0; i < 64; i++) {
         if (coeff[i] < 0) {
                 uint32_t level = -coeff[i];

                 level = ((level << 4) + (inter_matrix[i] >> 1)) / inter_matrix[i];
                 level = (level * mult) >> 17;
                 sum += level;
                 coeff[i] = -(int16_t) level;
         } else if (coeff[i] > 0) {
                 uint32_t level = coeff[i];

                 level = ((level << 4) + (inter_matrix[i] >> 1)) / inter_matrix[i];
                 level = (level * mult) >> 17;
                 sum += level;
                 coeff[i] = int16_t(level);
         } else {
                 coeff[i] = 0;
         }
 }
}

void TimgFilterDCT::dequant_mpeg_inter(int16_t * data,const uint32_t quant,const uint16_t * mpeg_quant_matrices)
{
 uint32_t sum = 0;
 const uint16_t *inter_matrix = (mpeg_quant_matrices);
 int i;

 for (i = 0; i < 64; i++) {
         if (data[i] == 0) {
                 data[i] = 0;
         } else if (data[i] < 0) {
                 int32_t level = -data[i];

                 level = ((2 * level + 1) * inter_matrix[i] * quant) >> 4;
                 data[i] = int16_t(level <= 2048 ? -level : -2048);
         } else {
                 uint32_t level = data[i];

                 level = ((2 * level + 1) * inter_matrix[i] * quant) >> 4;
                 data[i] = int16_t(level <= 2047 ? level : 2047);
         }

         sum ^= data[i];
 }

 /*      mismatch control */
 if ((sum & 1) == 0) {
         data[63] ^= 1;
 }
}

void TimgFilterDCT::mpeg(void)
{
 quant_mpeg_inter(pWorkArea,quant,(const uint16_t*)&factors[0][0]);
 dequant_mpeg_inter(pWorkArea,quant,(const uint16_t*)&factors[0][0]);
}

HRESULT TimgFilterDCT::process(TfilterQueue::iterator it,TffPict &pict,const TfilterSettingsVideo *cfg0)
{
 const TdctSettings *cfg=(const TdctSettings*)cfg0;
 init(pict,cfg->full,cfg->half);
 if (pictRect.dx>=8 && pictRect.dy>=8)
  {
   bool modechange=oldMode!=cfg->mode;
   if (modechange)
    switch (oldMode=cfg->mode)
     {
      case 1:processDct=&TimgFilterDCT::h263;break;
      case 2:processDct=&TimgFilterDCT::mpeg;break;
      default:
      case 0:processDct=&TimgFilterDCT::multiply;break;
     }
   if (oldMode==0 && (modechange || memcmp(oldfac,&cfg->fac0,sizeof(oldfac))!=0))
    {
     memcpy(oldfac,&cfg->fac0,sizeof(oldfac));
     for (int i=0;i<=7;i++)
      for (int j=0;j<=7;j++)
       factors[i][j]=(short)((oldfac[i]/1000.0) * (oldfac[j]/1000.0) * 8);
    }
   if (oldMode==2 && (modechange || memcpy(oldmatrix,&cfg->matrix0,sizeof(oldmatrix))!=0))
    {
     memcpy(oldmatrix,&cfg->matrix0,sizeof(oldmatrix));
     const unsigned char *m=(const unsigned char*)&cfg->matrix0;
     for (int i=0;i<8;i++)
      for (int j=0;j<8;j++)
       factors[i][j]=(short)limit<int>(*m++,1,255);
    }
   quant=cfg->quant;
   const unsigned char *srcY;
   getCur(FF_CSPS_MASK_YUV_PLANAR,pict,cfg->full,&srcY,NULL,NULL,NULL);
   unsigned char *dstY;
   getNext(csp1,pict,cfg->full,&dstY,NULL,NULL,NULL);

   unsigned int cycles=dx1[0]&~7;

   if (dx1[0]&7)
    TffPict::copy(dstY+cycles,stride2[0],srcY+cycles,stride1[0],dx1[0]&7,dy1[0]);

   __m64 m0=_mm_setzero_si64();
   const stride_t stride1_0=stride1[0],stride2_0=stride2[0];
   for (unsigned int y=0;y<=dy1[0]-7;srcY+=8*stride1_0,dstY+=8*stride2_0,y+=8)
    {
     const unsigned char *srcLn=srcY;unsigned char *dstLn=dstY,*dstLnEnd=dstLn+cycles;
     for (;dstLn<dstLnEnd;srcLn+=8,dstLn+=8)
      {
       __m64 mm0=*(__m64*)(srcLn+0*stride1_0);
       __m64 mm2=*(__m64*)(srcLn+1*stride1_0);
       *(__m64*)(pWorkArea+ 0)=_mm_unpacklo_pi8(mm0,m0);
       *(__m64*)(pWorkArea+ 4)=_mm_unpackhi_pi8(mm0,m0);
       *(__m64*)(pWorkArea+ 8)=_mm_unpacklo_pi8(mm2,m0);
       *(__m64*)(pWorkArea+12)=_mm_unpackhi_pi8(mm2,m0);

       mm0=*(__m64*)(srcLn+2*stride1_0);
       mm2=*(__m64*)(srcLn+3*stride1_0);
       *(__m64*)(pWorkArea+16)=_mm_unpacklo_pi8(mm0,m0);
       *(__m64*)(pWorkArea+20)=_mm_unpackhi_pi8(mm0,m0);
       *(__m64*)(pWorkArea+24)=_mm_unpacklo_pi8(mm2,m0);
       *(__m64*)(pWorkArea+28)=_mm_unpackhi_pi8(mm2,m0);

       mm0=*(__m64*)(srcLn+4*stride1_0);
       mm2=*(__m64*)(srcLn+5*stride1_0);
       *(__m64*)(pWorkArea+32)=_mm_unpacklo_pi8(mm0,m0);
       *(__m64*)(pWorkArea+36)=_mm_unpackhi_pi8(mm0,m0);
       *(__m64*)(pWorkArea+40)=_mm_unpacklo_pi8(mm2,m0);
       *(__m64*)(pWorkArea+44)=_mm_unpackhi_pi8(mm2,m0);

       mm0=*(__m64*)(srcLn+6*stride1_0);
       mm2=*(__m64*)(srcLn+7*stride1_0);
       *(__m64*)(pWorkArea+48)=_mm_unpacklo_pi8(mm0,m0);
       *(__m64*)(pWorkArea+52)=_mm_unpackhi_pi8(mm0,m0);
       *(__m64*)(pWorkArea+56)=_mm_unpacklo_pi8(mm2,m0);
       *(__m64*)(pWorkArea+60)=_mm_unpackhi_pi8(mm2,m0);

       fdct(pWorkArea);
       (this->*processDct)();
       idct(pWorkArea);

       *(__m64*)(dstLn+0*stride2_0)=_mm_packs_pu16(*(__m64*)(pWorkArea+0*8),*(__m64*)(pWorkArea+0*8+4));
       *(__m64*)(dstLn+1*stride2_0)=_mm_packs_pu16(*(__m64*)(pWorkArea+1*8),*(__m64*)(pWorkArea+1*8+4));
       *(__m64*)(dstLn+2*stride2_0)=_mm_packs_pu16(*(__m64*)(pWorkArea+2*8),*(__m64*)(pWorkArea+2*8+4));
       *(__m64*)(dstLn+3*stride2_0)=_mm_packs_pu16(*(__m64*)(pWorkArea+3*8),*(__m64*)(pWorkArea+3*8+4));
       *(__m64*)(dstLn+4*stride2_0)=_mm_packs_pu16(*(__m64*)(pWorkArea+4*8),*(__m64*)(pWorkArea+4*8+4));
       *(__m64*)(dstLn+5*stride2_0)=_mm_packs_pu16(*(__m64*)(pWorkArea+5*8),*(__m64*)(pWorkArea+5*8+4));
       *(__m64*)(dstLn+6*stride2_0)=_mm_packs_pu16(*(__m64*)(pWorkArea+6*8),*(__m64*)(pWorkArea+6*8+4));
       *(__m64*)(dstLn+7*stride2_0)=_mm_packs_pu16(*(__m64*)(pWorkArea+7*8),*(__m64*)(pWorkArea+7*8+4));
      }
    }
   _mm_empty();
   if (dy1[0]&7)
    TffPict::copy(dstY,stride2[0],srcY,stride1[0],dx1[0],dy1[0]&7);
  }
 return parent->deliverSample(++it,pict);
}

short TimgFilterDCT::iclip[1024],*TimgFilterDCT::iclp;

void TimgFilterDCT::idct_c_init(void)
{
	iclp = iclip + 512;
	for (int i = -512; i < 512; i++)
		iclp[i] = (short)limit(i,-256,255);
}
void TimgFilterDCT::idct_c(short *block)
{

	/*
	 * idct_int32_init() must be called before the first call to this

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -