📄 tomsmocomp.cpp

📁 从FFMPEG转换而来的H264解码程序,VC下编译..
💻 CPP
📖 第 1 页 / 共 4 页
字号:
12 3 4 下一页
//#define IS_SSE2
// Copyright (c) 2002 Tom Barry.  All rights reserved.
//      trbarry@trbarry.com
// Requires Avisynth source code to compile for Avisynth
// Avisynth Copyright 2000 Ben Rudiak-Gould.
//      http://www.math.berkeley.edu/~benrg/avisynth.html
/////////////////////////////////////////////////////////////////////////////
//
// This file is subject to the terms of the GNU General Public License as
// published by the Free Software Foundation.  A copy of this license is
// included with this software distribution in the file COPYING.  If you
// do not have a copy, you may obtain a copy by writing to the Free
// Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
//
// This software is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details
//
//  Also, this program is "Philanthropy-Ware".  That is, if you like it and
//  feel the need to reward or inspire the author then please feel free (but
//  not obligated) to consider joining or donating to the Electronic Frontier
//  Foundation. This will help keep cyber space free of barbed wire and bullsh*t.
//
/////////////////////////////////////////////////////////////////////////////
/*

From the Readme_TomsMoComp.txt file:

        TomsMoComp - Motion Compensating Deinterlace

        TomsMoComp.dll is an Avisynth filter that uses motion compensation and adaptive processing to deinterlace video source.

        Just unzip the contents into your Avisynth directory, or somewhere. As the script shows,
        I made a subdirectory under Avisynth just to keep it separate.
        avs, one of the scripts I was testing:

        LoadPlugin("d:\AVISynth\TomsMoComp\Debug\TomsMoComp.dll")
        clip = AVISource("c:\vcr\bikes.avi")
        return clip.TomsMoComp(...)

        It specifies the file spec (change yours) and asks for TopFirst and SearchEffort=10
        to be turned on. I've so far tested it only
        with Avisynth/VirtualDub.

        TomsMoComp Parm list:

          return clip.TomsMoComp(TopFirst, SearchEffort, Vertical_Filter)

        All the values are integer, 0=no, 1=yes:

         TopFirst - assume the top field, lines 0,2,4,... should be displayed first.
            The default is the supposedly more common BottomFirst (not for me).

         SearchEffort - determines how much effort (cpu time) will be used to find
            moved pixels. Currently numbers from 0-45 with 0 being practially
            just a smarter bob.

        Known issues and limitation:
        1)  Assumes YUV (YUY2) Frame Based input. Use an AVIsynth function to convert first if
         needed.

        For now, see www.trbarry.com/TomsMoComp.zip

  THIS PROGRAM IS STILL NEW AND EXPERIMENTAL!

  Please send comments to Tom Barry (trbarry@trbarry.com)
*/

#include <malloc.h>
#include <string.h>
#include <unknwn.h>
#include "TomsMoComp_ff.h"
#include "../../inttypes.h"
#ifdef __GNUC__
#define __forceinline __attribute__((__always_inline__)) inline
#endif
#include "../../simd.h"
#include "../../compiler.h"

#pragma warning(disable: 4305 4309 4799 4127 4701)

static const __int64 ShiftMask=0xfefffefffefffeffULL;	// to avoid shifting chroma to luma
static const __int64 YMask =0x00ff00ff00ff00ffULL;
static const __int64 UVMask=0xff00ff00ff00ff00ULL;
static const __int64 Max_Mov=0x0404040404040404LL;
static const __int64 FOURS=0x0404040404040404LL;
static const __int64 TENS =0x0a0a0a0a0a0a0a0aLL;
static const __int64 ONES =0x0101010101010101LL;

typedef void (*TsearchLoopFc)(stride_t _src_pitch, stride_t _dst_pitch,
                     int _rowsize, const uint8_t* _pWeaveSrc, const uint8_t* _pWeaveSrcP,
                     uint8_t* _pWeaveDest, int _TopFirst, const uint8_t* _pCopySrc,
                     const uint8_t* _pCopySrcP, int _FldHeight);
template<class Tsimd,bool IS_SSE2,int BPP,bool USE_YV12,bool USE_VERTICAL_FILTER> struct TtomsMoComp
{
private:
 template<bool DBL_RESIZE,bool SKIP_SEARCH,int SearchRange> struct TsearchLoop
  {
   static __forceinline void merge4PIXavg(const unsigned char *PADDR1,const unsigned char *PADDR2,__m64 &mm0,__m64 &mm1,__m64 &mm2,__m64 &mm3,__m64 &mm5,__m64 &mm7)
    {
     movq (mm0, PADDR1); /* our 4 pixels */
     movq (mm1, PADDR2); /* our pixel2 value */
     movq (mm2, mm0);                        /* another copy of our pixel1 value */
     movq (mm3, mm1);                        /* another copy of our pixel1 value */
     psubusb (mm2, mm1 );
     psubusb (mm3, mm0 );
     por (mm2,mm3   );
     Tsimd::v_pavgb(mm0, mm1, mm3, ShiftMask); /* avg of 2 pixels */
     movq (mm3, mm2 );                       /* another copy of our our weights */
     pxor  (mm1, mm1);
     psubusb (mm3, mm7);                     /* nonzero where old weights lower, else 0 */
     pcmpeqb (mm3, mm1);                     /* now ff where new better, else 00     */
     pcmpeqb (mm1, mm3);             /* here ff where old better, else 00 */
     pand (mm0, mm3   );                     /* keep only better new pixels */
     pand ( mm2, mm3  );                     /* and weights */
     pand (mm5, mm1   );                     /* keep only better old pixels */
     pand ( mm7, mm1  );
     por  (mm5, mm0   );                     /* and merge new & old vals */
     por  (mm7, mm2   );
    }
   static __forceinline void merge4PIXavgH(const unsigned char *PADDR1A, const unsigned char *PADDR1B, const unsigned char *PADDR2A, const unsigned char *PADDR2B,__m64 &mm0,__m64 &mm1,__m64 &mm2,__m64 &mm3,__m64 &mm5,__m64 &mm7)
    {
     movq (mm0, PADDR1A); /* our 4 pixels */
     movq (mm1, PADDR2A); /* our pixel2 value */
     movq (mm2, PADDR1B); /* our 4 pixels */
     movq (mm3, PADDR2B); /* our pixel2 value */
     Tsimd::v_pavgb(mm0, mm2, mm2, ShiftMask);
     Tsimd::v_pavgb(mm1, mm3, mm3, ShiftMask);
     movq (mm2, mm0);                        /* another copy of our pixel1 value */
     movq (mm3, mm1);                        /* another copy of our pixel1 value */
     psubusb (mm2, mm1);
     psubusb (mm3, mm0);
     por (mm2,mm3);
     Tsimd::v_pavgb(mm0, mm1, mm3, ShiftMask);    /* avg of 2 pixels */
     movq (mm3, mm2  );                      /* another copy of our our weights */
     pxor  (mm1, mm1 );
     psubusb (mm3, mm7);                     /* nonzero where old weights lower, else 0 */
     pcmpeqb (mm3, mm1);                     /* now ff where new better, else 00     */
     pcmpeqb (mm1, mm3);             /* here ff where old better, else 00 */
     pand (mm0, mm3   );                     /* keep only better new pixels */
     pand  (mm2, mm3  );                     /* and weights */
     pand (mm5, mm1   );                     /* keep only better old pixels */
     pand ( mm7, mm1  );
     por  (mm5, mm0   );                     /* and merge new & old vals */
     por  (mm7, mm2   );
    }
   template<int r> static __forceinline void search(stride_t &ecx,const unsigned char* const esi,const unsigned char* const edi,__m64 &mm0,__m64 &mm1,__m64 &mm2,__m64 &mm3,__m64 &mm5,__m64 &mm7)
    {
     if (r==3 || r==5)
      {
       merge4PIXavg(edi+ecx-BPP, esi+ecx+BPP,mm0,mm1,mm2,mm3,mm5,mm7); // left, right
       merge4PIXavg(edi+ecx+BPP, esi+ecx-BPP,mm0,mm1,mm2,mm3,mm5,mm7); // right, left
      }
     if (r==5)
      {
       merge4PIXavgH(edi+ecx-BPP, edi+ecx, esi+ecx, esi+ecx+BPP,mm0,mm1,mm2,mm3,mm5,mm7); // left, right
       merge4PIXavgH(edi+ecx+BPP, edi+ecx, esi+ecx, esi+ecx-BPP,mm0,mm1,mm2,mm3,mm5,mm7); // right, left
      }
     if (r==21 || r==99)
      {
       merge4PIXavg(edi-6, esi+2*ecx+6,mm0,mm1,mm2,mm3,mm5,mm7);  // up left, down right
       merge4PIXavg(edi+6, esi+2*ecx-6,mm0,mm1,mm2,mm3,mm5,mm7);  // up right, down left
       merge4PIXavg(edi+ecx-6, esi+ecx+6,mm0,mm1,mm2,mm3,mm5,mm7); // left, right
       merge4PIXavg(edi+ecx+6, esi+ecx-6,mm0,mm1,mm2,mm3,mm5,mm7); // right, left
       merge4PIXavg(edi+2*ecx-6, esi+6,mm0,mm1,mm2,mm3,mm5,mm7);   // down left, up right
       merge4PIXavg(edi+2*ecx+6, esi-6,mm0,mm1,mm2,mm3,mm5,mm7);   // down right, up left
      }
     if (r==9 || r==11 || r==13 || r==15 || r==19 || r==21 || r==99)
      {
       merge4PIXavg(edi-BPP, esi+2*ecx+BPP,mm0,mm1,mm2,mm3,mm5,mm7);  // up left, down right
       merge4PIXavg(edi+BPP, esi+2*ecx-BPP,mm0,mm1,mm2,mm3,mm5,mm7);  // up right, down left
       merge4PIXavg(edi+2*ecx-BPP, esi+BPP,mm0,mm1,mm2,mm3,mm5,mm7);   // down left, up right
       merge4PIXavg(edi+2*ecx+BPP, esi-BPP,mm0,mm1,mm2,mm3,mm5,mm7);   // down right, up left
       merge4PIXavg(edi+ecx-BPP, esi+ecx+BPP,mm0,mm1,mm2,mm3,mm5,mm7); // left, right
       merge4PIXavg(edi+ecx+BPP, esi+ecx-BPP,mm0,mm1,mm2,mm3,mm5,mm7); // right, left
      }
     if (r==11 || r==13 || r==19)
      {
       merge4PIXavgH(edi+ecx-BPP, edi+ecx, esi+ecx, esi+ecx+BPP,mm0,mm1,mm2,mm3,mm5,mm7); // left, right
       merge4PIXavgH(edi+ecx+BPP, edi+ecx, esi+ecx, esi+ecx-BPP,mm0,mm1,mm2,mm3,mm5,mm7); // right, left
      }

     if (r!=0)
      if (!USE_YV12)
       por (mm7,UVMask);

     if (r==13)
      {
       merge4PIXavgH(edi+2*ecx, edi+ecx, esi+ecx, esi,mm0,mm1,mm2,mm3,mm5,mm7);	// down, up
       merge4PIXavgH(edi, edi+ecx, esi+ecx, esi+2*ecx,mm0,mm1,mm2,mm3,mm5,mm7);	// up, down
      }
     if (r==99)
      {
       merge4PIXavg(edi-8, esi+2*ecx+8,mm0,mm1,mm2,mm3,mm5,mm7);  // up left, down right
       merge4PIXavg(edi+8, esi+2*ecx-8,mm0,mm1,mm2,mm3,mm5,mm7);  // up right, down left
       merge4PIXavg(edi+ecx-8, esi+ecx+8,mm0,mm1,mm2,mm3,mm5,mm7); // left, right
       merge4PIXavg(edi+ecx+8, esi+ecx-8,mm0,mm1,mm2,mm3,mm5,mm7); // right, left
       merge4PIXavg(edi+2*ecx-8, esi+8,mm0,mm1,mm2,mm3,mm5,mm7);   // down left, up right
       merge4PIXavg(edi+2*ecx+8, esi-8,mm0,mm1,mm2,mm3,mm5,mm7);   // down right, up left
      }
     if (r==15 || r==19 || r==21 || r==99)
      {
       merge4PIXavg(edi-2*BPP, esi+2*ecx+2*BPP,mm0,mm1,mm2,mm3,mm5,mm7);  // up left, down right
       merge4PIXavg(edi+2*BPP, esi+2*ecx-2*BPP,mm0,mm1,mm2,mm3,mm5,mm7);  // up right, down left
       merge4PIXavg(edi+ecx-2*BPP, esi+ecx+2*BPP,mm0,mm1,mm2,mm3,mm5,mm7); // left, right
       merge4PIXavg(edi+ecx+2*BPP, esi+ecx-2*BPP,mm0,mm1,mm2,mm3,mm5,mm7); // right, left
       merge4PIXavg(edi+2*ecx-2*BPP, esi+2*BPP,mm0,mm1,mm2,mm3,mm5,mm7);   // down left, up right
       merge4PIXavg(edi+2*ecx+2*BPP, esi-2*BPP,mm0,mm1,mm2,mm3,mm5,mm7);   // down right, up left
      }
     if (r==19)
      {
       merge4PIXavgH(edi+2*ecx, edi+ecx, esi+ecx, esi,mm0,mm1,mm2,mm3,mm5,mm7);	// down, up
       merge4PIXavgH(edi, edi+ecx, esi+ecx, esi+2*ecx,mm0,mm1,mm2,mm3,mm5,mm7);	// up, down
      }
     if (r==9 || r==11 || r==13 || r==15 || r==19 || r==21 || r==99)
      {
       merge4PIXavg(edi+2*ecx, esi,mm0,mm1,mm2,mm3,mm5,mm7);	// down, up
       merge4PIXavg(edi, esi+2*ecx,mm0,mm1,mm2,mm3,mm5,mm7);	// up, down
      }

     if (r!=0)
      {
       paddusb (mm7, ONES);   // bias toward no motion
       merge4PIXavg(edi+ecx,esi+ecx,mm0,mm1,mm2,mm3,mm5,mm7);  // center, in old and new
      }
    }
   static void SearchLoop( stride_t src_pitch, stride_t dst_pitch,
                     int rowsize, const uint8_t* pWeaveSrc, const uint8_t* pWeaveSrcP,
                     uint8_t* pWeaveDest, int TopFirst, const uint8_t* pCopySrc,
                     const uint8_t* pCopySrcP, int FldHeight)
    {
     uint8_t *pDest;
     const uint8_t *pSrcP;
     const uint8_t *pSrc;
     const uint8_t *pBob;
     const uint8_t *pBobP;
     int LineCt;

     stride_t src_pitch2;
     if (DBL_RESIZE)
      src_pitch2 = src_pitch;                     // even & odd lines are not interleaved in DScaler
     else
      src_pitch2 = 2 * src_pitch;         // even & odd lines are interleaved in Avisynth

     stride_t dst_pitch2 = 2 * dst_pitch;
     int y;
     int Last8 = (rowsize-8);                    // ofs to last 8 bytes in row

     stride_t dst_pitchw = dst_pitch; // local stor so asm can ref
     pSrc  = pWeaveSrc;                      // points 1 weave line above
     pSrcP = pWeaveSrcP;                     // "

     if (DBL_RESIZE)
      {
       if (USE_VERTICAL_FILTER)
        pDest = pWeaveDest;                     //  but will appear 1 line lower than with not VF
       else
        pDest = pWeaveDest;
       pBob =  pCopySrc;
       pBobP =  pCopySrcP;                     // not used
       LineCt = FldHeight-1;
      }
     else
      {
       // Not DBL_RESIZE here
       if (USE_VERTICAL_FILTER)
        pDest = pWeaveDest + dst_pitch;
       else
        pDest = pWeaveDest + dst_pitch2;
       if (TopFirst)
        {
         pBob = pCopySrc + src_pitch2;      // remember one weave line just copied previously
         pBobP = pCopySrcP + src_pitch2;
        }
       else
        {
         pBob =  pCopySrc;
         pBobP =  pCopySrcP;
        }
       LineCt = FldHeight - 2;
      }
     __m64 Min_Vals,Max_Vals;
     for (y=1; y <= LineCt; y++)
      {
       __m64 mm0,mm1,mm2,mm3,mm4,mm5,mm6,mm7;
       // Loop general reg usage
       //
       // eax - pBobP, then pDest
       // ebx - pBob
       // ecx - src_pitch2
       // edx - current offset
       // edi - prev weave pixels, 1 line up
       // esi - next weave pixels, 1 line up
       const unsigned char *ebx= pBob;
       stride_t ecx= src_pitch2,edx;
       const unsigned char *esi;
       unsigned char *edi,*eax;
       if (IS_SSE2)
        ;// sse2 code deleted for now
       else
        {
         ebx= pBob;
         ecx= src_pitch2;
         // simple bob first 8 bytes
         if (USE_VERTICAL_FILTER)
          {
           movq    (mm0, ebx);
           movq    (mm1, ebx+ecx);
           movq    (mm2, mm0);
           Tsimd::v_pavgb(mm2, mm1, mm3, ShiftMask);             // halfway between
           Tsimd::v_pavgb(mm0, mm2, mm3, ShiftMask);              // 1/4 way
           Tsimd::v_pavgb(mm1, mm2, mm3, ShiftMask);              // 3/4 way
           edi= pDest;
           stride_t eaxx= dst_pitchw;
           Tsimd::movntq (edi, mm0);
           Tsimd::movntq (edi+eaxx, mm1);

           // simple bob last 8 bytes
           edx= Last8;
           esi= ebx+edx;
           movq    (mm0, esi);
           movq    (mm1, esi+ecx);
           movq    (mm2, mm0);
           Tsimd::v_pavgb (mm2, mm1, mm3, ShiftMask);              // halfway between
           Tsimd::v_pavgb (mm0, mm2, mm3, ShiftMask);              // 1/4 way
           Tsimd::v_pavgb (mm1, mm2, mm3, ShiftMask);              // 3/4 way
           edi+= edx;                                                // last 8 bytes of dest
           Tsimd::movntq(edi, mm0);
           Tsimd::movntq(edi+eaxx, mm1);
          }
         else
          {
           movq    (mm0, ebx);
           //pavgb   mm0, qword ptr[ebx+ecx]
           Tsimd::v_pavgb(mm0, ebx+ecx, mm2, ShiftMask);
           edi= pDest;
           Tsimd::movntq(edi, mm0);
12 3 4 下一页
💿 文件大小 8073 K
👤 上传用户 sinba
📂 所属分类压缩解压
🏷️ 相关标签

#FFMPEG #H264 #VC #转换
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -