⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 ff_kerneldeint.cpp

📁 从FFMPEG转换而来的H264解码程序,VC下编译..
💻 CPP
📖 第 1 页 / 共 5 页
字号:
/*
	KernelDeint() deinterlacing plugin for Avisynth.

	Based on the original KernelDeint plugin (c) 2003 Donald A. Graft
	MMX optimizations + new motion mask code (c) 2004 Kurt B. Pruenner

	This program is free software; you can redistribute it and/or modify
	it under the terms of the GNU General Public License as published by
	the Free Software Foundation.

	This program is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
	GNU General Public License for more details.

	You should have received a copy of the GNU General Public License
	along with this program; if not, write to the Free Software
	Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/

#include <stddef.h>
#include "../../mem_align.h"
#include <malloc.h>
#include <string.h>
#include <unknwn.h>
#include "../../inttypes.h"
#include "../../simd.h"
#include "ff_kernelDeint.h"
#include "../../compiler.h"
#include <assert.h>

#pragma warning(disable:4127)
#pragma warning(disable:4799)
#pragma warning(disable:963)

#if !defined(DEBUG) && (!defined(__INTEL_COMPILER) && !defined(__GNUC__))
 #pragma message("Microsoft compilers are unable to produce optimized binary of ff_kernelDeint, use GCC or Intel C++ Compiler instead.")
#endif

static const __int64 qword_4354h=0x4354435443544354LL;
static const __int64 qword_15c2h=0x15c215c215c215c2LL;
static const __int64 qword_0ed9h=0x0ed90ed90ed90ed9LL;
static const __int64 qword_0354h=0x0354035403540354LL;
static const __int64 qword_03f8h=0x03f803f803f803f8LL;
static const __int64 qword_0fh=0x0f0f0f0f0f0f0f0fLL;
static const __int64 qword_14h=0x1414141414141414LL;
static const __int64 qword_10h=0x1010101010101010LL;
static const __int64 qword_140fh=0x140f140f140f140fLL;
static const __int64 qword_ff00h=0xff00ff00ff00ff00ULL;
static const __int64 qword_000000ffh=0x000000ff000000ffLL;
static const __int64 qword_00ff0000h=0x00ff000000ff0000LL;

struct TVideoFrame
{
private:
 int Bpp;
 void init(void)
  {
   memset(ptr,0,sizeof(ptr));
   memset(dx,0,sizeof(dx));
   memset(dy,0,sizeof(dy));
   memset(stride,0,sizeof(stride));
   Bpp=0;
  }
public:
 TVideoFrame(void)
  {
   init();
  }
 TVideoFrame(bool yv12,unsigned int Idx,unsigned int Idy,const unsigned char *src[3],stride_t srcStride[3],stride_t Istride,int field,IkernelDeint::Tcopy *copy)
  {
   init();
   Bpp=yv12?1:2;
   static const int shift[3]={0,1,1};
   for (int i=0;i<(yv12?3:1);i++)
    {
     dx[i]=Idx>>shift[i];dy[i]=(Idy/2)>>shift[i];
     stride[i]=Istride;
     ptr[i]=(unsigned char*)aligned_malloc(stride[i]*dy[i]);
     copy(ptr[i],stride[i],src[i]+field*srcStride[i],srcStride[i]*2,Bpp*dx[i],dy[i]);
    }
  }
 ~TVideoFrame(void)
  {
   for (int i=0;i<3;i++)
    if (ptr[i]) aligned_free(ptr[i]);
  }
 unsigned char* ptr[3];
 unsigned int dx[3],dy[3];
 stride_t stride[3];
 const unsigned char* GetReadPtr(int plane=0) {return ptr[plane];}
 unsigned char* GetWritePtr(int plane=0) {return ptr[plane];}
 stride_t GetPitch(int plane=0) {return stride[plane];}
 unsigned int GetRowSize(int plane=0) {return Bpp*dx[plane];}
 unsigned int GetHeight(int plane=0) {return dy[plane];}
};
typedef TVideoFrame *PVideoFrame;

static const bool YV12=true,YUY2=false;

template<bool YV12> struct TshowMotionMaskTraits;
template<> struct TshowMotionMaskTraits<YV12>
{
 typedef unsigned char Tpixel;
 static const Tpixel mask=0xff,mask2=(Tpixel)~235;
};
template<> struct TshowMotionMaskTraits<YUY2>
{
 typedef unsigned long Tpixel;
 static const Tpixel mask=0x00ff00ff,mask2=(Tpixel)~0x00eb00eb;
};

struct KernelDeintMask
{
 KernelDeintMask(int Iwidth,int Iheight):width(Iwidth),byteWidth(Iwidth),height(Iheight),size(1)
  {
   bytePitch=pitch=(width+15)&(-16);
   buffer=(unsigned char*)aligned_malloc(bytePitch*height);
  }
 KernelDeintMask(int Iwidth,int Iheight,int Isize):width(Iwidth),height(Iheight),size(Isize)
  {
   byteWidth=width*size;
   bytePitch=(byteWidth+15)&(-16);
   pitch=bytePitch/size;
   buffer=(unsigned char*)aligned_malloc(bytePitch*height);
  }
 ~KernelDeintMask()
  {
   aligned_free(buffer);
  }
 void set(int b)
  {
   memset(buffer,b,bytePitch*height);
  }

 int width;
 int byteWidth;
 int height;
 stride_t bytePitch;
 stride_t pitch;
 int size;

 unsigned char *buffer;
};

class TkernelDeint :public IkernelDeint
{
private:
 bool isYV12;
 int order,threshold;
 unsigned int width,height;
 bool sharp,twoway,linked,map,bob;
 KernelDeintMask *fullsizeMask;
 KernelDeintMask *halfsizeMask;
 Tcopy *copy;

 unsigned char *scratch;int scratchPitch;
 void (TkernelDeint::*Deinterlace_0fc)(int plane,int n,int order,KernelDeintMask* mask,unsigned char *dst[3],stride_t dstStride[3]);
 bool masksFilled;
public:
 TkernelDeint(bool IisYV12,unsigned int Iwidth,unsigned int Iheight,unsigned int rowsize,int Iorder,int Ithreshold,bool Isharp,bool Itwoway,bool Ilinked,bool Imap,bool Ibob,int /*cpuflags*/,Tcopy *Icopy):
  width(Iwidth),height(Iheight),
  isYV12(IisYV12),
  copy(Icopy),
  order(Iorder),
  threshold(Ithreshold),
  sharp(Isharp),
  twoway(Itwoway),
  linked(Ilinked),
  map(Imap),
  bob(Ibob),
  fieldsptr(0),fieldstart(0),

  masksFilled(false)
  {
   if (isYV12)
    {
     fullsizeMask=new KernelDeintMask(width,height/2);
     halfsizeMask=new KernelDeintMask(width/2,height/4);
     if (!sharp && !twoway)
      Deinterlace_0fc=&TkernelDeint::Deinterlace_0<false,false,8,YV12>;
     else if (!sharp && twoway)
      Deinterlace_0fc=&TkernelDeint::Deinterlace_0<false,true,8,YV12>;
     else if (sharp && !twoway)
      Deinterlace_0fc=&TkernelDeint::Deinterlace_0<true,false,8,YV12>;
     else if (sharp && twoway)
      Deinterlace_0fc=&TkernelDeint::Deinterlace_0<true,true,8,YV12>;
    }
   else
    {
     fullsizeMask=new KernelDeintMask(width,height/2,2); //YUY2
     halfsizeMask=NULL;
     if (!sharp && !twoway)
      Deinterlace_0fc=&TkernelDeint::Deinterlace_0<false,false,8,YUY2>;
     else if (!sharp && twoway)
      Deinterlace_0fc=&TkernelDeint::Deinterlace_0<false,true,8,YUY2>;
     else if (sharp && !twoway)
      Deinterlace_0fc=&TkernelDeint::Deinterlace_0<true,false,8,YUY2>;
     else if (sharp && twoway)
      Deinterlace_0fc=&TkernelDeint::Deinterlace_0<true,true,8,YUY2>;
    }

   scratchPitch=(rowsize+15)&(-16);
   scratch=(unsigned char*)aligned_malloc(scratchPitch*8*3);

   framenum=0;memset(fields,0,sizeof(fields));
  }
 ~TkernelDeint()
  {
   for (int i=0;i<NFIELDS;i++)
    if (fields[i]) delete fields[i];
   delete fullsizeMask;
   if (halfsizeMask) delete halfsizeMask;
   aligned_free(scratch);
  }
 STDMETHODIMP_(void) destroy(void)
  {
   delete this;
  }
 STDMETHODIMP_(void) setOrder(int order)
  {
   this->order=order;
  }
private:
 enum {PLANAR_Y=0,PLANAR_U=1,PLANAR_V=2};
 int framenum;
 STDMETHODIMP_(void) getFrame(const unsigned char *cur[3],stride_t srcStride[3],unsigned char *dst[3],stride_t dstStride[3],int bobframe) //hinting is done by caller
  {
   if (bobframe==0)
    {
     if (fields[fieldsptr]) delete fields[fieldsptr];fields[fieldsptr++]=new TVideoFrame(isYV12,width,height,cur,srcStride,scratchPitch,0,copy);
     if (fields[fieldsptr]) delete fields[fieldsptr];fields[fieldsptr++]=new TVideoFrame(isYV12,width,height,cur,srcStride,scratchPitch,1,copy);
     fieldstart+=2;if (fieldsptr==NFIELDS) fieldsptr=0;
    }
   //if (!bob) n*=2;
   if (isYV12)
    KernelDeint<YV12>(dst,dstStride,framenum*2+bobframe);
   else
    KernelDeint<YUY2>(dst,dstStride,framenum*2+bobframe);
   if (!bob || bobframe==1)
    framenum++;
   _mm_empty();
  }
 static const int NFIELDS=6;
 PVideoFrame fields[NFIELDS];int fieldstart,fieldsptr;
 PVideoFrame GetField(int n)
  {
   PVideoFrame field=NULL;
   do
    {
     if (n<0) n=0;
     n=fieldsptr-(fieldstart-n);
     if (n<0) n+=NFIELDS;
     field=fields[n^(1-order)];
     n--;
    } while (!field);
   return field;
  }

 static void HalveMotionMask_YV12_MMX(KernelDeintMask* halfsizeMask, KernelDeintMask* fullsizeMask)
  {
   unsigned char* halfMaskPtr=halfsizeMask->buffer;
   stride_t halfMaskPitch=halfsizeMask->bytePitch;

   unsigned char* fullMaskPtr=fullsizeMask->buffer;
   stride_t fullMaskPitch=fullsizeMask->bytePitch;

   int rowSize=halfsizeMask->width;

   __m64 mm2=_mm_set1_pi8(-128/*(char)0x80*/);

   for (int rowsLeft=halfsizeMask->height;rowsLeft;halfMaskPtr+=halfMaskPitch,fullMaskPtr+=fullMaskPitch*2,rowsLeft--)
    {
     unsigned char *esi=fullMaskPtr;
     unsigned char *edx=esi+fullMaskPitch;
     unsigned char *edi=halfMaskPtr;

     int ecx=rowSize;
     ecx+=7;
     ecx&=-8;

     esi+=ecx;
     esi+=ecx;

     edx+=ecx;
     edx+=ecx;

     edi+=ecx;

     ecx=-ecx;

     for (;ecx;ecx+=8)
      {
       __m64 mm0,mm1;
       movq (mm0,esi+2*ecx  );
       movq (mm1,esi+2*ecx+8);

       por (mm0,edx+2*ecx  );
       por (mm1,edx+2*ecx+8);

       psrlw (mm0,1);
       psrlw (mm1,1);

       packuswb (mm0,mm1);
       pxor (mm0,mm2);
       pcmpgtb (mm0,mm2);

       movq (edi+ecx,mm0);
      }
    }
    //_mm_empty();
  }

 template<int PART,bool HALFSIZE,bool OVERWRITE,bool TOPFIRST> static __forceinline void MotionMaskLine_YV12_2_MMX(int &bytesLeft,const unsigned char* &curSrcAPtr,const stride_t srcAPitch,const unsigned char* &curSrcBPtr/*,const int srcBPitch*/,unsigned char* &curMaskPtr,const stride_t maskPitch,int threshold)
  {
	//mov eax,080808080h
	//movd mm5,eax
	//punpcklbw mm5,mm5

	__m64 mm5=_mm_set1_pi8(-128/*(char)0x80*/);

	//mov eax,001010101h
	//movd mm6,eax
	//punpcklbw mm6,mm6

	__m64 mm6=_mm_set1_pi8(1);

        __m64 mm7=_mm_set1_pi8((char)threshold);
        /*
	mov eax,threshold
	movd mm7,eax
	punpcklbw mm7,mm7
	punpcklbw mm7,mm7
	punpcklbw mm7,mm7
	*/

	pxor (mm7,mm5);

	stride_t eax=srcAPitch;

	stride_t ebx=maskPitch;
	ebx+=ebx;

	const unsigned char *esi=curSrcAPtr;
	const unsigned char *edi=curSrcBPtr;
	unsigned char *edx=curMaskPtr;

	int ecx=bytesLeft;
	ecx+=7;
	ecx&=-8;

        for (;ecx!=0;esi+=8,edi+=8,ecx-=8)
        {
              __m64 mm0,mm1,mm2,mm3;

	      if (PART != 2 || TOPFIRST)
	      {
		      movq (mm0,esi);
		      movq (mm2,edi);

		      // convert unsigned to signed

		      pxor (mm0,mm5);
		      pxor (mm2,mm5);

		      // build a mask in mm3 of which bytes in mm2 are bigger
		      // than their counterparts in mm0

		      movq (mm3,mm2);
		      pcmpgtb (mm3,mm0);

		      // calculate the differences

		      psubb (mm0,mm2);

		      // flip sign of the bytes masked by mm3 (twos complement)

		      pxor (mm0,mm3);
		      pand (mm3,mm6);
		      paddb (mm0,mm3);

		      pxor (mm0,mm5);
		      pcmpgtb (mm0,mm7);

	      }else{
		      pxor (mm0,mm0);
	      }

	      if ((PART > 1) && ((PART < 4) || !(TOPFIRST)))
	      {

		      movq (mm1,esi+eax);
		      movq (mm2,edi+eax);

		      // convert unsigned to signed

		      pxor (mm1,mm5);
		      pxor (mm2,mm5);

		      // build a mask in mm3 of which bytes in mm2 are bigger
		      // than their counterparts in mm1

		      movq (mm3,mm2);
		      pcmpgtb (mm3,mm1);

		      // calculate the differences

		      psubb (mm1,mm2);

		      // flip sign of the bytes masked by mm3 (twos complement)

		      pxor (mm1,mm3);
		      pand (mm3,mm6);
		      paddb (mm1,mm3);

		      pxor (mm1,mm5);
		      pcmpgtb (mm1,mm7);

		      por (mm0,mm1);

	      }

	      if (HALFSIZE)
	      {

		      movq (mm1,mm0);

		      punpcklbw (mm0,mm0);
		      punpckhbw (mm1,mm1);

		      if (OVERWRITE){
			      movq (edx      ,mm0);
			      movq (edx+8    ,mm1);
			      movq (edx+ebx  ,mm0);
			      movq (edx+ebx+8,mm1);
		      }else{

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -