📄 ff_kerneldeint.cpp
字号:
/*
KernelDeint() deinterlacing plugin for Avisynth.
Based on the original KernelDeint plugin (c) 2003 Donald A. Graft
MMX optimizations + new motion mask code (c) 2004 Kurt B. Pruenner
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include <stddef.h>
#include "../../mem_align.h"
#include <malloc.h>
#include <string.h>
#include <unknwn.h>
#include "../../inttypes.h"
#include "../../simd.h"
#include "ff_kernelDeint.h"
#include "../../compiler.h"
#include <assert.h>
#pragma warning(disable:4127)
#pragma warning(disable:4799)
#pragma warning(disable:963)
#if !defined(DEBUG) && (!defined(__INTEL_COMPILER) && !defined(__GNUC__))
#pragma message("Microsoft compilers are unable to produce optimized binary of ff_kernelDeint, use GCC or Intel C++ Compiler instead.")
#endif
static const __int64 qword_4354h=0x4354435443544354LL;
static const __int64 qword_15c2h=0x15c215c215c215c2LL;
static const __int64 qword_0ed9h=0x0ed90ed90ed90ed9LL;
static const __int64 qword_0354h=0x0354035403540354LL;
static const __int64 qword_03f8h=0x03f803f803f803f8LL;
static const __int64 qword_0fh=0x0f0f0f0f0f0f0f0fLL;
static const __int64 qword_14h=0x1414141414141414LL;
static const __int64 qword_10h=0x1010101010101010LL;
static const __int64 qword_140fh=0x140f140f140f140fLL;
static const __int64 qword_ff00h=0xff00ff00ff00ff00ULL;
static const __int64 qword_000000ffh=0x000000ff000000ffLL;
static const __int64 qword_00ff0000h=0x00ff000000ff0000LL;
struct TVideoFrame
{
private:
int Bpp;
void init(void)
{
memset(ptr,0,sizeof(ptr));
memset(dx,0,sizeof(dx));
memset(dy,0,sizeof(dy));
memset(stride,0,sizeof(stride));
Bpp=0;
}
public:
TVideoFrame(void)
{
init();
}
TVideoFrame(bool yv12,unsigned int Idx,unsigned int Idy,const unsigned char *src[3],stride_t srcStride[3],stride_t Istride,int field,IkernelDeint::Tcopy *copy)
{
init();
Bpp=yv12?1:2;
static const int shift[3]={0,1,1};
for (int i=0;i<(yv12?3:1);i++)
{
dx[i]=Idx>>shift[i];dy[i]=(Idy/2)>>shift[i];
stride[i]=Istride;
ptr[i]=(unsigned char*)aligned_malloc(stride[i]*dy[i]);
copy(ptr[i],stride[i],src[i]+field*srcStride[i],srcStride[i]*2,Bpp*dx[i],dy[i]);
}
}
~TVideoFrame(void)
{
for (int i=0;i<3;i++)
if (ptr[i]) aligned_free(ptr[i]);
}
unsigned char* ptr[3];
unsigned int dx[3],dy[3];
stride_t stride[3];
const unsigned char* GetReadPtr(int plane=0) {return ptr[plane];}
unsigned char* GetWritePtr(int plane=0) {return ptr[plane];}
stride_t GetPitch(int plane=0) {return stride[plane];}
unsigned int GetRowSize(int plane=0) {return Bpp*dx[plane];}
unsigned int GetHeight(int plane=0) {return dy[plane];}
};
typedef TVideoFrame *PVideoFrame;
static const bool YV12=true,YUY2=false;
template<bool YV12> struct TshowMotionMaskTraits;
template<> struct TshowMotionMaskTraits<YV12>
{
typedef unsigned char Tpixel;
static const Tpixel mask=0xff,mask2=(Tpixel)~235;
};
template<> struct TshowMotionMaskTraits<YUY2>
{
typedef unsigned long Tpixel;
static const Tpixel mask=0x00ff00ff,mask2=(Tpixel)~0x00eb00eb;
};
struct KernelDeintMask
{
KernelDeintMask(int Iwidth,int Iheight):width(Iwidth),byteWidth(Iwidth),height(Iheight),size(1)
{
bytePitch=pitch=(width+15)&(-16);
buffer=(unsigned char*)aligned_malloc(bytePitch*height);
}
KernelDeintMask(int Iwidth,int Iheight,int Isize):width(Iwidth),height(Iheight),size(Isize)
{
byteWidth=width*size;
bytePitch=(byteWidth+15)&(-16);
pitch=bytePitch/size;
buffer=(unsigned char*)aligned_malloc(bytePitch*height);
}
~KernelDeintMask()
{
aligned_free(buffer);
}
void set(int b)
{
memset(buffer,b,bytePitch*height);
}
int width;
int byteWidth;
int height;
stride_t bytePitch;
stride_t pitch;
int size;
unsigned char *buffer;
};
class TkernelDeint :public IkernelDeint
{
private:
bool isYV12;
int order,threshold;
unsigned int width,height;
bool sharp,twoway,linked,map,bob;
KernelDeintMask *fullsizeMask;
KernelDeintMask *halfsizeMask;
Tcopy *copy;
unsigned char *scratch;int scratchPitch;
void (TkernelDeint::*Deinterlace_0fc)(int plane,int n,int order,KernelDeintMask* mask,unsigned char *dst[3],stride_t dstStride[3]);
bool masksFilled;
public:
TkernelDeint(bool IisYV12,unsigned int Iwidth,unsigned int Iheight,unsigned int rowsize,int Iorder,int Ithreshold,bool Isharp,bool Itwoway,bool Ilinked,bool Imap,bool Ibob,int /*cpuflags*/,Tcopy *Icopy):
width(Iwidth),height(Iheight),
isYV12(IisYV12),
copy(Icopy),
order(Iorder),
threshold(Ithreshold),
sharp(Isharp),
twoway(Itwoway),
linked(Ilinked),
map(Imap),
bob(Ibob),
fieldsptr(0),fieldstart(0),
masksFilled(false)
{
if (isYV12)
{
fullsizeMask=new KernelDeintMask(width,height/2);
halfsizeMask=new KernelDeintMask(width/2,height/4);
if (!sharp && !twoway)
Deinterlace_0fc=&TkernelDeint::Deinterlace_0<false,false,8,YV12>;
else if (!sharp && twoway)
Deinterlace_0fc=&TkernelDeint::Deinterlace_0<false,true,8,YV12>;
else if (sharp && !twoway)
Deinterlace_0fc=&TkernelDeint::Deinterlace_0<true,false,8,YV12>;
else if (sharp && twoway)
Deinterlace_0fc=&TkernelDeint::Deinterlace_0<true,true,8,YV12>;
}
else
{
fullsizeMask=new KernelDeintMask(width,height/2,2); //YUY2
halfsizeMask=NULL;
if (!sharp && !twoway)
Deinterlace_0fc=&TkernelDeint::Deinterlace_0<false,false,8,YUY2>;
else if (!sharp && twoway)
Deinterlace_0fc=&TkernelDeint::Deinterlace_0<false,true,8,YUY2>;
else if (sharp && !twoway)
Deinterlace_0fc=&TkernelDeint::Deinterlace_0<true,false,8,YUY2>;
else if (sharp && twoway)
Deinterlace_0fc=&TkernelDeint::Deinterlace_0<true,true,8,YUY2>;
}
scratchPitch=(rowsize+15)&(-16);
scratch=(unsigned char*)aligned_malloc(scratchPitch*8*3);
framenum=0;memset(fields,0,sizeof(fields));
}
~TkernelDeint()
{
for (int i=0;i<NFIELDS;i++)
if (fields[i]) delete fields[i];
delete fullsizeMask;
if (halfsizeMask) delete halfsizeMask;
aligned_free(scratch);
}
STDMETHODIMP_(void) destroy(void)
{
delete this;
}
STDMETHODIMP_(void) setOrder(int order)
{
this->order=order;
}
private:
enum {PLANAR_Y=0,PLANAR_U=1,PLANAR_V=2};
int framenum;
STDMETHODIMP_(void) getFrame(const unsigned char *cur[3],stride_t srcStride[3],unsigned char *dst[3],stride_t dstStride[3],int bobframe) //hinting is done by caller
{
if (bobframe==0)
{
if (fields[fieldsptr]) delete fields[fieldsptr];fields[fieldsptr++]=new TVideoFrame(isYV12,width,height,cur,srcStride,scratchPitch,0,copy);
if (fields[fieldsptr]) delete fields[fieldsptr];fields[fieldsptr++]=new TVideoFrame(isYV12,width,height,cur,srcStride,scratchPitch,1,copy);
fieldstart+=2;if (fieldsptr==NFIELDS) fieldsptr=0;
}
//if (!bob) n*=2;
if (isYV12)
KernelDeint<YV12>(dst,dstStride,framenum*2+bobframe);
else
KernelDeint<YUY2>(dst,dstStride,framenum*2+bobframe);
if (!bob || bobframe==1)
framenum++;
_mm_empty();
}
static const int NFIELDS=6;
PVideoFrame fields[NFIELDS];int fieldstart,fieldsptr;
PVideoFrame GetField(int n)
{
PVideoFrame field=NULL;
do
{
if (n<0) n=0;
n=fieldsptr-(fieldstart-n);
if (n<0) n+=NFIELDS;
field=fields[n^(1-order)];
n--;
} while (!field);
return field;
}
static void HalveMotionMask_YV12_MMX(KernelDeintMask* halfsizeMask, KernelDeintMask* fullsizeMask)
{
unsigned char* halfMaskPtr=halfsizeMask->buffer;
stride_t halfMaskPitch=halfsizeMask->bytePitch;
unsigned char* fullMaskPtr=fullsizeMask->buffer;
stride_t fullMaskPitch=fullsizeMask->bytePitch;
int rowSize=halfsizeMask->width;
__m64 mm2=_mm_set1_pi8(-128/*(char)0x80*/);
for (int rowsLeft=halfsizeMask->height;rowsLeft;halfMaskPtr+=halfMaskPitch,fullMaskPtr+=fullMaskPitch*2,rowsLeft--)
{
unsigned char *esi=fullMaskPtr;
unsigned char *edx=esi+fullMaskPitch;
unsigned char *edi=halfMaskPtr;
int ecx=rowSize;
ecx+=7;
ecx&=-8;
esi+=ecx;
esi+=ecx;
edx+=ecx;
edx+=ecx;
edi+=ecx;
ecx=-ecx;
for (;ecx;ecx+=8)
{
__m64 mm0,mm1;
movq (mm0,esi+2*ecx );
movq (mm1,esi+2*ecx+8);
por (mm0,edx+2*ecx );
por (mm1,edx+2*ecx+8);
psrlw (mm0,1);
psrlw (mm1,1);
packuswb (mm0,mm1);
pxor (mm0,mm2);
pcmpgtb (mm0,mm2);
movq (edi+ecx,mm0);
}
}
//_mm_empty();
}
template<int PART,bool HALFSIZE,bool OVERWRITE,bool TOPFIRST> static __forceinline void MotionMaskLine_YV12_2_MMX(int &bytesLeft,const unsigned char* &curSrcAPtr,const stride_t srcAPitch,const unsigned char* &curSrcBPtr/*,const int srcBPitch*/,unsigned char* &curMaskPtr,const stride_t maskPitch,int threshold)
{
//mov eax,080808080h
//movd mm5,eax
//punpcklbw mm5,mm5
__m64 mm5=_mm_set1_pi8(-128/*(char)0x80*/);
//mov eax,001010101h
//movd mm6,eax
//punpcklbw mm6,mm6
__m64 mm6=_mm_set1_pi8(1);
__m64 mm7=_mm_set1_pi8((char)threshold);
/*
mov eax,threshold
movd mm7,eax
punpcklbw mm7,mm7
punpcklbw mm7,mm7
punpcklbw mm7,mm7
*/
pxor (mm7,mm5);
stride_t eax=srcAPitch;
stride_t ebx=maskPitch;
ebx+=ebx;
const unsigned char *esi=curSrcAPtr;
const unsigned char *edi=curSrcBPtr;
unsigned char *edx=curMaskPtr;
int ecx=bytesLeft;
ecx+=7;
ecx&=-8;
for (;ecx!=0;esi+=8,edi+=8,ecx-=8)
{
__m64 mm0,mm1,mm2,mm3;
if (PART != 2 || TOPFIRST)
{
movq (mm0,esi);
movq (mm2,edi);
// convert unsigned to signed
pxor (mm0,mm5);
pxor (mm2,mm5);
// build a mask in mm3 of which bytes in mm2 are bigger
// than their counterparts in mm0
movq (mm3,mm2);
pcmpgtb (mm3,mm0);
// calculate the differences
psubb (mm0,mm2);
// flip sign of the bytes masked by mm3 (twos complement)
pxor (mm0,mm3);
pand (mm3,mm6);
paddb (mm0,mm3);
pxor (mm0,mm5);
pcmpgtb (mm0,mm7);
}else{
pxor (mm0,mm0);
}
if ((PART > 1) && ((PART < 4) || !(TOPFIRST)))
{
movq (mm1,esi+eax);
movq (mm2,edi+eax);
// convert unsigned to signed
pxor (mm1,mm5);
pxor (mm2,mm5);
// build a mask in mm3 of which bytes in mm2 are bigger
// than their counterparts in mm1
movq (mm3,mm2);
pcmpgtb (mm3,mm1);
// calculate the differences
psubb (mm1,mm2);
// flip sign of the bytes masked by mm3 (twos complement)
pxor (mm1,mm3);
pand (mm3,mm6);
paddb (mm1,mm3);
pxor (mm1,mm5);
pcmpgtb (mm1,mm7);
por (mm0,mm1);
}
if (HALFSIZE)
{
movq (mm1,mm0);
punpcklbw (mm0,mm0);
punpckhbw (mm1,mm1);
if (OVERWRITE){
movq (edx ,mm0);
movq (edx+8 ,mm1);
movq (edx+ebx ,mm0);
movq (edx+ebx+8,mm1);
}else{
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -