📄 ff_kerneldeint.cpp
字号:
//;punpckldq mm1,mm1
movq (mm1,qword_ff00h);
movq (mm2,mm1);
pand (mm1,mm0);
psrld (mm1,1);
pcmpgtd (mm1,mm4);
pand (mm2,mm1);
por (mm0,mm2);
}
if (PART == 1){
movq (edx,mm0);
}else{
movq (mm1,edx);
por (mm1,mm0);
movq (edx,mm1);
}
esi+=8;
edi+=8;
edx+=8;
ecx-=8;
if (ecx!=0)
goto ColLoop;
}
template<int COLORSPACE,int PART> static __forceinline void MotionMaskLine_YUY2_RGB_1(const int rowSize,const unsigned char* &srcAPtr,const stride_t srcAPitch,const unsigned char* &srcBPtr,const stride_t srcBPitch,unsigned char* &maskPtr,const stride_t maskPitch,bool linked,int order,int threshold)
{
int bytesLeft=rowSize;
const unsigned char *curSrcAPtr=srcAPtr;
const unsigned char *curSrcBPtr=srcBPtr;
unsigned char* curMaskPtr=(unsigned char*)maskPtr;//long
if (COLORSPACE == YUY2){
if (!linked)
{
if (order == 1)
{
//#undef LINKED
//#define TOPFIRST
MotionMaskLine_YUY2_2_MMX<PART,false,true>(bytesLeft,curSrcAPtr,srcAPitch,curSrcBPtr,curMaskPtr,maskPitch,threshold);
}
else if (order != 1)
{
//#undef LINKED
//#undef TOPFIRST
MotionMaskLine_YUY2_2_MMX<PART,false,false>(bytesLeft,curSrcAPtr,srcAPitch,curSrcBPtr,curMaskPtr,maskPitch,threshold);
}
}
//else
}
if (linked)
{
if (order == 1)
{
//#define LINKED
//#define TOPFIRST
MotionMaskLine_YUY2_2_MMX<PART,true,true>(bytesLeft,curSrcAPtr,srcAPitch,curSrcBPtr,curMaskPtr,maskPitch,threshold);
}
else if (order != 1)
{
//#define LINKED
//#undef TOPFIRST
MotionMaskLine_YUY2_2_MMX<PART,true,false>(bytesLeft,curSrcAPtr,srcAPitch,curSrcBPtr,curMaskPtr,maskPitch,threshold);
}
}
maskPtr+=maskPitch;
srcAPtr+=srcAPitch;
srcBPtr+=srcBPitch;
}
template<int COLORSPACE,int PITCH_MOD> __forceinline void MotionMask_YUY2_RGB_0(KernelDeintMask* mask,
int order,
int n)
{
stride_t maskPitch=mask->bytePitch;
PVideoFrame srcA;
PVideoFrame srcB;
stride_t srcAPitch;
stride_t srcBPitch;
const unsigned char* srcAPtr;
const unsigned char* srcBPtr;
int rowSize;
unsigned char* maskPtr;
srcA=GetField(n );
srcB=GetField(n-2);
srcAPitch=srcA->GetPitch();
srcBPitch=srcB->GetPitch();
srcAPtr=srcA->GetReadPtr();
srcBPtr=srcB->GetReadPtr();
rowSize=srcA->GetRowSize();
bool oddPitch=(srcAPitch % PITCH_MOD) != 0;
maskPtr=mask->buffer;
//int bytesLeft;
//unsigned short doubleThreshold=(unsigned short)(2*threshold);
//const unsigned char* curSrcAPtr;
//const unsigned char* curSrcBPtr;
int height=srcA->GetHeight();
int rowsLeft=height;
if (oddPitch) rowsLeft--;
while (rowsLeft > 0)
{
//#define PART 1
MotionMaskLine_YUY2_RGB_1<COLORSPACE,1>(rowSize,srcAPtr,srcAPitch,srcBPtr,srcBPitch,maskPtr,maskPitch,linked,order,threshold);
rowsLeft--;
}
if (oddPitch)
{
// copy bottom parts of fields to scratch buffer
stride_t safePitch=(scratchPitch >= srcAPitch ? srcAPitch : scratchPitch);
memcpy(scratch,srcAPtr,safePitch);
memcpy(scratch+scratchPitch,srcBPtr,safePitch);
// change pointers to point to scratch buffer
srcAPtr=scratch;
srcBPtr=srcAPtr+scratchPitch;
srcAPitch=srcBPitch=scratchPitch*2;
//#define PART 1
MotionMaskLine_YUY2_RGB_1<COLORSPACE,1>(rowSize,srcAPtr,srcAPitch,srcBPtr,srcBPitch,maskPtr,maskPitch,linked,order,threshold);
}
srcA=GetField(n+1);
srcB=GetField(n-1);
srcAPtr=srcA->GetReadPtr()-(1-order)*srcAPitch;
srcBPtr=srcB->GetReadPtr()-(1-order)*srcBPitch;;
maskPtr=mask->buffer;
//#define PART 2
MotionMaskLine_YUY2_RGB_1<COLORSPACE,2>(rowSize,srcAPtr,srcAPitch,srcBPtr,srcBPitch,maskPtr,maskPitch,linked,order,threshold);
rowsLeft=height-2;
if (oddPitch) rowsLeft--;
while (rowsLeft > 0)
{
//#define PART 3
MotionMaskLine_YUY2_RGB_1<COLORSPACE,3>(rowSize,srcAPtr,srcAPitch,srcBPtr,srcBPitch,maskPtr,maskPitch,linked,order,threshold);
rowsLeft--;
}
if (oddPitch)
{
// copy bottom parts of fields to scratch buffer
stride_t safePitch=(scratchPitch >= srcAPitch ? srcAPitch : scratchPitch);
unsigned char* scratchPtr=scratch;
for (rowsLeft=2; rowsLeft > 0; rowsLeft--)
{
memcpy(scratchPtr,srcAPtr,safePitch);
scratchPtr+=scratchPitch;
memcpy(scratchPtr,srcBPtr,safePitch);
scratchPtr+=scratchPitch;
srcAPtr+=srcAPitch; srcBPtr+=srcBPitch;
}
// change pointers to point to scratch buffer
srcAPtr=scratch+2*scratchPitch;
srcBPtr=srcAPtr+scratchPitch;
srcAPitch=srcBPitch=scratchPitch*2;
//#define PART 3
MotionMaskLine_YUY2_RGB_1<COLORSPACE,3>(rowSize,srcAPtr,srcAPitch,srcBPtr,srcBPitch,maskPtr,maskPitch,linked,order,threshold);
}
//#define PART 4
MotionMaskLine_YUY2_RGB_1<COLORSPACE,4>(rowSize,srcAPtr,srcAPitch,srcBPtr,srcBPitch,maskPtr,maskPitch,linked,order,threshold);
}
void BuildMotionMask_YUY2_MMX(KernelDeintMask* mask,
int order,
int n)
{
//#define COLORSPACE COLORSPACE_YUY2
//#define PITCH_MOD 8
//#define MOTIONMASK_STAGE1 "include/MotionMaskLine_YUY2+RGB_1.cpp"
//#define MOTIONMASK_STAGE2 "include/MotionMaskLine_YUY2_2_MMX.cpp"
MotionMask_YUY2_RGB_0<YUY2,8>(mask,order,n);
//#include "include\MotionMask_YUY2+RGB_0.cpp"
//_mm_empty();
}
template<bool YV12> void BuildMotionMask(KernelDeintMask* fullsizeMask,
KernelDeintMask* halfsizeMask,
int order,
bool linked,
int n)
{
if (YV12)
if (linked)
{
BuildPlaneMotionMask_YV12_MMX(fullsizeMask,order,PLANAR_Y,true,true,n);
BuildPlaneMotionMask_YV12_MMX(fullsizeMask,order,PLANAR_U,true,false,n);
BuildPlaneMotionMask_YV12_MMX(fullsizeMask,order,PLANAR_V,true,false,n);
HalveMotionMask_YV12_MMX(halfsizeMask,fullsizeMask);
}
else
{
BuildPlaneMotionMask_YV12_MMX(fullsizeMask,order,PLANAR_Y,false,true,n);
BuildPlaneMotionMask_YV12_MMX(halfsizeMask,order,PLANAR_U,false,true,n);
BuildPlaneMotionMask_YV12_MMX(halfsizeMask,order,PLANAR_V,false,false,n);
}
else
BuildMotionMask_YUY2_MMX(fullsizeMask,order,n);
}
static __forceinline void COPY_LINE(const unsigned char* &srcPtr,stride_t srcPitch,unsigned char* &dstPtr,stride_t dstPitch,int rowSize,int order)
{
const unsigned char *esi=srcPtr;
unsigned char *edi=dstPtr;
int edx=rowSize;
int ecx=edx;
edx+=7;
ecx&=-32;
edx&=24;
esi+=ecx;
edi+=ecx;
ecx=-ecx;
for (;ecx;ecx+=32)
{
int eax=ecx;
__m64 mm0,mm1,mm2,mm3;
movq (mm0,esi+ecx );
movq (mm1,esi+ecx+8 );
movq (mm2,esi+ecx+16);
movq (mm3,esi+ecx+24);
//mov eax,ecx
movq (edi+eax ,mm0);
movq (edi+eax+8 ,mm1);
movq (edi+eax+16,mm2);
movq (edi+eax+24,mm3);
}
esi+=edx;
edi+=edx;
edx=-edx;
if (edx!=0)
for (;edx;edx+=8)
{
int eax=edx;
__m64 mm0;
movq (mm0,esi+edx);
//mov eax,edx
movq (edi+eax,mm0);
}
dstPtr+=dstPitch;
if (order == 0) srcPtr+=srcPitch;
}
static const int SRC_PREV=1;
static const int SRC_CUR =2;
static const int SRC_NEXT=3;
template<int DIRECTION,int SOURCE> static __forceinline void DeinterlaceLoad(stride_t &eax,const unsigned char* &esi,const unsigned char* &ecx,const unsigned char* &edi,__m64 &TARGET,bool &EAX_INVERTED)
{
if ((DIRECTION < 0) && !(EAX_INVERTED)){
EAX_INVERTED=true;
eax=-eax;
}else if ((DIRECTION > 0) && (EAX_INVERTED)){
EAX_INVERTED=false;
eax=-eax;
}
const unsigned char *ASOURCE;
if (SOURCE == SRC_PREV)
ASOURCE=esi;
else if (SOURCE == SRC_CUR)
ASOURCE=ecx;
else if (SOURCE == SRC_NEXT)
ASOURCE=edi;
if ((DIRECTION == -2) || (DIRECTION == 2))
movq (TARGET,ASOURCE + 2*eax);
else if ((DIRECTION == -1) || (DIRECTION == 1))
movq (TARGET,ASOURCE + eax);
else
movq (TARGET,ASOURCE);
}
template<bool HALFSIZE,bool SHARP,bool TWOWAY,int COLORSPACE,int U2,int U1,int D1,int D2,int ASU2,int ASU1,int ASD1,int ASD2> static __forceinline void DeinterlaceLine_2(int &bytesLeft,const unsigned char* &curPrevPtr,const unsigned char* &curSrcPtr,const unsigned char* &curNextPtr,unsigned char* &curDstPtr,const unsigned char* &curMaskPtr,stride_t srcPitch)
{
bytesLeft+=7;
bytesLeft&=-8;
const unsigned char *esi=curPrevPtr;
const unsigned char *ecx=curSrcPtr;
const unsigned char *edi=curNextPtr;
unsigned char *edx=curDstPtr;
stride_t eax=srcPitch;
__m64 mm0,mm1,mm2,mm3,mm4,mm5,mm6,mm7;
ColLoop:
const unsigned char *ebx1=curMaskPtr;
movq (mm7,ebx1);
movq (mm6,mm7);
psrlw (mm6,1);
packuswb (mm6,mm6);
curMaskPtr+=8;
int ebx;movd(ebx,mm6);
if (ebx==0)
goto CopyThrough;
pxor (mm6,mm6);
if (!SHARP){
if (!TWOWAY){
// intVal=+8*( *(curSrcPtr +ASD1*srcPitch)
//#define SOURCE SRC_CUR
//#define TARGET mm0
//#define DIRECTION ASD1
bool EAX_INVERTED=false;
DeinterlaceLoad<ASD1,SRC_CUR>(eax,esi,ecx,edi,mm0,EAX_INVERTED);// #include "include\DeinterlaceLoad.cpp"
movq (mm1,mm0);
punpcklbw (mm0,mm6);
punpckhbw (mm1,mm6);
// +*(curSrcPtr +ASU1*srcPitch))
//#define SOURCE SRC_CUR
//#define TARGET mm4
//#define DIRECTION ASU1
DeinterlaceLoad<ASU1,SRC_CUR>(eax,esi,ecx,edi,mm4,EAX_INVERTED);// #include "include\DeinterlaceLoad.cpp"
movq (mm5,mm4);
punpcklbw (mm4,mm6);
punpckhbw (mm5,mm6);
paddsw (mm4,mm0);
paddsw (mm5,mm1);
psllw (mm4,3);
psllw (mm5,3);
//; +2*( *(curPrevPtr ))
//#define SOURCE SRC_PREV
//#define TARGET mm0
//#define DIRECTION 0
DeinterlaceLoad<0,SRC_PREV>(eax,esi,ecx,edi,mm0,EAX_INVERTED);// #include "include\DeinterlaceLoad.cpp"
movq (mm1,mm0);
punpcklbw (mm0,mm6);
punpckhbw (mm1,mm6);
psllw (mm0,1);
psllw (mm1,1);
paddsw (mm4,mm0);
paddsw (mm5,mm1);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -