📄 ff_kerneldeint.cpp
字号:
movq (mm2,edx );
movq (mm3,edx+8 );
por (mm2,mm0);
por (mm3,mm1);
movq (edx ,mm2);
movq (edx+8 ,mm3);
movq (mm2,edx+ebx );
movq (mm3,edx+ebx+8);
por (mm2,mm0);
por (mm3,mm1);
movq (edx+ebx ,mm2);
movq (edx+ebx+8,mm3);
}
edx+=16;
}else{
if (OVERWRITE){
movq (edx,mm0);
}else{
movq (mm1,edx);
por (mm1,mm0);
movq (edx,mm1);
}
edx+=8;
}
}
}
template<int PART> static __forceinline void MotionMaskLine_YV12_1(const int rowSize,const unsigned char* &srcAPtr,const stride_t srcAPitch,const unsigned char* &srcBPtr,const stride_t srcBPitch,unsigned char* &maskPtr,const stride_t maskPitch,bool halfsize,bool overwrite,int threshold,int order)
{
int bytesLeft=rowSize;
const unsigned char *curSrcAPtr=srcAPtr;
const unsigned char *curSrcBPtr=srcBPtr;
if (halfsize)
{
//#define HALFSIZE
unsigned char* curMaskPtr=maskPtr;
if (PART == 1)
{
if ( overwrite && (order == 1))
{
//#define OVERWRITE
//#define TOPFIRST
MotionMaskLine_YV12_2_MMX<PART,true,true,true>(bytesLeft,curSrcAPtr,srcAPitch,curSrcBPtr,curMaskPtr,maskPitch,threshold);
goto part1end1;
}
else if ( overwrite && (order != 1))
{
//#define OVERWRITE
//#undef TOPFIRST
MotionMaskLine_YV12_2_MMX<PART,true,true,false>(bytesLeft,curSrcAPtr,srcAPitch,curSrcBPtr,curMaskPtr,maskPitch,threshold);
goto part1end1;
}
//else
}
if (!overwrite && (order == 1))
{
//#undef OVERWRITE
//#define TOPFIRST
MotionMaskLine_YV12_2_MMX<PART,true,false,true>(bytesLeft,curSrcAPtr,srcAPitch,curSrcBPtr,curMaskPtr,maskPitch,threshold);
}
else if (!overwrite && (order != 1))
{
//#undef OVERWRITE
//#undef TOPFIRST
MotionMaskLine_YV12_2_MMX<PART,true,false,false>(bytesLeft,curSrcAPtr,srcAPitch,curSrcBPtr,curMaskPtr,maskPitch,threshold);
}
part1end1:
maskPtr+=2*2*maskPitch;
}
else
{
//#undef HALFSIZE
unsigned char* curMaskPtr=maskPtr;
if (PART == 1)
{
if ( overwrite && (order == 1))
{
//#define OVERWRITE
//#define TOPFIRST
MotionMaskLine_YV12_2_MMX<PART,false,true,true>(bytesLeft,curSrcAPtr,srcAPitch,curSrcBPtr,curMaskPtr,maskPitch,threshold);
goto part1end2;
}
else if ( overwrite && (order != 1))
{
//#define OVERWRITE
//#undef TOPFIRST
MotionMaskLine_YV12_2_MMX<PART,false,true,false>(bytesLeft,curSrcAPtr,srcAPitch,curSrcBPtr,curMaskPtr,maskPitch,threshold);
goto part1end2;
}
//else
}
if (!overwrite && (order == 1))
{
//#undef OVERWRITE
//#define TOPFIRST
MotionMaskLine_YV12_2_MMX<PART,false,false,true>(bytesLeft,curSrcAPtr,srcAPitch,curSrcBPtr,curMaskPtr,maskPitch,threshold);
}
else if (!overwrite && (order != 1))
{
//#undef OVERWRITE
//#undef TOPFIRST
MotionMaskLine_YV12_2_MMX<PART,false,false,false>(bytesLeft,curSrcAPtr,srcAPitch,curSrcBPtr,curMaskPtr,maskPitch,threshold);
}
part1end2:
maskPtr+=maskPitch;
}
srcAPtr+=srcAPitch;
srcBPtr+=srcBPitch;
}
template<int PITCH_MOD> __forceinline void MotionMask_YV12_0(KernelDeintMask* mask,
int order,int plane,
bool overwrite,bool linked,int n)
{
bool halfsize=(plane != PLANAR_Y) && linked;
stride_t maskPitch=mask->pitch;
if (halfsize) maskPitch/=2;
PVideoFrame srcA;
PVideoFrame srcB;
stride_t srcAPitch;
stride_t srcBPitch;
const unsigned char* srcAPtr;
const unsigned char* srcBPtr;
int rowSize;
unsigned char* maskPtr;
srcA=GetField(n+1);
srcB=GetField(n-1);
srcAPitch=srcA->GetPitch(plane);
srcBPitch=srcB->GetPitch(plane);
srcAPtr=srcA->GetReadPtr(plane);
srcBPtr=srcB->GetReadPtr(plane);
rowSize=srcA->GetRowSize(plane);
bool oddPitch=(srcAPitch % PITCH_MOD) != 0;
maskPtr=mask->buffer;
//unsigned short doubleThreshold=(unsigned short)(2*threshold);
int height=srcA->GetHeight(plane);
int rowsLeft=height;
if (oddPitch) rowsLeft--;
while (rowsLeft > 0)
{
MotionMaskLine_YV12_1<1>(rowSize,srcAPtr,srcAPitch,srcBPtr,srcBPitch,maskPtr,maskPitch,halfsize,overwrite,threshold,order);
rowsLeft--;
}
if (oddPitch)
{
// copy bottom parts of fields to scratch buffer
stride_t safePitch=(scratchPitch >= srcAPitch ? srcAPitch : scratchPitch);
//unsigned char* scratchPtr=scratch;
memcpy(scratch,srcAPtr,safePitch);
memcpy(scratch+scratchPitch,srcBPtr,safePitch);
// change pointers to point to scratch buffer
srcAPtr=scratch;
srcBPtr=srcAPtr+scratchPitch;
srcAPitch=srcBPitch=scratchPitch*2;
MotionMaskLine_YV12_1<1>(rowSize,srcAPtr,srcAPitch,srcBPtr,srcBPitch,maskPtr,maskPitch,halfsize,overwrite,threshold,order);
}
overwrite=false;
srcA=GetField(n );
srcB=GetField(n-2);
srcAPtr=srcA->GetReadPtr(plane)-(1-order)*srcAPitch;
srcBPtr=srcB->GetReadPtr(plane)-(1-order)*srcBPitch;;
maskPtr=mask->buffer;
MotionMaskLine_YV12_1<2>(rowSize,srcAPtr,srcAPitch,srcBPtr,srcBPitch,maskPtr,maskPitch,halfsize,overwrite,threshold,order);
rowsLeft=height-2;
if (oddPitch) rowsLeft--;
while (rowsLeft > 0)
{
MotionMaskLine_YV12_1<3>(rowSize,srcAPtr,srcAPitch,srcBPtr,srcBPitch,maskPtr,maskPitch,halfsize,overwrite,threshold,order);
rowsLeft--;
}
if (oddPitch)
{
// copy bottom parts of fields to scratch buffer
stride_t safePitch=(scratchPitch >= srcAPitch ? srcAPitch : scratchPitch);
unsigned char* scratchPtr=scratch;
for (rowsLeft=2; rowsLeft > 0; rowsLeft--)
{
memcpy(scratchPtr,srcAPtr,safePitch);
scratchPtr+=scratchPitch;
memcpy(scratchPtr,srcBPtr,safePitch);
scratchPtr+=scratchPitch;
srcAPtr+=srcAPitch; srcBPtr+=srcBPitch;
}
// change pointers to point to scratch buffer
srcAPtr=scratch;
srcBPtr=srcAPtr+scratchPitch;
srcAPitch=srcBPitch=scratchPitch*2;
MotionMaskLine_YV12_1<3>(rowSize,srcAPtr,srcAPitch,srcBPtr,srcBPitch,maskPtr,maskPitch,halfsize,overwrite,threshold,order);
}
MotionMaskLine_YV12_1<4>(rowSize,srcAPtr,srcAPitch,srcBPtr,srcBPitch,maskPtr,maskPitch,halfsize,overwrite,threshold,order);
}
__forceinline void BuildPlaneMotionMask_YV12_MMX(KernelDeintMask* mask,
int order,
int plane,
bool linked,
bool overwrite,int n)
{
//#define COLORSPACE COLORSPACE_YV12
//#define PITCH_MOD 8
//#define MOTIONMASK_STAGE1 "include/MotionMaskLine_YV12_1.cpp"
//#define MOTIONMASK_STAGE2 "include/MotionMaskLine_YV12_2_MMX.cpp"
MotionMask_YV12_0<8>(mask,order,plane,overwrite,linked,n);
//_mm_empty();
}
template<int PART,bool LINKED,bool TOPFIRST> static __forceinline void MotionMaskLine_YUY2_2_MMX(int &bytesLeft,const unsigned char* &curSrcAPtr,const stride_t srcAPitch,const unsigned char* &curSrcBPtr/*,const int srcBPitch*/,unsigned char* &curMaskPtr,const stride_t /*maskPitch*/,int threshold)
{
__m64 mm4,mm0,mm2,mm3,mm1;
pxor (mm4,mm4);
//;mov eax,080808080h
//;movd mm5,eax
//;punpcklbw mm5,mm5
__m64 mm5=_mm_set1_pi8(-128/*(char)0x80*/);
//mov eax,001010101h
//movd mm6,eax
//punpcklbw mm6,mm6
__m64 mm6=_mm_set1_pi8(1);
__m64 mm7=_mm_set1_pi8((char)threshold);
//mov eax,threshold
//movd mm7,eax
//punpcklbw mm7,mm7
//punpcklbw mm7,mm7
//punpcklbw mm7,mm7
pxor (mm7,mm5);
stride_t eax=srcAPitch;
const unsigned char *esi=curSrcAPtr;
const unsigned char *edi=curSrcBPtr;
unsigned char *edx=curMaskPtr;
int ecx=bytesLeft;
ecx+=7;
ecx&=-8;
ColLoop:
if ((PART != 2) || (TOPFIRST)){
movq (mm0,esi);
movq (mm2,edi);
//; convert unsigned to signed
pxor (mm0,mm5);
pxor (mm2,mm5);
//; build a mask in mm3 of which bytes in mm2 are bigger
//; than their counterparts in mm0
movq (mm3,mm2);
pcmpgtb (mm3,mm0);
//; calculate the differences
psubb (mm0,mm2);
//; flip sign of the bytes masked by mm3 (twos complement)
pxor (mm0,mm3);
pand (mm3,mm6);
paddb (mm0,mm3);
pxor (mm0,mm5);
pcmpgtb (mm0,mm7);
}else{
pxor (mm0,mm0);
}
if ((PART > 1) && ((PART < 4) || !(TOPFIRST))){
movq (mm1,esi+eax);
movq (mm2,edi+eax);
//; convert unsigned to signed
pxor (mm1,mm5);
pxor (mm2,mm5);
//; build a mask in mm3 of which bytes in mm2 are bigger
//; than their counterparts in mm1
movq (mm3,mm2);
pcmpgtb (mm3,mm1);
//; calculate the differences
psubb (mm1,mm2);
//; flip sign of the bytes masked by mm3 (twos complement)
pxor (mm1,mm3);
pand (mm3,mm6);
paddb (mm1,mm3);
pxor (mm1,mm5);
pcmpgtb (mm1,mm7);
por (mm0,mm1);
}
if (LINKED){
//;mov ebx,0ff00ff00h
//;movd mm1,ebx
//;punpckldq mm1,mm1
movq (mm1,qword_ff00h);
pand (mm1,mm0);
psrld (mm1,1);
pcmpgtd (mm1,mm4);
por (mm0,mm1);
//;mov ebx,0000000ffh
//;movd mm2,ebx
movq (mm2,qword_000000ffh);
//;shl ebx,16
//;movd mm3,ebx
movq (mm2,qword_00ff0000h);
//;punpckldq mm2,mm2
//;punpckldq mm3,mm3
movq (mm1,mm2);
pand (mm1,mm0);
psrld (mm1,1);
pcmpgtd (mm1,mm4);
psubusb (mm1,mm3);
por (mm0,mm1);
pand (mm3,mm0);
psrld (mm3,1);
pcmpgtd (mm3,mm4);
psubusb (mm3,mm2);
por (mm0,mm3);
}else{
//;mov ebx,0ff00ff00h
//;movd mm1,ebx
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -