📄 tomsmocomp.cpp
字号:
// simple bob last 8 bytes
edx= Last8;
esi=ebx+edx;
movq (mm0, esi);
//pavgb mm0, qword ptr[esi+ecx]
Tsimd::v_pavgb(mm0, esi+ecx, mm2, ShiftMask);
Tsimd::movntq(edi+edx, mm0);
}
// now loop and get the middle qwords
esi= pSrc;
edi= (unsigned char*)pSrcP;
edx= 8; // curr offset into all lines
LoopQ:
eax= (unsigned char*)pBobP;
edi+= 8;
esi+= 8;
ebx+= 8;
eax+= edx;
//weirdbob.inc
// First, get and save our possible Bob values
// Assume our pixels are layed out as follows with x the calc'd bob value
// and the other pixels are from the current field
//
// j a b c k current field
// x calculated line
// m d e f n current field
//
// we calc the bob value as:
// x2 = either avg(a,f), avg(c,d), avg(b,e), avg(j,n), or avg(k,m)
// selected for the smallest of abs(a,f), abs(c,d), or abs(b,e), etc.
// a,f
// 2003/06/17 Use BPP (bits per pixel) here
movq (mm0, ebx-BPP); // value a from top left
movq (mm1, ebx+ecx+BPP); // value f from bottom right
movq (mm6, mm0);
//pavgb mm6, mm1 // avg(a,f), also best so far
Tsimd::v_pavgb(mm6, mm1, mm7, ShiftMask); // avg(a,f), also best so far
movq (mm7, mm0);
psubusb (mm7, mm1);
psubusb (mm1, mm0);
por (mm7, mm1); // abs diff, also best so far
// c,d
movq (mm0, ebx+BPP); // value a from top left
movq (mm1, ebx+ecx-BPP); // value f from bottom right
movq (mm2, mm0);
//pavgb mm2, mm1 // avg(c,d)
Tsimd::v_pavgb(mm2, mm1, mm3, ShiftMask); // avg(c,d)
movq (mm3, mm0);
psubusb (mm3, mm1);
psubusb (mm1, mm0);
por (mm3, mm1); // abs(c,d)
movq (mm1, mm3); // keep copy
psubusb (mm3, mm7); // nonzero where new weights bigger, else 0
pxor (mm4, mm4);
pcmpeqb (mm3, mm4); // now ff where new better, else 00
pcmpeqb (mm4, mm3); // here ff where old better, else 00
pand (mm1, mm3); // keep only better new avg and abs
pand (mm2, mm3);
pand (mm6, mm4);
pand (mm7, mm4);
por (mm6, mm2); // and merge new & old vals keeping best
por (mm7, mm1);
if (!USE_YV12)
{
por (mm7, UVMask); // but we know YUY2 chroma is worthless so far
pand (mm5, YMask); // mask out chroma from here also
}
// j,n
movq (mm0, ebx-2*BPP); // value j from top left
movq (mm1, ebx+ecx+2*BPP); // value n from bottom right
movq (mm2, mm0);
// pavgb mm2, mm1 // avg(j,n)
Tsimd::v_pavgb(mm2, mm1, mm3, ShiftMask); // avg(j,n)
movq (mm3, mm0);
psubusb (mm3, mm1);
psubusb (mm1, mm0);
por (mm3, mm1); // abs(j-n)
movq (mm1, mm3); // keep copy
psubusb (mm3, mm7); // nonzero where new weights bigger, else 0
pxor (mm4, mm4);
pcmpeqb (mm3, mm4); // now ff where new better, else 00
pcmpeqb (mm4, mm3); // here ff where old better, else 00
pand (mm1, mm3); // keep only better new avg and abs
pand (mm3, mm2);
pand (mm6, mm4);
pand (mm7, mm4);
por (mm6, mm3); // and merge new & old vals keeping best
por (mm7, mm1); // "
// k, m
movq (mm0, ebx+2*BPP); // value k from top right
movq (mm1, ebx+ecx-2*BPP); // value n from bottom left
movq (mm4, mm0);
//pavgb mm4, mm1 // avg(k,m)
Tsimd::v_pavgb(mm4, mm1, mm3, ShiftMask); // avg(k,m)
movq (mm3, mm0);
psubusb (mm3, mm1);
psubusb (mm1, mm0);
por (mm3, mm1); // abs(k,m)
movq (mm1, mm3); // keep copy
movq (mm2, mm4); // avg(k,m)
psubusb (mm3, mm7); // nonzero where new weights bigger, else 0
pxor (mm4, mm4);
pcmpeqb (mm3, mm4); // now ff where new better, else 00
pcmpeqb (mm4, mm3); // here ff where old better, else 00
pand (mm1, mm3); // keep only better new avg and abs
pand (mm3, mm2);
pand (mm6, mm4);
pand (mm7, mm4);
por (mm6, mm3); // and merge new & old vals keeping best
por (mm7, mm1); // "
// b,e
movq (mm0,ebx); // value b from top
movq (mm1,ebx+ecx); // value e from bottom
// We will also calc here the max/min values to later limit comb
// so the max excursion will not exceed the Max_Comb constant
if (SKIP_SEARCH)
{
movq (mm2, mm0);
//pminub mm2, mm1
Tsimd::v_pminub(mm2, mm1, mm4);
//pmaxub mm6, mm2 // clip our current results so far to be above this
Tsimd::pmaxub(mm6, mm2);
movq (mm2, mm0);
Tsimd::pmaxub (mm2, mm1);
// pminub mm6, mm2 // clip our current results so far to be below this
Tsimd::v_pminub(mm6, mm2, mm4);
}
else
{
movq (mm2, mm0);
movq (mm4, eax);
psubusb (mm2, mm4);
psubusb (mm4, mm0);
por (mm4, mm2); // abs diff
movq (mm2, mm1);
movq (mm3, eax+ecx);
psubusb (mm2, mm3);
psubusb (mm3, mm1);
por (mm3, mm2); // abs diff
//pmaxub mm3, mm4 // top or bottom pixel moved most
Tsimd::pmaxub(mm3, mm4); // top or bottom pixel moved most
psubusb (mm3, Max_Mov); // moved more than allowed? or goes to 0?
pxor (mm4, mm4);
pcmpeqb (mm3, mm4); // now ff where low motion, else high motion
movq (mm2, mm0);
// pminub mm2, mm1
Tsimd::v_pminub(mm2, mm1, mm4);
// pmaxub mm6, mm2 // clip our current results so far to be above this
Tsimd::pmaxub(mm6, mm2);
psubusb (mm2, mm3); // maybe decrease it to 0000.. if no surround motion
movq (Min_Vals, mm2);
movq (mm2, mm0);
Tsimd::pmaxub (mm2, mm1);
// pminub mm6, mm2 // clip our current results so far to be below this
Tsimd::v_pminub (mm6, mm2, mm4);
paddusb (mm2, mm3); // maybe increase it to ffffff if no surround motion
movq (Max_Vals, mm2);
}
movq (mm2, mm0);
//pavgb mm2, mm1 // avg(b,e)
Tsimd::v_pavgb(mm2, mm1, mm3, ShiftMask); // avg(b,e)
movq (mm3, mm0);
psubusb (mm3, mm1);
psubusb (mm1, mm0);
por (mm3, mm1); // abs(c,d)
movq (mm1, mm3); // keep copy of diffs
pxor (mm4, mm4);
psubusb (mm3, mm7); // nonzero where new weights bigger, else 0
pcmpeqb (mm3, mm4); // now ff where new better, else 00
pcmpeqb (mm4, mm3); // here ff where old better, else 00
pand (mm1, mm3);
pand (mm2, mm3);
pand (mm6, mm4);
pand (mm7, mm4);
por (mm6, mm2); // our x2 value
por (mm7, mm1); // our x2 diffs
movq (mm4, mm7); // save as bob uncertainty indicator
// For non-SSE2:
// through out most of the rest of this loop we will maintain
// mm4 our min bob value
// mm5 best weave pixels so far
// mm6 our max Bob value
// mm7 best weighted pixel ratings so far
// We will keep a slight bias to using the weave pixels
// from the current location, by rating them by the min distance
// from the Bob value instead of the avg distance from that value.
// our best and only rating so far
pcmpeqb(mm7, mm7); // ffff, say we didn't find anything good yet
}
// end of top
search<SearchRange>(ecx,esi,edi,mm0,mm1,mm2,mm3,mm5,mm7);
// bottom
if (IS_SSE2)
;//sse2 code deleted for now
else
{
// Version for non-SSE2
if (SKIP_SEARCH)
movq (mm0, mm6); // just use the results of our wierd bob
else
{
// Use the better of bob or weave
//pminub mm4, TENS // the most we care about
Tsimd::v_pminub (mm4, TENS, mm0); // the most we care about
psubusb (mm7, mm4 ); // foregive that much from weave est?
psubusb (mm7, FOURS); // bias it a bit toward weave
pxor (mm0, mm0);
pcmpeqb (mm7, mm0); // all ff where weave better, else 00
pcmpeqb (mm0, mm7); // all ff where bob better, else 00
pand (mm0, mm6); // use bob for these pixel values
pand (mm7, mm5); // use weave for these
por (mm0, mm7); // combine both
//pminub mm0, Max_Vals // but clip to catch the stray error
Tsimd::v_pminub(mm0, Max_Vals, mm1); // but clip to catch the stray error
//pmaxub mm0, Min_Vals
Tsimd::pmaxub (mm0, Min_Vals);
}
eax= pDest;
if (USE_VERTICAL_FILTER)
{
movq (mm1, mm0);
//pavgb mm0, qword ptr[ebx]
Tsimd::v_pavgb (mm0, ebx, mm2, ShiftMask);
//movntq qword ptr[eax+edx], mm0
Tsimd::movntq(eax+edx, mm0);
//pavgb mm1, qword ptr[ebx+ecx]
Tsimd::v_pavgb (mm1, ebx+ecx, mm2, ShiftMask);
eax+= dst_pitchw;
//movntq qword ptr[eax+edx], mm1
Tsimd::movntq(eax+edx, mm1);
}
else
{
// movntq qword ptr[eax+edx], mm0
movq (mm1, esi+ecx);
Tsimd::movntq(eax+edx, mm0);
}
edx=edx+8; // bump offset pointer
if (edx<Last8) // done with line?
goto LoopQ; // y
}
// adjust for next line
pSrc += src_pitch2;
pSrcP += src_pitch2;
pDest += dst_pitch2;
pBob += src_pitch2;
pBobP += src_pitch2;
}
_mm_empty();
}
};
public:
static TsearchLoopFc getProcess(int SearchEffort)
{
// Go fill in the hard part, being variously lazy depending upon SearchEffort
if (SearchEffort == -1)
return TsearchLoop< true, true, 0>::SearchLoop;
else if (SearchEffort == 0)
return TsearchLoop<false, true, 0>::SearchLoop;
else if (SearchEffort <= 1)
return TsearchLoop<false,false, 1>::SearchLoop;
// else if (SearchEffort <= 2)
//Search_Effort_2();
else if (SearchEffort <= 3)
return TsearchLoop<false,false, 3>::SearchLoop;
else if (SearchEffort <= 5)
return TsearchLoop<false,false, 5>::SearchLoop;
else if (SearchEffort <= 9)
return TsearchLoop<false,false, 9>::SearchLoop;
else if (SearchEffort <= 11)
return TsearchLoop<false,false,11>::SearchLoop;
else if (SearchEffort <= 13)
return TsearchLoop<false,false,13>::SearchLoop;
else if (SearchEffort <= 15)
return TsearchLoop<false,false,15>::SearchLoop;
else if (SearchEffort <= 19)
return TsearchLoop<false,false,19>::SearchLoop;
else if (SearchEffort <= 21)
return TsearchLoop<false,false,21>::SearchLoop;
else
return TsearchLoop<false,false,99>::SearchLoop;
}
};
template<class Tsimd> struct TdblResize
{
static __forceinline void DblResizeH_1(int rowsize, const BYTE* srcp, BYTE* dstp)
{
int ct = (rowsize-8) >> 3;
// Loop general reg usage
//
// ecx loop ctr
// edi - dest
// esi - src pixels
// now loop and get the middle qwords
unsigned char *edi= dstp;
const unsigned char *esi= srcp;
__m64 mm0,mm2,mm6,mm1,mm7;
for (int ecx=ct;ecx>0;ecx--,esi+=8,edi+=16)
{
// just use the current line, first luma
movq (mm0, esi); // orig luma, in register as VYUYVYUY
movq (mm2, mm0); // keep a copy while we got it
movq (mm6, esi+2); // luma 2 bytes to right
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -