📄 tomsmocomp.cpp
字号:
//#define IS_SSE2
// Copyright (c) 2002 Tom Barry. All rights reserved.
// trbarry@trbarry.com
// Requires Avisynth source code to compile for Avisynth
// Avisynth Copyright 2000 Ben Rudiak-Gould.
// http://www.math.berkeley.edu/~benrg/avisynth.html
/////////////////////////////////////////////////////////////////////////////
//
// This file is subject to the terms of the GNU General Public License as
// published by the Free Software Foundation. A copy of this license is
// included with this software distribution in the file COPYING. If you
// do not have a copy, you may obtain a copy by writing to the Free
// Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
//
// This software is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details
//
// Also, this program is "Philanthropy-Ware". That is, if you like it and
// feel the need to reward or inspire the author then please feel free (but
// not obligated) to consider joining or donating to the Electronic Frontier
// Foundation. This will help keep cyber space free of barbed wire and bullsh*t.
//
/////////////////////////////////////////////////////////////////////////////
/*
From the Readme_TomsMoComp.txt file:
TomsMoComp - Motion Compensating Deinterlace
TomsMoComp.dll is an Avisynth filter that uses motion compensation and adaptive processing to deinterlace video source.
Just unzip the contents into your Avisynth directory, or somewhere. As the script shows,
I made a subdirectory under Avisynth just to keep it separate.
avs, one of the scripts I was testing:
LoadPlugin("d:\AVISynth\TomsMoComp\Debug\TomsMoComp.dll")
clip = AVISource("c:\vcr\bikes.avi")
return clip.TomsMoComp(...)
It specifies the file spec (change yours) and asks for TopFirst and SearchEffort=10
to be turned on. I've so far tested it only
with Avisynth/VirtualDub.
TomsMoComp Parm list:
return clip.TomsMoComp(TopFirst, SearchEffort, Vertical_Filter)
All the values are integer, 0=no, 1=yes:
TopFirst - assume the top field, lines 0,2,4,... should be displayed first.
The default is the supposedly more common BottomFirst (not for me).
SearchEffort - determines how much effort (cpu time) will be used to find
moved pixels. Currently numbers from 0-45 with 0 being practially
just a smarter bob.
Known issues and limitation:
1) Assumes YUV (YUY2) Frame Based input. Use an AVIsynth function to convert first if
needed.
For now, see www.trbarry.com/TomsMoComp.zip
THIS PROGRAM IS STILL NEW AND EXPERIMENTAL!
Please send comments to Tom Barry (trbarry@trbarry.com)
*/
#include <malloc.h>
#include <string.h>
#include <unknwn.h>
#include "TomsMoComp_ff.h"
#include "../../inttypes.h"
#ifdef __GNUC__
#define __forceinline __attribute__((__always_inline__)) inline
#endif
#include "../../simd.h"
#include "../../compiler.h"
#pragma warning(disable: 4305 4309 4799 4127 4701)
static const __int64 ShiftMask=0xfefffefffefffeffULL; // to avoid shifting chroma to luma
static const __int64 YMask =0x00ff00ff00ff00ffULL;
static const __int64 UVMask=0xff00ff00ff00ff00ULL;
static const __int64 Max_Mov=0x0404040404040404LL;
static const __int64 FOURS=0x0404040404040404LL;
static const __int64 TENS =0x0a0a0a0a0a0a0a0aLL;
static const __int64 ONES =0x0101010101010101LL;
typedef void (*TsearchLoopFc)(stride_t _src_pitch, stride_t _dst_pitch,
int _rowsize, const uint8_t* _pWeaveSrc, const uint8_t* _pWeaveSrcP,
uint8_t* _pWeaveDest, int _TopFirst, const uint8_t* _pCopySrc,
const uint8_t* _pCopySrcP, int _FldHeight);
template<class Tsimd,bool IS_SSE2,int BPP,bool USE_YV12,bool USE_VERTICAL_FILTER> struct TtomsMoComp
{
private:
template<bool DBL_RESIZE,bool SKIP_SEARCH,int SearchRange> struct TsearchLoop
{
static __forceinline void merge4PIXavg(const unsigned char *PADDR1,const unsigned char *PADDR2,__m64 &mm0,__m64 &mm1,__m64 &mm2,__m64 &mm3,__m64 &mm5,__m64 &mm7)
{
movq (mm0, PADDR1); /* our 4 pixels */
movq (mm1, PADDR2); /* our pixel2 value */
movq (mm2, mm0); /* another copy of our pixel1 value */
movq (mm3, mm1); /* another copy of our pixel1 value */
psubusb (mm2, mm1 );
psubusb (mm3, mm0 );
por (mm2,mm3 );
Tsimd::v_pavgb(mm0, mm1, mm3, ShiftMask); /* avg of 2 pixels */
movq (mm3, mm2 ); /* another copy of our our weights */
pxor (mm1, mm1);
psubusb (mm3, mm7); /* nonzero where old weights lower, else 0 */
pcmpeqb (mm3, mm1); /* now ff where new better, else 00 */
pcmpeqb (mm1, mm3); /* here ff where old better, else 00 */
pand (mm0, mm3 ); /* keep only better new pixels */
pand ( mm2, mm3 ); /* and weights */
pand (mm5, mm1 ); /* keep only better old pixels */
pand ( mm7, mm1 );
por (mm5, mm0 ); /* and merge new & old vals */
por (mm7, mm2 );
}
static __forceinline void merge4PIXavgH(const unsigned char *PADDR1A, const unsigned char *PADDR1B, const unsigned char *PADDR2A, const unsigned char *PADDR2B,__m64 &mm0,__m64 &mm1,__m64 &mm2,__m64 &mm3,__m64 &mm5,__m64 &mm7)
{
movq (mm0, PADDR1A); /* our 4 pixels */
movq (mm1, PADDR2A); /* our pixel2 value */
movq (mm2, PADDR1B); /* our 4 pixels */
movq (mm3, PADDR2B); /* our pixel2 value */
Tsimd::v_pavgb(mm0, mm2, mm2, ShiftMask);
Tsimd::v_pavgb(mm1, mm3, mm3, ShiftMask);
movq (mm2, mm0); /* another copy of our pixel1 value */
movq (mm3, mm1); /* another copy of our pixel1 value */
psubusb (mm2, mm1);
psubusb (mm3, mm0);
por (mm2,mm3);
Tsimd::v_pavgb(mm0, mm1, mm3, ShiftMask); /* avg of 2 pixels */
movq (mm3, mm2 ); /* another copy of our our weights */
pxor (mm1, mm1 );
psubusb (mm3, mm7); /* nonzero where old weights lower, else 0 */
pcmpeqb (mm3, mm1); /* now ff where new better, else 00 */
pcmpeqb (mm1, mm3); /* here ff where old better, else 00 */
pand (mm0, mm3 ); /* keep only better new pixels */
pand (mm2, mm3 ); /* and weights */
pand (mm5, mm1 ); /* keep only better old pixels */
pand ( mm7, mm1 );
por (mm5, mm0 ); /* and merge new & old vals */
por (mm7, mm2 );
}
template<int r> static __forceinline void search(stride_t &ecx,const unsigned char* const esi,const unsigned char* const edi,__m64 &mm0,__m64 &mm1,__m64 &mm2,__m64 &mm3,__m64 &mm5,__m64 &mm7)
{
if (r==3 || r==5)
{
merge4PIXavg(edi+ecx-BPP, esi+ecx+BPP,mm0,mm1,mm2,mm3,mm5,mm7); // left, right
merge4PIXavg(edi+ecx+BPP, esi+ecx-BPP,mm0,mm1,mm2,mm3,mm5,mm7); // right, left
}
if (r==5)
{
merge4PIXavgH(edi+ecx-BPP, edi+ecx, esi+ecx, esi+ecx+BPP,mm0,mm1,mm2,mm3,mm5,mm7); // left, right
merge4PIXavgH(edi+ecx+BPP, edi+ecx, esi+ecx, esi+ecx-BPP,mm0,mm1,mm2,mm3,mm5,mm7); // right, left
}
if (r==21 || r==99)
{
merge4PIXavg(edi-6, esi+2*ecx+6,mm0,mm1,mm2,mm3,mm5,mm7); // up left, down right
merge4PIXavg(edi+6, esi+2*ecx-6,mm0,mm1,mm2,mm3,mm5,mm7); // up right, down left
merge4PIXavg(edi+ecx-6, esi+ecx+6,mm0,mm1,mm2,mm3,mm5,mm7); // left, right
merge4PIXavg(edi+ecx+6, esi+ecx-6,mm0,mm1,mm2,mm3,mm5,mm7); // right, left
merge4PIXavg(edi+2*ecx-6, esi+6,mm0,mm1,mm2,mm3,mm5,mm7); // down left, up right
merge4PIXavg(edi+2*ecx+6, esi-6,mm0,mm1,mm2,mm3,mm5,mm7); // down right, up left
}
if (r==9 || r==11 || r==13 || r==15 || r==19 || r==21 || r==99)
{
merge4PIXavg(edi-BPP, esi+2*ecx+BPP,mm0,mm1,mm2,mm3,mm5,mm7); // up left, down right
merge4PIXavg(edi+BPP, esi+2*ecx-BPP,mm0,mm1,mm2,mm3,mm5,mm7); // up right, down left
merge4PIXavg(edi+2*ecx-BPP, esi+BPP,mm0,mm1,mm2,mm3,mm5,mm7); // down left, up right
merge4PIXavg(edi+2*ecx+BPP, esi-BPP,mm0,mm1,mm2,mm3,mm5,mm7); // down right, up left
merge4PIXavg(edi+ecx-BPP, esi+ecx+BPP,mm0,mm1,mm2,mm3,mm5,mm7); // left, right
merge4PIXavg(edi+ecx+BPP, esi+ecx-BPP,mm0,mm1,mm2,mm3,mm5,mm7); // right, left
}
if (r==11 || r==13 || r==19)
{
merge4PIXavgH(edi+ecx-BPP, edi+ecx, esi+ecx, esi+ecx+BPP,mm0,mm1,mm2,mm3,mm5,mm7); // left, right
merge4PIXavgH(edi+ecx+BPP, edi+ecx, esi+ecx, esi+ecx-BPP,mm0,mm1,mm2,mm3,mm5,mm7); // right, left
}
if (r!=0)
if (!USE_YV12)
por (mm7,UVMask);
if (r==13)
{
merge4PIXavgH(edi+2*ecx, edi+ecx, esi+ecx, esi,mm0,mm1,mm2,mm3,mm5,mm7); // down, up
merge4PIXavgH(edi, edi+ecx, esi+ecx, esi+2*ecx,mm0,mm1,mm2,mm3,mm5,mm7); // up, down
}
if (r==99)
{
merge4PIXavg(edi-8, esi+2*ecx+8,mm0,mm1,mm2,mm3,mm5,mm7); // up left, down right
merge4PIXavg(edi+8, esi+2*ecx-8,mm0,mm1,mm2,mm3,mm5,mm7); // up right, down left
merge4PIXavg(edi+ecx-8, esi+ecx+8,mm0,mm1,mm2,mm3,mm5,mm7); // left, right
merge4PIXavg(edi+ecx+8, esi+ecx-8,mm0,mm1,mm2,mm3,mm5,mm7); // right, left
merge4PIXavg(edi+2*ecx-8, esi+8,mm0,mm1,mm2,mm3,mm5,mm7); // down left, up right
merge4PIXavg(edi+2*ecx+8, esi-8,mm0,mm1,mm2,mm3,mm5,mm7); // down right, up left
}
if (r==15 || r==19 || r==21 || r==99)
{
merge4PIXavg(edi-2*BPP, esi+2*ecx+2*BPP,mm0,mm1,mm2,mm3,mm5,mm7); // up left, down right
merge4PIXavg(edi+2*BPP, esi+2*ecx-2*BPP,mm0,mm1,mm2,mm3,mm5,mm7); // up right, down left
merge4PIXavg(edi+ecx-2*BPP, esi+ecx+2*BPP,mm0,mm1,mm2,mm3,mm5,mm7); // left, right
merge4PIXavg(edi+ecx+2*BPP, esi+ecx-2*BPP,mm0,mm1,mm2,mm3,mm5,mm7); // right, left
merge4PIXavg(edi+2*ecx-2*BPP, esi+2*BPP,mm0,mm1,mm2,mm3,mm5,mm7); // down left, up right
merge4PIXavg(edi+2*ecx+2*BPP, esi-2*BPP,mm0,mm1,mm2,mm3,mm5,mm7); // down right, up left
}
if (r==19)
{
merge4PIXavgH(edi+2*ecx, edi+ecx, esi+ecx, esi,mm0,mm1,mm2,mm3,mm5,mm7); // down, up
merge4PIXavgH(edi, edi+ecx, esi+ecx, esi+2*ecx,mm0,mm1,mm2,mm3,mm5,mm7); // up, down
}
if (r==9 || r==11 || r==13 || r==15 || r==19 || r==21 || r==99)
{
merge4PIXavg(edi+2*ecx, esi,mm0,mm1,mm2,mm3,mm5,mm7); // down, up
merge4PIXavg(edi, esi+2*ecx,mm0,mm1,mm2,mm3,mm5,mm7); // up, down
}
if (r!=0)
{
paddusb (mm7, ONES); // bias toward no motion
merge4PIXavg(edi+ecx,esi+ecx,mm0,mm1,mm2,mm3,mm5,mm7); // center, in old and new
}
}
static void SearchLoop( stride_t src_pitch, stride_t dst_pitch,
int rowsize, const uint8_t* pWeaveSrc, const uint8_t* pWeaveSrcP,
uint8_t* pWeaveDest, int TopFirst, const uint8_t* pCopySrc,
const uint8_t* pCopySrcP, int FldHeight)
{
uint8_t *pDest;
const uint8_t *pSrcP;
const uint8_t *pSrc;
const uint8_t *pBob;
const uint8_t *pBobP;
int LineCt;
stride_t src_pitch2;
if (DBL_RESIZE)
src_pitch2 = src_pitch; // even & odd lines are not interleaved in DScaler
else
src_pitch2 = 2 * src_pitch; // even & odd lines are interleaved in Avisynth
stride_t dst_pitch2 = 2 * dst_pitch;
int y;
int Last8 = (rowsize-8); // ofs to last 8 bytes in row
stride_t dst_pitchw = dst_pitch; // local stor so asm can ref
pSrc = pWeaveSrc; // points 1 weave line above
pSrcP = pWeaveSrcP; // "
if (DBL_RESIZE)
{
if (USE_VERTICAL_FILTER)
pDest = pWeaveDest; // but will appear 1 line lower than with not VF
else
pDest = pWeaveDest;
pBob = pCopySrc;
pBobP = pCopySrcP; // not used
LineCt = FldHeight-1;
}
else
{
// Not DBL_RESIZE here
if (USE_VERTICAL_FILTER)
pDest = pWeaveDest + dst_pitch;
else
pDest = pWeaveDest + dst_pitch2;
if (TopFirst)
{
pBob = pCopySrc + src_pitch2; // remember one weave line just copied previously
pBobP = pCopySrcP + src_pitch2;
}
else
{
pBob = pCopySrc;
pBobP = pCopySrcP;
}
LineCt = FldHeight - 2;
}
__m64 Min_Vals,Max_Vals;
for (y=1; y <= LineCt; y++)
{
__m64 mm0,mm1,mm2,mm3,mm4,mm5,mm6,mm7;
// Loop general reg usage
//
// eax - pBobP, then pDest
// ebx - pBob
// ecx - src_pitch2
// edx - current offset
// edi - prev weave pixels, 1 line up
// esi - next weave pixels, 1 line up
const unsigned char *ebx= pBob;
stride_t ecx= src_pitch2,edx;
const unsigned char *esi;
unsigned char *edi,*eax;
if (IS_SSE2)
;// sse2 code deleted for now
else
{
ebx= pBob;
ecx= src_pitch2;
// simple bob first 8 bytes
if (USE_VERTICAL_FILTER)
{
movq (mm0, ebx);
movq (mm1, ebx+ecx);
movq (mm2, mm0);
Tsimd::v_pavgb(mm2, mm1, mm3, ShiftMask); // halfway between
Tsimd::v_pavgb(mm0, mm2, mm3, ShiftMask); // 1/4 way
Tsimd::v_pavgb(mm1, mm2, mm3, ShiftMask); // 3/4 way
edi= pDest;
stride_t eaxx= dst_pitchw;
Tsimd::movntq (edi, mm0);
Tsimd::movntq (edi+eaxx, mm1);
// simple bob last 8 bytes
edx= Last8;
esi= ebx+edx;
movq (mm0, esi);
movq (mm1, esi+ecx);
movq (mm2, mm0);
Tsimd::v_pavgb (mm2, mm1, mm3, ShiftMask); // halfway between
Tsimd::v_pavgb (mm0, mm2, mm3, ShiftMask); // 1/4 way
Tsimd::v_pavgb (mm1, mm2, mm3, ShiftMask); // 3/4 way
edi+= edx; // last 8 bytes of dest
Tsimd::movntq(edi, mm0);
Tsimd::movntq(edi+eaxx, mm1);
}
else
{
movq (mm0, ebx);
//pavgb mm0, qword ptr[ebx+ecx]
Tsimd::v_pavgb(mm0, ebx+ecx, mm2, ShiftMask);
edi= pDest;
Tsimd::movntq(edi, mm0);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -