📄 timgfiltersharpen.cpp
字号:
if (Ymax) aligned_free(Ymax);Ymax=NULL;
}
template<class _mm> void TimgFilterXsharp::xsharpen(unsigned int dx1,unsigned int dy1,const TsharpenSettings *cfg,const unsigned char *srcY,stride_t stride1,unsigned char *dstY,stride_t stride2)
{
unsigned int dx8=(dx1/8)*8;
const unsigned char *src=srcY,*srcEnd;
unsigned char *min=Ymin,*max=Ymax;
for (srcEnd=src+dy1*stride1;src!=srcEnd;src+=stride1,min+=minStride,max+=minStride)
for (unsigned int x=1;x<dx8;x+=8)
{
*(__m64*)(min+x)=_mm::T64::min_pu8(_mm::T64::min_pu8(*(__m64*)(src+x-1),*(__m64*)(src+x)),*(__m64*)(src+x+1));
*(__m64*)(max+x)=_mm::T64::max_pu8(_mm::T64::max_pu8(*(__m64*)(src+x-1),*(__m64*)(src+x)),*(__m64*)(src+x+1));
}
src=srcY+stride1;
min=Ymin+minStride;max=Ymax+minStride;
unsigned char *dst=dstY+stride2;
typename _mm::__m strength64=_mm::set1_pi16((short)cfg->xsharpStrength);
typename _mm::__m strengthInv64=_mm::set1_pi16((short)(127-cfg->xsharpStrength));
typename _mm::__m thresh64=_mm::set1_pi16((short)cfg->xsharpThreshold);
typename _mm::__m m0=_mm::setzero_si64(),m255=_mm::set1_pi8(-1);
for (srcEnd=src+(dy1-2)*stride1;src!=srcEnd;src+=stride1,dst+=stride2,min+=minStride,max+=minStride)
for (unsigned int x=1;x<dx8;x+=_mm::size/2)
{
typename _mm::__m mm1=_mm::min_pi16(_mm::min_pi16(_mm::unpacklo_pi8(_mm::load2(min+x-minStride),m0),_mm::unpacklo_pi8(_mm::load2(min+x),m0)),_mm::unpacklo_pi8(_mm::load2(min+x+minStride),m0));
typename _mm::__m mm3=_mm::max_pi16(_mm::max_pi16(_mm::unpacklo_pi8(_mm::load2(max+x-minStride),m0),_mm::unpacklo_pi8(_mm::load2(max+x),m0)),_mm::unpacklo_pi8(_mm::load2(max+x+minStride),m0));
typename _mm::__m mm0=_mm::unpacklo_pi8(_mm::load2(src+x),m0);
typename _mm::__m mm2=_mm::sub_pi16(mm0,mm1),mm4=_mm::sub_pi16(mm3,mm0);
typename _mm::__m mm5=_mm::cmpgt_pi16(mm2,mm4);
typename _mm::__m mm6=_mm::and_si64(_mm::cmpgt_pi16(thresh64,mm4),mm5);
typename _mm::__m mm7=_mm::and_si64(mm6,mm3);
mm4=_mm::cmpgt_pi16(thresh64,mm2);
mm5=_mm::xor_si64(mm5,m255);
mm4=_mm::and_si64(mm4,mm5);
mm1=_mm::and_si64(mm1,mm4);
mm7=_mm::mullo_pi16(_mm::or_si64(mm7,mm1),strength64);
mm4=_mm::or_si64(mm4,mm6);
mm6=_mm::mullo_pi16(mm0,strengthInv64);
mm7=_mm::and_si64(_mm::srli_pi16(_mm::add_pi16(mm7,mm6),7),mm4);
_mm::store2(dst+x,_mm::packs_pu16(_mm::or_si64(_mm::and_si64(mm0,_mm::xor_si64(mm4,m255)),mm7),m0));
}
_mm::empty();
src=srcY+stride1;dst=dstY+stride2;
memcpy(dstY,srcY,dx1);
memcpy(dstY+(dy1-1)*stride2,srcY+(dy1-1)*stride1,dx1);
for (unsigned int y=1;y<dy1-1;src+=stride1,dst+=stride2,y++)
*dst=*src;
if (dx1&7)
TffPict::copy(dstY+dx8,stride2,srcY+dx8,stride1,dx1&7,dy1);
_mm_empty();
}
bool TimgFilterXsharp::is(const TffPictBase &pict,const TfilterSettingsVideo *cfg0)
{
const TsharpenSettings *cfg=(const TsharpenSettings*)cfg0;
return super::is(pict,cfg) && cfg->xsharpStrength;
}
HRESULT TimgFilterXsharp::process(TfilterQueue::iterator it,TffPict &pict,const TfilterSettingsVideo *cfg0)
{
if (is(pict,cfg0))
{
const TsharpenSettings *cfg=(const TsharpenSettings*)cfg0;
init(pict,cfg->full,cfg->half);
if (!Ymin)
{
minStride=(pictRect.dx/16+2)*16;
Ymin=(unsigned char*)aligned_malloc(minStride*pictRect.dy*2);
Ymax=(unsigned char*)aligned_malloc(minStride*pictRect.dy);
}
const unsigned char *src;
unsigned char *dst;
getCur(FF_CSPS_MASK_YUV_PLANAR,pict,cfg->full,&src,NULL,NULL,NULL);
getNext(csp1,pict,cfg->full,&dst,NULL,NULL,NULL);
(this->*xsharpenFc)(dx1[0],dy1[0],cfg,src,stride1[0],dst,stride2[0]);
}
return parent->deliverSample(++it,pict);
}
//==================================== TimgFilterMsharp =====================================
TimgFilterMsharp::TimgFilterMsharp(IffdshowBase *Ideci,Tfilters *Iparent):TimgFilter(Ideci,Iparent)
{
blur=work=NULL;blur3x3=NULL;
}
void TimgFilterMsharp::done(void)
{
if (blur) aligned_free(blur);blur=NULL;
if (work) aligned_free(work);work=NULL;
if (blur3x3) delete blur3x3;blur3x3=NULL;
}
bool TimgFilterMsharp::is(const TffPictBase &pict,const TfilterSettingsVideo *cfg0)
{
const TsharpenSettings *cfg=(const TsharpenSettings*)cfg0;
return super::is(pict,cfg) && cfg->msharpStrength;
}
HRESULT TimgFilterMsharp::process(TfilterQueue::iterator it,TffPict &pict,const TfilterSettingsVideo *cfg0)
{
if (is(pict,cfg0))
{
const TsharpenSettings *cfg=(const TsharpenSettings*)cfg0;
init(pict,cfg->full,cfg->half);
const unsigned char *src_;
unsigned char *dst_;
getCur(FF_CSPS_MASK_YUV_PLANAR,pict,cfg->full,&src_,NULL,NULL,NULL);
getNext(csp1,pict,cfg->full,&dst_,NULL,NULL,NULL);
const unsigned char *src=src_;unsigned char *dst=dst_;
int strength=cfg->msharpStrength*2,invstrength=255-strength;
const unsigned char *srcp=src;
const unsigned char *srcp_saved=srcp;
if (!blur3x3) blur3x3=new T3x3blurSWS(deci,dx1[0],dy1[0]);
if (!blur)
{
stride=(dx1[0]/16+2)*16;
blur=(unsigned char*)aligned_malloc(stride*dy1[0]);
work=(unsigned char*)aligned_malloc(stride*dy1[0]);
}
unsigned char *blurp=blur;
const unsigned char *blurpn;
unsigned char *workp=work;
unsigned char *dstp=dst;
int b1,b2,b4;
blur3x3->process(src,stride1[0],blur,stride);
blurp=blur;
blurpn=blurp+stride;
dstp=dst;
// Diagonal detail detection.
/*
int b3;
b1=blurp[0];
b3=blurpn[0];
for (y=0;y<dy1[0]-1;dstp+=stride2[0],blurp+=m.stride,blurpn+=m.stride,y++)
for (x=0;x<dx1[0]-1;x++)
{
b2=blurp[x+1];
b4=blurpn[x+1];
if (abs(b1-b4)>=cfg->msharpThreshold || abs(b2-b3)>=cfg->msharpThreshold)
dstp[x]=255;
else
dstp[x]=0;
b1=b2;b3=b4;
}
*/
__m64 m255=_mm_set1_pi8(-1),m0=_mm_setzero_si64();
__m64 threshold64=_mm_set1_pi8((char)std::max(1,cfg->msharpThreshold-1));
for (unsigned int y=0;y<dy1[0]-1;blurp+=stride,blurpn+=stride,dstp+=stride2[0],y++)
for (unsigned int x=0;x<dx1[0]-1;x+=8)
{
__m64 mm1=*(__m64*)(blurp +x+1);
__m64 mm2=*(__m64*)(blurpn+x+1);
__m64 mm3=*(__m64*)(blurpn+x-1);
mm2=_mm_subs_pu8(_mm_absdif_u8(mm2,mm1),threshold64);
mm3=_mm_subs_pu8(_mm_absdif_u8(mm3,mm1),threshold64);
mm2=_mm_xor_si64(_mm_cmpeq_pi8(_mm_or_si64(mm2,mm3),m0),m255);
*(__m64*)(dstp+x+1)=mm2;
}
if (cfg->msharpHQ)
{
// Vertical detail detection
for (unsigned int x=0;x<dx1[0];x++)
{
blurp=blur;
blurpn=blurp+stride;
dstp=dst;
b1=blurp[x];
for (unsigned int y=0;y<dy1[0]-1;dstp+=stride2[0],blurp+=stride,blurpn+=stride,y++)
{
b2=blurpn[x];
if (abs(b1-b2)>=cfg->msharpThreshold)
dstp[x]=255;
b1=b2;
}
}
// Horizontal detail detection
blurp=blur;
dstp=dst;
for (unsigned int y=0;y<dy1[0];dstp+=stride2[0],blurp+=stride,y++)
{
b1=blurp[0];
for (unsigned int x=0;x<dx1[0]-1;x+=1)
{
b2=blurp[x+1];
if (abs(b1-b2)>=cfg->msharpThreshold)
dstp[x]=255;
b1=b2;
}
}
}
// Fix up detail map borders
dstp=dst;
memset(dstp,0,dx1[0]);
memset(dstp+stride2[0],0,dx1[0]);
memset(dstp+(dy1[0]-2)*stride2[0],0,dx1[0]);
memset(dstp+(dy1[0]-1)*stride2[0],0,dx1[0]);
for (unsigned int y=0;y<dy1[0];dstp+=stride2[0],y++)
{
dstp[0]=0;
dstp[1]=0;
dstp[dx1[0]-1]=0;
dstp[dx1[0]-2]=0;
}
if (!cfg->msharpMask)
{
// Fix up output frame borders
srcp=srcp_saved;
workp=work;
memcpy(workp,srcp,dx1[0]);
memcpy(workp+(dy1[0]-1)*stride,srcp+(dy1[0]-1)*stride1[0],dx1[0]);
for (unsigned int y=0;y<dy1[0];srcp+=stride1[0],workp+=stride,y++)
{
workp[0]=srcp[0];
workp[dx1[0]-1]=srcp[dx1[0]-1];
}
// Now sharpen the edge areas and we're done
srcp=srcp_saved+stride1[0];
dstp=dst+stride2[0];
blurp=blur+stride;
for (unsigned int y=1;y<dy1[0]-1;srcp+=stride1[0],dstp+=stride2[0],blurp+=stride,y++)
{
for (unsigned int x=1;x<dx1[0]-1;)
{
if (*(int*)(dstp+x)==0)
{
*(int*)(dstp+x)=*(int*)(srcp+x);
x+=4;
continue;
}
if (dstp[x])
{
b4=4*int(srcp[x])-3*int(blurp[x]);
if (b4<0) b4=0; else if (b4>255) b4=255;
dstp[x]=(unsigned char)((strength*b4+invstrength*srcp[x])>>8);
}
else
dstp[x]=srcp[x];
x++;
}
dstp[0]=srcp[0];
dstp[dx1[0]-1]=srcp[dx1[0]-1];
}
memcpy(dst,src,dx1[0]);
memcpy(dst+(dy1[0]-1)*stride2[0],src+(dy1[0]-1)*stride1[0],dx1[0]);
}
_mm_empty();
}
return parent->deliverSample(++it,pict);
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -