📄 loopf_asm.cpp
字号:
paddw (mm5,mm7);
paddw (mm5,eax + LFABS_OFFSET );//x += LoopFilterAdjustBeforeShift
psubw (mm0,mm1);
paddw (mm0,mm5);
pxor (mm6,mm6 );
movd (mm7,eax + TRANS_OFFSET + 8 );//xx xx xx xx 32 22 12 02
psraw (mm0,3 );//values to be clipped
movd (mm3,eax + TRANS_OFFSET + 4 );//xx xx xx xx 31 21 11 01
punpcklbw (mm7,mm6);
psubw (mm7,mm2 );//pms + y
punpcklbw (mm3,mm6);
paddw (mm3,mm2 );//p0 - y
packuswb (mm7,mm7 );//clamp pms + y
packuswb (mm3,mm3 );//clamp p0 - y
movq (mm1,mm0);
movq (mm2,eax + LIMIT_OFFSET );//get the limit value
psraw (mm0,15);
//values to write out
punpcklbw (mm3,mm7 );//32 31 22 21 12 11 02 01
movq (mm7,mm0 );//save sign
unsigned int ebp;
movd ((int&)ebp,mm3 );//12 11 02 01
pxor (mm1,mm0);
//xor bp,bp
*(uint16_t*)(ebx + 1)=ebp ;//02 01
psubsw (mm1,mm0 );//abs(i)
ebp>>=16;//shr ebp,16
movq (mm5,mm2);
*(uint16_t*)(ebx + ecx + 1)=ebp;
psrlq (mm3,32 );//xx xx xx xx 32 31 22 21
por (mm7,eax + FOURONES_OFFSET );//now have -1 or 1
psubw (mm5,mm1 );//limit - abs(i)
movd ((int&)ebp,mm3 );//32 31 22 21
movq (mm4,mm5);
*(uint16_t*)(ebx + ecx*2 + 1)=ebp;
psraw (mm5,15);
ebp>>=16;// shr ebp,16
pxor (mm4,mm5);
*(uint16_t*)(ebx + edx + 1)=ebp;
psubsw (mm4,mm5 );//abs(limit - abs(i))
movd (mm5,eax + TRANS_OFFSET + 24 );//xx xx xx xx 72 62 52 42
psubusw (mm2,mm4 );//limit - abs(limit - abs(i))
pmullw (mm2,mm7 );//new y
pxor (mm6,mm6);
movd (mm3,eax + TRANS_OFFSET + 20 );//xx xx xx xx 71 61 51 41
punpcklbw (mm5,mm6);
ebx=ebx + ecx*4;
punpcklbw (mm3,mm6);
paddw (mm3,mm2 );//pms + y
psubw (mm5,mm2 );//p0 - y
packuswb (mm3,mm3 );//clamp pms + y
//pop ebp
//-
//
//NOTE: optimize the following crap somehow
//
packuswb (mm5,mm5 );//clamp p0 - y
//-
punpcklbw (mm3,mm5 );//72 71 62 61 52 51 42 41
//-
unsigned int ax;
movd ((int&)ax,mm3 );//52 51 42 41
psrlq (mm3,32 );//xx xx xx xx 72 71 62 61
*(uint16_t*)(ebx + 1)=ax;
//-
ax>>=16;//shr eax,16
//-
*(uint16_t*)(ebx + ecx + 1)=ax;
//-
movd ((int&)ax,mm3);
//-
*(uint16_t*)(ebx + ecx*2 + 1)=ax;
//-
ax>>=16;//shr eax,16
//-
*(uint16_t*)(ebx + edx + 1)=ax;
//-
}
/****************************************************************************
*
* ROUTINE : FilterVert_MMX
*
* INPUTS : None
*
* OUTPUTS : None
*
* RETURNS : None
*
* FUNCTION : Applies a loop filter to a horizontal edge vertically
*
* SPECIAL NOTES :
*
*
* ERRORS : None.
*
****************************************************************************/
extern "C" void FilterVert_MMX(unsigned char * PixelPtr, ogg_int32_t LineLength, ogg_int32_t *BoundingValuePtr)
{
ogg_int32_t ms = -LineLength;
ogg_int32_t ms2 = ms + ms;
/* A somewhat optimized MMX version of the top edge filter.*/
/* Originally written for Tim's VP30 code. */
unsigned char *eax=(unsigned char*)BoundingValuePtr;
//-
unsigned char *ebx=(unsigned char*)PixelPtr;
int ecx=ms; //negative stride
__m64 mm0,mm1,mm2,mm3,mm4,mm5,mm6,mm7;
movd (mm1,ebx + 0 );//p0
pxor (mm4,mm4);
movd (mm0,ebx + ecx );//get row above -- pms
punpcklbw (mm1,mm4 );//convert to words
int edx=LineLength;
punpcklbw (mm0,mm4);
movd (mm6,ebx + ecx*2 );//pms2
psubw (mm1,mm0 );//x = p0 - pms
movq (mm2,ebx + edx );//pstride
movq (mm3,mm1);
punpcklbw (mm6,mm4);
paddw (mm3,mm1);
punpcklbw (mm2,mm4);
paddw (mm1,mm3);
paddw (mm1,eax + LFABS_OFFSET );//x += LoopFilterAdjustBeforeShift
psubw (mm6,mm2);
movq (mm2,eax + LIMIT_OFFSET );//get the limit value
paddw (mm6,mm1);
movd (mm5,ebx + 4 );//p0
psraw (mm6,3 );//values to be clipped
movq (mm1,mm6 );
psraw (mm6,15);
movd (mm7,ebx + ecx + 4 );//pms
pxor (mm1,mm6);
psubsw (mm1,mm6 );//abs(i)
pxor (mm0,mm0);
punpcklbw (mm5,mm0);
movq (mm3,mm2);
por (mm6,eax + FOURONES_OFFSET );//now have -1 or 1
punpcklbw (mm7,mm0);
psubw (mm3,mm1 );//limit - abs(i)
psubw (mm5,mm7 );//x = p0 - pms
movq (mm4,mm3);
psraw (mm3,15);
movd (mm0,ebx + ecx*2 + 4 );//pms2
pxor (mm4,mm3);
movd (mm1,ebx + edx +4 );//pstride
psubsw (mm4,mm3 );//abs(limit - abs(i))
pxor (mm3,mm3);
psubusw (mm2,mm4 );//limit - abs(limit - abs(i))
punpcklbw (mm0,mm3);
movq (mm7,mm5);
paddw (mm7,mm5);
pmullw (mm2,mm6 );//new y -- wait 3 cycles
punpcklbw (mm1,mm3);
paddw (mm5,mm7);
paddw (mm5,eax + LFABS_OFFSET );//x += LoopFilterAdjustBeforeShift
psubw (mm0,mm1);
paddw (mm0,mm5);
pxor (mm6,mm6 );
movd (mm7,ebx + 0 );//p0
psraw (mm0,3 );//values to be clipped
movd (mm3,ebx + ecx );//get row above -- pms
punpcklbw (mm7,mm6);
psubw (mm7,mm2 );//pms + y
punpcklbw (mm3,mm6);
paddw (mm3,mm2 );//p0 - y
packuswb (mm7,mm7 );//clamp pms + y
packuswb (mm3,mm3 );//clamp p0 - y
movq (mm1,mm0);
movd (ebx + 0,mm7 );//write p0
psraw (mm0,15);
movq (mm7,mm0 );//save sign
pxor (mm1,mm0);
//
//
movq (mm2,eax + LIMIT_OFFSET );//get the limit value
//
//
psubsw (mm1,mm0 );//abs(i)
movq (mm5,mm2);
por (mm7,eax + FOURONES_OFFSET );//now have -1 or 1
psubw (mm5,mm1 );//limit - abs(i)
movq (mm4,mm5);
psraw (mm5,15);
movd (ebx + ecx,mm3 );//write pms
pxor (mm4,mm5);
psubsw (mm4,mm5 );//abs(limit - abs(i))
pxor (mm6,mm6);
movd (mm5,ebx + 4 );//p0
psubusw (mm2,mm4 );//limit - abs(limit - abs(i))
movd (mm3,ebx + ecx + 4 );//pms
pmullw (mm2,mm7 );//new y
punpcklbw (mm5,mm6);
//-
punpcklbw (mm3,mm6);
//-
paddw (mm3,mm2 );//pms + y
psubw (mm5,mm2 );//p0 - y
packuswb (mm3,mm3 );//clamp pms + y
//-
packuswb (mm5,mm5 );//clamp p0 - y
//-
movd (ebx + ecx + 4,mm3 );//write pms
//
movd (ebx + 4,mm5 );//write p0
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -