pictureb_sse.cpp
来自「H.263的编码程序,加了CPU指令优化,VC版.」· C++ 代码 · 共 104 行
CPP
104 行
int findbiSAD_sse(unsigned char *prev, unsigned char *next, unsigned char *curr,
int lx_ipol, int lx_curr, int blocksize)
{
int sad = 0;
__int64 mask = 0x00ff00ff00ff00ff;
int distance = lx_ipol*2;
if (16 == blocksize)
{
__asm
{
mov esi, [prev]
mov edi, [next]
mov edx, [curr]
mov ecx, 16
xor eax, eax
loop028: movq mm6, [mask]
movq mm0, [edx]
movq mm2, [edx+8]
movq mm1, [esi]
movq mm4, [esi+8]
movq mm5, [edi]
movq mm7, [edi+8]
pand mm1, mm6
pand mm4, mm6
pand mm5, mm6
pand mm7, mm6
packuswb mm1, mm4
packuswb mm5, mm7
pavgb mm1, mm5
movq mm3, [esi+16]
movq mm4, [esi+24]
movq mm5, [edi+16]
movq mm7, [edi+24]
pand mm3, mm6
pand mm4, mm6
pand mm5, mm6
pand mm7, mm6
packuswb mm3, mm4
packuswb mm5, mm7
pavgb mm3, mm5
psadbw mm0, mm1
psadbw mm2, mm3
paddw mm0, mm2
push ebx
movd ebx, mm0
add eax, ebx
pop ebx
add esi, [distance]
add edi, [distance]
add edx, [lx_curr]
dec ecx
jnz loop028
mov [sad], eax
emms
}
}
else
{
__asm
{
mov esi, [prev]
mov edi, [next]
mov edx, [curr]
mov ecx, 8
loop029: movq mm6, [mask]
movq mm0, [edx]
movq mm1, [esi]
movq mm4, [esi+8]
movq mm5, [edi]
movq mm7, [edi+8]
pand mm1, mm6
pand mm4, mm6
pand mm5, mm6
pand mm7, mm6
packuswb mm1, mm4
packuswb mm5, mm7
pavgb mm1, mm5
psadbw mm0, mm1
push ebx
movd ebx, mm0
add eax, ebx
pop ebx
add esi, [distance]
add edi, [distance]
add edx, [lx_curr]
dec ecx
jnz loop029
mov [sad], eax
emms
}
}
return sad;
}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?