📄 motion.c
字号:
if (v>=0)
s+= v;
else
s-= v;
}
p1 = p1a;
p1a+= lx;
p2+= lx;
}
}
}
else
{
if(cpu_MMX)
{
_asm
{
mov esi, blk1 ;// esi = blk1
mov edi, blk2 ;// edi = blk2
mov ecx, h ;// ecx = h
mov edx, lx ;// edx = lx
mov eax, 0 ;// eax = s
dist1__l5:
movd mm0, [esi] ;// lower 4 bytes in mm0 = esi[0..3]
movd mm1, [esi+1] ;// lower 4 bytes in mm1 = esi[1..4]
movd mm2, [esi+edx] ;// lower 4 bytes in mm2 = (esi + edx)[0..3]
movd mm3, [esi+edx+1] ;// lower 4 bytes in mm3 = (esi + edx)[1..4]
movd mm4, [esi+4] ;// lower 4 bytes in mm4 = esi[4..7]
movd mm5, [esi+5] ;// lower 4 bytes in mm5 = esi[5..8]
movd mm6, [esi+edx+4] ;// lower 4 bytes in mm6 = (esi + edx)[4..7]
movd mm7, [esi+edx+5] ;// lower 4 bytes in mm7 = (esi + edx)[5..8]
punpcklbw mm0, PACKED_0 ;// unpack the lower 4 bytes into mm0
punpcklbw mm1, PACKED_0 ;// unpack the lower 4 bytes into mm1
punpcklbw mm2, PACKED_0 ;// unpack the lower 4 bytes into mm2
punpcklbw mm3, PACKED_0 ;// unpack the lower 4 bytes into mm3
punpcklbw mm4, PACKED_0 ;// unpack the lower 4 bytes into mm4
punpcklbw mm5, PACKED_0 ;// unpack the lower 4 bytes into mm5
punpcklbw mm6, PACKED_0 ;// unpack the lower 4 bytes into mm6
punpcklbw mm7, PACKED_0 ;// unpack the lower 4 bytes into mm7
paddw mm0, mm1
paddw mm2, mm3
paddw mm4, mm5
paddw mm6, mm7
paddw mm0, mm2 ;// mm0 += mm1 + mm2 + mm3
paddw mm4, mm6 ;// mm4 += mm5 + mm6 + mm7
paddw mm0, PACKED_2 ;// mm0 += (2, 2, 2, 2)
paddw mm4, PACKED_2 ;// mm4 += (2, 2, 2, 2)
psrlw mm0, 2 ;// mm0 >>= 2
psrlw mm4, 2 ;// mm0 >>= 2
movd mm1, [edi] ;// lower 4 bytes in mm1 = edi[0..3]
movd mm5, [edi+4] ;// lower 4 bytes in mm5 = edi[4..7]
punpcklbw mm1, PACKED_0 ;// unpack the lower 4 bytes into mm1
punpcklbw mm5, PACKED_0 ;// unpack the lower 4 bytes into mm5
movq mm3, mm0
movq mm7, mm4
psubusw mm0, mm1
psubusw mm4, mm5
psubusw mm1, mm3
psubusw mm5, mm7
por mm0, mm1 ;// mm0 = abs((esi[0..3] + esi[1..4] + (esi + edx)[0..3] + (esi + edx)[1..4]) >> 2) - edi[0..3])
por mm4, mm5 ;// mm0 = abs((esi[4..7] + esi[5..8] + (esi + edx)[4..7] + (esi + edx)[5..8]) >> 2) - edi[4..7])
movq mm2, mm0
movq mm6, mm4
punpcklwd mm0, PACKED_0 ;// unpack the lower 2 words into mm0
punpckhwd mm2, PACKED_0 ;// unpack the upper 2 words into mm2
punpcklwd mm4, PACKED_0 ;// unpack the lower 2 words into mm4
punpckhwd mm6, PACKED_0 ;// unpack the upper 2 words into mm6
paddd mm0, mm2
paddd mm4, mm6
paddd mm0, mm4 ;// mm0 += mm2 + mm4 + mm6
movd ebx, mm0 ;// load lower dword of mm0 into ebx
add eax, ebx ;// eax += ebx
psrlq mm0, 32 ;// shift mm0 to get upper dword
movd ebx, mm0 ;// load lower dword of mm0 into ebx
add eax, ebx ;// eax += ebx
movd mm0, [esi+8] ;// lower 4 bytes in mm0 = esi[8..11]
movd mm1, [esi+9] ;// lower 4 bytes in mm1 = esi[9..12]
movd mm2, [esi+edx+8] ;// lower 4 bytes in mm2 = (esi + edx)[8..11]
movd mm3, [esi+edx+9] ;// lower 4 bytes in mm3 = (esi + edx)[9..12]
movd mm4, [esi+12] ;// lower 4 bytes in mm4 = esi[12..15]
movd mm5, [esi+13] ;// lower 4 bytes in mm5 = esi[13..16]
movd mm6, [esi+edx+12] ;// lower 4 bytes in mm6 = (esi + edx)[12..15]
movd mm7, [esi+edx+13] ;// lower 4 bytes in mm7 = (esi + edx)[13..16]
punpcklbw mm0, PACKED_0 ;// unpack the lower 4 bytes into mm0
punpcklbw mm1, PACKED_0 ;// unpack the lower 4 bytes into mm1
punpcklbw mm2, PACKED_0 ;// unpack the lower 4 bytes into mm2
punpcklbw mm3, PACKED_0 ;// unpack the lower 4 bytes into mm3
punpcklbw mm4, PACKED_0 ;// unpack the lower 4 bytes into mm4
punpcklbw mm5, PACKED_0 ;// unpack the lower 4 bytes into mm5
punpcklbw mm6, PACKED_0 ;// unpack the lower 4 bytes into mm6
punpcklbw mm7, PACKED_0 ;// unpack the lower 4 bytes into mm7
paddw mm0, mm1
paddw mm2, mm3
paddw mm4, mm5
paddw mm6, mm7
paddw mm0, mm2 ;// mm0 += mm1 + mm2 + mm3
paddw mm4, mm6 ;// mm4 += mm5 + mm6 + mm7
paddw mm0, PACKED_2 ;// mm0 += (2, 2, 2, 2)
paddw mm4, PACKED_2 ;// mm4 += (2, 2, 2, 2)
psrlw mm0, 2 ;// mm0 >>= 2
psrlw mm4, 2 ;// mm0 >>= 2
movd mm1, [edi+8] ;// lower 4 bytes in mm1 = edi[8..11]
movd mm5, [edi+12] ;// lower 4 bytes in mm5 = edi[12..15]
punpcklbw mm1, PACKED_0 ;// unpack the lower 4 bytes into mm1
punpcklbw mm5, PACKED_0 ;// unpack the lower 4 bytes into mm5
movq mm3, mm0
movq mm7, mm4
psubusw mm0, mm1
psubusw mm4, mm5
psubusw mm1, mm3
psubusw mm5, mm7
por mm0, mm1 ;// mm0 = abs((esi[8..11] + esi[9..12] + (esi + edx)[8..11] + (esi + edx)[9..12]) >> 2) - edi[8..11])
por mm4, mm5 ;// mm0 = abs((esi[12..15] + esi[13..16] + (esi + edx)[12..15] + (esi + edx)[13..16]) >> 2) - edi[12..15])
movq mm2, mm0
movq mm6, mm4
punpcklwd mm0, PACKED_0 ;// unpack the lower 2 words into mm0
punpckhwd mm2, PACKED_0 ;// unpack the upper 2 words into mm2
punpcklwd mm4, PACKED_0 ;// unpack the lower 2 words into mm4
punpckhwd mm6, PACKED_0 ;// unpack the upper 2 words into mm6
paddd mm0, mm2
paddd mm4, mm6
paddd mm0, mm4 ;// mm0 += mm2 + mm4 + mm6
movd ebx, mm0 ;// load lower dword of mm0 into ebx
add eax, ebx ;// eax += ebx
psrlq mm0, 32 ;// shift mm0 to get upper dword
movd ebx, mm0 ;// load lower dword of mm0 into ebx
add eax, ebx ;// eax += ebx
add esi, edx ;// esi += edx
add edi, edx ;// edi += edx
dec ecx ;// decrement ecx
jnz dist1__l5 ;// loop while not zero
mov s, eax ;// s = eax
emms ;// empty MMX state
}
}
else
{
s = 0;
p1 = blk1;
p2 = blk2;
p1a = p1 + lx;
for (j=0; j<h; j++)
{
for (i=0; i<16; i++)
{
v = ((unsigned int)(p1[i]+p1[i+1]+p1a[i]+p1a[i+1]+2)>>2) - p2[i];
if (v>=0)
s+= v;
else
s-= v;
}
p1 = p1a;
p1a+= lx;
p2+= lx;
}
}
}
return s;
}
/*
* 两个块之间均方误差
*/static int dist2(blk1,blk2,lx,hx,hy,h)unsigned char *blk1,*blk2;int lx,hx,hy,h;{ unsigned char *p1,*p1a,*p2; int i,j; int s,v;
if(fastMotionCompensationLevel)
{
lx <<= fastMotionCompensationLevel;
h >>= fastMotionCompensationLevel;
}
if (!hx && !hy)
{
if(cpu_MMX)
{
_asm
{
mov esi, blk1 ;// esi = blk1
mov edi, blk2 ;// edi = blk2
mov ecx, h ;// ecx = h
mov edx, lx ;// edx = lx
mov eax, 0 ;// eax = s
dist2__l1:
movd mm0, [esi] ;// lower 4 bytes in mm0 = esi[0..3]
movd mm1, [edi] ;// lower 4 bytes in mm1 = edi[0..3]
movd mm2, [esi+4] ;// lower 4 bytes in mm2 = esi[4..7]
movd mm3, [edi+4] ;// lower 4 bytes in mm3 = edi[4..7]
movd mm4, [esi+8] ;// lower 4 bytes in mm4 = esi[8..11]
movd mm5, [edi+8] ;// lower 4 bytes in mm5 = edi[8..11]
movd mm6, [esi+12] ;// lower 4 bytes in mm6 = esi[12..15]
movd mm7, [edi+12] ;// lower 4 bytes in mm7 = edi[12..15]
punpcklbw mm0, PACKED_0 ;// unpack the lower 4 bytes into mm0
punpcklbw mm1, PACKED_0 ;// unpack the lower 4 bytes into mm1
punpcklbw mm2, PACKED_0 ;// unpack the lower 4 bytes into mm2
punpcklbw mm3, PACKED_0 ;// unpack the lower 4 bytes into mm3
punpcklbw mm4, PACKED_0 ;// unpack the lower 4 bytes into mm4
punpcklbw mm5, PACKED_0 ;// unpack the lower 4 bytes into mm5
punpcklbw mm6, PACKED_0 ;// unpack the lower 4 bytes into mm6
punpcklbw mm7, PACKED_0 ;// unpack the lower 4 bytes into mm7
psubw mm0, mm1 ;// mm0 -= mm1
psubw mm2, mm3 ;// mm2 -= mm3
psubw mm4, mm5 ;// mm4 -= mm5
psubw mm6, mm7 ;// mm6 -= mm7
pmullw mm0, mm0 ;// mm0 = sqr(mm0)
pmullw mm2, mm2 ;// mm2 = sqr(mm2)
pmullw mm4, mm4 ;// mm4 = sqr(mm4)
pmullw mm6, mm6 ;// mm6 = sqr(mm6)
paddw mm0, mm2
paddw mm4, mm6
paddw mm0, mm4 ;// mm0 += mm2 + mm4 + mm6
movq mm1, mm0
punpcklwd mm0, PACKED_0 ;// unpack the lower 2 words into mm0
punpckhwd mm1, PACKED_0 ;// unpack the upper 2 words into mm1
paddd mm0, mm1 ;// mm0 += mm1
movd ebx, mm0 ;// load lower dword of mm0 into ebx
add eax, ebx ;// eax += ebx
psrlq mm0, 32 ;// shift mm0 to get upper dword
movd ebx, mm0 ;// load lower dword of mm0 into ebx
add eax, ebx ;// eax += ebx
add esi, edx ;// esi += edx
add edi, edx ;// edi += edx
dec ecx ;// decrement ecx
jnz dist2__l1 ;// loop while not zero
mov s, eax ;// s = eax
emms ;// empty MMX state
}
}
else
{
s = 0;
p1 = blk1;
p2 = blk2;
for (j=0; j<h; j++) { for (i=0; i<16; i++) { v = p1[i] - p2[i]; s+= v*v; } p1+= lx; p2+= lx; }
}
} else if (hx && !hy)
{
if(cpu_MMX)
{
_asm
{
mov esi, blk1 ;// esi = blk1
mov edi, blk2 ;// edi = blk2
mov edx, lx ;// edx = lx
mov eax, 0 ;// eax = s
mov ecx, h ;// ecx = h
dist2__l2:
movd mm0, [esi] ;// lower 4 bytes in mm0 = esi[0..3]
movd mm1, [esi+1] ;// lower 4 bytes in mm1 = esi[1..4]
movd mm2, [esi+4] ;// lower 4 bytes in mm2 = esi[4..7]
movd mm3, [esi+5] ;// lower 4 bytes in mm3 = esi[5..8]
movd mm4, [esi+8] ;// lower 4 bytes in mm4 = esi[8..11]
movd mm5, [esi+9] ;// lower 4 bytes in mm5 = esi[9..12]
movd mm6, [esi+12] ;// lower 4 bytes in mm6 = esi[12..15]
movd mm7, [esi+13] ;// lower 4 bytes in mm7 = esi[13..16]
punpcklbw mm0, PACKED_0 ;// unpack the lower 4 bytes into mm0
punpcklbw mm1, PACKED_0 ;// unpack the lower 4 bytes into mm1
punpcklbw mm2, PACKED_0 ;// unpack the lower 4 bytes into mm2
punpcklbw mm3, PACKED_0 ;// unpack the lower 4 bytes into mm3
punpcklbw mm4, PACKED_0 ;// unpack the lower 4 bytes into mm4
punpcklbw mm5, PACKED_0 ;// unpack the lower 4 bytes into mm5
punpcklbw mm6, PACKED_0 ;// unpack the lower 4 bytes into mm6
punpcklbw mm7, PACKED_0 ;// unpack the lower 4 bytes into mm7
paddw mm0, mm1 ;// mm0 += mm1
paddw mm2, mm3 ;// mm2 += mm3
paddw mm4, mm5 ;// mm4 += mm5
paddw mm6, mm7 ;// mm6 += mm7
paddw mm0, PACKED_1 ;// mm0 += (1, 1, 1, 1)
paddw mm2, PACKED_1 ;// mm2 += (1, 1, 1, 1)
paddw mm4, PACKED_1 ;// mm4 += (1, 1, 1, 1)
paddw mm6, PACKED_1 ;// mm6 += (1, 1, 1, 1)
psrlw mm0, 1 ;// mm0 >>= 1
psrlw mm2, 1 ;// mm2 >>= 1
psrlw mm4, 1 ;// mm4 >>= 1
psrlw mm6, 1 ;// mm6 >>= 1
movd mm1, [edi] ;// lower 4 bytes in mm1 = edi[0..3]
movd mm3, [edi+4] ;// lower 4 bytes in mm3 = edi[4..7]
movd mm5, [edi+8] ;// lower 4 bytes in mm5 = edi[8..11]
movd mm7, [edi+12] ;// lower 4 bytes in mm7 = edi[12..15]
punpcklbw mm1, PACKED_0 ;// unpack the lower 4 bytes into mm1
punpcklbw mm3, PACKED_0 ;// unpack the lower 4 bytes into mm3
punpcklbw mm5, PACKED_0 ;// unpack the lower 4 bytes into mm5
punpcklbw mm7, PACKED_0 ;// unpack the lower 4 bytes into mm7
psubw mm0, mm1 ;// mm0 -= mm1
psubw mm2, mm3 ;// mm2 -= mm3
psubw mm4, mm5 ;// mm4 -= mm5
psubw mm6, mm7 ;// mm6 -= mm7
pmullw mm0, mm0 ;// mm0 = sqr(mm0)
pmullw mm2, mm2 ;// mm2 = sqr(mm2)
pmullw mm4, mm4 ;// mm4 = sqr(mm4)
pmullw mm6, mm6 ;// mm6 = sqr(mm6)
paddw mm0, mm2
paddw mm4, mm6
paddw mm0, mm4 ;// mm0 += mm2 + mm4 + mm6
movq mm1, mm0
punpcklwd mm0, PACKED_0 ;// unpack the 4 lower words into mm0
punpckhwd mm1, PACKED_0 ;// unpack the 4 lower words into mm1
paddd mm0, mm1 ;// mm0 += mm1
movd ebx, mm0 ;// load lower dword of mm0 into ebx
add eax, ebx ;// eax += ebx
psrlq mm0, 32 ;// shift mm0 to get upper dword
movd ebx, mm0 ;// load lower dword of mm0 into ebx
add eax, ebx ;// eax += ebx
add esi, edx ;// esi += edx
add edi, edx ;// edi += edx
dec ecx ;// decrement ecx
jnz dist2__l2 ;// loop while not zero
mov s, eax ;// s = eax
emms ;// empty MMX state
}
}
else
{
s = 0;
p1 = blk1;
p2 = blk2;
for (j=0; j<h; j++) { for (i=0; i<16; i++) { v = ((unsigned int)(p1[i]+p1[i+1]+1)>>1) - p2[i]; s+= v*v; } p1+= lx; p2+= lx; }
}
} else if (!hx && hy) {
if(cpu_MMX)
{
// I do loop unrolling on the inner loop !
_asm
{
mov esi, blk1 ;// esi = blk1
mov edi, blk2 ;// edi = blk2
mov ecx, h ;// ecx = h
mov edx, lx ;// edx = lx
mov eax, 0 ;// eax = s
dist2__l3:
movd mm0, [esi] ;// lower 4 bytes in mm0 = esi[0..3]
movd mm1, [esi+edx] ;// lower 4 bytes in mm1 = (esi + edx)[0..3]
movd mm2, [esi+4] ;// lower 4 bytes in mm2 = esi[4..7]
movd mm3, [esi+edx+4] ;// lower 4 bytes in mm3 = (esi + edx)[4..7]
movd mm4, [esi+8] ;// lower 4 bytes in mm4 = esi[8..11]
movd mm5, [esi+edx+8] ;// lower 4 bytes in mm5 = (esi + edx)[8..11]
movd mm6, [esi+12] ;// lower 4 bytes in mm6 = esi[12..15]
movd mm7, [esi+edx+12] ;// lower 4 bytes in mm7 = (esi + edx)[12..15]
punpcklbw mm0, PACKED_0 ;// unpack the lower 4 bytes into mm0
punpcklbw mm1, PACKED_0 ;// unpack the lower 4 bytes into mm1
punpcklbw mm2, PACKED_0 ;// unpack the lower 4 bytes into mm2
punpcklbw mm3, PACKED_0 ;// unpack the lower 4 bytes into mm3
punpcklbw mm4, PACKED_0 ;// unpack the lower 4 bytes into mm4
punpcklbw mm5, PACKED_0 ;// unpack the lower 4 bytes into mm5
punpcklbw mm6, PACKED_0 ;// unpack the lower 4 bytes into mm6
punpcklbw mm7, PACKED_0 ;// unpack the lower 4 bytes into mm7
paddw mm0, mm1 ;// mm0 += mm1
paddw mm2, mm3 ;// mm2 += mm3
paddw mm4, mm5 ;// mm4 += mm5
paddw mm6, mm7 ;// mm6 += mm7
paddw mm0, PACKED_1 ;// mm0 += (1, 1, 1, 1)
paddw mm2, PACKED_1 ;// mm2 += (1, 1, 1, 1)
paddw mm4, PACKED_1 ;// mm4 += (1, 1, 1, 1)
paddw mm6, PACKED_1 ;// mm6 += (1, 1, 1, 1)
psrlw mm0, 1 ;// mm0 >>= 1
psrlw mm2, 1 ;// mm2 >>= 1
psrlw mm4, 1 ;// mm4 >>= 1
psrlw mm6, 1 ;// mm6 >>= 1
movd mm1, [edi] ;// lower 4 bytes in mm1 = edi[0..3]
movd mm3, [edi+4] ;// lower 4 bytes in mm3 = edi[4..7]
movd mm5, [edi+8] ;// lower 4 bytes in mm5 = edi[8..11]
movd mm7, [edi+12] ;// lower 4 bytes in mm7 = edi[12..15]
punpcklbw mm1, PACKED_0 ;// unpack the lower 4 bytes into mm1
punpcklbw mm3, PACKED_0 ;// unpack the lower 4 bytes into mm3
punpcklbw mm5, PACKED_0 ;// unpack the lower 4 bytes into mm5
punpcklbw mm7, PACKED_0 ;// unpack the lower 4 bytes into mm7
psubw mm0, mm1 ;// mm0 -= mm1
psubw mm2, mm3 ;// mm2 -= mm3
psubw mm4, mm5 ;// mm4 -= mm5
psubw mm6, mm7 ;// mm6 -= mm7
pmullw mm0, mm0 ;// mm0 = sqr(mm0)
pmullw mm2, mm2 ;// mm2 = sqr(mm2)
pmullw mm4, mm4 ;// mm4 = sqr(mm4)
pmullw mm6, mm6 ;// mm6 = sqr(mm6)
paddw mm0, mm2
paddw mm4, mm6
paddw mm0, mm4 ;// mm0 += mm2 + mm4 + mm6
movq mm1, mm0
punpcklwd mm0, PACKED_0 ;// unpack the 4 lower words into mm0
punpckhwd mm1, PACKED_0 ;// unpack the 4 lower words into mm1
paddd mm0, mm1 ;// mm0 += mm1
movd ebx, mm0 ;// load lower dword of mm0 into ebx
add eax, ebx ;// eax
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -