📄 predict.c
字号:
_asm
{
mov esi, s ;// esi = s
mov edi, d ;// edi = d
mov ecx, h ;// ecx = h
mov edx, lx ;// edx = lx
pred_comp__l8:
movd mm0, [esi] ;// lower 4 bytes into mm0 = esi[0..3]
movd mm1, [esi+edx] ;// lower 4 bytes into mm1 = (esi + edx)[0..3]
movd mm2, [esi+4] ;// lower 4 bytes into mm2 = esi[4..7]
movd mm3, [esi+edx+4] ;// lower 4 bytes into mm3 = (esi + edx)[4..7]
movd mm4, [esi+8] ;// lower 4 bytes into mm4 = esi[8..11]
movd mm5, [esi+edx+8] ;// lower 4 bytes into mm5 = (esi + edx)[8..11]
movd mm6, [esi+12] ;// lower 4 bytes into mm6 = esi[12..15]
movd mm7, [esi+edx+12] ;// lower 4 bytes into mm7 = (esi + edx)[12..15]
punpcklbw mm0, PACKED_0 ;// unpack the lower 4 bytes into mm0
punpcklbw mm1, PACKED_0 ;// unpack the lower 4 bytes into mm1
punpcklbw mm2, PACKED_0 ;// unpack the lower 4 bytes into mm2
punpcklbw mm3, PACKED_0 ;// unpack the lower 4 bytes into mm3
punpcklbw mm4, PACKED_0 ;// unpack the lower 4 bytes into mm4
punpcklbw mm5, PACKED_0 ;// unpack the lower 4 bytes into mm5
punpcklbw mm6, PACKED_0 ;// unpack the lower 4 bytes into mm6
punpcklbw mm7, PACKED_0 ;// unpack the lower 4 bytes into mm7
paddw mm0, mm1
paddw mm2, mm3
paddw mm4, mm5
paddw mm6, mm7
paddw mm0, PACKED_1 ;// mm0 += mm1 + (1, 1, 1, 1)
paddw mm2, PACKED_1 ;// mm2 += mm3 + (1, 1, 1, 1)
paddw mm4, PACKED_1 ;// mm4 += mm5 + (1, 1, 1, 1)
paddw mm6, PACKED_1 ;// mm6 += mm7 + (1, 1, 1, 1)
psrlw mm0, 1 ;// mm0 >>= 1
psrlw mm2, 1 ;// mm2 >>= 1
psrlw mm4, 1 ;// mm4 >>= 1
psrlw mm6, 1 ;// mm6 >>= 1
packuswb mm0, mm2 ;// pack mm0 and mm2
packuswb mm4, mm6 ;// pack mm4 and mm6
movq [edi], mm0 ;// store mm0 into edi[0..7]
movq [edi+8], mm4 ;// store mm4 into edi[8..15]
add esi, edx ;// esi += edx
add edi, edx ;// edi += edx
dec ecx ;// decrement ecx
jnz pred_comp__l8 ;// loop while not zero
emms ;// empty MMX state
}
return;
}
}
for (j=0; j<h; j++) { for (i=0; i<w; i++) d[i] = (unsigned int)(s[i]+s[i+lx]+1)>>1;
s+= lx; d+= lx; }
} else if (xh && !yh) if (addflag)
{
if(cpu_MMX)
{
if(w == 8)
{
_asm
{
mov esi, s ;// esi = s
mov edi, d ;// edi = d
mov edx, lx ;// edx = lx
mov ecx, h ;// ecx = h
pred_comp__l9:
movd mm0, [esi] ;// lower 4 bytes into mm0 = esi[0..3]
movd mm1, [esi+1] ;// lower 4 bytes into mm1 = esi[1..4]
movd mm2, [esi+4] ;// lower 4 bytes into mm2 = esi[4..7]
movd mm3, [esi+5] ;// lower 4 bytes into mm3 = esi[5..8]
punpcklbw mm0, PACKED_0 ;// unpack the lower 4 bytes into mm0
punpcklbw mm1, PACKED_0 ;// unpack the lower 4 bytes into mm1
punpcklbw mm2, PACKED_0 ;// unpack the lower 4 bytes into mm2
punpcklbw mm3, PACKED_0 ;// unpack the lower 4 bytes into mm3
paddw mm0, mm1
paddw mm2, mm3
paddw mm0, PACKED_1 ;// mm0 += mm1 + (1, 1, 1, 1)
paddw mm2, PACKED_1 ;// mm2 += mm3 + (1, 1, 1, 1)
movd mm1, [edi] ;// lower 4 bytes into mm1 = edi[0..3]
movd mm3, [edi+4] ;// lower 4 bytes into mm3 = edi[4..7]
psrlw mm0, 1 ;// mm0 >>= 1
psrlw mm2, 1 ;// mm2 >>= 1
punpcklbw mm1, PACKED_0 ;// unpack the lower 4 bytes into mm1
punpcklbw mm3, PACKED_0 ;// unpack the lower 4 bytes into mm3
paddw mm0, PACKED_1
paddw mm2, PACKED_1
paddw mm0, mm1 ;// mm0 += mm1 + (1, 1, 1, 1)
paddw mm2, mm3 ;// mm2 += mm3 + (1, 1, 1, 1)
psrlw mm0, 1 ;// mm0 >>= 1
psrlw mm2, 1 ;// mm2 >>= 1
packuswb mm0, mm2 ;// pack mm0 and mm2
movq [edi], mm0 ;// store mm0 into edi[0..7]
add esi, edx ;// esi += edx
add edi, edx ;// edi += edx
dec ecx ;// decrement ecx
jnz pred_comp__l9 ;// loop while not zero
emms ;// empty MMX state
}
return;
}
if(w == 16)
{
_asm
{
mov esi, s ;// esi = s
mov edi, d ;// edi = d
mov edx, lx ;// edx = lx
mov ecx, h ;// ecx = h
pred_comp__l10:
movd mm0, [esi] ;// lower 4 bytes into mm0 = esi[0..3]
movd mm1, [esi+1] ;// lower 4 bytes into mm1 = esi[1..4]
movd mm2, [esi+4] ;// lower 4 bytes into mm2 = esi[4..7]
movd mm3, [esi+5] ;// lower 4 bytes into mm3 = esi[5..8]
movd mm4, [esi+8] ;// lower 4 bytes into mm4 = esi[8..11]
movd mm5, [esi+9] ;// lower 4 bytes into mm5 = esi[9..12]
movd mm6, [esi+12] ;// lower 4 bytes into mm6 = esi[12..15]
movd mm7, [esi+13] ;// lower 4 bytes into mm7 = esi[13..16]
punpcklbw mm0, PACKED_0 ;// unpack the lower 4 bytes into mm0
punpcklbw mm1, PACKED_0 ;// unpack the lower 4 bytes into mm1
punpcklbw mm2, PACKED_0 ;// unpack the lower 4 bytes into mm2
punpcklbw mm3, PACKED_0 ;// unpack the lower 4 bytes into mm3
punpcklbw mm4, PACKED_0 ;// unpack the lower 4 bytes into mm4
punpcklbw mm5, PACKED_0 ;// unpack the lower 4 bytes into mm5
punpcklbw mm6, PACKED_0 ;// unpack the lower 4 bytes into mm6
punpcklbw mm7, PACKED_0 ;// unpack the lower 4 bytes into mm7
paddw mm0, mm1
paddw mm2, mm3
paddw mm4, mm5
paddw mm6, mm7
paddw mm0, PACKED_1 ;// mm0 += mm1 + (1, 1, 1, 1)
paddw mm2, PACKED_1 ;// mm2 += mm3 + (1, 1, 1, 1)
paddw mm4, PACKED_1 ;// mm4 += mm5 + (1, 1, 1, 1)
paddw mm6, PACKED_1 ;// mm6 += mm7 + (1, 1, 1, 1)
movd mm1, [edi] ;// lower 4 bytes into mm1 = edi[0..3]
movd mm3, [edi+4] ;// lower 4 bytes into mm3 = edi[4..7]
movd mm5, [edi+8] ;// lower 4 bytes into mm5 = edi[8..11]
movd mm7, [edi+12] ;// lower 4 bytes into mm7 = edi[12..15]
psrlw mm0, 1 ;// mm0 >>= 1
psrlw mm2, 1 ;// mm2 >>= 1
psrlw mm4, 1 ;// mm4 >>= 1
psrlw mm6, 1 ;// mm6 >>= 1
punpcklbw mm1, PACKED_0 ;// unpack the lower 4 bytes into mm1
punpcklbw mm3, PACKED_0 ;// unpack the lower 4 bytes into mm3
punpcklbw mm5, PACKED_0 ;// unpack the lower 4 bytes into mm5
punpcklbw mm7, PACKED_0 ;// unpack the lower 4 bytes into mm7
paddw mm0, PACKED_1
paddw mm2, PACKED_1
paddw mm4, PACKED_1
paddw mm6, PACKED_1
paddw mm0, mm1 ;// mm0 += mm1 + (1, 1, 1, 1)
paddw mm2, mm3 ;// mm2 += mm3 + (1, 1, 1, 1)
paddw mm4, mm5 ;// mm4 += mm5 + (1, 1, 1, 1)
paddw mm6, mm7 ;// mm6 += mm7 + (1, 1, 1, 1)
psrlw mm0, 1 ;// mm0 >>= 1
psrlw mm2, 1 ;// mm2 >>= 1
psrlw mm4, 1 ;// mm4 >>= 1
psrlw mm6, 1 ;// mm6 >>= 1
packuswb mm0, mm2 ;// pack mm0 and mm2
packuswb mm4, mm6 ;// pack mm4 and mm6
movq [edi], mm0 ;// store mm0 into edi[0..7]
movq [edi+8], mm4 ;// store mm4 into edi[8..15]
add esi, edx ;// esi += edx
add edi, edx ;// edi += edx
dec ecx ;// decrement ecx
jnz pred_comp__l10 ;// loop while not zero
emms ;// empty MMX state
}
return;
}
}
for (j=0; j<h; j++) { for (i=0; i<w; i++) d[i] = (d[i] + ((unsigned int)(s[i]+s[i+1]+1)>>1)+1)>>1;
s+= lx; d+= lx; }
} else
{
if(cpu_MMX)
{
if(w == 8)
{
_asm
{
mov esi, s ;// esi = s
mov edi, d ;// edi = d
mov edx, lx ;// edx = lx
mov ecx, h ;// ecx = h
pred_comp__l11:
movd mm0, [esi] ;// lower 4 bytes into mm0 = esi[0..3]
movd mm1, [esi+1] ;// lower 4 bytes into mm1 = esi[1..4]
movd mm2, [esi+4] ;// lower 4 bytes into mm2 = esi[4..7]
movd mm3, [esi+5] ;// lower 4 bytes into mm3 = esi[0..3]
punpcklbw mm0, PACKED_0 ;// unpack the lower 4 bytes into mm0
punpcklbw mm1, PACKED_0 ;// unpack the lower 4 bytes into mm1
punpcklbw mm2, PACKED_0 ;// unpack the lower 4 bytes into mm2
punpcklbw mm3, PACKED_0 ;// unpack the lower 4 bytes into mm3
paddw mm0, mm1
paddw mm2, mm3
paddw mm0, PACKED_1 ;// mm0 += mm1 + (1, 1, 1, 1)
paddw mm2, PACKED_1 ;// mm2 += mm3 + (1, 1, 1, 1)
psrlw mm0, 1 ;// mm0 >>= 1
psrlw mm2, 1 ;// mm2 >>= 1
packuswb mm0, mm2 ;// pack mm0 and mm2
movq [edi], mm0 ;// store mm0 into edi[0..7]
add esi, edx ;// esi += edx
add edi, edx ;// edi += edx
dec ecx ;// decrement ecx
jnz pred_comp__l11 ;// loop while not zero
emms ;// empty MMX state
}
return;
}
if(w == 16)
{
_asm
{
mov esi, s ;// esi = s
mov edi, d ;// edi = d
mov edx, lx ;// edx = lx
mov ecx, h ;// ecx = h
pred_comp__l12:
movd mm0, [esi] ;// lower 4 bytes into mm0 = esi[0..3]
movd mm1, [esi+1] ;// lower 4 bytes into mm1 = esi[1..4]
movd mm2, [esi+4] ;// lower 4 bytes into mm2 = esi[4..7]
movd mm3, [esi+5] ;// lower 4 bytes into mm3 = esi[0..3]
movd mm4, [esi+8] ;// lower 4 bytes into mm4 = esi[8..11]
movd mm5, [esi+9] ;// lower 4 bytes into mm5 = esi[9..12]
movd mm6, [esi+12] ;// lower 4 bytes into mm6 = esi[12..15]
movd mm7, [esi+13] ;// lower 4 bytes into mm7 = esi[13..16]
punpcklbw mm0, PACKED_0 ;// unpack the lower 4 bytes into mm0
punpcklbw mm1, PACKED_0 ;// unpack the lower 4 bytes into mm1
punpcklbw mm2, PACKED_0 ;// unpack the lower 4 bytes into mm2
punpcklbw mm3, PACKED_0 ;// unpack the lower 4 bytes into mm3
punpcklbw mm4, PACKED_0 ;// unpack the lower 4 bytes into mm4
punpcklbw mm5, PACKED_0 ;// unpack the lower 4 bytes into mm5
punpcklbw mm6, PACKED_0 ;// unpack the lower 4 bytes into mm6
punpcklbw mm7, PACKED_0 ;// unpack the lower 4 bytes into mm7
paddw mm0, mm1
paddw mm2, mm3
paddw mm4, mm5
paddw mm6, mm7
paddw mm0, PACKED_1 ;// mm0 += mm1 + (1, 1, 1, 1)
paddw mm2, PACKED_1 ;// mm2 += mm3 + (1, 1, 1, 1)
paddw mm4, PACKED_1 ;// mm4 += mm5 + (1, 1, 1, 1)
paddw mm6, PACKED_1 ;// mm6 += mm7 + (1, 1, 1, 1)
psrlw mm0, 1 ;// mm0 >>= 1
psrlw mm2, 1 ;// mm2 >>= 1
psrlw mm4, 1 ;// mm4 >>= 1
psrlw mm6, 1 ;// mm6 >>= 1
packuswb mm0, mm2 ;// pack mm0 and mm2
packuswb mm4, mm6 ;// pack mm4 and mm6
movq [edi], mm0 ;// store mm0 into edi[0..7]
movq [edi+8], mm4 ;// store mm4 into edi[8..15]
add esi, edx ;// esi += edx
add edi, edx ;// edi += edx
dec ecx ;// decrement ecx
jnz pred_comp__l12 ;// loop while not zero
emms ;// empty MMX state
}
return;
}
}
for (j=0; j<h; j++) { for (i=0; i<w; i++) d[i] = (unsigned int)(s[i]+s[i+1]+1)>>1;
s+= lx; d+= lx; }
} else /* if (xh && yh) */ if (addflag)
{
if(cpu_MMX)
{
if(w == 8)
{
_asm
{
mov esi, s ;// esi = s
mov edi, d ;// edi = d
mov edx, lx ;// edx = lx
mov ecx, h ;// ecx = h
pred_comp__l13:
movd mm0, [esi] ;// lower 4 bytes into mm0 = esi[0..3]
movd mm1, [esi+1] ;// lower 4 bytes into mm1 = esi[1..4]
movd mm2, [esi+edx] ;// lower 4 bytes into mm2 = (esi + edx)[0..3]
movd mm3, [esi+edx+1] ;// lower 4 bytes into mm3 = (esi + edx)[1..4]
movd mm4, [esi+4] ;// lower 4 bytes into mm4 = esi[4..7]
movd mm5, [esi+5] ;// lower 4 bytes into mm5 = esi[5..8]
movd mm6, [esi+edx+4] ;// lower 4 bytes into mm6 = (esi + edx)[4..7]
movd mm7, [esi+edx+5] ;// lower 4 bytes into mm7 = (esi + edx)[5..8]
punpcklbw mm0, PACKED_0 ;// unpack the lower 4 bytes into mm0
punpcklbw mm1, PACKED_0 ;// unpack the lower 4 bytes into mm1
punpcklbw mm2, PACKED_0 ;// unpack the lower 4 bytes into mm2
punpcklbw mm3, PACKED_0 ;// unpack the lower 4 bytes into mm3
punpcklbw mm4, PACKED_0 ;// unpack the lower 4 bytes into mm4
punpcklbw mm5, PACKED_0 ;// unpack the lower 4 bytes into mm5
punpcklbw mm6, PACKED_0 ;// unpack the lower 4 bytes into mm6
punpcklbw mm7, PACKED_0 ;// unpack the lower 4 bytes into mm7
paddw mm0, mm1
paddw mm2, mm3
paddw mm4, mm5
paddw mm6, mm7
paddw mm0, mm2 ;// mm0 += mm1 + mm2 + mm3
paddw mm4, mm6 ;// mm4 += mm5 + mm6 + mm7
movd mm1, [edi] ;// lower 4 bytes into mm1 = edi[0..3]
movd mm5, [edi+4] ;// lower 4 bytes into mm5 = edi[4..7]
paddw mm0, PACKED_2 ;// mm0 += (2, 2, 2, 2)
paddw mm4, PACKED_2 ;// mm4 += (2, 2, 2, 2)
punpcklbw mm1, PACKED_0 ;// unpack the lower 4 bytes into mm1
punpcklbw mm5, PACKED_0 ;// unpack the lower 4 bytes into mm5
psrlw mm0, 2
psrlw mm4, 2
paddw mm0, PACKED_1
paddw mm4, PACKED_1
paddw mm0, mm1
paddw mm4, mm5
psrlw mm0, 1 ;// mm0 = (mm0 >> 2) + (1, 1, 1, 1) + mm1
psrlw mm4, 1 ;// mm4 = (mm4 >> 2) + (1, 1, 1, 1) + mm5
packuswb mm0, mm4 ;// pack mm0 and mm4
movq [edi], mm0 ;// store mm0 into edi[0..7]
add esi, edx ;// esi += edx
add edi, edx ;// edi += edx
dec ecx ;// decrement ecx
jnz pred_comp__l13 ;// loop while not zero
emms ;// empty MMX state
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -