📄 readpic.c
字号:
paddw mm5, mm3 ;// mm5 += mm3
movq mm1, mm4
movq mm3, mm5
pmullw mm4, PACKED_228 ;// mm4 *= 228
pmullw mm5, PACKED_70 ;// mm5 *= 70
pmulhw mm1, PACKED_228 ;// mm1 *= 228
pmulhw mm3, PACKED_70 ;// mm3 *= 70
movq mm6, mm4
movq mm7, mm5
punpcklwd mm4, mm1 ;// unpack the two low words into two dwords
punpckhwd mm6, mm1 ;// unpack the two upper words into two dwords
punpcklwd mm5, mm3 ;// unpack the two low words into two dwords
punpckhwd mm7, mm3 ;// unpack the two upper words into two dwords
paddd mm4, mm5 ;// mm4 += mm5
paddd mm6, mm7 ;// mm6 += mm7
movq mm7, PACKED_255 ;// mm7 = (255, 255, 255, 255)
movq mm5, [esi-3] ;// mm5 = 4 words into esi[-3..4]
movq mm0, [esi-2] ;// mm0 = 4 words into esi[-2..5]
movq mm1, [esi+3] ;// mm1 = 4 words into esi[3..10]
movq mm3, [esi+4] ;// mm3 = 4 words into esi[4..11]
pand mm5, mm7 ;// keep only LSB of mm5
pand mm0, mm7 ;// keep only LSB of mm0
pand mm1, mm7 ;// keep only LSB of mm1
pand mm3, mm7 ;// keep only LSB of mm3
paddw mm0, mm1 ;// mm0 += mm1
paddw mm5, mm3 ;// mm5 += mm3
movq mm1, mm0
movq mm3, mm5
pmullw mm0, PACKED_MINUS37 ;// mm4 *= -37
pmullw mm5, PACKED_MINUS21 ;// mm5 *= -21
pmulhw mm1, PACKED_MINUS37 ;// mm1 *= -37
pmulhw mm3, PACKED_MINUS21 ;// mm3 *= -21
movq mm2, mm0
movq mm7, mm5
punpcklwd mm0, mm1 ;// unpack the two low words into two dwords
punpckhwd mm2, mm1 ;// unpack the two upper words into two dwords
punpcklwd mm5, mm3 ;// unpack the two low words into two dwords
punpckhwd mm7, mm3 ;// unpack the two upper words into two dwords
paddd mm0, mm5
paddd mm2, mm7
paddd mm0, mm4 ;// mm0 += mm4 + mm5
paddd mm2, mm6 ;// mm2 += mm6 + mm7
movq mm7, PACKED_255 ;// mm7 = (255, 255, 255, 255)
movq mm5, [esi-5] ;// mm5 = 4 words into esi[-5..2]
movq mm4, [esi-4] ;// mm4 = 4 words into esi[-4..3]
movq mm1, [esi+5] ;// mm1 = 4 words into esi[5..12]
movq mm3, [esi+6] ;// mm3 = 4 words into esi[6..13]
pand mm5, mm7 ;// keep only LSB of mm5
pand mm4, mm7 ;// keep only LSB of mm4
pand mm1, mm7 ;// keep only LSB of mm1
pand mm3, mm7 ;// keep only LSB of mm3
paddw mm4, mm1 ;// mm4 += mm1
paddw mm5, mm3 ;// mm5 += mm3
movq mm1, mm4
movq mm3, mm5
pmullw mm4, PACKED_11 ;// mm4 *= 11
pmullw mm5, PACKED_5 ;// mm5 *= 5
pmulhw mm1, PACKED_11 ;// mm1 *= 11
pmulhw mm3, PACKED_5 ;// mm3 *= 5
movq mm6, mm4
movq mm7, mm5
punpcklwd mm4, mm1 ;// unpack the two low words into two dwords
punpckhwd mm6, mm1 ;// unpack the two upper words into two dwords
punpcklwd mm5, mm3 ;// unpack the two low words into two dwords
punpckhwd mm7, mm3 ;// unpack the two upper words into two dwords
paddd mm4, mm5
paddd mm6, mm7
paddd mm0, mm4
paddd mm2, mm6
paddd mm0, PACKED_256 ;// mm0 += mm4 + mm5 + (256, 256, 256, 256)
paddd mm2, PACKED_256 ;// mm2 += mm6 + mm7 + (256, 256, 256, 256)
psrld mm0, 9 ;// mm0 >>= 9
psrld mm2, 9 ;// mm0 >>= 9
packssdw mm0, mm2 ;// pack mm0 and mm2
packuswb mm0, mm0 ;// pack words into bytes
movd [edi], mm0 ;// store the lower 4 bytes of mm0 into edi[0..3]
add esi, 8 ;// esi += 8
add edi, 4 ;// edi += 4
dec ecx ;// decrement ecx
jnz conv444to422__l1 ;// loop while not zero
mov src, esi ;// update src
mov dst, edi ;// update dst
emms ;// empty MMX state
}
// epilogue
*dst++ = clp[(int)(228*(src[0]+src[1])
+70*(src[-1]+src[2])
-37*(src[-2]+src[3])
-21*(src[-3]+src[4])
+11*(src[-4]+src[5])
+ 5*(src[-5]+src[6])+256)>>9];
*dst++ = v = clp[(int)(228*(src[2]+src[3])
+70*(src[1]+src[4])
-37*(src[0]+src[5])
-21*(src[-1]+src[6])
+11*(src[-2]+src[7])
+ 5*(src[-3]+src[8])+256)>>9];
*dst++ = clp[(int)(228*(src[4]+src[5])
+70*(src[3]+src[6])
-37*(src[2]+src[7])
-21*(src[1]+src[7])
+11*(src[0]+src[7])
+ 5*(src[-1]+src[7])+256)>>9];
*dst++ = clp[(int)(228*(src[6]+src[7])
+70*(src[5]+src[7])
-37*(src[4]+src[7])
-21*(src[3]+src[7])
+11*(src[2]+src[7])
+ 5*(src[1]+src[7])+256)>>9];
src += 8;
} while(--h);
}
else
{
for (j=0; j<height; j++)
{
for (i=0; i<width; i+=2)
{
im5 = (i<5) ? 0 : i-5;
im4 = (i<4) ? 0 : i-4;
im3 = (i<3) ? 0 : i-3;
im2 = (i<2) ? 0 : i-2;
im1 = (i<1) ? 0 : i-1;
ip1 = (i<width-1) ? i+1 : width-1;
ip2 = (i<width-2) ? i+2 : width-1;
ip3 = (i<width-3) ? i+3 : width-1;
ip4 = (i<width-4) ? i+4 : width-1;
ip5 = (i<width-5) ? i+5 : width-1;
ip6 = (i<width-5) ? i+6 : width-1;
dst[i>>1] = clp[(int)(228*(src[i]+src[ip1])
+70*(src[im1]+src[ip2])
-37*(src[im2]+src[ip3])
-21*(src[im3]+src[ip4])
+11*(src[im4]+src[ip5])
+ 5*(src[im5]+src[ip6])+256)>>9];
}
src+= width;
dst+= width>>1;
}
}
}
static void conv422to420(src,dst)
unsigned char *src, *dst;
{
int w, i, j, k, jm5, jm4, jm3, jm2, jm1;
int jp1, jp2, jp3, jp4, jp5, jp6;
int height_div2 = height >> 1;
unsigned char *s, *d;
w = width>>1;
if(cpu_MMX && width > 8 && (width & 3) == 0 && height > 16 && (height & 1) == 0)
{
i = w >> 2;
do
{
// prologue
s = src;
d = dst;
for(k = 0; k < 4; k++)
{
d[k] = clp[(int)(228*(s[k]+s[k+w])
+70*(s[k]+s[k+w*2])
-37*(s[k]+s[k+w*3])
-21*(s[k]+s[k+w*4])
+11*(s[k]+s[k+w*5])
+ 5*(s[k]+s[k+w*6])+256)>>9];
d[k+w] = clp[(int)(228*(s[k+w*2]+s[k+w*3])
+70*(s[k+w]+s[k+w*4])
-37*(s[k]+s[k+w*5])
-21*(s[k]+s[k+w*6])
+11*(s[k]+s[k+w*7])
+ 5*(s[k]+s[k+w*8])+256)>>9];
d[k+2*w] = clp[(int)(228*(s[k+w*4]+s[k+w*5])
+70*(s[k+w*3]+s[k+w*6])
-37*(s[k+w*2]+s[k+w*7])
-21*(s[k+w]+s[k+w*8])
+11*(s[k]+s[k+w*9])
+ 5*(s[k]+s[k+w*10])+256)>>9];
d[k+3*w] = clp[(int)(228*(s[k+w*6]+s[k+w*7])
+70*(s[k+w*5]+s[k+w*8])
-37*(s[k+w*4]+s[k+w*9])
-21*(s[k+w*3]+s[k+w*10])
+11*(s[k+w*2]+s[k+w*11])
+ 5*(s[k+w]+s[k+w*12])+256)>>9];
}
d += 4*w;
s += 8*w;
j = height_div2 - 8;
_asm
{
mov esi, s ;// esi = s
mov edi, d ;// edi = d
conv422to420__l1:
mov eax, w ;// eax = w
mov ebx, eax
neg ebx ;// ebx = -w
pxor mm7, mm7 ;// mm7 = 0
movd mm2, [esi+ebx] ;// lower 4 bytes into mm2 = (esi + ebx)[0..3]
movd mm0, [esi] ;// lower 4 bytes into mm0 = esi[0..3]
movd mm1, [esi+eax] ;// lower 4 bytes into mm1 = (esi + eax)[0..3]
movd mm3, [esi+2*eax] ;// lower 4 bytes into mm3 = (esi + 2 * eax)[0..3]
punpcklbw mm0, mm7 ;// unpack the lower 4 bytes into mm0
punpcklbw mm1, mm7 ;// unpack the lower 4 bytes into mm1
punpcklbw mm2, mm7 ;// unpack the lower 4 bytes into mm2
punpcklbw mm3, mm7 ;// unpack the lower 4 bytes into mm3
paddw mm0, mm1 ;// mm0 += mm1
paddw mm2, mm3 ;// mm2 += mm3
movq mm1, mm0
movq mm3, mm2
pmullw mm0, PACKED_228 ;// mm0 = LSW(mm0 * 228)
pmulhw mm1, PACKED_228 ;// mm1 = MSW(mm0 * 228)
pmullw mm2, PACKED_70 ;// mm2 = LSW(mm2 * 70)
pmulhw mm3, PACKED_70 ;// mm3 = MSW(mm2 * 70)
movq mm4, mm0
movq mm5, mm2
punpcklwd mm0, mm1 ;// unpack the lower 2 words into mm0 and mm1
punpckhwd mm4, mm1 ;// unpack the upper 2 words into mm4 and mm1
punpcklwd mm2, mm3 ;// unpack the lower 2 words into mm2 and mm3
punpckhwd mm5, mm3 ;// unpack the upper 2 words into mm5 and mm3
paddd mm0, mm2 ;// mm0 += mm2
paddd mm4, mm5 ;// mm4 += mm5
lea ecx, [2*ebx+ebx] ;// ecx = -3*w
lea edx, [2*eax+eax] ;// edx = 3*w
movd mm2, [esi+ecx] ;// lower 4 bytes into mm2 = (esi + ecx)[0..3]
movd mm6, [esi+2*ebx] ;// lower 4 bytes into mm6 = (esi + 2 * ebx)[0..3]
movd mm1, [esi+edx] ;// lower 4 bytes into mm1 = (esi + edx)[0..3]
movd mm3, [esi+4*eax] ;// lower 4 bytes into mm3 = (esi + 4 * eax)[0..3]
punpcklbw mm6, mm7 ;// unpack the lower 4 bytes into mm6
punpcklbw mm1, mm7 ;// unpack the lower 4 bytes into mm1
punpcklbw mm2, mm7 ;// unpack the lower 4 bytes into mm2
punpcklbw mm3, mm7 ;// unpack the lower 4 bytes into mm3
paddw mm6, mm1 ;// mm6 += mm1
paddw mm2, mm3 ;// mm2 += mm3
movq mm1, mm6
movq mm3, mm2
pmullw mm6, PACKED_MINUS37 ;// mm6 = LSW(-37 * mm6)
pmulhw mm1, PACKED_MINUS37 ;// mm1 = MSW(-37 * mm6)
pmullw mm2, PACKED_MINUS21 ;// mm2 = LSW(-21 * mm2)
pmulhw mm3, PACKED_MINUS21 ;// mm3 = MSW(-21 * mm2)
movq mm7, mm6
movq mm5, mm2
punpcklwd mm6, mm1 ;// unpack the lower 2 words into mm6 and mm1
punpckhwd mm7, mm1 ;// unpack the upper 2 words into mm7 and mm1
punpcklwd mm2, mm3 ;// unpack the lower 2 words into mm2 and mm3
punpckhwd mm5, mm3 ;// unpack the upper 2 words into mm5 and mm3
paddd mm6, mm2
paddd mm7, mm5
paddd mm0, mm6 ;// mm0 += mm2 + mm6
paddd mm4, mm7 ;// mm4 += mm5 + mm7
lea ecx, [4*ebx+ebx] ;// ecx = -5*w
lea edx, [4*eax+eax] ;// edx = 5*w
add eax, edx ;// eax = 6*w
pxor mm7, mm7 ;// mm7 = 0
movd mm2, [esi+ecx] ;// lower 4 bytes into mm2 = (esi + ecx)[0..3]
movd mm6, [esi+4*ebx] ;// lower 4 bytes into mm6 = (esi + 4 * ebx)[0..3]
movd mm1, [esi+edx] ;// lower 4 bytes into mm1 = (esi + edx)[0..3]
movd mm3, [esi+eax] ;// lower 4 bytes into mm3 = (esi + eax)[0..3]
punpcklbw mm6, mm7 ;// unpack the lower 4 bytes into mm6
punpcklbw mm1, mm7 ;// unpack the lower 4 bytes into mm1
punpcklbw mm2, mm7 ;// unpack the lower 4 bytes into mm2
punpcklbw mm3, mm7 ;// unpack the lower 4 bytes into mm3
paddw mm6, mm1 ;// mm6 += mm1
paddw mm2, mm3 ;// mm2 += mm3
movq mm1, mm6
movq mm3, mm2
pmullw mm6, PACKED_11 ;// mm6 = LSW(mm6 * 11)
pmulhw mm1, PACKED_11 ;// mm1 = MSW(mm6 * 11)
pmullw mm2, PACKED_5 ;// mm2 = LSW(mm2 * 5)
pmulhw mm3, PACKED_5 ;// mm3 = MSW(mm2 * 5)
movq mm7, mm6
movq mm5, mm2
punpcklwd mm6, mm1 ;// unpack the lower 2 words into mm6 and mm1
punpckhwd mm7, mm1 ;// unpack the upper 2 words into mm7 and mm1
punpcklwd mm2, mm3 ;// unpack the lower 2 words into mm2 and mm3
punpckhwd mm5, mm3 ;// unpack the upper 2 words into mm5 and mm3
paddd mm6, mm2
paddd mm7, mm5
paddd mm0, mm6
paddd mm4, mm7
paddd mm0, PACKED_256 ;// mm0 += mm2 + mm6 + (256, 256, 256, 256)
paddd mm4, PACKED_256 ;// mm4 += mm5 + mm7 + (256, 256, 256, 256)
psrld mm0, 9 ;// mm0 >>= 9
psrld mm4, 9 ;// mm4 >>= 9
packssdw mm0, mm4 ;// pack mm0 and mm4
packuswb mm0, mm0 ;// pack mm0 to get result into the lower 4 bytes
movd [edi], mm0 ;// store result into edi[0..3]
add esi, width ;// esi += width
add edi, w ;// edi += w
dec dword ptr j ;// decrement j
jnz conv422to420__l1 ;// loop while not zero
mov s, esi ;// update s
mov d, edi ;// update d
emms ;// empty MMX state
}
// epilogue
for(k = 0; k < 4; k++)
{
d[k] = clp[(int)(228*(s[k]+s[k+w])
+70*(s[k-w]+s[k+w*2])
-37*(s[k-w*2]+s[k+w*3])
-21*(s[k-w*3]+s[k+w*4])
+11*(s[k-w*4]+s[k+w*5])
+ 5*(s[k-w*5]+s[k+w*6])+256)>>9];
d[k+w] = clp[(int)(228*(s[k+w*2]+s[k+w*3])
+70*(s[k+w]+s[k+w*4])
-37*(s[k]+s[k+w*5])
-21*(s[k-w]+s[k+w*6])
+11*(s[k-w*2]+s[k+w*7])
+ 5*(s[k-w*3]+s[k+w*8])+256)>>9];
d[k+2*w] = clp[(int)(228*(s[k+w*4]+s[k+w*5])
+70*(s[k+w*3]+s[k+w*6])
-37*(s[k+w*2]+s[k+w*7])
-21*(s[k+w]+s[k+w*7])
+11*(s[k]+s[k+w*7])
+ 5*(s[k-w]+s[k+w*7])+256)>>9];
d[k+3*w] = clp[(int)(228*(s[k+w*6]+s[k+w*7])
+70*(s[k+w*5]+s[k+w*7])
-37*(s[k+w*4]+s[k+w*7])
-21*(s[k+w*3]+s[k+w*7])
+11*(s[k+w*2]+s[k+w*7])
+ 5*(s[k+w]+s[k+w*7])+256)>>9];
}
src+=4;
dst+=4;
} while(--i);
}
else
{
for (i=0; i<w; i++)
{
for (j=0; j<height; j+=2)
{
jm5 = (j<5) ? 0 : j-5;
jm4 = (j<4) ? 0 : j-4;
jm3 = (j<3) ? 0 : j-3;
jm2 = (j<2) ? 0 : j-2;
jm1 = (j<1) ? 0 : j-1;
jp1 = (j<height-1) ? j+1 : height-1;
jp2 = (j<height-2) ? j+2 : height-1;
jp3 = (j<height-3) ? j+3 : height-1;
jp4 = (j<height-4) ? j+4 : height-1;
jp5 = (j<height-5) ? j+5 : height-1;
jp6 = (j<height-5) ? j+6 : height-1;
// FIR filter with 0.5 sample interval phase shift
v = clp[(int)(228*(src[w*j]+src[w*jp1])
+70*(src[w*jm1]+src[w*jp2])
-37*(src[w*jm2]+src[w*jp3])
-21*(src[w*jm3]+src[w*jp4])
+11*(src[w*jm4]+src[w*jp5])
+ 5*(src[w*jm5]+src[w*jp6])+256)>>9];
if(dst[w*(j>>1)] != v)
{
v = v;
}
dst[w*(j>>1)] = v;
}
src++;
dst++;
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -