📄 deblockopt.cpp
字号:
punpcklbw (mm0, mm7 );/* lower -1 */
por (mm5, mm6 );/* abs(S_2 - S_1) */
movq ( edi+64, mm0 );/* Lower -1 */
movq (mm6, mm1 );/* mm6 = S_1 */
punpckhbw (mm1, mm7 );/* Higher -1 */
movq ( edi+72, mm1 );/* Higher -1 */
movq (mm0, eax );/* mm0 = Src0 */
paddusb ( mm4, mm5 );/* abs(S_5-S_4)+abs(S_4-S_3)+abs(S_3-S_2)+abs(S_2 - S_1) */
movq ( Variance11, mm4 );/* save the variance */
movq ( mm5, FLimitMmx );/* mm5 = FFFF FFFF */
psubb ( mm4, Eight128c );/* abs(..) - 128 */
pcmpgtb ( mm5, mm4 );/* abs(S_5-S_4)+abs(S_4-S_3)+abs(S_3-S_2)+abs(S_2 - S_1) < FLimit ? */
movq (mm1, mm0 );/* mm1 = S0 */
punpcklbw( mm0, mm7 );/* lower 0 */
movq ( mm4, mm1 );/* mm4 = S0 */
movq( edi+80, mm0 );/* write lower 0 */
psubusb (mm4, mm6 );/* S0 - S_1 */
psubusb ( mm6, mm1 );/* S_1 - S0 */
movq( mm0, eax + ecx );/* mm0 = SrcPitch */
movq ( mm3, QStepMmx );/* mm3 = QQQQQQQQQ */
por ( mm4, mm6 );/* abs(S0 - S_1) */
movq ( mm6, mm1 );/* mm6 = S0 */
pcmpgtb ( mm3, mm4 );/* abs(S0-S_1) < QStep */
punpckhbw( mm1, mm7 );/* higher 0 */
movq (mm4, mm0 );/* mm4 = S1 */
pand (mm5, mm3 );/* abs(S_5-S_4)+abs(S_4-S_3)+abs(S_3-S_2)+abs(S_2 - S_1) < FLimit &&
abs(S0-S_1) < QStep */
movq (edi+88, mm1 );/* write higher 0 */
movq ( mm1, mm0 );/* mm1 = S1 */
psubusb ( mm4, mm6 );/* S1 - S0 */
punpcklbw ( mm0, mm7 );/* lower 1 */
psubusb ( mm6, mm1 );/* S0 - S1 */
movq ( edi+96, mm0 );/* write lower 1 */
por ( mm4, mm6 );/* mm4 = abs(S1-S0) */
movq ( mm2, eax + ecx *2 );/* mm2 = Src2*Pitch */
movq ( mm6, mm1 );/* mm6 = S1 */
eax= eax + ecx *4;/* eax = Src + 4 * Pitch */
punpckhbw ( mm1, mm7 );/* higher 1 */
movq ( mm0, mm2 );/* mm0 = S2 */
movq ( edi+104, mm1 );/* wirte higher 1 */
movq ( mm3, mm0 );/* mm3 = S2 */
movq ( mm1, eax + edx );/* mm4 = Src3*pitch */
punpcklbw ( mm2, mm7 );/* lower 2 */
psubusb ( mm3, mm6 );/* S2 - S1 */
psubusb ( mm6, mm0 );/* S1 - S2 */
por ( mm3, mm6 );/* abs(S1-S2) */
movq( edi+112, mm2 );/* write lower 2 */
movq (mm6, mm0 );/* mm6 = S2 */
punpckhbw( mm0, mm7 );/* higher 2 */
paddusb ( mm4, mm3 );/* abs(S0-S1)+abs(S1-S2) */
movq ( mm2, mm1 );/* mm2 = S3 */
movq ( mm3, mm1 );/* mm3 = S3 */
movq ( edi+120, mm0 );/* write higher 2 */
punpcklbw( mm1, mm7 );/* Low 3 */
movq ( mm0, eax );/* mm0 = Src4*pitch */
psubusb ( mm3, mm6 );/* S3 - S2 */
psubusb ( mm6, mm2 );/* S2 - S3 */
por ( mm3, mm6 );/* abs(S2-S3) */
movq( edi+128, mm1 );/* low 3 */
movq ( mm6, mm2 );/* mm6 = S3 */
punpckhbw( mm2, mm7 );/* high 3 */
paddusb ( mm4, mm3 );/* abs(S0-S1)+abs(S1-S2)+abs(S2-S3) */
movq( mm1, mm0 );/* mm1 = S4 */
movq ( mm3, mm0 );/* mm3 = S4 */
movq( edi+136, mm2 );/* high 3 */
punpcklbw( mm0, mm7 );/* low 4 */
psubusb ( mm3, mm6 );/* S4 - S3 */
movq( edi+144, mm0 );/* low 4 */
psubusb ( mm6, mm1 );/* S3 - S4 */
por ( mm3, mm6 );/* abs(S3-S4) */
punpckhbw( mm1, mm7 );/* high 4 */
paddusb ( mm4, mm3 );/* abs((S0-S1)+abs(S1-S2)+abs(S2-S3)+abs(S3-S4) */
movq ( Variance21, mm4 );/* save the variance */
movq ( mm6, FLimitMmx );/* mm6 = FFFFFFFFF */
psubb ( mm4, Eight128c );/* abs(..) - 128 */
movq ( edi+152, mm1 );/* high 4 */
pcmpgtb ( mm6, mm4 );/* abs((S0-S1)+abs(S1-S2)+abs(S2-S3)+abs(S3-S4)<FLimit? */
pand (mm6, mm5 );/* Flag */
/* done with copying everything to intermediate buffer */
/* mm7 = 0, mm6 = Flag */
movq ( mm0, mm6);
movq ( mm7, mm6 );
punpckhbw ( mm0, mm6);
punpcklbw ( mm7, mm6);
/* mm0 and mm7 now are in use */
/* Let's do the filtering now */
movq ( mm3, edi );/* mm3 = -5 */
movq ( mm2, edi+144 );/* mm2 = 4 */
movq ( mm1, mm3 );/* x0 = -4 */
paddw ( mm3, mm3 );/* mm3 = x0 + x0 */
movq ( mm4, edi+16 );/* mm4 = x1 */
paddw ( mm3, mm1 );/* mm3 = x0+x0+x0*/
paddw ( mm3, edi+32 );/* mm3 = x0+x0+x0+x2 */
paddw ( mm4, edi+48 );/* mm4 = x1+x3 */
paddw ( mm3, edi+64 );/* mm3 = x4 */
paddw ( mm4, FourFours );/* mm4 = x1+x3+4 */
paddw ( mm3, mm4 );/* mm3 = x0+x0+x0+x1+x2+x3+x4+4 */
/* Des-4*Pitch = ((sum + x1) >> 3; */
movq ( mm4, mm3 );/* mm4 = mm3 */
movq ( mm5, edi+16 );/* mm5 = x1 */
paddw ( mm4, mm5 );/* mm4 = sum+x1 */
psraw ( mm4, 3 );/* mm4 >>=4 */
psubw ( mm4, mm5 );/* New Value - old Value */
pand ( mm4, mm7 );/* And the flag */
paddw ( mm4, mm5 );/* add the old value back */
movq ( esi, mm4 );/* Write new x1 */
/* sum += x5 -x0 */
/* Des-3*Pitch=(sum+x2)>>3 */
movq ( mm5, edi+32 );/* mm5= x2 */
psubw ( mm3, mm1 );/* sum=sum-x0 */
paddw ( mm3, edi+80 );/* sum=sum+x5 */
movq ( mm4, mm5 );/* copy sum */
paddw ( mm4, mm3 );/* mm4=sum+x2 */
psraw ( mm4, 3 );/* mm4=(sum+x2)>>3 */
psubw ( mm4, mm5 );/* new value - old value */
pand ( mm4, mm7 );/* And the flag */
paddw ( mm4, mm5 );/* add the old value back */
movq ( esi+16, mm4 );/* write new x2 */
/* sum += x6 - x0 */
/* Des-2*Pitch=(sum+x3)>>3 */
movq ( mm5, edi+48 );/* mm5= x3 */
psubw ( mm3, mm1 );/* sum=sum-x0 */
paddw ( mm3, edi+96 );/* sum=sum+x6 */
movq ( mm4, mm5 );/* copy x3 */
paddw ( mm4, mm3 );/* mm4=sum+x3 */
psraw ( mm4, 3 );/* mm4=(sum+x3)>>3 */
psubw ( mm4, mm5 );/* new value - old value */
pand ( mm4, mm7 );/* And the flag */
paddw ( mm4, mm5 );/* add the old value back */
movq ( esi+32, mm4 );/* write new x3 */
/* sum += x7 - x0 */
/* Des-Pitch=(sum+x4)>>3 */
movq ( mm5, edi+64 );/* mm5 = x4 */
psubw ( mm3, mm1 );/* sum = sum-x0 */
paddw ( mm3, edi+112 );/* sum = sum+x7 */
movq ( mm4, mm5 );/* mm4 = x4 */
paddw ( mm4, mm3 );/* mm4 = sum + x4 */
psraw ( mm4, 3 );/* >>=4 */
psubw ( mm4, mm5 );/* -=x4 */
pand ( mm4, mm7 );/* and flag */
paddw ( mm4, mm5 );/* += x4 */
movq ( esi+48, mm4 );/* write new x4 */
/* sum+= x8-x1 */
/* Des0=((sum+x5)>>3 */
movq ( mm5, edi+80 );/* mm5 = x5 */
psubw ( mm3, edi+16 );/* sum -= x1 */
paddw ( mm3, edi+128 );/* sub += x8 */
movq ( mm4, mm5 );/* mm4 = x5 */
paddw ( mm4, mm3 );/* mm4= sum+x5 */
psraw ( mm4, 3 );/* >>=4 */
psubw ( mm4, mm5 );/* -=x5 */
pand ( mm4, mm7 );/* and flag */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -