📄 postprocess_template.c
字号:
src[l4]-= d; src[l5]+= d; } src++; }src-=8; for(x=0; x<8; x++) { int y; for(y=4; y<6; y++) { int d= src[x+y*stride] - tmp[x+(y-4)*8]; int ad= ABS(d); static int max=0; static int sum=0; static int num=0; static int bias=0; if(max<ad) max=ad; sum+= ad>3 ? 1 : 0; if(ad>3) { src[0] = src[7] = src[stride*7] = src[(stride+1)*7]=255; } if(y==4) bias+=d; num++; if(num%1000000 == 0) { printf(" %d %d %d %d\n", num, sum, max, bias); } } }}*/#elif defined (HAVE_MMX) src+= stride*4; asm volatile( "pxor %%mm7, %%mm7 \n\t" "leal -40(%%esp), %%ecx \n\t" // make space for 4 8-byte vars "andl $0xFFFFFFF8, %%ecx \n\t" // align// 0 1 2 3 4 5 6 7// %0 %0+%1 %0+2%1 eax+2%1 %0+4%1 eax+4%1 edx+%1 edx+2%1// %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 "movq (%0), %%mm0 \n\t" "movq %%mm0, %%mm1 \n\t" "punpcklbw %%mm7, %%mm0 \n\t" // low part of line 0 "punpckhbw %%mm7, %%mm1 \n\t" // high part of line 0 "movq (%0, %1), %%mm2 \n\t" "leal (%0, %1, 2), %%eax \n\t" "movq %%mm2, %%mm3 \n\t" "punpcklbw %%mm7, %%mm2 \n\t" // low part of line 1 "punpckhbw %%mm7, %%mm3 \n\t" // high part of line 1 "movq (%%eax), %%mm4 \n\t" "movq %%mm4, %%mm5 \n\t" "punpcklbw %%mm7, %%mm4 \n\t" // low part of line 2 "punpckhbw %%mm7, %%mm5 \n\t" // high part of line 2 "paddw %%mm0, %%mm0 \n\t" // 2L0 "paddw %%mm1, %%mm1 \n\t" // 2H0 "psubw %%mm4, %%mm2 \n\t" // L1 - L2 "psubw %%mm5, %%mm3 \n\t" // H1 - H2 "psubw %%mm2, %%mm0 \n\t" // 2L0 - L1 + L2 "psubw %%mm3, %%mm1 \n\t" // 2H0 - H1 + H2 "psllw $2, %%mm2 \n\t" // 4L1 - 4L2 "psllw $2, %%mm3 \n\t" // 4H1 - 4H2 "psubw %%mm2, %%mm0 \n\t" // 2L0 - 5L1 + 5L2 "psubw %%mm3, %%mm1 \n\t" // 2H0 - 5H1 + 5H2 "movq (%%eax, %1), %%mm2 \n\t" "movq %%mm2, %%mm3 \n\t" "punpcklbw %%mm7, %%mm2 \n\t" // L3 "punpckhbw %%mm7, %%mm3 \n\t" // H3 "psubw %%mm2, %%mm0 \n\t" // 2L0 - 5L1 + 5L2 - L3 "psubw %%mm3, %%mm1 \n\t" // 2H0 - 5H1 + 5H2 - H3 "psubw %%mm2, %%mm0 \n\t" // 2L0 - 5L1 + 5L2 - 2L3 "psubw %%mm3, %%mm1 \n\t" // 2H0 - 5H1 + 5H2 - 2H3 "movq %%mm0, (%%ecx) \n\t" // 2L0 - 5L1 + 5L2 - 2L3 "movq %%mm1, 8(%%ecx) \n\t" // 2H0 - 5H1 + 5H2 - 2H3 "movq (%%eax, %1, 2), %%mm0 \n\t" "movq %%mm0, %%mm1 \n\t" "punpcklbw %%mm7, %%mm0 \n\t" // L4 "punpckhbw %%mm7, %%mm1 \n\t" // H4 "psubw %%mm0, %%mm2 \n\t" // L3 - L4 "psubw %%mm1, %%mm3 \n\t" // H3 - H4 "movq %%mm2, 16(%%ecx) \n\t" // L3 - L4 "movq %%mm3, 24(%%ecx) \n\t" // H3 - H4 "paddw %%mm4, %%mm4 \n\t" // 2L2 "paddw %%mm5, %%mm5 \n\t" // 2H2 "psubw %%mm2, %%mm4 \n\t" // 2L2 - L3 + L4 "psubw %%mm3, %%mm5 \n\t" // 2H2 - H3 + H4 "leal (%%eax, %1), %0 \n\t" "psllw $2, %%mm2 \n\t" // 4L3 - 4L4 "psllw $2, %%mm3 \n\t" // 4H3 - 4H4 "psubw %%mm2, %%mm4 \n\t" // 2L2 - 5L3 + 5L4 "psubw %%mm3, %%mm5 \n\t" // 2H2 - 5H3 + 5H4//50 opcodes so far "movq (%0, %1, 2), %%mm2 \n\t" "movq %%mm2, %%mm3 \n\t" "punpcklbw %%mm7, %%mm2 \n\t" // L5 "punpckhbw %%mm7, %%mm3 \n\t" // H5 "psubw %%mm2, %%mm4 \n\t" // 2L2 - 5L3 + 5L4 - L5 "psubw %%mm3, %%mm5 \n\t" // 2H2 - 5H3 + 5H4 - H5 "psubw %%mm2, %%mm4 \n\t" // 2L2 - 5L3 + 5L4 - 2L5 "psubw %%mm3, %%mm5 \n\t" // 2H2 - 5H3 + 5H4 - 2H5 "movq (%%eax, %1, 4), %%mm6 \n\t" "punpcklbw %%mm7, %%mm6 \n\t" // L6 "psubw %%mm6, %%mm2 \n\t" // L5 - L6 "movq (%%eax, %1, 4), %%mm6 \n\t" "punpckhbw %%mm7, %%mm6 \n\t" // H6 "psubw %%mm6, %%mm3 \n\t" // H5 - H6 "paddw %%mm0, %%mm0 \n\t" // 2L4 "paddw %%mm1, %%mm1 \n\t" // 2H4 "psubw %%mm2, %%mm0 \n\t" // 2L4 - L5 + L6 "psubw %%mm3, %%mm1 \n\t" // 2H4 - H5 + H6 "psllw $2, %%mm2 \n\t" // 4L5 - 4L6 "psllw $2, %%mm3 \n\t" // 4H5 - 4H6 "psubw %%mm2, %%mm0 \n\t" // 2L4 - 5L5 + 5L6 "psubw %%mm3, %%mm1 \n\t" // 2H4 - 5H5 + 5H6 "movq (%0, %1, 4), %%mm2 \n\t" "movq %%mm2, %%mm3 \n\t" "punpcklbw %%mm7, %%mm2 \n\t" // L7 "punpckhbw %%mm7, %%mm3 \n\t" // H7 "paddw %%mm2, %%mm2 \n\t" // 2L7 "paddw %%mm3, %%mm3 \n\t" // 2H7 "psubw %%mm2, %%mm0 \n\t" // 2L4 - 5L5 + 5L6 - 2L7 "psubw %%mm3, %%mm1 \n\t" // 2H4 - 5H5 + 5H6 - 2H7 "movq (%%ecx), %%mm2 \n\t" // 2L0 - 5L1 + 5L2 - 2L3 "movq 8(%%ecx), %%mm3 \n\t" // 2H0 - 5H1 + 5H2 - 2H3#ifdef HAVE_MMX2 "movq %%mm7, %%mm6 \n\t" // 0 "psubw %%mm0, %%mm6 \n\t" "pmaxsw %%mm6, %%mm0 \n\t" // |2L4 - 5L5 + 5L6 - 2L7| "movq %%mm7, %%mm6 \n\t" // 0 "psubw %%mm1, %%mm6 \n\t" "pmaxsw %%mm6, %%mm1 \n\t" // |2H4 - 5H5 + 5H6 - 2H7| "movq %%mm7, %%mm6 \n\t" // 0 "psubw %%mm2, %%mm6 \n\t" "pmaxsw %%mm6, %%mm2 \n\t" // |2L0 - 5L1 + 5L2 - 2L3| "movq %%mm7, %%mm6 \n\t" // 0 "psubw %%mm3, %%mm6 \n\t" "pmaxsw %%mm6, %%mm3 \n\t" // |2H0 - 5H1 + 5H2 - 2H3|#else "movq %%mm7, %%mm6 \n\t" // 0 "pcmpgtw %%mm0, %%mm6 \n\t" "pxor %%mm6, %%mm0 \n\t" "psubw %%mm6, %%mm0 \n\t" // |2L4 - 5L5 + 5L6 - 2L7| "movq %%mm7, %%mm6 \n\t" // 0 "pcmpgtw %%mm1, %%mm6 \n\t" "pxor %%mm6, %%mm1 \n\t" "psubw %%mm6, %%mm1 \n\t" // |2H4 - 5H5 + 5H6 - 2H7| "movq %%mm7, %%mm6 \n\t" // 0 "pcmpgtw %%mm2, %%mm6 \n\t" "pxor %%mm6, %%mm2 \n\t" "psubw %%mm6, %%mm2 \n\t" // |2L0 - 5L1 + 5L2 - 2L3| "movq %%mm7, %%mm6 \n\t" // 0 "pcmpgtw %%mm3, %%mm6 \n\t" "pxor %%mm6, %%mm3 \n\t" "psubw %%mm6, %%mm3 \n\t" // |2H0 - 5H1 + 5H2 - 2H3|#endif#ifdef HAVE_MMX2 "pminsw %%mm2, %%mm0 \n\t" "pminsw %%mm3, %%mm1 \n\t"#else "movq %%mm0, %%mm6 \n\t" "psubusw %%mm2, %%mm6 \n\t" "psubw %%mm6, %%mm0 \n\t" "movq %%mm1, %%mm6 \n\t" "psubusw %%mm3, %%mm6 \n\t" "psubw %%mm6, %%mm1 \n\t"#endif "movq %%mm7, %%mm6 \n\t" // 0 "pcmpgtw %%mm4, %%mm6 \n\t" // sign(2L2 - 5L3 + 5L4 - 2L5) "pxor %%mm6, %%mm4 \n\t" "psubw %%mm6, %%mm4 \n\t" // |2L2 - 5L3 + 5L4 - 2L5| "pcmpgtw %%mm5, %%mm7 \n\t" // sign(2H2 - 5H3 + 5H4 - 2H5) "pxor %%mm7, %%mm5 \n\t" "psubw %%mm7, %%mm5 \n\t" // |2H2 - 5H3 + 5H4 - 2H5|// 100 opcodes "movd %2, %%mm2 \n\t" // QP "psllw $3, %%mm2 \n\t" // 8QP "movq %%mm2, %%mm3 \n\t" // 8QP "pcmpgtw %%mm4, %%mm2 \n\t" "pcmpgtw %%mm5, %%mm3 \n\t" "pand %%mm2, %%mm4 \n\t" "pand %%mm3, %%mm5 \n\t" "psubusw %%mm0, %%mm4 \n\t" // hd "psubusw %%mm1, %%mm5 \n\t" // ld "movq "MANGLE(w05)", %%mm2 \n\t" // 5 "pmullw %%mm2, %%mm4 \n\t" "pmullw %%mm2, %%mm5 \n\t" "movq "MANGLE(w20)", %%mm2 \n\t" // 32 "paddw %%mm2, %%mm4 \n\t" "paddw %%mm2, %%mm5 \n\t" "psrlw $6, %%mm4 \n\t" "psrlw $6, %%mm5 \n\t" "movq 16(%%ecx), %%mm0 \n\t" // L3 - L4 "movq 24(%%ecx), %%mm1 \n\t" // H3 - H4 "pxor %%mm2, %%mm2 \n\t" "pxor %%mm3, %%mm3 \n\t" "pcmpgtw %%mm0, %%mm2 \n\t" // sign (L3-L4) "pcmpgtw %%mm1, %%mm3 \n\t" // sign (H3-H4) "pxor %%mm2, %%mm0 \n\t" "pxor %%mm3, %%mm1 \n\t" "psubw %%mm2, %%mm0 \n\t" // |L3-L4| "psubw %%mm3, %%mm1 \n\t" // |H3-H4| "psrlw $1, %%mm0 \n\t" // |L3 - L4|/2 "psrlw $1, %%mm1 \n\t" // |H3 - H4|/2 "pxor %%mm6, %%mm2 \n\t" "pxor %%mm7, %%mm3 \n\t" "pand %%mm2, %%mm4 \n\t" "pand %%mm3, %%mm5 \n\t"#ifdef HAVE_MMX2 "pminsw %%mm0, %%mm4 \n\t" "pminsw %%mm1, %%mm5 \n\t"#else "movq %%mm4, %%mm2 \n\t" "psubusw %%mm0, %%mm2 \n\t" "psubw %%mm2, %%mm4 \n\t" "movq %%mm5, %%mm2 \n\t" "psubusw %%mm1, %%mm2 \n\t" "psubw %%mm2, %%mm5 \n\t"#endif "pxor %%mm6, %%mm4 \n\t" "pxor %%mm7, %%mm5 \n\t" "psubw %%mm6, %%mm4 \n\t" "psubw %%mm7, %%mm5 \n\t" "packsswb %%mm5, %%mm4 \n\t" "movq (%0), %%mm0 \n\t" "paddb %%mm4, %%mm0 \n\t" "movq %%mm0, (%0) \n\t" "movq (%0, %1), %%mm0 \n\t" "psubb %%mm4, %%mm0 \n\t" "movq %%mm0, (%0, %1) \n\t" : "+r" (src) : "r" (stride), "m" (c->pQPb) : "%eax", "%ecx" );#else const int l1= stride; const int l2= stride + l1; const int l3= stride + l2; const int l4= stride + l3; const int l5= stride + l4; const int l6= stride + l5; const int l7= stride + l6; const int l8= stride + l7;// const int l9= stride + l8; int x; src+= stride*3; for(x=0; x<BLOCK_SIZE; x++) { const int middleEnergy= 5*(src[l5] - src[l4]) + 2*(src[l3] - src[l6]); if(ABS(middleEnergy) < 8*c->QP) { const int q=(src[l4] - src[l5])/2; const int leftEnergy= 5*(src[l3] - src[l2]) + 2*(src[l1] - src[l4]); const int rightEnergy= 5*(src[l7] - src[l6]) + 2*(src[l5] - src[l8]); int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) ); d= MAX(d, 0); d= (5*d + 32) >> 6; d*= SIGN(-middleEnergy); if(q>0) { d= d<0 ? 0 : d; d= d>q ? q : d; } else { d= d>0 ? 0 : d; d= d<q ? q : d; } src[l4]-= d; src[l5]+= d; } src++; }#endif}static inline void RENAME(dering)(uint8_t src[], int stride, PPContext *c){#if defined (HAVE_MMX2) || defined (HAVE_3DNOW) asm volatile( "pxor %%mm6, %%mm6 \n\t" "pcmpeqb %%mm7, %%mm7 \n\t" "movq %2, %%mm0 \n\t" "punpcklbw %%mm6, %%mm0 \n\t" "psrlw $1, %%mm0 \n\t" "psubw %%mm7, %%mm0 \n\t" "packuswb %%mm0, %%mm0 \n\t" "movq %%mm0, %3 \n\t" "leal (%0, %1), %%eax \n\t" "leal (%%eax, %1, 4), %%edx \n\t" // 0 1 2 3 4 5 6 7 8 9// %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1#undef FIND_MIN_MAX#ifdef HAVE_MMX2#define FIND_MIN_MAX(addr)\ "movq " #addr ", %%mm0 \n\t"\ "pminub %%mm0, %%mm7 \n\t"\ "pmaxub %%mm0, %%mm6 \n\t"#else#define FIND_MIN_MAX(addr)\ "movq " #addr ", %%mm0 \n\t"\ "movq %%mm7, %%mm1 \n\t"\ "psubusb %%mm0, %%mm6 \n\t"\ "paddb %%mm0, %%mm6 \n\t"\ "psubusb %%mm0, %%mm1 \n\t"\ "psubb %%mm1, %%mm7 \n\t"#endifFIND_MIN_MAX((%%eax))FIND_MIN_MAX((%%eax, %1))FIND_MIN_MAX((%%eax, %1, 2))FIND_MIN_MAX((%0, %1, 4))FIND_MIN_MAX((%%edx))FIND_MIN_MAX((%%edx, %1))FIND_MIN_MAX((%%edx, %1, 2))FIND_MIN_MAX((%0, %1, 8)) "movq %%mm7, %%mm4 \n\t" "psrlq $8, %%mm7 \n\t"#ifdef HAVE_MMX2 "pminub %%mm4, %%mm7 \n\t" // min of pixels "pshufw $0xF9, %%mm7, %%mm4 \n\t" "pminub %%mm4, %%mm7 \n\t" // min of pixels "pshufw $0xFE, %%mm7, %%mm4 \n\t" "pminub %%mm4, %%mm7 \n\t"#else "movq %%mm7, %%mm1 \n\t" "psubusb %%mm4, %%mm1 \n\t" "psubb %%mm1, %%mm7 \n\t" "movq %%mm7, %%mm4 \n\t" "psrlq $16, %%mm7 \n\t" "movq %%mm7, %%mm1 \n\t" "psubusb %%mm4, %%mm1 \n\t" "psubb %%mm1, %%mm7 \n\t" "movq %%mm7, %%mm4 \n\t" "psrlq $32, %%mm7 \n\t" "movq %%mm7, %%mm1 \n\t" "psubusb %%mm4, %%mm1 \n\t" "psubb %%mm1, %%mm7 \n\t"#endif "movq %%mm6, %%mm4 \n\t" "psrlq $8, %%mm6 \n\t"#ifdef HAVE_MMX2 "pmaxub %%mm4, %%mm6 \n\t" // max of pixels "pshufw $0xF9, %%mm6, %%mm4 \n\t" "pmaxub %%mm4, %%mm6 \n\t" "pshufw $0xFE, %%mm6, %%mm4 \n\t" "pmaxub %%mm4, %%mm6 \n\t"#else "psubusb %%mm4, %%mm6 \n\t" "paddb %%mm4, %%mm6 \n\t" "movq %%mm6, %%mm4 \n\t" "psrlq $16, %%mm6 \n\t" "psubusb %%mm4, %%mm6 \n\t" "paddb %%mm4, %%mm6 \n\t" "movq %%mm6, %%mm4 \n\t" "psrlq $32, %%mm6 \n\t" "psubusb %%mm4, %%mm6 \n\t" "paddb %%mm4, %%mm6 \n\t"#endif "movq %%mm6, %%mm0 \n\t" // max "psubb %%mm7, %%mm6 \n\t" // max - min "movd %%mm6, %%ecx \n\t" "cmpb "MANGLE(deringThreshold)", %%cl \n\t" " jb 1f \n\t" "leal -24(%%esp), %%ecx \n\t" "andl $0xFFFFFFF8, %%ecx \n\t" PAVGB(%%mm0, %%mm7) // a=(max + min)/2 "punpcklbw %%mm7, %%mm7 \n\t" "punpcklbw %%mm7, %%mm7 \n\t" "punpcklbw %%mm7, %%mm7 \n\t" "movq %%mm7, (%%ecx) \n\t" "movq (%0), %%mm0 \n\t" // L10 "movq %%mm0, %%mm1 \n\t" // L10 "movq %%mm0, %%mm2 \n\t" // L10 "psllq $8, %%mm1 \n\t" "psrlq $8, %%mm2 \n\t" "movd -4(%0), %%mm3 \n\t" "movd 8(%0), %%mm4 \n\t" "psrlq $24, %%mm3 \n\t" "psllq $56, %%mm4 \n\t"
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -