📄 postprocess_template.c
字号:
"movq "MANGLE(b00)", %%mm5 \n\t" // 0 "psubb %%mm2, %%mm1 \n\t" // 128 - renergy/16 "psubb %%mm3, %%mm5 \n\t" // 128 - lenergy/16 PMAXUB(%%mm1, %%mm2) // 128 + |renergy/16| PMAXUB(%%mm5, %%mm3) // 128 + |lenergy/16| PMINUB(%%mm2, %%mm3, %%mm1) // 128 + MIN(|lenergy|,|renergy|)/16// mm0=128-q, mm3=128 + MIN(|lenergy|,|renergy|)/16, mm4= menergy/16 + 128 "movq "MANGLE(b00)", %%mm7 \n\t" // 0 "movq %2, %%mm2 \n\t" // QP PAVGB(%%mm6, %%mm2) // 128 + QP/2 "psubb %%mm6, %%mm2 \n\t" "movq %%mm4, %%mm1 \n\t" "pcmpgtb %%mm7, %%mm1 \n\t" // SIGN(menergy) "pxor %%mm1, %%mm4 \n\t" "psubb %%mm1, %%mm4 \n\t" // 128 + |menergy|/16 "pcmpgtb %%mm4, %%mm2 \n\t" // |menergy|/16 < QP/2 "psubusb %%mm3, %%mm4 \n\t" //d=|menergy|/16 - MIN(|lenergy|,|renergy|)/16// mm0=128-q, mm1= SIGN(menergy), mm2= |menergy|/16 < QP/2, mm4= d/16 "movq %%mm4, %%mm3 \n\t" // d "psubusb "MANGLE(b01)", %%mm4 \n\t" PAVGB(%%mm7, %%mm4) // d/32 PAVGB(%%mm7, %%mm4) // (d + 32)/64 "paddb %%mm3, %%mm4 \n\t" // 5d/64 "pand %%mm2, %%mm4 \n\t" "movq "MANGLE(b80)", %%mm5 \n\t" // 128 "psubb %%mm0, %%mm5 \n\t" // q "paddsb %%mm6, %%mm5 \n\t" // fix bad rounding "pcmpgtb %%mm5, %%mm7 \n\t" // SIGN(q) "pxor %%mm7, %%mm5 \n\t" PMINUB(%%mm5, %%mm4, %%mm3) // MIN(|q|, 5d/64) "pxor %%mm1, %%mm7 \n\t" // SIGN(d*q) "pand %%mm7, %%mm4 \n\t" "movq (%%"REG_a", %1, 2), %%mm0 \n\t" "movq (%0, %1, 4), %%mm2 \n\t" "pxor %%mm1, %%mm0 \n\t" "pxor %%mm1, %%mm2 \n\t" "paddb %%mm4, %%mm0 \n\t" "psubb %%mm4, %%mm2 \n\t" "pxor %%mm1, %%mm0 \n\t" "pxor %%mm1, %%mm2 \n\t" "movq %%mm0, (%%"REG_a", %1, 2) \n\t" "movq %%mm2, (%0, %1, 4) \n\t" : : "r" (src), "r" ((long)stride), "m" (c->pQPb) : "%"REG_a, "%"REG_c );/* { int x; src-= stride; for(x=0; x<BLOCK_SIZE; x++) { const int middleEnergy= 5*(src[l5] - src[l4]) + 2*(src[l3] - src[l6]); if(ABS(middleEnergy)< 8*QP) { const int q=(src[l4] - src[l5])/2; const int leftEnergy= 5*(src[l3] - src[l2]) + 2*(src[l1] - src[l4]); const int rightEnergy= 5*(src[l7] - src[l6]) + 2*(src[l5] - src[l8]); int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) ); d= MAX(d, 0); d= (5*d + 32) >> 6; d*= SIGN(-middleEnergy); if(q>0) { d= d<0 ? 0 : d; d= d>q ? q : d; } else { d= d>0 ? 0 : d; d= d<q ? q : d; } src[l4]-= d; src[l5]+= d; } src++; }src-=8; for(x=0; x<8; x++) { int y; for(y=4; y<6; y++) { int d= src[x+y*stride] - tmp[x+(y-4)*8]; int ad= ABS(d); static int max=0; static int sum=0; static int num=0; static int bias=0; if(max<ad) max=ad; sum+= ad>3 ? 1 : 0; if(ad>3) { src[0] = src[7] = src[stride*7] = src[(stride+1)*7]=255; } if(y==4) bias+=d; num++; if(num%1000000 == 0) { printf(" %d %d %d %d\n", num, sum, max, bias); } } }}*/#elif defined (HAVE_MMX) src+= stride*4; asm volatile( "pxor %%mm7, %%mm7 \n\t" "lea -40(%%"REG_SP"), %%"REG_c" \n\t" // make space for 4 8-byte vars "and "ALIGN_MASK", %%"REG_c" \n\t" // align// 0 1 2 3 4 5 6 7// %0 %0+%1 %0+2%1 eax+2%1 %0+4%1 eax+4%1 edx+%1 edx+2%1// %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 "movq (%0), %%mm0 \n\t" "movq %%mm0, %%mm1 \n\t" "punpcklbw %%mm7, %%mm0 \n\t" // low part of line 0 "punpckhbw %%mm7, %%mm1 \n\t" // high part of line 0 "movq (%0, %1), %%mm2 \n\t" "lea (%0, %1, 2), %%"REG_a" \n\t" "movq %%mm2, %%mm3 \n\t" "punpcklbw %%mm7, %%mm2 \n\t" // low part of line 1 "punpckhbw %%mm7, %%mm3 \n\t" // high part of line 1 "movq (%%"REG_a"), %%mm4 \n\t" "movq %%mm4, %%mm5 \n\t" "punpcklbw %%mm7, %%mm4 \n\t" // low part of line 2 "punpckhbw %%mm7, %%mm5 \n\t" // high part of line 2 "paddw %%mm0, %%mm0 \n\t" // 2L0 "paddw %%mm1, %%mm1 \n\t" // 2H0 "psubw %%mm4, %%mm2 \n\t" // L1 - L2 "psubw %%mm5, %%mm3 \n\t" // H1 - H2 "psubw %%mm2, %%mm0 \n\t" // 2L0 - L1 + L2 "psubw %%mm3, %%mm1 \n\t" // 2H0 - H1 + H2 "psllw $2, %%mm2 \n\t" // 4L1 - 4L2 "psllw $2, %%mm3 \n\t" // 4H1 - 4H2 "psubw %%mm2, %%mm0 \n\t" // 2L0 - 5L1 + 5L2 "psubw %%mm3, %%mm1 \n\t" // 2H0 - 5H1 + 5H2 "movq (%%"REG_a", %1), %%mm2 \n\t" "movq %%mm2, %%mm3 \n\t" "punpcklbw %%mm7, %%mm2 \n\t" // L3 "punpckhbw %%mm7, %%mm3 \n\t" // H3 "psubw %%mm2, %%mm0 \n\t" // 2L0 - 5L1 + 5L2 - L3 "psubw %%mm3, %%mm1 \n\t" // 2H0 - 5H1 + 5H2 - H3 "psubw %%mm2, %%mm0 \n\t" // 2L0 - 5L1 + 5L2 - 2L3 "psubw %%mm3, %%mm1 \n\t" // 2H0 - 5H1 + 5H2 - 2H3 "movq %%mm0, (%%"REG_c") \n\t" // 2L0 - 5L1 + 5L2 - 2L3 "movq %%mm1, 8(%%"REG_c") \n\t" // 2H0 - 5H1 + 5H2 - 2H3 "movq (%%"REG_a", %1, 2), %%mm0 \n\t" "movq %%mm0, %%mm1 \n\t" "punpcklbw %%mm7, %%mm0 \n\t" // L4 "punpckhbw %%mm7, %%mm1 \n\t" // H4 "psubw %%mm0, %%mm2 \n\t" // L3 - L4 "psubw %%mm1, %%mm3 \n\t" // H3 - H4 "movq %%mm2, 16(%%"REG_c") \n\t" // L3 - L4 "movq %%mm3, 24(%%"REG_c") \n\t" // H3 - H4 "paddw %%mm4, %%mm4 \n\t" // 2L2 "paddw %%mm5, %%mm5 \n\t" // 2H2 "psubw %%mm2, %%mm4 \n\t" // 2L2 - L3 + L4 "psubw %%mm3, %%mm5 \n\t" // 2H2 - H3 + H4 "lea (%%"REG_a", %1), %0 \n\t" "psllw $2, %%mm2 \n\t" // 4L3 - 4L4 "psllw $2, %%mm3 \n\t" // 4H3 - 4H4 "psubw %%mm2, %%mm4 \n\t" // 2L2 - 5L3 + 5L4 "psubw %%mm3, %%mm5 \n\t" // 2H2 - 5H3 + 5H4//50 opcodes so far "movq (%0, %1, 2), %%mm2 \n\t" "movq %%mm2, %%mm3 \n\t" "punpcklbw %%mm7, %%mm2 \n\t" // L5 "punpckhbw %%mm7, %%mm3 \n\t" // H5 "psubw %%mm2, %%mm4 \n\t" // 2L2 - 5L3 + 5L4 - L5 "psubw %%mm3, %%mm5 \n\t" // 2H2 - 5H3 + 5H4 - H5 "psubw %%mm2, %%mm4 \n\t" // 2L2 - 5L3 + 5L4 - 2L5 "psubw %%mm3, %%mm5 \n\t" // 2H2 - 5H3 + 5H4 - 2H5 "movq (%%"REG_a", %1, 4), %%mm6 \n\t" "punpcklbw %%mm7, %%mm6 \n\t" // L6 "psubw %%mm6, %%mm2 \n\t" // L5 - L6 "movq (%%"REG_a", %1, 4), %%mm6 \n\t" "punpckhbw %%mm7, %%mm6 \n\t" // H6 "psubw %%mm6, %%mm3 \n\t" // H5 - H6 "paddw %%mm0, %%mm0 \n\t" // 2L4 "paddw %%mm1, %%mm1 \n\t" // 2H4 "psubw %%mm2, %%mm0 \n\t" // 2L4 - L5 + L6 "psubw %%mm3, %%mm1 \n\t" // 2H4 - H5 + H6 "psllw $2, %%mm2 \n\t" // 4L5 - 4L6 "psllw $2, %%mm3 \n\t" // 4H5 - 4H6 "psubw %%mm2, %%mm0 \n\t" // 2L4 - 5L5 + 5L6 "psubw %%mm3, %%mm1 \n\t" // 2H4 - 5H5 + 5H6 "movq (%0, %1, 4), %%mm2 \n\t" "movq %%mm2, %%mm3 \n\t" "punpcklbw %%mm7, %%mm2 \n\t" // L7 "punpckhbw %%mm7, %%mm3 \n\t" // H7 "paddw %%mm2, %%mm2 \n\t" // 2L7 "paddw %%mm3, %%mm3 \n\t" // 2H7 "psubw %%mm2, %%mm0 \n\t" // 2L4 - 5L5 + 5L6 - 2L7 "psubw %%mm3, %%mm1 \n\t" // 2H4 - 5H5 + 5H6 - 2H7 "movq (%%"REG_c"), %%mm2 \n\t" // 2L0 - 5L1 + 5L2 - 2L3 "movq 8(%%"REG_c"), %%mm3 \n\t" // 2H0 - 5H1 + 5H2 - 2H3#ifdef HAVE_MMX2 "movq %%mm7, %%mm6 \n\t" // 0 "psubw %%mm0, %%mm6 \n\t" "pmaxsw %%mm6, %%mm0 \n\t" // |2L4 - 5L5 + 5L6 - 2L7| "movq %%mm7, %%mm6 \n\t" // 0 "psubw %%mm1, %%mm6 \n\t" "pmaxsw %%mm6, %%mm1 \n\t" // |2H4 - 5H5 + 5H6 - 2H7| "movq %%mm7, %%mm6 \n\t" // 0 "psubw %%mm2, %%mm6 \n\t" "pmaxsw %%mm6, %%mm2 \n\t" // |2L0 - 5L1 + 5L2 - 2L3| "movq %%mm7, %%mm6 \n\t" // 0 "psubw %%mm3, %%mm6 \n\t" "pmaxsw %%mm6, %%mm3 \n\t" // |2H0 - 5H1 + 5H2 - 2H3|#else "movq %%mm7, %%mm6 \n\t" // 0 "pcmpgtw %%mm0, %%mm6 \n\t" "pxor %%mm6, %%mm0 \n\t" "psubw %%mm6, %%mm0 \n\t" // |2L4 - 5L5 + 5L6 - 2L7| "movq %%mm7, %%mm6 \n\t" // 0 "pcmpgtw %%mm1, %%mm6 \n\t" "pxor %%mm6, %%mm1 \n\t" "psubw %%mm6, %%mm1 \n\t" // |2H4 - 5H5 + 5H6 - 2H7| "movq %%mm7, %%mm6 \n\t" // 0 "pcmpgtw %%mm2, %%mm6 \n\t" "pxor %%mm6, %%mm2 \n\t" "psubw %%mm6, %%mm2 \n\t" // |2L0 - 5L1 + 5L2 - 2L3| "movq %%mm7, %%mm6 \n\t" // 0 "pcmpgtw %%mm3, %%mm6 \n\t" "pxor %%mm6, %%mm3 \n\t" "psubw %%mm6, %%mm3 \n\t" // |2H0 - 5H1 + 5H2 - 2H3|#endif#ifdef HAVE_MMX2
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -