📄 postprocess_template.c
字号:
const int l2= stride + l1; const int l3= stride + l2; const int l4= stride + l3; const int l5= stride + l4; const int l6= stride + l5;// const int l7= stride + l6;// const int l8= stride + l7;// const int l9= stride + l8; int x; const int QP15= QP + (QP>>2); src+= stride*3; for(x=0; x<BLOCK_SIZE; x++) { const int v = (src[x+l5] - src[x+l4]); if(ABS(v) < QP15) { src[x+l3] +=v>>3; src[x+l4] +=v>>1; src[x+l5] -=v>>1; src[x+l6] -=v>>3; } }#endif}#endif/** * Experimental Filter 1 * will not damage linear gradients * Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter * can only smooth blocks at the expected locations (it cant smooth them if they did move) * MMX2 version does correct clipping C version doesnt */static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co){#if defined (HAVE_MMX2) || defined (HAVE_3DNOW) src+= stride*3; asm volatile( "pxor %%mm7, %%mm7 \n\t" // 0 "leal (%0, %1), %%eax \n\t" "leal (%%eax, %1, 4), %%ecx \n\t"// 0 1 2 3 4 5 6 7 8 9// %0 eax eax+%1 eax+2%1 %0+4%1 ecx ecx+%1 ecx+2%1 %0+8%1 ecx+4%1 "movq (%%eax, %1, 2), %%mm0 \n\t" // line 3 "movq (%0, %1, 4), %%mm1 \n\t" // line 4 "movq %%mm1, %%mm2 \n\t" // line 4 "psubusb %%mm0, %%mm1 \n\t" "psubusb %%mm2, %%mm0 \n\t" "por %%mm1, %%mm0 \n\t" // |l2 - l3| "movq (%%ecx), %%mm3 \n\t" // line 5 "movq (%%ecx, %1), %%mm4 \n\t" // line 6 "movq %%mm3, %%mm5 \n\t" // line 5 "psubusb %%mm4, %%mm3 \n\t" "psubusb %%mm5, %%mm4 \n\t" "por %%mm4, %%mm3 \n\t" // |l5 - l6| PAVGB(%%mm3, %%mm0) // (|l2 - l3| + |l5 - l6|)/2 "movq %%mm2, %%mm1 \n\t" // line 4 "psubusb %%mm5, %%mm2 \n\t" "movq %%mm2, %%mm4 \n\t" "pcmpeqb %%mm7, %%mm2 \n\t" // (l4 - l5) <= 0 ? -1 : 0 "psubusb %%mm1, %%mm5 \n\t" "por %%mm5, %%mm4 \n\t" // |l4 - l5| "psubusb %%mm0, %%mm4 \n\t" //d = MAX(0, |l4-l5| - (|l2-l3| + |l5-l6|)/2) "movq %%mm4, %%mm3 \n\t" // d "movq %2, %%mm0 \n\t" "paddusb %%mm0, %%mm0 \n\t" "psubusb %%mm0, %%mm4 \n\t" "pcmpeqb %%mm7, %%mm4 \n\t" // d <= QP ? -1 : 0 "psubusb "MANGLE(b01)", %%mm3 \n\t" "pand %%mm4, %%mm3 \n\t" // d <= QP ? d : 0 PAVGB(%%mm7, %%mm3) // d/2 "movq %%mm3, %%mm1 \n\t" // d/2 PAVGB(%%mm7, %%mm3) // d/4 PAVGB(%%mm1, %%mm3) // 3*d/8 "movq (%0, %1, 4), %%mm0 \n\t" // line 4 "pxor %%mm2, %%mm0 \n\t" //(l4 - l5) <= 0 ? -l4-1 : l4 "psubusb %%mm3, %%mm0 \n\t" "pxor %%mm2, %%mm0 \n\t" "movq %%mm0, (%0, %1, 4) \n\t" // line 4 "movq (%%ecx), %%mm0 \n\t" // line 5 "pxor %%mm2, %%mm0 \n\t" //(l4 - l5) <= 0 ? -l5-1 : l5 "paddusb %%mm3, %%mm0 \n\t" "pxor %%mm2, %%mm0 \n\t" "movq %%mm0, (%%ecx) \n\t" // line 5 PAVGB(%%mm7, %%mm1) // d/4 "movq (%%eax, %1, 2), %%mm0 \n\t" // line 3 "pxor %%mm2, %%mm0 \n\t" //(l4 - l5) <= 0 ? -l4-1 : l4 "psubusb %%mm1, %%mm0 \n\t" "pxor %%mm2, %%mm0 \n\t" "movq %%mm0, (%%eax, %1, 2) \n\t" // line 3 "movq (%%ecx, %1), %%mm0 \n\t" // line 6 "pxor %%mm2, %%mm0 \n\t" //(l4 - l5) <= 0 ? -l5-1 : l5 "paddusb %%mm1, %%mm0 \n\t" "pxor %%mm2, %%mm0 \n\t" "movq %%mm0, (%%ecx, %1) \n\t" // line 6 PAVGB(%%mm7, %%mm1) // d/8 "movq (%%eax, %1), %%mm0 \n\t" // line 2 "pxor %%mm2, %%mm0 \n\t" //(l4 - l5) <= 0 ? -l2-1 : l2 "psubusb %%mm1, %%mm0 \n\t" "pxor %%mm2, %%mm0 \n\t" "movq %%mm0, (%%eax, %1) \n\t" // line 2 "movq (%%ecx, %1, 2), %%mm0 \n\t" // line 7 "pxor %%mm2, %%mm0 \n\t" //(l4 - l5) <= 0 ? -l7-1 : l7 "paddusb %%mm1, %%mm0 \n\t" "pxor %%mm2, %%mm0 \n\t" "movq %%mm0, (%%ecx, %1, 2) \n\t" // line 7 : : "r" (src), "r" (stride), "m" (co->pQPb) : "%eax", "%ecx" );#else const int l1= stride; const int l2= stride + l1; const int l3= stride + l2; const int l4= stride + l3; const int l5= stride + l4; const int l6= stride + l5; const int l7= stride + l6;// const int l8= stride + l7;// const int l9= stride + l8; int x; src+= stride*3; for(x=0; x<BLOCK_SIZE; x++) { int a= src[l3] - src[l4]; int b= src[l4] - src[l5]; int c= src[l5] - src[l6]; int d= ABS(b) - ((ABS(a) + ABS(c))>>1); d= MAX(d, 0); if(d < co->QP*2) { int v = d * SIGN(-b); src[l2] +=v>>3; src[l3] +=v>>2; src[l4] +=(3*v)>>3; src[l5] -=(3*v)>>3; src[l6] -=v>>2; src[l7] -=v>>3; } src++; }#endif}static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext *c){#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)/* uint8_t tmp[16]; const int l1= stride; const int l2= stride + l1; const int l3= stride + l2; const int l4= (int)tmp - (int)src - stride*3; const int l5= (int)tmp - (int)src - stride*3 + 8; const int l6= stride*3 + l3; const int l7= stride + l6; const int l8= stride + l7; memcpy(tmp, src+stride*7, 8); memcpy(tmp+8, src+stride*8, 8);*/ src+= stride*4; asm volatile(#if 0 //sligtly more accurate and slightly slower "pxor %%mm7, %%mm7 \n\t" // 0 "leal (%0, %1), %%eax \n\t" "leal (%%eax, %1, 4), %%ecx \n\t"// 0 1 2 3 4 5 6 7// %0 %0+%1 %0+2%1 eax+2%1 %0+4%1 eax+4%1 ecx+%1 ecx+2%1// %0 eax eax+%1 eax+2%1 %0+4%1 ecx ecx+%1 ecx+2%1 "movq (%0, %1, 2), %%mm0 \n\t" // l2 "movq (%0), %%mm1 \n\t" // l0 "movq %%mm0, %%mm2 \n\t" // l2 PAVGB(%%mm7, %%mm0) // ~l2/2 PAVGB(%%mm1, %%mm0) // ~(l2 + 2l0)/4 PAVGB(%%mm2, %%mm0) // ~(5l2 + 2l0)/8 "movq (%%eax), %%mm1 \n\t" // l1 "movq (%%eax, %1, 2), %%mm3 \n\t" // l3 "movq %%mm1, %%mm4 \n\t" // l1 PAVGB(%%mm7, %%mm1) // ~l1/2 PAVGB(%%mm3, %%mm1) // ~(l1 + 2l3)/4 PAVGB(%%mm4, %%mm1) // ~(5l1 + 2l3)/8 "movq %%mm0, %%mm4 \n\t" // ~(5l2 + 2l0)/8 "psubusb %%mm1, %%mm0 \n\t" "psubusb %%mm4, %%mm1 \n\t" "por %%mm0, %%mm1 \n\t" // ~|2l0 - 5l1 + 5l2 - 2l3|/8// mm1= |lenergy|, mm2= l2, mm3= l3, mm7=0 "movq (%0, %1, 4), %%mm0 \n\t" // l4 "movq %%mm0, %%mm4 \n\t" // l4 PAVGB(%%mm7, %%mm0) // ~l4/2 PAVGB(%%mm2, %%mm0) // ~(l4 + 2l2)/4 PAVGB(%%mm4, %%mm0) // ~(5l4 + 2l2)/8 "movq (%%ecx), %%mm2 \n\t" // l5 "movq %%mm3, %%mm5 \n\t" // l3 PAVGB(%%mm7, %%mm3) // ~l3/2 PAVGB(%%mm2, %%mm3) // ~(l3 + 2l5)/4 PAVGB(%%mm5, %%mm3) // ~(5l3 + 2l5)/8 "movq %%mm0, %%mm6 \n\t" // ~(5l4 + 2l2)/8 "psubusb %%mm3, %%mm0 \n\t" "psubusb %%mm6, %%mm3 \n\t" "por %%mm0, %%mm3 \n\t" // ~|2l2 - 5l3 + 5l4 - 2l5|/8 "pcmpeqb %%mm7, %%mm0 \n\t" // SIGN(2l2 - 5l3 + 5l4 - 2l5)// mm0= SIGN(menergy), mm1= |lenergy|, mm2= l5, mm3= |menergy|, mm4=l4, mm5= l3, mm7=0 "movq (%%ecx, %1), %%mm6 \n\t" // l6 "movq %%mm6, %%mm5 \n\t" // l6 PAVGB(%%mm7, %%mm6) // ~l6/2 PAVGB(%%mm4, %%mm6) // ~(l6 + 2l4)/4 PAVGB(%%mm5, %%mm6) // ~(5l6 + 2l4)/8 "movq (%%ecx, %1, 2), %%mm5 \n\t" // l7 "movq %%mm2, %%mm4 \n\t" // l5 PAVGB(%%mm7, %%mm2) // ~l5/2 PAVGB(%%mm5, %%mm2) // ~(l5 + 2l7)/4 PAVGB(%%mm4, %%mm2) // ~(5l5 + 2l7)/8 "movq %%mm6, %%mm4 \n\t" // ~(5l6 + 2l4)/8 "psubusb %%mm2, %%mm6 \n\t" "psubusb %%mm4, %%mm2 \n\t" "por %%mm6, %%mm2 \n\t" // ~|2l4 - 5l5 + 5l6 - 2l7|/8// mm0= SIGN(menergy), mm1= |lenergy|/8, mm2= |renergy|/8, mm3= |menergy|/8, mm7=0 PMINUB(%%mm2, %%mm1, %%mm4) // MIN(|lenergy|,|renergy|)/8 "movq %2, %%mm4 \n\t" // QP //FIXME QP+1 ? "paddusb "MANGLE(b01)", %%mm4 \n\t" "pcmpgtb %%mm3, %%mm4 \n\t" // |menergy|/8 < QP "psubusb %%mm1, %%mm3 \n\t" // d=|menergy|/8-MIN(|lenergy|,|renergy|)/8 "pand %%mm4, %%mm3 \n\t" "movq %%mm3, %%mm1 \n\t"// "psubusb "MANGLE(b01)", %%mm3 \n\t" PAVGB(%%mm7, %%mm3) PAVGB(%%mm7, %%mm3) "paddusb %%mm1, %%mm3 \n\t"// "paddusb "MANGLE(b01)", %%mm3 \n\t" "movq (%%eax, %1, 2), %%mm6 \n\t" //l3 "movq (%0, %1, 4), %%mm5 \n\t" //l4 "movq (%0, %1, 4), %%mm4 \n\t" //l4 "psubusb %%mm6, %%mm5 \n\t" "psubusb %%mm4, %%mm6 \n\t" "por %%mm6, %%mm5 \n\t" // |l3-l4| "pcmpeqb %%mm7, %%mm6 \n\t" // SIGN(l3-l4) "pxor %%mm6, %%mm0 \n\t" "pand %%mm0, %%mm3 \n\t" PMINUB(%%mm5, %%mm3, %%mm0) "psubusb "MANGLE(b01)", %%mm3 \n\t" PAVGB(%%mm7, %%mm3) "movq (%%eax, %1, 2), %%mm0 \n\t" "movq (%0, %1, 4), %%mm2 \n\t" "pxor %%mm6, %%mm0 \n\t" "pxor %%mm6, %%mm2 \n\t" "psubb %%mm3, %%mm0 \n\t" "paddb %%mm3, %%mm2 \n\t" "pxor %%mm6, %%mm0 \n\t" "pxor %%mm6, %%mm2 \n\t" "movq %%mm0, (%%eax, %1, 2) \n\t" "movq %%mm2, (%0, %1, 4) \n\t"#endif "leal (%0, %1), %%eax \n\t" "pcmpeqb %%mm6, %%mm6 \n\t" // -1// 0 1 2 3 4 5 6 7// %0 %0+%1 %0+2%1 eax+2%1 %0+4%1 eax+4%1 ecx+%1 ecx+2%1// %0 eax eax+%1 eax+2%1 %0+4%1 ecx ecx+%1 ecx+2%1 "movq (%%eax, %1, 2), %%mm1 \n\t" // l3 "movq (%0, %1, 4), %%mm0 \n\t" // l4 "pxor %%mm6, %%mm1 \n\t" // -l3-1 PAVGB(%%mm1, %%mm0) // -q+128 = (l4-l3+256)/2// mm1=-l3-1, mm0=128-q "movq (%%eax, %1, 4), %%mm2 \n\t" // l5 "movq (%%eax, %1), %%mm3 \n\t" // l2 "pxor %%mm6, %%mm2 \n\t" // -l5-1 "movq %%mm2, %%mm5 \n\t" // -l5-1 "movq "MANGLE(b80)", %%mm4 \n\t" // 128 "leal (%%eax, %1, 4), %%ecx \n\t" PAVGB(%%mm3, %%mm2) // (l2-l5+256)/2 PAVGB(%%mm0, %%mm4) // ~(l4-l3)/4 + 128 PAVGB(%%mm2, %%mm4) // ~(l2-l5)/4 +(l4-l3)/8 + 128 PAVGB(%%mm0, %%mm4) // ~(l2-l5)/8 +5(l4-l3)/16 + 128// mm1=-l3-1, mm0=128-q, mm3=l2, mm4=menergy/16 + 128, mm5= -l5-1 "movq (%%eax), %%mm2 \n\t" // l1 "pxor %%mm6, %%mm2 \n\t" // -l1-1 PAVGB(%%mm3, %%mm2) // (l2-l1+256)/2 PAVGB((%0), %%mm1) // (l0-l3+256)/2 "movq "MANGLE(b80)", %%mm3 \n\t" // 128 PAVGB(%%mm2, %%mm3) // ~(l2-l1)/4 + 128 PAVGB(%%mm1, %%mm3) // ~(l0-l3)/4 +(l2-l1)/8 + 128 PAVGB(%%mm2, %%mm3) // ~(l0-l3)/8 +5(l2-l1)/16 + 128// mm0=128-q, mm3=lenergy/16 + 128, mm4= menergy/16 + 128, mm5= -l5-1 PAVGB((%%ecx, %1), %%mm5) // (l6-l5+256)/2 "movq (%%ecx, %1, 2), %%mm1 \n\t" // l7 "pxor %%mm6, %%mm1 \n\t" // -l7-1 PAVGB((%0, %1, 4), %%mm1) // (l4-l7+256)/2 "movq "MANGLE(b80)", %%mm2 \n\t" // 128 PAVGB(%%mm5, %%mm2) // ~(l6-l5)/4 + 128 PAVGB(%%mm1, %%mm2) // ~(l4-l7)/4 +(l6-l5)/8 + 128 PAVGB(%%mm5, %%mm2) // ~(l4-l7)/8 +5(l6-l5)/16 + 128// mm0=128-q, mm2=renergy/16 + 128, mm3=lenergy/16 + 128, mm4= menergy/16 + 128 "movq "MANGLE(b00)", %%mm1 \n\t" // 0 "movq "MANGLE(b00)", %%mm5 \n\t" // 0 "psubb %%mm2, %%mm1 \n\t" // 128 - renergy/16 "psubb %%mm3, %%mm5 \n\t" // 128 - lenergy/16 PMAXUB(%%mm1, %%mm2) // 128 + |renergy/16| PMAXUB(%%mm5, %%mm3) // 128 + |lenergy/16| PMINUB(%%mm2, %%mm3, %%mm1) // 128 + MIN(|lenergy|,|renergy|)/16// mm0=128-q, mm3=128 + MIN(|lenergy|,|renergy|)/16, mm4= menergy/16 + 128 "movq "MANGLE(b00)", %%mm7 \n\t" // 0 "movq %2, %%mm2 \n\t" // QP PAVGB(%%mm6, %%mm2) // 128 + QP/2 "psubb %%mm6, %%mm2 \n\t" "movq %%mm4, %%mm1 \n\t" "pcmpgtb %%mm7, %%mm1 \n\t" // SIGN(menergy) "pxor %%mm1, %%mm4 \n\t" "psubb %%mm1, %%mm4 \n\t" // 128 + |menergy|/16 "pcmpgtb %%mm4, %%mm2 \n\t" // |menergy|/16 < QP/2 "psubusb %%mm3, %%mm4 \n\t" //d=|menergy|/16 - MIN(|lenergy|,|renergy|)/16// mm0=128-q, mm1= SIGN(menergy), mm2= |menergy|/16 < QP/2, mm4= d/16 "movq %%mm4, %%mm3 \n\t" // d "psubusb "MANGLE(b01)", %%mm4 \n\t" PAVGB(%%mm7, %%mm4) // d/32 PAVGB(%%mm7, %%mm4) // (d + 32)/64 "paddb %%mm3, %%mm4 \n\t" // 5d/64 "pand %%mm2, %%mm4 \n\t" "movq "MANGLE(b80)", %%mm5 \n\t" // 128 "psubb %%mm0, %%mm5 \n\t" // q "paddsb %%mm6, %%mm5 \n\t" // fix bad rounding "pcmpgtb %%mm5, %%mm7 \n\t" // SIGN(q) "pxor %%mm7, %%mm5 \n\t" PMINUB(%%mm5, %%mm4, %%mm3) // MIN(|q|, 5d/64) "pxor %%mm1, %%mm7 \n\t" // SIGN(d*q) "pand %%mm7, %%mm4 \n\t" "movq (%%eax, %1, 2), %%mm0 \n\t" "movq (%0, %1, 4), %%mm2 \n\t" "pxor %%mm1, %%mm0 \n\t" "pxor %%mm1, %%mm2 \n\t" "paddb %%mm4, %%mm0 \n\t" "psubb %%mm4, %%mm2 \n\t" "pxor %%mm1, %%mm0 \n\t" "pxor %%mm1, %%mm2 \n\t" "movq %%mm0, (%%eax, %1, 2) \n\t" "movq %%mm2, (%0, %1, 4) \n\t" : : "r" (src), "r" (stride), "m" (c->pQPb) : "%eax", "%ecx" );/* { int x; src-= stride; for(x=0; x<BLOCK_SIZE; x++) { const int middleEnergy= 5*(src[l5] - src[l4]) + 2*(src[l3] - src[l6]); if(ABS(middleEnergy)< 8*QP) { const int q=(src[l4] - src[l5])/2; const int leftEnergy= 5*(src[l3] - src[l2]) + 2*(src[l1] - src[l4]); const int rightEnergy= 5*(src[l7] - src[l6]) + 2*(src[l5] - src[l8]); int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) ); d= MAX(d, 0); d= (5*d + 32) >> 6; d*= SIGN(-middleEnergy); if(q>0) { d= d<0 ? 0 : d; d= d>q ? q : d; } else { d= d>0 ? 0 : d; d= d<q ? q : d; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -