📄 postprocess_template.c
字号:
/* Copyright (C) 2001-2002 Michael Niedermayer (michaelni@gmx.at) This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*//** * @file postprocess_template.c * mmx/mmx2/3dnow postprocess code. */#undef PAVGB#undef PMINUB#undef PMAXUB#ifdef HAVE_MMX2#define PAVGB(a,b) "pavgb " #a ", " #b " \n\t"#elif defined (HAVE_3DNOW)#define PAVGB(a,b) "pavgusb " #a ", " #b " \n\t"#endif#ifdef HAVE_MMX2#define PMINUB(a,b,t) "pminub " #a ", " #b " \n\t"#elif defined (HAVE_MMX)#define PMINUB(b,a,t) \ "movq " #a ", " #t " \n\t"\ "psubusb " #b ", " #t " \n\t"\ "psubb " #t ", " #a " \n\t"#endif#ifdef HAVE_MMX2#define PMAXUB(a,b) "pmaxub " #a ", " #b " \n\t"#elif defined (HAVE_MMX)#define PMAXUB(a,b) \ "psubusb " #a ", " #b " \n\t"\ "paddb " #a ", " #b " \n\t"#endif//FIXME? |255-0| = 1 (shouldnt be a problem ...)#ifdef HAVE_MMX/** * Check if the middle 8x8 Block in the given 8x16 block is flat */static inline int RENAME(vertClassify)(uint8_t src[], int stride, PPContext *c){ int numEq= 0, dcOk; src+= stride*4; // src points to begin of the 8x8 Blockasm volatile( "movq %0, %%mm7 \n\t" "movq %1, %%mm6 \n\t" : : "m" (c->mmxDcOffset[c->nonBQP]), "m" (c->mmxDcThreshold[c->nonBQP]) ); asm volatile( "leal (%2, %3), %%eax \n\t"// 0 1 2 3 4 5 6 7 8 9// %1 eax eax+%2 eax+2%2 %1+4%2 ecx ecx+%2 ecx+2%2 %1+8%2 ecx+4%2 "movq (%2), %%mm0 \n\t" "movq (%%eax), %%mm1 \n\t" "movq %%mm0, %%mm3 \n\t" "movq %%mm0, %%mm4 \n\t" PMAXUB(%%mm1, %%mm4) PMINUB(%%mm1, %%mm3, %%mm5) "psubb %%mm1, %%mm0 \n\t" // mm0 = differnece "paddb %%mm7, %%mm0 \n\t" "pcmpgtb %%mm6, %%mm0 \n\t" "movq (%%eax,%3), %%mm2 \n\t" PMAXUB(%%mm2, %%mm4) PMINUB(%%mm2, %%mm3, %%mm5) "psubb %%mm2, %%mm1 \n\t" "paddb %%mm7, %%mm1 \n\t" "pcmpgtb %%mm6, %%mm1 \n\t" "paddb %%mm1, %%mm0 \n\t" "movq (%%eax, %3, 2), %%mm1 \n\t" PMAXUB(%%mm1, %%mm4) PMINUB(%%mm1, %%mm3, %%mm5) "psubb %%mm1, %%mm2 \n\t" "paddb %%mm7, %%mm2 \n\t" "pcmpgtb %%mm6, %%mm2 \n\t" "paddb %%mm2, %%mm0 \n\t" "leal (%%eax, %3, 4), %%eax \n\t" "movq (%2, %3, 4), %%mm2 \n\t" PMAXUB(%%mm2, %%mm4) PMINUB(%%mm2, %%mm3, %%mm5) "psubb %%mm2, %%mm1 \n\t" "paddb %%mm7, %%mm1 \n\t" "pcmpgtb %%mm6, %%mm1 \n\t" "paddb %%mm1, %%mm0 \n\t" "movq (%%eax), %%mm1 \n\t" PMAXUB(%%mm1, %%mm4) PMINUB(%%mm1, %%mm3, %%mm5) "psubb %%mm1, %%mm2 \n\t" "paddb %%mm7, %%mm2 \n\t" "pcmpgtb %%mm6, %%mm2 \n\t" "paddb %%mm2, %%mm0 \n\t" "movq (%%eax, %3), %%mm2 \n\t" PMAXUB(%%mm2, %%mm4) PMINUB(%%mm2, %%mm3, %%mm5) "psubb %%mm2, %%mm1 \n\t" "paddb %%mm7, %%mm1 \n\t" "pcmpgtb %%mm6, %%mm1 \n\t" "paddb %%mm1, %%mm0 \n\t" "movq (%%eax, %3, 2), %%mm1 \n\t" PMAXUB(%%mm1, %%mm4) PMINUB(%%mm1, %%mm3, %%mm5) "psubb %%mm1, %%mm2 \n\t" "paddb %%mm7, %%mm2 \n\t" "pcmpgtb %%mm6, %%mm2 \n\t" "paddb %%mm2, %%mm0 \n\t" "psubusb %%mm3, %%mm4 \n\t" " \n\t"#ifdef HAVE_MMX2 "pxor %%mm7, %%mm7 \n\t" "psadbw %%mm7, %%mm0 \n\t"#else "movq %%mm0, %%mm1 \n\t" "psrlw $8, %%mm0 \n\t" "paddb %%mm1, %%mm0 \n\t" "movq %%mm0, %%mm1 \n\t" "psrlq $16, %%mm0 \n\t" "paddb %%mm1, %%mm0 \n\t" "movq %%mm0, %%mm1 \n\t" "psrlq $32, %%mm0 \n\t" "paddb %%mm1, %%mm0 \n\t"#endif "movq %4, %%mm7 \n\t" // QP,..., QP "paddusb %%mm7, %%mm7 \n\t" // 2QP ... 2QP "psubusb %%mm7, %%mm4 \n\t" // Diff <= 2QP -> 0 "packssdw %%mm4, %%mm4 \n\t" "movd %%mm0, %0 \n\t" "movd %%mm4, %1 \n\t" : "=r" (numEq), "=r" (dcOk) : "r" (src), "r" (stride), "m" (c->pQPb) : "%eax" ); numEq= (-numEq) &0xFF; if(numEq > c->ppMode.flatnessThreshold){ if(dcOk) return 0; else return 1; }else{ return 2; }}#endif/** * Do a vertical low pass filter on the 8x16 block (only write to the 8x8 block in the middle) * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 */static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c){#if defined (HAVE_MMX2) || defined (HAVE_3DNOW) src+= stride*3; asm volatile( //"movv %0 %1 %2\n\t" "movq %2, %%mm0 \n\t" // QP,..., QP "pxor %%mm4, %%mm4 \n\t" "movq (%0), %%mm6 \n\t" "movq (%0, %1), %%mm5 \n\t" "movq %%mm5, %%mm1 \n\t" "movq %%mm6, %%mm2 \n\t" "psubusb %%mm6, %%mm5 \n\t" "psubusb %%mm1, %%mm2 \n\t" "por %%mm5, %%mm2 \n\t" // ABS Diff of lines "psubusb %%mm0, %%mm2 \n\t" // diff <= QP -> 0 "pcmpeqb %%mm4, %%mm2 \n\t" // diff <= QP -> FF "pand %%mm2, %%mm6 \n\t" "pandn %%mm1, %%mm2 \n\t" "por %%mm2, %%mm6 \n\t"// First Line to Filter "movq (%0, %1, 8), %%mm5 \n\t" "leal (%0, %1, 4), %%eax \n\t" "leal (%0, %1, 8), %%ecx \n\t" "subl %1, %%ecx \n\t" "addl %1, %0 \n\t" // %0 points to line 1 not 0 "movq (%0, %1, 8), %%mm7 \n\t" "movq %%mm5, %%mm1 \n\t" "movq %%mm7, %%mm2 \n\t" "psubusb %%mm7, %%mm5 \n\t" "psubusb %%mm1, %%mm2 \n\t" "por %%mm5, %%mm2 \n\t" // ABS Diff of lines "psubusb %%mm0, %%mm2 \n\t" // diff <= QP -> 0 "pcmpeqb %%mm4, %%mm2 \n\t" // diff <= QP -> FF "pand %%mm2, %%mm7 \n\t" "pandn %%mm1, %%mm2 \n\t" "por %%mm2, %%mm7 \n\t" // First Line to Filter // 1 2 3 4 5 6 7 8 // %0 %0+%1 %0+2%1 eax %0+4%1 eax+2%1 ecx eax+4%1 // 6 4 2 2 1 1 // 6 4 4 2 // 6 8 2 "movq (%0, %1), %%mm0 \n\t" // 1 "movq %%mm0, %%mm1 \n\t" // 1 PAVGB(%%mm6, %%mm0) //1 1 /2 PAVGB(%%mm6, %%mm0) //3 1 /4 "movq (%0, %1, 4), %%mm2 \n\t" // 1 "movq %%mm2, %%mm5 \n\t" // 1 PAVGB((%%eax), %%mm2) // 11 /2 PAVGB((%0, %1, 2), %%mm2) // 211 /4 "movq %%mm2, %%mm3 \n\t" // 211 /4 "movq (%0), %%mm4 \n\t" // 1 PAVGB(%%mm4, %%mm3) // 4 211 /8 PAVGB(%%mm0, %%mm3) //642211 /16 "movq %%mm3, (%0) \n\t" // X // mm1=2 mm2=3(211) mm4=1 mm5=5 mm6=0 mm7=9 "movq %%mm1, %%mm0 \n\t" // 1 PAVGB(%%mm6, %%mm0) //1 1 /2 "movq %%mm4, %%mm3 \n\t" // 1 PAVGB((%0,%1,2), %%mm3) // 1 1 /2 PAVGB((%%eax,%1,2), %%mm5) // 11 /2 PAVGB((%%eax), %%mm5) // 211 /4 PAVGB(%%mm5, %%mm3) // 2 2211 /8 PAVGB(%%mm0, %%mm3) //4242211 /16 "movq %%mm3, (%0,%1) \n\t" // X // mm1=2 mm2=3(211) mm4=1 mm5=4(211) mm6=0 mm7=9 PAVGB(%%mm4, %%mm6) //11 /2 "movq (%%ecx), %%mm0 \n\t" // 1 PAVGB((%%eax, %1, 2), %%mm0) // 11/2 "movq %%mm0, %%mm3 \n\t" // 11/2 PAVGB(%%mm1, %%mm0) // 2 11/4 PAVGB(%%mm6, %%mm0) //222 11/8 PAVGB(%%mm2, %%mm0) //22242211/16 "movq (%0, %1, 2), %%mm2 \n\t" // 1 "movq %%mm0, (%0, %1, 2) \n\t" // X // mm1=2 mm2=3 mm3=6(11) mm4=1 mm5=4(211) mm6=0(11) mm7=9 "movq (%%eax, %1, 4), %%mm0 \n\t" // 1 PAVGB((%%ecx), %%mm0) // 11 /2 PAVGB(%%mm0, %%mm6) //11 11 /4 PAVGB(%%mm1, %%mm4) // 11 /2 PAVGB(%%mm2, %%mm1) // 11 /2 PAVGB(%%mm1, %%mm6) //1122 11 /8 PAVGB(%%mm5, %%mm6) //112242211 /16 "movq (%%eax), %%mm5 \n\t" // 1 "movq %%mm6, (%%eax) \n\t" // X // mm0=7(11) mm1=2(11) mm2=3 mm3=6(11) mm4=1(11) mm5=4 mm7=9 "movq (%%eax, %1, 4), %%mm6 \n\t" // 1 PAVGB(%%mm7, %%mm6) // 11 /2 PAVGB(%%mm4, %%mm6) // 11 11 /4 PAVGB(%%mm3, %%mm6) // 11 2211 /8 PAVGB(%%mm5, %%mm2) // 11 /2 "movq (%0, %1, 4), %%mm4 \n\t" // 1 PAVGB(%%mm4, %%mm2) // 112 /4 PAVGB(%%mm2, %%mm6) // 112242211 /16 "movq %%mm6, (%0, %1, 4) \n\t" // X // mm0=7(11) mm1=2(11) mm2=3(112) mm3=6(11) mm4=5 mm5=4 mm7=9 PAVGB(%%mm7, %%mm1) // 11 2 /4 PAVGB(%%mm4, %%mm5) // 11 /2 PAVGB(%%mm5, %%mm0) // 11 11 /4 "movq (%%eax, %1, 2), %%mm6 \n\t" // 1 PAVGB(%%mm6, %%mm1) // 11 4 2 /8 PAVGB(%%mm0, %%mm1) // 11224222 /16 "movq %%mm1, (%%eax, %1, 2) \n\t" // X // mm2=3(112) mm3=6(11) mm4=5 mm5=4(11) mm6=6 mm7=9 PAVGB((%%ecx), %%mm2) // 112 4 /8 "movq (%%eax, %1, 4), %%mm0 \n\t" // 1 PAVGB(%%mm0, %%mm6) // 1 1 /2 PAVGB(%%mm7, %%mm6) // 1 12 /4 PAVGB(%%mm2, %%mm6) // 1122424 /4 "movq %%mm6, (%%ecx) \n\t" // X // mm0=8 mm3=6(11) mm4=5 mm5=4(11) mm7=9 PAVGB(%%mm7, %%mm5) // 11 2 /4 PAVGB(%%mm7, %%mm5) // 11 6 /8 PAVGB(%%mm3, %%mm0) // 112 /4 PAVGB(%%mm0, %%mm5) // 112246 /16 "movq %%mm5, (%%eax, %1, 4) \n\t" // X "subl %1, %0 \n\t" : : "r" (src), "r" (stride), "m" (c->pQPb) : "%eax", "%ecx" );#else const int l1= stride; const int l2= stride + l1; const int l3= stride + l2; const int l4= stride + l3; const int l5= stride + l4; const int l6= stride + l5; const int l7= stride + l6; const int l8= stride + l7; const int l9= stride + l8; int x; src+= stride*3; for(x=0; x<BLOCK_SIZE; x++) { const int first= ABS(src[0] - src[l1]) < c->QP ? src[0] : src[l1]; const int last= ABS(src[l8] - src[l9]) < c->QP ? src[l9] : src[l8]; int sums[9]; sums[0] = first + src[l1]; sums[1] = src[l1] + src[l2]; sums[2] = src[l2] + src[l3]; sums[3] = src[l3] + src[l4]; sums[4] = src[l4] + src[l5]; sums[5] = src[l5] + src[l6]; sums[6] = src[l6] + src[l7]; sums[7] = src[l7] + src[l8]; sums[8] = src[l8] + last; src[l1]= ((sums[0]<<2) + ((first + sums[2])<<1) + sums[4] + 8)>>4; src[l2]= ((src[l2]<<2) + ((first + sums[0] + sums[3])<<1) + sums[5] + 8)>>4; src[l3]= ((src[l3]<<2) + ((first + sums[1] + sums[4])<<1) + sums[6] + 8)>>4; src[l4]= ((src[l4]<<2) + ((sums[2] + sums[5])<<1) + sums[0] + sums[7] + 8)>>4; src[l5]= ((src[l5]<<2) + ((sums[3] + sums[6])<<1) + sums[1] + sums[8] + 8)>>4; src[l6]= ((src[l6]<<2) + ((last + sums[7] + sums[4])<<1) + sums[2] + 8)>>4; src[l7]= (((last + src[l7])<<2) + ((src[l8] + sums[5])<<1) + sums[3] + 8)>>4; src[l8]= ((sums[8]<<2) + ((last + sums[6])<<1) + sums[4] + 8)>>4; src++; }#endif}#if 0/** * Experimental implementation of the filter (Algorithm 1) described in a paper from Ramkishor & Karandikar * values are correctly clipped (MMX2) * values are wraparound (C) * conclusion: its fast, but introduces ugly horizontal patterns if there is a continious gradient 0 8 16 24 x = 8 x/2 = 4 x/8 = 1 1 12 12 23 */static inline void RENAME(vertRK1Filter)(uint8_t *src, int stride, int QP){#if defined (HAVE_MMX2) || defined (HAVE_3DNOW) src+= stride*3;// FIXME rounding asm volatile( "pxor %%mm7, %%mm7 \n\t" // 0 "movq "MANGLE(b80)", %%mm6 \n\t" // MIN_SIGNED_BYTE "leal (%0, %1), %%eax \n\t" "leal (%%eax, %1, 4), %%ecx \n\t"// 0 1 2 3 4 5 6 7 8 9// %0 eax eax+%1 eax+2%1 %0+4%1 ecx ecx+%1 ecx+2%1 %0+8%1 ecx+4%1 "movq "MANGLE(pQPb)", %%mm0 \n\t" // QP,..., QP "movq %%mm0, %%mm1 \n\t" // QP,..., QP "paddusb "MANGLE(b02)", %%mm0 \n\t" "psrlw $2, %%mm0 \n\t" "pand "MANGLE(b3F)", %%mm0 \n\t" // QP/4,..., QP/4 "paddusb %%mm1, %%mm0 \n\t" // QP*1.25 ... "movq (%0, %1, 4), %%mm2 \n\t" // line 4 "movq (%%ecx), %%mm3 \n\t" // line 5 "movq %%mm2, %%mm4 \n\t" // line 4 "pcmpeqb %%mm5, %%mm5 \n\t" // -1 "pxor %%mm2, %%mm5 \n\t" // -line 4 - 1 PAVGB(%%mm3, %%mm5) "paddb %%mm6, %%mm5 \n\t" // (l5-l4)/2 "psubusb %%mm3, %%mm4 \n\t" "psubusb %%mm2, %%mm3 \n\t" "por %%mm3, %%mm4 \n\t" // |l4 - l5| "psubusb %%mm0, %%mm4 \n\t" "pcmpeqb %%mm7, %%mm4 \n\t" "pand %%mm4, %%mm5 \n\t" // d/2// "paddb %%mm6, %%mm2 \n\t" // line 4 + 0x80 "paddb %%mm5, %%mm2 \n\t"// "psubb %%mm6, %%mm2 \n\t" "movq %%mm2, (%0,%1, 4) \n\t" "movq (%%ecx), %%mm2 \n\t"// "paddb %%mm6, %%mm2 \n\t" // line 5 + 0x80 "psubb %%mm5, %%mm2 \n\t"// "psubb %%mm6, %%mm2 \n\t" "movq %%mm2, (%%ecx) \n\t" "paddb %%mm6, %%mm5 \n\t" "psrlw $2, %%mm5 \n\t" "pand "MANGLE(b3F)", %%mm5 \n\t" "psubb "MANGLE(b20)", %%mm5 \n\t" // (l5-l4)/8 "movq (%%eax, %1, 2), %%mm2 \n\t" "paddb %%mm6, %%mm2 \n\t" // line 3 + 0x80 "paddsb %%mm5, %%mm2 \n\t" "psubb %%mm6, %%mm2 \n\t" "movq %%mm2, (%%eax, %1, 2) \n\t" "movq (%%ecx, %1), %%mm2 \n\t" "paddb %%mm6, %%mm2 \n\t" // line 6 + 0x80 "psubsb %%mm5, %%mm2 \n\t" "psubb %%mm6, %%mm2 \n\t" "movq %%mm2, (%%ecx, %1) \n\t" : : "r" (src), "r" (stride) : "%eax", "%ecx" );#else const int l1= stride;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -