📄 postprocess_template.c
字号:
#if defined (HAVE_MMX2) || defined (HAVE_3DNOW) src+= stride*4; asm volatile( "lea (%0, %1), %%"REG_a" \n\t" "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t" "pxor %%mm7, %%mm7 \n\t" "movq (%2), %%mm0 \n\t"// 0 1 2 3 4 5 6 7 8 9 10// %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1 ecx#define REAL_DEINT_FF(a,b,c,d)\ "movq " #a ", %%mm1 \n\t"\ "movq " #b ", %%mm2 \n\t"\ "movq " #c ", %%mm3 \n\t"\ "movq " #d ", %%mm4 \n\t"\ PAVGB(%%mm3, %%mm1) \ PAVGB(%%mm4, %%mm0) \ "movq %%mm0, %%mm3 \n\t"\ "punpcklbw %%mm7, %%mm0 \n\t"\ "punpckhbw %%mm7, %%mm3 \n\t"\ "movq %%mm1, %%mm4 \n\t"\ "punpcklbw %%mm7, %%mm1 \n\t"\ "punpckhbw %%mm7, %%mm4 \n\t"\ "psllw $2, %%mm1 \n\t"\ "psllw $2, %%mm4 \n\t"\ "psubw %%mm0, %%mm1 \n\t"\ "psubw %%mm3, %%mm4 \n\t"\ "movq %%mm2, %%mm5 \n\t"\ "movq %%mm2, %%mm0 \n\t"\ "punpcklbw %%mm7, %%mm2 \n\t"\ "punpckhbw %%mm7, %%mm5 \n\t"\ "paddw %%mm2, %%mm1 \n\t"\ "paddw %%mm5, %%mm4 \n\t"\ "psraw $2, %%mm1 \n\t"\ "psraw $2, %%mm4 \n\t"\ "packuswb %%mm4, %%mm1 \n\t"\ "movq %%mm1, " #b " \n\t"\#define DEINT_FF(a,b,c,d) REAL_DEINT_FF(a,b,c,d)DEINT_FF((%0) , (%%REGa) , (%%REGa, %1), (%%REGa, %1, 2))DEINT_FF((%%REGa, %1), (%%REGa, %1, 2), (%0, %1, 4), (%%REGd) )DEINT_FF((%0, %1, 4), (%%REGd) , (%%REGd, %1), (%%REGd, %1, 2))DEINT_FF((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8), (%%REGd, %1, 4)) "movq %%mm0, (%2) \n\t" : : "r" (src), "r" ((long)stride), "r"(tmp) : "%"REG_a, "%"REG_d );#else int x; src+= stride*4; for(x=0; x<8; x++) { int t1= tmp[x]; int t2= src[stride*1]; src[stride*1]= CLIP((-t1 + 4*src[stride*0] + 2*t2 + 4*src[stride*2] - src[stride*3] + 4)>>3); t1= src[stride*4]; src[stride*3]= CLIP((-t2 + 4*src[stride*2] + 2*t1 + 4*src[stride*4] - src[stride*5] + 4)>>3); t2= src[stride*6]; src[stride*5]= CLIP((-t1 + 4*src[stride*4] + 2*t2 + 4*src[stride*6] - src[stride*7] + 4)>>3); t1= src[stride*8]; src[stride*7]= CLIP((-t2 + 4*src[stride*6] + 2*t1 + 4*src[stride*8] - src[stride*9] + 4)>>3); tmp[x]= t1; src++; }#endif}/** * Deinterlaces the given block by filtering every line with a (-1 2 6 2 -1) filter. * will be called for every 8x8 block and can read & write from line 4-15 * lines 0-3 have been passed through the deblock / dering filters allready, but can be read too * lines 4-12 will be read into the deblocking filter and should be deinterlaced * this filter will read lines 4-13 and write 4-11 */static inline void RENAME(deInterlaceL5)(uint8_t src[], int stride, uint8_t *tmp, uint8_t *tmp2){#if defined (HAVE_MMX2) || defined (HAVE_3DNOW) src+= stride*4; asm volatile( "lea (%0, %1), %%"REG_a" \n\t" "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t" "pxor %%mm7, %%mm7 \n\t" "movq (%2), %%mm0 \n\t" "movq (%3), %%mm1 \n\t"// 0 1 2 3 4 5 6 7 8 9 10// %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1 ecx#define REAL_DEINT_L5(t1,t2,a,b,c)\ "movq " #a ", %%mm2 \n\t"\ "movq " #b ", %%mm3 \n\t"\ "movq " #c ", %%mm4 \n\t"\ PAVGB(t2, %%mm3) \ PAVGB(t1, %%mm4) \ "movq %%mm2, %%mm5 \n\t"\ "movq %%mm2, " #t1 " \n\t"\ "punpcklbw %%mm7, %%mm2 \n\t"\ "punpckhbw %%mm7, %%mm5 \n\t"\ "movq %%mm2, %%mm6 \n\t"\ "paddw %%mm2, %%mm2 \n\t"\ "paddw %%mm6, %%mm2 \n\t"\ "movq %%mm5, %%mm6 \n\t"\ "paddw %%mm5, %%mm5 \n\t"\ "paddw %%mm6, %%mm5 \n\t"\ "movq %%mm3, %%mm6 \n\t"\ "punpcklbw %%mm7, %%mm3 \n\t"\ "punpckhbw %%mm7, %%mm6 \n\t"\ "paddw %%mm3, %%mm3 \n\t"\ "paddw %%mm6, %%mm6 \n\t"\ "paddw %%mm3, %%mm2 \n\t"\ "paddw %%mm6, %%mm5 \n\t"\ "movq %%mm4, %%mm6 \n\t"\ "punpcklbw %%mm7, %%mm4 \n\t"\ "punpckhbw %%mm7, %%mm6 \n\t"\ "psubw %%mm4, %%mm2 \n\t"\ "psubw %%mm6, %%mm5 \n\t"\ "psraw $2, %%mm2 \n\t"\ "psraw $2, %%mm5 \n\t"\ "packuswb %%mm5, %%mm2 \n\t"\ "movq %%mm2, " #a " \n\t"\#define DEINT_L5(t1,t2,a,b,c) REAL_DEINT_L5(t1,t2,a,b,c)DEINT_L5(%%mm0, %%mm1, (%0) , (%%REGa) , (%%REGa, %1) )DEINT_L5(%%mm1, %%mm0, (%%REGa) , (%%REGa, %1) , (%%REGa, %1, 2))DEINT_L5(%%mm0, %%mm1, (%%REGa, %1) , (%%REGa, %1, 2), (%0, %1, 4) )DEINT_L5(%%mm1, %%mm0, (%%REGa, %1, 2), (%0, %1, 4) , (%%REGd) )DEINT_L5(%%mm0, %%mm1, (%0, %1, 4) , (%%REGd) , (%%REGd, %1) ) DEINT_L5(%%mm1, %%mm0, (%%REGd) , (%%REGd, %1) , (%%REGd, %1, 2))DEINT_L5(%%mm0, %%mm1, (%%REGd, %1) , (%%REGd, %1, 2), (%0, %1, 8) )DEINT_L5(%%mm1, %%mm0, (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4)) "movq %%mm0, (%2) \n\t" "movq %%mm1, (%3) \n\t" : : "r" (src), "r" ((long)stride), "r"(tmp), "r"(tmp2) : "%"REG_a, "%"REG_d );#else int x; src+= stride*4; for(x=0; x<8; x++) { int t1= tmp[x]; int t2= tmp2[x]; int t3= src[0]; src[stride*0]= CLIP((-(t1 + src[stride*2]) + 2*(t2 + src[stride*1]) + 6*t3 + 4)>>3); t1= src[stride*1]; src[stride*1]= CLIP((-(t2 + src[stride*3]) + 2*(t3 + src[stride*2]) + 6*t1 + 4)>>3); t2= src[stride*2]; src[stride*2]= CLIP((-(t3 + src[stride*4]) + 2*(t1 + src[stride*3]) + 6*t2 + 4)>>3); t3= src[stride*3]; src[stride*3]= CLIP((-(t1 + src[stride*5]) + 2*(t2 + src[stride*4]) + 6*t3 + 4)>>3); t1= src[stride*4]; src[stride*4]= CLIP((-(t2 + src[stride*6]) + 2*(t3 + src[stride*5]) + 6*t1 + 4)>>3); t2= src[stride*5]; src[stride*5]= CLIP((-(t3 + src[stride*7]) + 2*(t1 + src[stride*6]) + 6*t2 + 4)>>3); t3= src[stride*6]; src[stride*6]= CLIP((-(t1 + src[stride*8]) + 2*(t2 + src[stride*7]) + 6*t3 + 4)>>3); t1= src[stride*7]; src[stride*7]= CLIP((-(t2 + src[stride*9]) + 2*(t3 + src[stride*8]) + 6*t1 + 4)>>3); tmp[x]= t3; tmp2[x]= t1; src++; }#endif}/** * Deinterlaces the given block by filtering all lines with a (1 2 1) filter. * will be called for every 8x8 block and can read & write from line 4-15 * lines 0-3 have been passed through the deblock / dering filters allready, but can be read too * lines 4-12 will be read into the deblocking filter and should be deinterlaced * this filter will read lines 4-13 and write 4-11 */static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uint8_t *tmp){#if defined (HAVE_MMX2) || defined (HAVE_3DNOW) src+= 4*stride; asm volatile( "lea (%0, %1), %%"REG_a" \n\t" "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t"// 0 1 2 3 4 5 6 7 8 9// %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1 "movq (%2), %%mm0 \n\t" // L0 "movq (%%"REG_a"), %%mm1 \n\t" // L2 PAVGB(%%mm1, %%mm0) // L0+L2 "movq (%0), %%mm2 \n\t" // L1 PAVGB(%%mm2, %%mm0) "movq %%mm0, (%0) \n\t" "movq (%%"REG_a", %1), %%mm0 \n\t" // L3 PAVGB(%%mm0, %%mm2) // L1+L3 PAVGB(%%mm1, %%mm2) // 2L2 + L1 + L3 "movq %%mm2, (%%"REG_a") \n\t" "movq (%%"REG_a", %1, 2), %%mm2 \n\t" // L4 PAVGB(%%mm2, %%mm1) // L2+L4 PAVGB(%%mm0, %%mm1) // 2L3 + L2 + L4 "movq %%mm1, (%%"REG_a", %1) \n\t" "movq (%0, %1, 4), %%mm1 \n\t" // L5 PAVGB(%%mm1, %%mm0) // L3+L5 PAVGB(%%mm2, %%mm0) // 2L4 + L3 + L5 "movq %%mm0, (%%"REG_a", %1, 2) \n\t" "movq (%%"REG_d"), %%mm0 \n\t" // L6 PAVGB(%%mm0, %%mm2) // L4+L6 PAVGB(%%mm1, %%mm2) // 2L5 + L4 + L6 "movq %%mm2, (%0, %1, 4) \n\t" "movq (%%"REG_d", %1), %%mm2 \n\t" // L7 PAVGB(%%mm2, %%mm1) // L5+L7 PAVGB(%%mm0, %%mm1) // 2L6 + L5 + L7 "movq %%mm1, (%%"REG_d") \n\t" "movq (%%"REG_d", %1, 2), %%mm1 \n\t" // L8 PAVGB(%%mm1, %%mm0) // L6+L8 PAVGB(%%mm2, %%mm0) // 2L7 + L6 + L8 "movq %%mm0, (%%"REG_d", %1) \n\t" "movq (%0, %1, 8), %%mm0 \n\t" // L9 PAVGB(%%mm0, %%mm2) // L7+L9 PAVGB(%%mm1, %%mm2) // 2L8 + L7 + L9 "movq %%mm2, (%%"REG_d", %1, 2) \n\t" "movq %%mm1, (%2) \n\t" : : "r" (src), "r" ((long)stride), "r" (tmp) : "%"REG_a, "%"REG_d );#else int a, b, c, x; src+= 4*stride; for(x=0; x<2; x++){ a= *(uint32_t*)&tmp[stride*0]; b= *(uint32_t*)&src[stride*0]; c= *(uint32_t*)&src[stride*1]; a= (a&c) + (((a^c)&0xFEFEFEFEUL)>>1); *(uint32_t*)&src[stride*0]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1); a= *(uint32_t*)&src[stride*2]; b= (a&b) + (((a^b)&0xFEFEFEFEUL)>>1); *(uint32_t*)&src[stride*1]= (c|b) - (((c^b)&0xFEFEFEFEUL)>>1); b= *(uint32_t*)&src[stride*3]; c= (b&c) + (((b^c)&0xFEFEFEFEUL)>>1); *(uint32_t*)&src[stride*2]= (c|a) - (((c^a)&0xFEFEFEFEUL)>>1); c= *(uint32_t*)&src[stride*4]; a= (a&c) + (((a^c)&0xFEFEFEFEUL)>>1); *(uint32_t*)&src[stride*3]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1); a= *(uint32_t*)&src[stride*5]; b= (a&b) + (((a^b)&0xFEFEFEFEUL)>>1); *(uint32_t*)&src[stride*4]= (c|b) - (((c^b)&0xFEFEFEFEUL)>>1); b= *(uint32_t*)&src[stride*6]; c= (b&c) + (((b^c)&0xFEFEFEFEUL)>>1); *(uint32_t*)&src[stride*5]= (c|a) - (((c^a)&0xFEFEFEFEUL)>>1); c= *(uint32_t*)&src[stride*7]; a= (a&c) + (((a^c)&0xFEFEFEFEUL)>>1); *(uint32_t*)&src[stride*6]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1); a= *(uint32_t*)&src[stride*8]; b= (a&b) + (((a^b)&0xFEFEFEFEUL)>>1); *(uint32_t*)&src[stride*7]= (c|b) - (((c^b)&0xFEFEFEFEUL)>>1); *(uint32_t*)&tmp[stride*0]= c; src += 4; tmp += 4; }#endif}/** * Deinterlaces the given block by applying a median filter to every second line. * will be called for every 8x8 block and can read & write from line 4-15, * lines 0-3 have been passed through the deblock / dering filters allready, but can be read too * lines 4-12 will be read into the deblocking filter and should be deinterlaced */static inline void RENAME(deInterlaceMedian)(uint8_t src[], int stride){#ifdef HAVE_MMX src+= 4*stride;#ifdef HAVE_MMX2 asm volatile( "lea (%0, %1), %%"REG_a" \n\t" "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t"// 0 1 2 3 4 5 6 7 8 9// %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1 "movq (%0), %%mm0 \n\t" // "movq (%%"REG_a", %1), %%mm2 \n\t" // "movq (%%"REG_a"), %%mm1 \n\t" // "movq %%mm0, %%mm3 \n\t" "pmaxub %%mm1, %%mm0 \n\t" // "pminub %%mm3, %%mm1 \n\t" // "pmaxub %%mm2, %%mm1 \n\t" // "pminub %%mm1, %%mm0 \n\t" "movq %%mm0, (%%"REG_a") \n\t" "movq (%0, %1, 4), %%mm0 \n\t" // "movq (%%"REG_a", %1, 2), %%mm1 \n\t" // "movq %%mm2, %%mm3 \n\t" "pmaxub %%mm1, %%mm2 \n\t" // "pminub %%mm3, %%mm1 \n\t" // "pmaxub %%mm0, %%mm1 \n\t" // "pminub %%mm1, %%mm2 \n\t" "movq %%mm2, (%%"REG_a", %1, 2) \n\t" "movq (%%"REG_d"), %%mm2 \n\t" // "movq (%%"REG_d", %1), %%mm1 \n\t" // "movq %%mm2, %%mm3 \n\t" "pmaxub %%mm0, %%mm2 \n\t" // "pminub %%mm3, %%mm0 \n\t" // "pmaxub %%mm1, %%mm0 \n\t" // "pminub %%mm0, %%mm2 \n\t" "movq %%mm2, (%%"REG_d") \n\t" "movq (%%"REG_d", %1, 2), %%mm2 \n\t" // "movq (%0, %1, 8), %%mm0 \n\t" // "movq %%mm2, %%mm3 \n\t" "pmaxub %%mm0, %%mm2 \n\t" // "pminub %%mm3, %%mm0 \n\t" // "pmaxub %%mm1, %%mm0 \n\t" // "pminub %%mm0, %%mm2 \n\t" "movq %%mm2, (%%"REG_d", %1, 2) \n\t" : : "r" (src), "r" ((long)stride) : "%"REG_a, "%"REG_d );#else // MMX without MMX2 asm volatile( "lea (%0, %1), %%"REG_a" \n\t" "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t"// 0 1 2 3 4 5 6 7 8 9// %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1 "pxor %%mm7, %%mm7 \n\t"#define REAL_MEDIAN(a,b,c)\ "movq " #a ", %%mm0 \n\t"\ "movq " #b ", %%mm2 \n\t"\ "movq " #c ", %%mm1 \n\t"\ "movq %%mm0, %%mm3 \n\t"\ "movq %%mm1, %%mm4 \n\t"\ "movq %%mm2, %%mm5 \n\t"\ "psubusb %%mm1, %%mm3 \n\t"\ "psubusb %%mm2, %%mm4 \n\t"\ "psubusb %%mm0, %%mm5 \n\t"\ "pcmpeqb %%mm7, %%mm3 \n\t"\ "pcmpeqb %%mm7, %%mm4 \n\t"\ "pcmpeqb %%mm7, %%mm5 \n\t"\ "movq %%mm3, %%mm6 \n\t"\ "pxor %%mm4, %%mm3 \n\t"\ "pxor %%mm5, %%mm4 \n\t"\ "pxor %%mm6, %%mm5 \n\t"\ "por %%mm3, %%mm1 \n\t"\ "por %%mm4, %%mm2 \n\t"\ "por %%mm5, %%mm0 \n\t"\ "pand %%mm2, %%mm0 \n\t"\ "pand %%mm1, %%mm0 \n\t"\ "movq %%mm0, " #b " \n\t"#define MEDIAN(a,b,c) REAL_MEDIAN(a,b,c)MEDIAN((%0), (%%REGa), (%%REGa, %1))MEDIAN((%%REGa, %1), (%%REGa, %1, 2), (%0, %1, 4))MEDIAN((%0, %1, 4), (%%REGd), (%%REGd, %1))MEDIAN((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8)) : : "r" (src), "r" ((long)stride) : "%"REG_a, "%"REG_d );#endif // MMX#else int x, y; src+= 4*stride; // FIXME - there should be a way to do a few columns in parallel like w/mmx for(x=0; x<8; x++) { uint8_t *colsrc = src; for (y=0; y<4; y++) { int a, b, c, d, e, f; a = colsrc[0 ]; b = colsrc[stride ]; c = colsrc[stride*2]; d = (a-b)>>31; e = (b-c)>>31; f = (c-a)>>31; colsrc[stride ] = (a|(d^f)) & (b|(d^e)) & (c|(e^f)); colsrc += stride*2; } src++; }#endif}#ifdef HAVE_MMX/** * transposes and shift the given 8x8 Block into dst1 and dst2 */static inline void RENAME(transpose1)(uint8_t *dst1, uint8_t *dst2, uint8_t *src, int srcStride){ asm( "lea (%0, %1), %%"REG_a" \n\t"// 0 1 2 3 4 5 6 7 8 9// %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1 "movq (%0), %%mm0 \n\t" // 12345678 "movq (%%"REG_a"), %%mm1 \n\t" // abcdefgh "movq %%mm0, %%mm2 \n\t" // 12345678 "punpcklbw %%mm1, %%mm0 \n\t" // 1a2b3c4d "punpckhbw %%mm1, %%mm2 \n\
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -