📄 deinterlace.c
字号:
pxor_r2r( mm7, mm7 ); for( y = 0; y < 8; y += 2 ) { for( x = 0; x < 8; x +=4 ) { movd_m2r( src1[x], mm0 ); movd_r2m( mm0, dst[x] ); movd_m2r( src2[x], mm1 ); movd_m2r( src1[i_src1+x], mm2 ); punpcklbw_r2r( mm7, mm0 ); punpcklbw_r2r( mm7, mm1 ); punpcklbw_r2r( mm7, mm2 ); paddw_r2r( mm1, mm1 ); movq_r2r( mm1, mm3 ); paddw_r2r( mm3, mm3 ); paddw_r2r( mm2, mm0 ); paddw_r2r( mm3, mm1 ); paddw_m2r( m_4, mm1 ); paddw_r2r( mm1, mm0 ); psraw_i2r( 3, mm0 ); packuswb_r2r( mm7, mm0 ); movd_r2m( mm0, dst[i_dst+x] ); } dst += 2*i_dst; src1 += i_src1; src2 += i_src2; }}#endif/* For debug */static inline void XDeint8x8Set( uint8_t *dst, int i_dst, uint8_t v ){ int y; for( y = 0; y < 8; y++ ) memset( &dst[y*i_dst], v, 8 );}/* XDeint8x8FieldE: Stupid deinterlacing (1,0,1) for block that miss a * neighbour * (Use 8x9 pixels) * TODO: a better one for the inner part. */static inline void XDeint8x8FieldEC( uint8_t *dst, int i_dst, uint8_t *src, int i_src ){ int y, x; /* Interlaced */ for( y = 0; y < 8; y += 2 ) { memcpy( dst, src, 8 ); dst += i_dst; for( x = 0; x < 8; x++ ) dst[x] = (src[x] + src[2*i_src+x] ) >> 1; dst += 1*i_dst; src += 2*i_src; }}#ifdef CAN_COMPILE_MMXEXTstatic inline void XDeint8x8FieldEMMXEXT( uint8_t *dst, int i_dst, uint8_t *src, int i_src ){ int y; /* Interlaced */ for( y = 0; y < 8; y += 2 ) { movq_m2r( src[0], mm0 ); movq_r2m( mm0, dst[0] ); dst += i_dst; movq_m2r( src[2*i_src], mm1 ); pavgb_r2r( mm1, mm0 ); movq_r2m( mm0, dst[0] ); dst += 1*i_dst; src += 2*i_src; }}#endif/* XDeint8x8Field: Edge oriented interpolation * (Need -4 and +5 pixels H, +1 line) */static inline void XDeint8x8FieldC( uint8_t *dst, int i_dst, uint8_t *src, int i_src ){ int y, x; /* Interlaced */ for( y = 0; y < 8; y += 2 ) { memcpy( dst, src, 8 ); dst += i_dst; for( x = 0; x < 8; x++ ) { uint8_t *src2 = &src[2*i_src]; /* I use 8 pixels just to match the MMX version, but it's overkill * 5 would be enough (less isn't good) */ const int c0 = abs(src[x-4]-src2[x-2]) + abs(src[x-3]-src2[x-1]) + abs(src[x-2]-src2[x+0]) + abs(src[x-1]-src2[x+1]) + abs(src[x+0]-src2[x+2]) + abs(src[x+1]-src2[x+3]) + abs(src[x+2]-src2[x+4]) + abs(src[x+3]-src2[x+5]); const int c1 = abs(src[x-3]-src2[x-3]) + abs(src[x-2]-src2[x-2]) + abs(src[x-1]-src2[x-1]) + abs(src[x+0]-src2[x+0]) + abs(src[x+1]-src2[x+1]) + abs(src[x+2]-src2[x+2]) + abs(src[x+3]-src2[x+3]) + abs(src[x+4]-src2[x+4]); const int c2 = abs(src[x-2]-src2[x-4]) + abs(src[x-1]-src2[x-3]) + abs(src[x+0]-src2[x-2]) + abs(src[x+1]-src2[x-1]) + abs(src[x+2]-src2[x+0]) + abs(src[x+3]-src2[x+1]) + abs(src[x+4]-src2[x+2]) + abs(src[x+5]-src2[x+3]); if( c0 < c1 && c1 <= c2 ) dst[x] = (src[x-1] + src2[x+1]) >> 1; else if( c2 < c1 && c1 <= c0 ) dst[x] = (src[x+1] + src2[x-1]) >> 1; else dst[x] = (src[x+0] + src2[x+0]) >> 1; } dst += 1*i_dst; src += 2*i_src; }}#ifdef CAN_COMPILE_MMXEXTstatic inline void XDeint8x8FieldMMXEXT( uint8_t *dst, int i_dst, uint8_t *src, int i_src ){ int y, x; /* Interlaced */ for( y = 0; y < 8; y += 2 ) { memcpy( dst, src, 8 ); dst += i_dst; for( x = 0; x < 8; x++ ) { uint8_t *src2 = &src[2*i_src]; int32_t c0, c1, c2; movq_m2r( src[x-2], mm0 ); movq_m2r( src[x-3], mm1 ); movq_m2r( src[x-4], mm2 ); psadbw_m2r( src2[x-4], mm0 ); psadbw_m2r( src2[x-3], mm1 ); psadbw_m2r( src2[x-2], mm2 ); movd_r2m( mm0, c2 ); movd_r2m( mm1, c1 ); movd_r2m( mm2, c0 ); if( c0 < c1 && c1 <= c2 ) dst[x] = (src[x-1] + src2[x+1]) >> 1; else if( c2 < c1 && c1 <= c0 ) dst[x] = (src[x+1] + src2[x-1]) >> 1; else dst[x] = (src[x+0] + src2[x+0]) >> 1; } dst += 1*i_dst; src += 2*i_src; }}#endif#if 0static inline int XDeint8x8SsdC( uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 ){ int y, x; int s = 0; for( y = 0; y < 8; y++ ) for( x = 0; x < 8; x++ ) s += ssd( pix1[y*i_pix1+x] - pix2[y*i_pix2+x] ); return s;}#ifdef CAN_COMPILE_MMXEXTstatic inline int XDeint8x8SsdMMXEXT( uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 ){ int y; int32_t s; pxor_r2r( mm7, mm7 ); pxor_r2r( mm6, mm6 ); for( y = 0; y < 8; y++ ) { movq_m2r( pix1[0], mm0 ); movq_m2r( pix2[0], mm1 ); movq_r2r( mm0, mm2 ); movq_r2r( mm1, mm3 ); punpcklbw_r2r( mm7, mm0 ); punpckhbw_r2r( mm7, mm2 ); punpcklbw_r2r( mm7, mm1 ); punpckhbw_r2r( mm7, mm3 ); psubw_r2r( mm1, mm0 ); psubw_r2r( mm3, mm2 ); pmaddwd_r2r( mm0, mm0 ); pmaddwd_r2r( mm2, mm2 ); paddd_r2r( mm2, mm0 ); paddd_r2r( mm0, mm6 ); pix1 += i_pix1; pix2 += i_pix2; } movq_r2r( mm6, mm7 ); psrlq_i2r( 32, mm7 ); paddd_r2r( mm6, mm7 ); movd_r2m( mm7, s ); return s;}#endif#endif#if 0/* A little try with motion, but doesn't work better that pure intra (and slow) */#ifdef CAN_COMPILE_MMXEXT/* XDeintMC: * Bilinear MC QPel * TODO: mmx version (easier in sse2) */static inline void XDeintMC( uint8_t *dst, int i_dst, uint8_t *src, int i_src, int mvx, int mvy, int i_width, int i_height ){ const int d4x = mvx&0x03; const int d4y = mvy&0x03; const int cA = (4-d4x)*(4-d4y); const int cB = d4x *(4-d4y); const int cC = (4-d4x)*d4y; const int cD = d4x *d4y; int y, x; uint8_t *srcp; src += (mvy >> 2) * i_src + (mvx >> 2); srcp = &src[i_src]; for( y = 0; y < i_height; y++ ) { for( x = 0; x < i_width; x++ ) { dst[x] = ( cA*src[x] + cB*src[x+1] + cC*srcp[x] + cD*srcp[x+1] + 8 ) >> 4; } dst += i_dst; src = srcp; srcp += i_src; }}static int XDeint8x4SadMMXEXT( uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 ){ int32_t s; movq_m2r( pix1[0*i_pix1], mm0 ); movq_m2r( pix1[1*i_pix1], mm1 ); psadbw_m2r( pix2[0*i_pix2], mm0 ); psadbw_m2r( pix2[1*i_pix2], mm1 ); movq_m2r( pix1[2*i_pix1], mm2 ); movq_m2r( pix1[3*i_pix1], mm3 ); psadbw_m2r( pix2[2*i_pix2], mm2 ); psadbw_m2r( pix2[3*i_pix2], mm3 ); paddd_r2r( mm1, mm0 ); paddd_r2r( mm3, mm2 ); paddd_r2r( mm2, mm0 ); movd_r2m( mm0, s ); return s;}static inline int XDeint8x4TestQpel( uint8_t *src, int i_src, uint8_t *ref, int i_stride, int mx, int my, int xmax, int ymax ){ uint8_t buffer[8*4]; if( abs(mx) >= 4*xmax || abs(my) >= 4*ymax ) return 255*255*255; XDeintMC( buffer, 8, ref, i_stride, mx, my, 8, 4 ); return XDeint8x4SadMMXEXT( src, i_src, buffer, 8 );}static inline int XDeint8x4TestInt( uint8_t *src, int i_src, uint8_t *ref, int i_stride, int mx, int my, int xmax, int ymax ){ if( abs(mx) >= xmax || abs(my) >= ymax ) return 255*255*255; return XDeint8x4SadMMXEXT( src, i_src, &ref[my*i_stride+mx], i_stride );}static inline void XDeint8x8FieldMotion( uint8_t *dst, int i_dst, uint8_t *src, int i_src, int *mpx, int *mpy, int xmax, int ymax ){ static const int dx[8] = { 0, 0, -1, 1, -1, -1, 1, 1 }; static const int dy[8] = {-1, 1, 0, 0, -1, 1, -1, 1 }; uint8_t *next = &src[i_src]; const int i_src2 = 2*i_src; int mvx, mvy; int mvs, s; int i_step; uint8_t *rec = &dst[i_dst]; /* We construct with intra method the missing field */ XDeint8x8FieldMMXEXT( dst, i_dst, src, i_src ); /* Now we will try to find a match with ME with the other field */ /* ME: A small/partial EPZS * We search only for small MV (with high motion intra will be perfect */ if( xmax > 4 ) xmax = 4; if( ymax > 4 ) ymax = 4; /* Init with NULL Mv */ mvx = mvy = 0; mvs = XDeint8x4SadMMXEXT( rec, i_src2, next, i_src2 ); /* Try predicted Mv */ if( (s=XDeint8x4TestInt( rec, i_src2, next, i_src2, *mpx, *mpy, xmax, ymax)) < mvs ) { mvs = s; mvx = *mpx; mvy = *mpy; } /* Search interger pel (small mv) */ for( i_step = 0; i_step < 4; i_step++ ) { int c = 4; int s; int i; for( i = 0; i < 4; i++ ) { s = XDeint8x4TestInt( rec, i_src2, next, i_src2, mvx+dx[i], mvy+dy[i], xmax, ymax ); if( s < mvs ) { mvs = s; c = i; } } if( c == 4 ) break; mvx += dx[c]; mvy += dy[c]; } *mpx = mvx; *mpy = mvy; mvx <<= 2; mvy <<= 2; if( mvs > 4 && mvs < 256 ) { /* Search Qpel */ /* XXX: for now only HPEL (too slow) */ for( i_step = 0; i_step < 4; i_step++ ) { int c = 8; int s; int i; for( i = 0; i < 8; i++ ) { s = XDeint8x4TestQpel( rec, i_src2, next, i_src2, mvx+dx[i], mvy+dy[i], xmax, ymax ); if( s < mvs ) { mvs = s; c = i; } } if( c == 8 ) break; mvx += dx[c]; mvy += dy[c]; } } if( mvs < 128 ) { uint8_t buffer[8*4]; XDeintMC( buffer, 8, next, i_src2, mvx, mvy, 8, 4 ); XDeint8x8MergeMMXEXT( dst, i_dst, src, 2*i_src, buffer, 8 ); //XDeint8x8Set( dst, i_dst, 0 ); }}#endif#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -