speedy.c
来自「linux下的MPEG1」· C语言 代码 · 共 2,101 行 · 第 1/5 页
C
2,101 行
pxor_r2r( mm5, mm5 ); // Temporal noise. pxor_r2r( mm6, mm6 ); // Current spacial noise. // First loop to measure first four columns oldp = old; newp = new; for( i = 4; i; --i ) { movq_m2r( oldp[0], mm0 ); movq_m2r( oldp[os], mm1 ); pand_m2r( ymask, mm0 ); pand_m2r( ymask, mm1 ); oldp += (os*2); movq_m2r( newp[0], mm2 ); movq_m2r( newp[ns], mm3 ); pand_m2r( ymask, mm2 ); pand_m2r( ymask, mm3 ); newp += (ns*2); paddw_r2r( mm1, mm4 ); paddw_r2r( mm1, mm5 ); paddw_r2r( mm3, mm6 ); psubw_r2r( mm0, mm4 ); psubw_r2r( mm2, mm5 ); psubw_r2r( mm2, mm6 ); } movq_r2m( mm4, outdata[0] ); movq_r2m( mm5, outdata[16] ); movq_r2m( mm6, outdata[32] ); pxor_r2r( mm4, mm4 ); pxor_r2r( mm5, mm5 ); pxor_r2r( mm6, mm6 ); // Second loop for the last four columns oldp = old; newp = new; for( i = 4; i; --i ) { movq_m2r( oldp[8], mm0 ); movq_m2r( oldp[os+8], mm1 ); pand_m2r( ymask, mm0 ); pand_m2r( ymask, mm1 ); oldp += (os*2); movq_m2r( newp[8], mm2 ); movq_m2r( newp[ns+8], mm3 ); pand_m2r( ymask, mm2 ); pand_m2r( ymask, mm3 ); newp += (ns*2); paddw_r2r( mm1, mm4 ); paddw_r2r( mm1, mm5 ); paddw_r2r( mm3, mm6 ); psubw_r2r( mm0, mm4 ); psubw_r2r( mm2, mm5 ); psubw_r2r( mm2, mm6 ); } movq_r2m( mm4, outdata[8] ); movq_r2m( mm5, outdata[24] ); movq_r2m( mm6, outdata[40] ); m->p = m->t = m->s = 0; for (i=0; i<8; i++) { // FIXME: move abs() into the mmx code! m->p += ABS(out[i]); m->t += ABS(out[8+i]); m->s += ABS(out[16+i]); } emms();}#endifstatic void diff_packed422_block8x8_c( pulldown_metrics_t *m, uint8_t *old, uint8_t *new, int os, int ns ){ int x, y, e=0, o=0, s=0, p=0, t=0; uint8_t *oldp, *newp; m->s = m->p = m->t = 0; for (x = 8; x; x--) { oldp = old; old += 2; newp = new; new += 2; s = p = t = 0; for (y = 4; y; y--) { e += ABS(newp[0] - oldp[0]); o += ABS(newp[ns] - oldp[os]); s += newp[ns]-newp[0]; p += oldp[os]-oldp[0]; t += oldp[os]-newp[0]; oldp += os<<1; newp += ns<<1; } m->s += ABS(s); m->p += ABS(p); m->t += ABS(t); } m->e = e; m->o = o; m->d = e+o;}static void packed444_to_packed422_scanline_c( uint8_t *output, uint8_t *input, int width ){ width /= 2; while( width-- ) { output[ 0 ] = input[ 0 ]; output[ 1 ] = input[ 1 ]; output[ 2 ] = input[ 3 ]; output[ 3 ] = input[ 2 ]; output += 4; input += 6; }}static void packed422_to_packed444_scanline_c( uint8_t *output, uint8_t *input, int width ){ width /= 2; while( width-- ) { output[ 0 ] = input[ 0 ]; output[ 1 ] = input[ 1 ]; output[ 2 ] = input[ 3 ]; output[ 3 ] = input[ 2 ]; output[ 4 ] = input[ 1 ]; output[ 5 ] = input[ 3 ]; output += 6; input += 4; }}/** * For the middle pixels, the filter kernel is: * * [-1 3 -6 12 -24 80 80 -24 12 -6 3 -1] */static void packed422_to_packed444_rec601_scanline_c( uint8_t *dest, uint8_t *src, int width ){ int i; /* Process two input pixels at a time. Input is [Y'][Cb][Y'][Cr]. */ for( i = 0; i < width / 2; i++ ) { dest[ (i*6) + 0 ] = src[ (i*4) + 0 ]; dest[ (i*6) + 1 ] = src[ (i*4) + 1 ]; dest[ (i*6) + 2 ] = src[ (i*4) + 3 ]; dest[ (i*6) + 3 ] = src[ (i*4) + 2 ]; if( i > (5*2) && i < ((width/2) - (6*2)) ) { dest[ (i*6) + 4 ] = clip255( (( (80*(src[ (i*4) + 1 ] + src[ (i*4) + 5 ])) - (24*(src[ (i*4) - 3 ] + src[ (i*4) + 9 ])) + (12*(src[ (i*4) - 7 ] + src[ (i*4) + 13])) - ( 6*(src[ (i*4) - 11] + src[ (i*4) + 17])) + ( 3*(src[ (i*4) - 15] + src[ (i*4) + 21])) - ( (src[ (i*4) - 19] + src[ (i*4) + 25]))) + 64) >> 7 ); dest[ (i*6) + 5 ] = clip255( (( (80*(src[ (i*4) + 3 ] + src[ (i*4) + 7 ])) - (24*(src[ (i*4) - 1 ] + src[ (i*4) + 11])) + (12*(src[ (i*4) - 5 ] + src[ (i*4) + 15])) - ( 6*(src[ (i*4) - 9 ] + src[ (i*4) + 19])) + ( 3*(src[ (i*4) - 13] + src[ (i*4) + 23])) - ( (src[ (i*4) - 17] + src[ (i*4) + 27]))) + 64) >> 7 ); } else if( i < ((width/2) - 1) ) { dest[ (i*6) + 4 ] = (src[ (i*4) + 1 ] + src[ (i*4) + 5 ] + 1) >> 1; dest[ (i*6) + 5 ] = (src[ (i*4) + 3 ] + src[ (i*4) + 7 ] + 1) >> 1; } else { dest[ (i*6) + 4 ] = src[ (i*4) + 1 ]; dest[ (i*6) + 5 ] = src[ (i*4) + 3 ]; } }}#if defined(ARCH_X86) || defined(ARCH_X86_64)static void vfilter_chroma_121_packed422_scanline_mmx( uint8_t *output, int width, uint8_t *m, uint8_t *t, uint8_t *b ){ int i; const mmx_t ymask = { 0x00ff00ff00ff00ffULL }; const mmx_t cmask = { 0xff00ff00ff00ff00ULL }; // Get width in bytes. width *= 2; i = width / 8; width -= i * 8; movq_m2r( ymask, mm7 ); movq_m2r( cmask, mm6 ); while( i-- ) { movq_m2r( *t, mm0 ); movq_m2r( *b, mm1 ); movq_m2r( *m, mm2 ); movq_r2r ( mm2, mm3 ); pand_r2r ( mm7, mm3 ); pand_r2r ( mm6, mm0 ); pand_r2r ( mm6, mm1 ); pand_r2r ( mm6, mm2 ); psrlq_i2r( 8, mm0 ); psrlq_i2r( 8, mm1 ); psrlq_i2r( 7, mm2 ); paddw_r2r( mm0, mm2 ); paddw_r2r( mm1, mm2 ); psllw_i2r( 6, mm2 ); pand_r2r( mm6, mm2 ); por_r2r ( mm3, mm2 ); movq_r2m( mm2, *output ); output += 8; t += 8; b += 8; m += 8; } output++; t++; b++; m++; while( width-- ) { *output = (*t + *b + (*m << 1)) >> 2; output+=2; t+=2; b+=2; m+=2; } emms();}#endifstatic void vfilter_chroma_121_packed422_scanline_c( uint8_t *output, int width, uint8_t *m, uint8_t *t, uint8_t *b ){ output++; t++; b++; m++; while( width-- ) { *output = (*t + *b + (*m << 1)) >> 2; output +=2; t+=2; b+=2; m+=2; }}#if defined(ARCH_X86) || defined(ARCH_X86_64)static void vfilter_chroma_332_packed422_scanline_mmx( uint8_t *output, int width, uint8_t *m, uint8_t *t, uint8_t *b ){ int i; const mmx_t ymask = { 0x00ff00ff00ff00ffULL }; const mmx_t cmask = { 0xff00ff00ff00ff00ULL }; // Get width in bytes. width *= 2; i = width / 8; width -= i * 8; movq_m2r( ymask, mm7 ); movq_m2r( cmask, mm6 ); while( i-- ) { movq_m2r( *t, mm0 ); movq_m2r( *b, mm1 ); movq_m2r( *m, mm2 ); movq_r2r ( mm2, mm3 ); pand_r2r ( mm7, mm3 ); pand_r2r ( mm6, mm0 ); pand_r2r ( mm6, mm1 ); pand_r2r ( mm6, mm2 ); psrlq_i2r( 8, mm0 ); psrlq_i2r( 7, mm1 ); psrlq_i2r( 8, mm2 ); movq_r2r ( mm0, mm4 ); psllw_i2r( 1, mm4 ); paddw_r2r( mm4, mm0 ); movq_r2r ( mm2, mm4 ); psllw_i2r( 1, mm4 ); paddw_r2r( mm4, mm2 ); paddw_r2r( mm0, mm2 ); paddw_r2r( mm1, mm2 ); psllw_i2r( 5, mm2 ); pand_r2r( mm6, mm2 ); por_r2r ( mm3, mm2 ); movq_r2m( mm2, *output ); output += 8; t += 8; b += 8; m += 8; } output++; t++; b++; m++; while( width-- ) { *output = (3 * *t + 3 * *m + 2 * *b) >> 3; output +=2; t+=2; b+=2; m+=2; } emms();}#endifstatic void vfilter_chroma_332_packed422_scanline_c( uint8_t *output, int width, uint8_t *m, uint8_t *t, uint8_t *b ){ output++; t++; b++; m++; while( width-- ) { *output = (3 * *t + 3 * *m + 2 * *b) >> 3; output +=2; t+=2; b+=2; m+=2; }}#if defined(ARCH_X86) || defined(ARCH_X86_64)static void kill_chroma_packed422_inplace_scanline_mmx( uint8_t *data, int width ){ const mmx_t ymask = { 0x00ff00ff00ff00ffULL }; const mmx_t nullchroma = { 0x8000800080008000ULL }; movq_m2r( ymask, mm7 ); movq_m2r( nullchroma, mm6 ); for(; width > 4; width -= 4 ) { movq_m2r( *data, mm0 ); pand_r2r( mm7, mm0 ); paddb_r2r( mm6, mm0 ); movq_r2m( mm0, *data ); data += 8; } emms(); while( width-- ) { data[ 1 ] = 128; data += 2; }}#endifstatic void kill_chroma_packed422_inplace_scanline_c( uint8_t *data, int width ){ while( width-- ) { data[ 1 ] = 128; data += 2; }}#if defined(ARCH_X86) || defined(ARCH_X86_64)static void invert_colour_packed422_inplace_scanline_mmx( uint8_t *data, int width ){ const mmx_t allones = { 0xffffffffffffffffULL }; movq_m2r( allones, mm1 ); for(; width > 4; width -= 4 ) { movq_r2r( mm1, mm2 ); movq_m2r( *data, mm0 ); psubb_r2r( mm0, mm2 ); movq_r2m( mm2, *data ); data += 8; } emms(); width *= 2; while( width-- ) { *data = 255 - *data; data++; }}#endifstatic void invert_colour_packed422_inplace_scanline_c( uint8_t *data, int width ){ width *= 2; while( width-- ) { *data = 255 - *data; data++; }}/*// this duplicates alternate lines in alternate frames to highlight or mute// the effects of chroma crawl. it is not a solution or proper filter. it's// only for testing.void testing_packed422_inplace_scanline_c( uint8_t *data, int width, int scanline ){ volatile static int topbottom = 0; static uint8_t scanbuffer[2048]; if( scanline <= 1 ) { topbottom = scanline; memcpy(scanbuffer, data, width*2); } if ( scanline < 10 ) { printf("scanline: %d %d\n", scanline, topbottom); } if ( ((scanline-topbottom)/2)%2 && scanline > 1 ) { memcpy(data, scanbuffer, width*2); } else { memcpy(scanbuffer, data, width*2); }}*/static void mirror_packed422_inplace_scanline_c( uint8_t *data, int width ){ int x, tmp1, tmp2; int width2 = width*2; for( x = 0; x < width; x += 2 ) { tmp1 = data[ x ]; tmp2 = data[ x+1 ]; data[ x ] = data[ width2 - x ]; data[ x+1 ] = data[ width2 - x + 1 ]; data[ width2 - x ] = tmp1; data[ width2 - x + 1 ] = tmp2; }}static void halfmirror_packed422_inplace_scanline_c( uint8_t *data, int width ){ int x; for( x = 0; x < width; x += 2 ) { data[ width + x ] = data[ width - x ]; data[ width + x + 1 ] = data[ width - x + 1 ]; }}static void filter_luma_121_packed422_inplace_scanline_c( uint8_t *data, int width )
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?