speedy.c

来自「linux下的MPEG1」· C语言 代码 · 共 2,101 行 · 第 1/5 页

C
2,101
字号
    pxor_r2r( mm5, mm5 );  // Temporal noise.    pxor_r2r( mm6, mm6 );  // Current spacial noise.    // First loop to measure first four columns    oldp = old; newp = new;    for( i = 4; i; --i ) {        movq_m2r( oldp[0], mm0 );        movq_m2r( oldp[os], mm1 );        pand_m2r( ymask, mm0 );        pand_m2r( ymask, mm1 );        oldp += (os*2);        movq_m2r( newp[0], mm2 );        movq_m2r( newp[ns], mm3 );        pand_m2r( ymask, mm2 );        pand_m2r( ymask, mm3 );        newp += (ns*2);        paddw_r2r( mm1, mm4 );        paddw_r2r( mm1, mm5 );        paddw_r2r( mm3, mm6 );        psubw_r2r( mm0, mm4 );        psubw_r2r( mm2, mm5 );        psubw_r2r( mm2, mm6 );    }    movq_r2m( mm4, outdata[0] );    movq_r2m( mm5, outdata[16] );    movq_r2m( mm6, outdata[32] );    pxor_r2r( mm4, mm4 );    pxor_r2r( mm5, mm5 );    pxor_r2r( mm6, mm6 );    // Second loop for the last four columns    oldp = old; newp = new;    for( i = 4; i; --i ) {        movq_m2r( oldp[8], mm0 );        movq_m2r( oldp[os+8], mm1 );        pand_m2r( ymask, mm0 );        pand_m2r( ymask, mm1 );        oldp += (os*2);        movq_m2r( newp[8], mm2 );        movq_m2r( newp[ns+8], mm3 );        pand_m2r( ymask, mm2 );        pand_m2r( ymask, mm3 );        newp += (ns*2);        paddw_r2r( mm1, mm4 );        paddw_r2r( mm1, mm5 );        paddw_r2r( mm3, mm6 );        psubw_r2r( mm0, mm4 );        psubw_r2r( mm2, mm5 );        psubw_r2r( mm2, mm6 );    }    movq_r2m( mm4, outdata[8] );    movq_r2m( mm5, outdata[24] );    movq_r2m( mm6, outdata[40] );    m->p = m->t = m->s = 0;    for (i=0; i<8; i++) {        // FIXME: move abs() into the mmx code!        m->p += ABS(out[i]);        m->t += ABS(out[8+i]);        m->s += ABS(out[16+i]);    }    emms();}#endifstatic void diff_packed422_block8x8_c( pulldown_metrics_t *m, uint8_t *old,                                       uint8_t *new, int os, int ns ){    int x, y, e=0, o=0, s=0, p=0, t=0;    uint8_t *oldp, *newp;    m->s = m->p = m->t = 0;    for (x = 8; x; x--) {        oldp = old; old += 2;        newp = new; new += 2;        s = p = t = 0;        for (y = 4; y; y--) {            e += ABS(newp[0] - oldp[0]);            o += ABS(newp[ns] - oldp[os]);            s += newp[ns]-newp[0];            p += oldp[os]-oldp[0];            t += oldp[os]-newp[0];            oldp += os<<1;            newp += ns<<1;        }        m->s += ABS(s);        m->p += ABS(p);        m->t += ABS(t);    }    m->e = e;    m->o = o;    m->d = e+o;}static void packed444_to_packed422_scanline_c( uint8_t *output, uint8_t *input, int width ){    width /= 2;    while( width-- ) {        output[ 0 ] = input[ 0 ];        output[ 1 ] = input[ 1 ];        output[ 2 ] = input[ 3 ];        output[ 3 ] = input[ 2 ];        output += 4;        input += 6;    }}static void packed422_to_packed444_scanline_c( uint8_t *output, uint8_t *input, int width ){    width /= 2;    while( width-- ) {        output[ 0 ] = input[ 0 ];        output[ 1 ] = input[ 1 ];        output[ 2 ] = input[ 3 ];        output[ 3 ] = input[ 2 ];        output[ 4 ] = input[ 1 ];        output[ 5 ] = input[ 3 ];        output += 6;        input += 4;    }}/** * For the middle pixels, the filter kernel is: * * [-1 3 -6 12 -24 80 80 -24 12 -6 3 -1] */static void packed422_to_packed444_rec601_scanline_c( uint8_t *dest, uint8_t *src, int width ){    int i;    /* Process two input pixels at a time.  Input is [Y'][Cb][Y'][Cr]. */    for( i = 0; i < width / 2; i++ ) {        dest[ (i*6) + 0 ] = src[ (i*4) + 0 ];        dest[ (i*6) + 1 ] = src[ (i*4) + 1 ];        dest[ (i*6) + 2 ] = src[ (i*4) + 3 ];        dest[ (i*6) + 3 ] = src[ (i*4) + 2 ];        if( i > (5*2) && i < ((width/2) - (6*2)) ) {            dest[ (i*6) + 4 ] = clip255( ((  (80*(src[ (i*4) + 1 ] + src[ (i*4) + 5 ]))                                           - (24*(src[ (i*4) - 3 ] + src[ (i*4) + 9 ]))                                           + (12*(src[ (i*4) - 7 ] + src[ (i*4) + 13]))                                           - ( 6*(src[ (i*4) - 11] + src[ (i*4) + 17]))                                           + ( 3*(src[ (i*4) - 15] + src[ (i*4) + 21]))                                           - (   (src[ (i*4) - 19] + src[ (i*4) + 25]))) + 64) >> 7 );            dest[ (i*6) + 5 ] = clip255( ((  (80*(src[ (i*4) + 3 ] + src[ (i*4) + 7 ]))                                           - (24*(src[ (i*4) - 1 ] + src[ (i*4) + 11]))                                           + (12*(src[ (i*4) - 5 ] + src[ (i*4) + 15]))                                           - ( 6*(src[ (i*4) - 9 ] + src[ (i*4) + 19]))                                           + ( 3*(src[ (i*4) - 13] + src[ (i*4) + 23]))                                           - (   (src[ (i*4) - 17] + src[ (i*4) + 27]))) + 64) >> 7 );        } else if( i < ((width/2) - 1) ) {            dest[ (i*6) + 4 ] = (src[ (i*4) + 1 ] + src[ (i*4) + 5 ] + 1) >> 1;            dest[ (i*6) + 5 ] = (src[ (i*4) + 3 ] + src[ (i*4) + 7 ] + 1) >> 1;        } else {            dest[ (i*6) + 4 ] = src[ (i*4) + 1 ];            dest[ (i*6) + 5 ] = src[ (i*4) + 3 ];        }    }}#if defined(ARCH_X86) || defined(ARCH_X86_64)static void vfilter_chroma_121_packed422_scanline_mmx( uint8_t *output, int width,                                                       uint8_t *m, uint8_t *t, uint8_t *b ){    int i;    const mmx_t ymask = { 0x00ff00ff00ff00ffULL };    const mmx_t cmask = { 0xff00ff00ff00ff00ULL };    // Get width in bytes.    width *= 2;    i = width / 8;    width -= i * 8;    movq_m2r( ymask, mm7 );    movq_m2r( cmask, mm6 );    while( i-- ) {        movq_m2r( *t, mm0 );        movq_m2r( *b, mm1 );        movq_m2r( *m, mm2 );        movq_r2r ( mm2, mm3 );        pand_r2r ( mm7, mm3 );        pand_r2r ( mm6, mm0 );        pand_r2r ( mm6, mm1 );        pand_r2r ( mm6, mm2 );        psrlq_i2r( 8, mm0 );        psrlq_i2r( 8, mm1 );        psrlq_i2r( 7, mm2 );        paddw_r2r( mm0, mm2 );        paddw_r2r( mm1, mm2 );        psllw_i2r( 6, mm2 );        pand_r2r( mm6, mm2 );         por_r2r ( mm3, mm2 );        movq_r2m( mm2, *output );        output += 8;        t += 8;        b += 8;        m += 8;    }    output++; t++; b++; m++;    while( width-- ) {        *output = (*t + *b + (*m << 1)) >> 2;        output+=2; t+=2; b+=2; m+=2;    }        emms();}#endifstatic void vfilter_chroma_121_packed422_scanline_c( uint8_t *output, int width,                                                     uint8_t *m, uint8_t *t, uint8_t *b ){    output++; t++; b++; m++;    while( width-- ) {        *output = (*t + *b + (*m << 1)) >> 2;        output +=2; t+=2; b+=2; m+=2;    }}#if defined(ARCH_X86) || defined(ARCH_X86_64)static void vfilter_chroma_332_packed422_scanline_mmx( uint8_t *output, int width,                                                       uint8_t *m, uint8_t *t, uint8_t *b ){    int i;    const mmx_t ymask = { 0x00ff00ff00ff00ffULL };    const mmx_t cmask = { 0xff00ff00ff00ff00ULL };    // Get width in bytes.    width *= 2;     i = width / 8;    width -= i * 8;    movq_m2r( ymask, mm7 );    movq_m2r( cmask, mm6 );    while( i-- ) {        movq_m2r( *t, mm0 );        movq_m2r( *b, mm1 );        movq_m2r( *m, mm2 );        movq_r2r ( mm2, mm3 );        pand_r2r ( mm7, mm3 );        pand_r2r ( mm6, mm0 );        pand_r2r ( mm6, mm1 );        pand_r2r ( mm6, mm2 );        psrlq_i2r( 8, mm0 );        psrlq_i2r( 7, mm1 );        psrlq_i2r( 8, mm2 );        movq_r2r ( mm0, mm4 );        psllw_i2r( 1, mm4 );        paddw_r2r( mm4, mm0 );        movq_r2r ( mm2, mm4 );        psllw_i2r( 1, mm4 );        paddw_r2r( mm4, mm2 );        paddw_r2r( mm0, mm2 );        paddw_r2r( mm1, mm2 );        psllw_i2r( 5, mm2 );        pand_r2r( mm6, mm2 );        por_r2r ( mm3, mm2 );        movq_r2m( mm2, *output );        output += 8;        t += 8;        b += 8;        m += 8;    }    output++; t++; b++; m++;    while( width-- ) {        *output = (3 * *t + 3 * *m + 2 * *b) >> 3;        output +=2; t+=2; b+=2; m+=2;    }    emms();}#endifstatic void vfilter_chroma_332_packed422_scanline_c( uint8_t *output, int width,                                                     uint8_t *m, uint8_t *t, uint8_t *b ){    output++; t++; b++; m++;    while( width-- ) {        *output = (3 * *t + 3 * *m + 2 * *b) >> 3;        output +=2; t+=2; b+=2; m+=2;    }}#if defined(ARCH_X86) || defined(ARCH_X86_64)static void kill_chroma_packed422_inplace_scanline_mmx( uint8_t *data, int width ){    const mmx_t ymask = { 0x00ff00ff00ff00ffULL };    const mmx_t nullchroma = { 0x8000800080008000ULL };    movq_m2r( ymask, mm7 );    movq_m2r( nullchroma, mm6 );    for(; width > 4; width -= 4 ) {        movq_m2r( *data, mm0 );        pand_r2r( mm7, mm0 );        paddb_r2r( mm6, mm0 );        movq_r2m( mm0, *data );        data += 8;    }    emms();    while( width-- ) {        data[ 1 ] = 128;        data += 2;    }}#endifstatic void kill_chroma_packed422_inplace_scanline_c( uint8_t *data, int width ){    while( width-- ) {        data[ 1 ] = 128;        data += 2;    }}#if defined(ARCH_X86) || defined(ARCH_X86_64)static void invert_colour_packed422_inplace_scanline_mmx( uint8_t *data, int width ){    const mmx_t allones = { 0xffffffffffffffffULL };    movq_m2r( allones, mm1 );    for(; width > 4; width -= 4 ) {        movq_r2r( mm1, mm2 );        movq_m2r( *data, mm0 );        psubb_r2r( mm0, mm2 );        movq_r2m( mm2, *data );        data += 8;    }    emms();    width *= 2;    while( width-- ) {        *data = 255 - *data;        data++;    }}#endifstatic void invert_colour_packed422_inplace_scanline_c( uint8_t *data, int width ){    width *= 2;    while( width-- ) {        *data = 255 - *data;        data++;    }}/*// this duplicates alternate lines in alternate frames to highlight or mute// the effects of chroma crawl. it is not a solution or proper filter. it's// only for testing.void testing_packed422_inplace_scanline_c( uint8_t *data, int width, int scanline ){    volatile static int topbottom = 0;    static uint8_t scanbuffer[2048];    if( scanline <= 1 ) {        topbottom = scanline;        memcpy(scanbuffer, data, width*2);    }    if ( scanline < 10 ) {        printf("scanline: %d %d\n", scanline, topbottom);    }    if ( ((scanline-topbottom)/2)%2 && scanline > 1 ) {        memcpy(data, scanbuffer, width*2);    } else {        memcpy(scanbuffer, data, width*2);    }}*/static void mirror_packed422_inplace_scanline_c( uint8_t *data, int width ){    int x, tmp1, tmp2;    int width2 = width*2;    for( x = 0; x < width; x += 2 ) {        tmp1 = data[ x   ];        tmp2 = data[ x+1 ];        data[ x   ] = data[ width2 - x     ];        data[ x+1 ] = data[ width2 - x + 1 ];        data[ width2 - x     ] = tmp1;        data[ width2 - x + 1 ] = tmp2;    }}static void halfmirror_packed422_inplace_scanline_c( uint8_t *data, int width ){    int x;    for( x = 0; x < width; x += 2 ) {        data[ width + x     ] = data[ width - x     ];        data[ width + x + 1 ] = data[ width - x + 1 ];    }}static void filter_luma_121_packed422_inplace_scanline_c( uint8_t *data, int width )

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?