📄 mc.c

📁 从服务器上下的x264编码器源码……希望对大家有帮助……这个是09年4月1日的
💻 C
📖 第 1 页 / 共 2 页
字号:
上一页 12
        dstv_8B  = vec_u16_to_u8( dstv_16B );        vec_ste( vec_splat( (vec_u32_t) dstv_8A, 0 ), 0, (uint32_t*) dst );        dst += i_dst_stride;        vec_ste( vec_splat( (vec_u32_t) dstv_8B, 0 ), 0, (uint32_t*) dst );        dst += i_dst_stride;    }}#define DO_PROCESS_W8( a )  \    src##a##v_16A = vec_u8_to_u16( src##a##v_8A );  \    src##a##v_16B = vec_u8_to_u16( src##a##v_8B );  \    dstv_16A = vec_mladd( src##a##v_16A, coeff##a##v, dstv_16A );   \    dstv_16B = vec_mladd( src##a##v_16B, coeff##a##v, dstv_16B )static void mc_chroma_altivec_8xh( uint8_t *dst, int i_dst_stride,                                   uint8_t *src, int i_src_stride,                                   int mvx, int mvy,                                   int i_height ){    uint8_t *srcp;    int y;    int d8x = mvx & 0x07;    int d8y = mvy & 0x07;    DECLARE_ALIGNED_16( uint16_t coeff[4] );    coeff[0] = (8-d8x)*(8-d8y);    coeff[1] = d8x    *(8-d8y);    coeff[2] = (8-d8x)*d8y;    coeff[3] = d8x    *d8y;    src  += (mvy >> 3) * i_src_stride + (mvx >> 3);    srcp  = &src[i_src_stride];    LOAD_ZERO;    PREP_LOAD;    PREP_LOAD_SRC( src );    PREP_STORE8;    vec_u16_t   coeff0v, coeff1v, coeff2v, coeff3v;    vec_u8_t    src0v_8A, src1v_8A, src2v_8A, src3v_8A, dstv_8A;    vec_u8_t    src0v_8B, src1v_8B, src2v_8B, src3v_8B, dstv_8B;    vec_u16_t   src0v_16A, src1v_16A, src2v_16A, src3v_16A, dstv_16A;    vec_u16_t   src0v_16B, src1v_16B, src2v_16B, src3v_16B, dstv_16B;    vec_u16_t   shiftv, k32v;    coeff0v = vec_ld( 0, coeff );    coeff3v = vec_splat( coeff0v, 3 );    coeff2v = vec_splat( coeff0v, 2 );    coeff1v = vec_splat( coeff0v, 1 );    coeff0v = vec_splat( coeff0v, 0 );    k32v    = vec_sl( vec_splat_u16( 1 ), vec_splat_u16( 5 ) );    shiftv  = vec_splat_u16( 6 );    VEC_LOAD( src, src2v_8B, 9, vec_u8_t, src );    src3v_8B = vec_sld( src2v_8B, src2v_8B, 1 );    for( y = 0; y < i_height; y+=2 )    {        src0v_8A = src2v_8B;        src1v_8A = src3v_8B;        VEC_LOAD_G( srcp, src2v_8A, 9, vec_u8_t );        srcp += i_src_stride;        VEC_LOAD_G( srcp, src2v_8B, 9, vec_u8_t );        srcp += i_src_stride;        src3v_8A = vec_sld( src2v_8A, src2v_8A, 1 );        src3v_8B = vec_sld( src2v_8B, src2v_8B, 1 );        src0v_8B = src2v_8A;        src1v_8B = src3v_8A;        dstv_16A = dstv_16B = k32v;        DO_PROCESS_W8( 0 );        DO_PROCESS_W8( 1 );        DO_PROCESS_W8( 2 );        DO_PROCESS_W8( 3 );        dstv_16A = vec_sr( dstv_16A, shiftv );        dstv_16B = vec_sr( dstv_16B, shiftv );        dstv_8A  = vec_u16_to_u8( dstv_16A );        dstv_8B  = vec_u16_to_u8( dstv_16B );        VEC_STORE8( dstv_8A, dst );        dst += i_dst_stride;        VEC_STORE8( dstv_8B, dst );        dst += i_dst_stride;    }}static void mc_chroma_altivec( uint8_t *dst, int i_dst_stride,                               uint8_t *src, int i_src_stride,                               int mvx, int mvy,                               int i_width, int i_height ){    if( i_width == 8 )    {        mc_chroma_altivec_8xh( dst, i_dst_stride, src, i_src_stride,                               mvx, mvy, i_height );    }    else if( i_width == 4 )    {        mc_chroma_altivec_4xh( dst, i_dst_stride, src, i_src_stride,                               mvx, mvy, i_height );    }    else    {        mc_chroma_2xh( dst, i_dst_stride, src, i_src_stride,                       mvx, mvy, i_height );    }}#define HPEL_FILTER_1( t1v, t2v, t3v, t4v, t5v, t6v ) \{                                                     \    t1v = vec_add( t1v, t6v );                        \    t2v = vec_add( t2v, t5v );                        \    t3v = vec_add( t3v, t4v );                        \                                                      \    t1v = vec_sub( t1v, t2v );   /* (a-b) */          \    t2v = vec_sub( t2v, t3v );   /* (b-c) */          \    t2v = vec_sl(  t2v, twov );  /* (b-c)*4 */        \    t1v = vec_sub( t1v, t2v );   /* a-5*b+4*c */      \    t3v = vec_sl(  t3v, fourv ); /* 16*c */           \    t1v = vec_add( t1v, t3v );   /* a-5*b+20*c */     \}#define HPEL_FILTER_2( t1v, t2v, t3v, t4v, t5v, t6v ) \{                                                     \    t1v = vec_add( t1v, t6v );                        \    t2v = vec_add( t2v, t5v );                        \    t3v = vec_add( t3v, t4v );                        \                                                      \    t1v = vec_sub( t1v, t2v );  /* (a-b) */           \    t1v = vec_sra( t1v, twov ); /* (a-b)/4 */         \    t1v = vec_sub( t1v, t2v );  /* (a-b)/4-b */       \    t1v = vec_add( t1v, t3v );  /* (a-b)/4-b+c */     \    t1v = vec_sra( t1v, twov ); /* ((a-b)/4-b+c)/4 */ \    t1v = vec_add( t1v, t3v );  /* ((a-b)/4-b+c)/4+c = (a-5*b+20*c)/16 */ \}#define HPEL_FILTER_HORIZONTAL()                             \{                                                            \    VEC_LOAD_G( &src[x- 2+i_stride*y], src1v, 16, vec_u8_t); \    VEC_LOAD_G( &src[x+14+i_stride*y], src6v, 16, vec_u8_t); \                                                             \    src2v = vec_sld( src1v, src6v,  1 );                     \    src3v = vec_sld( src1v, src6v,  2 );                     \    src4v = vec_sld( src1v, src6v,  3 );                     \    src5v = vec_sld( src1v, src6v,  4 );                     \    src6v = vec_sld( src1v, src6v,  5 );                     \                                                             \    temp1v = vec_u8_to_s16_h( src1v );                       \    temp2v = vec_u8_to_s16_h( src2v );                       \    temp3v = vec_u8_to_s16_h( src3v );                       \    temp4v = vec_u8_to_s16_h( src4v );                       \    temp5v = vec_u8_to_s16_h( src5v );                       \    temp6v = vec_u8_to_s16_h( src6v );                       \                                                             \    HPEL_FILTER_1( temp1v, temp2v, temp3v,                   \                   temp4v, temp5v, temp6v );                 \                                                             \    dest1v = vec_add( temp1v, sixteenv );                    \    dest1v = vec_sra( dest1v, fivev );                       \                                                             \    temp1v = vec_u8_to_s16_l( src1v );                       \    temp2v = vec_u8_to_s16_l( src2v );                       \    temp3v = vec_u8_to_s16_l( src3v );                       \    temp4v = vec_u8_to_s16_l( src4v );                       \    temp5v = vec_u8_to_s16_l( src5v );                       \    temp6v = vec_u8_to_s16_l( src6v );                       \                                                             \    HPEL_FILTER_1( temp1v, temp2v, temp3v,                   \                   temp4v, temp5v, temp6v );                 \                                                             \    dest2v = vec_add( temp1v, sixteenv );                    \    dest2v = vec_sra( dest2v, fivev );                       \                                                             \    destv = vec_packsu( dest1v, dest2v );                    \                                                             \    VEC_STORE16( destv, &dsth[x+i_stride*y], dsth );         \}#define HPEL_FILTER_VERTICAL()                                    \{                                                                 \    VEC_LOAD( &src[x+i_stride*(y-2)], src1v, 16, vec_u8_t, src ); \    VEC_LOAD( &src[x+i_stride*(y-1)], src2v, 16, vec_u8_t, src ); \    VEC_LOAD( &src[x+i_stride*(y-0)], src3v, 16, vec_u8_t, src ); \    VEC_LOAD( &src[x+i_stride*(y+1)], src4v, 16, vec_u8_t, src ); \    VEC_LOAD( &src[x+i_stride*(y+2)], src5v, 16, vec_u8_t, src ); \    VEC_LOAD( &src[x+i_stride*(y+3)], src6v, 16, vec_u8_t, src ); \                                                                  \    temp1v = vec_u8_to_s16_h( src1v );                            \    temp2v = vec_u8_to_s16_h( src2v );                            \    temp3v = vec_u8_to_s16_h( src3v );                            \    temp4v = vec_u8_to_s16_h( src4v );                            \    temp5v = vec_u8_to_s16_h( src5v );                            \    temp6v = vec_u8_to_s16_h( src6v );                            \                                                                  \    HPEL_FILTER_1( temp1v, temp2v, temp3v,                        \                   temp4v, temp5v, temp6v );                      \                                                                  \    dest1v = vec_add( temp1v, sixteenv );                         \    dest1v = vec_sra( dest1v, fivev );                            \                                                                  \    temp4v = vec_u8_to_s16_l( src1v );                            \    temp5v = vec_u8_to_s16_l( src2v );                            \    temp6v = vec_u8_to_s16_l( src3v );                            \    temp7v = vec_u8_to_s16_l( src4v );                            \    temp8v = vec_u8_to_s16_l( src5v );                            \    temp9v = vec_u8_to_s16_l( src6v );                            \                                                                  \    HPEL_FILTER_1( temp4v, temp5v, temp6v,                        \                   temp7v, temp8v, temp9v );                      \                                                                  \    dest2v = vec_add( temp4v, sixteenv );                         \    dest2v = vec_sra( dest2v, fivev );                            \                                                                  \    destv = vec_packsu( dest1v, dest2v );                         \                                                                  \    VEC_STORE16( destv, &dstv[x+i_stride*y], dsth );              \}#define HPEL_FILTER_CENTRAL()                           \{                                                       \    temp1v = vec_sld( tempav, tempbv, 12 );             \    temp2v = vec_sld( tempav, tempbv, 14 );             \    temp3v = tempbv;                                    \    temp4v = vec_sld( tempbv, tempcv,  2 );             \    temp5v = vec_sld( tempbv, tempcv,  4 );             \    temp6v = vec_sld( tempbv, tempcv,  6 );             \                                                        \    HPEL_FILTER_2( temp1v, temp2v, temp3v,              \                   temp4v, temp5v, temp6v );            \                                                        \    dest1v = vec_add( temp1v, thirtytwov );             \    dest1v = vec_sra( dest1v, sixv );                   \                                                        \    temp1v = vec_sld( tempbv, tempcv, 12 );             \    temp2v = vec_sld( tempbv, tempcv, 14 );             \    temp3v = tempcv;                                    \    temp4v = vec_sld( tempcv, tempdv,  2 );             \    temp5v = vec_sld( tempcv, tempdv,  4 );             \    temp6v = vec_sld( tempcv, tempdv,  6 );             \                                                        \    HPEL_FILTER_2( temp1v, temp2v, temp3v,              \                   temp4v, temp5v, temp6v );            \                                                        \    dest2v = vec_add( temp1v, thirtytwov );             \    dest2v = vec_sra( dest2v, sixv );                   \                                                        \    destv = vec_packsu( dest1v, dest2v );               \                                                        \    VEC_STORE16( destv, &dstc[x-16+i_stride*y], dsth ); \}void x264_hpel_filter_altivec( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_t *src,                               int i_stride, int i_width, int i_height, int16_t *buf ){    int x, y;    vec_u8_t destv;    vec_u8_t src1v, src2v, src3v, src4v, src5v, src6v;    vec_s16_t dest1v, dest2v;    vec_s16_t temp1v, temp2v, temp3v, temp4v, temp5v, temp6v, temp7v, temp8v, temp9v;    vec_s16_t tempav, tempbv, tempcv, tempdv, tempev;    PREP_LOAD;    PREP_LOAD_SRC( src);    PREP_STORE16;    PREP_STORE16_DST( dsth );    LOAD_ZERO;    vec_u16_t twov, fourv, fivev, sixv;    vec_s16_t sixteenv, thirtytwov;    vec_u16_u temp_u;    temp_u.s[0]=2;    twov = vec_splat( temp_u.v, 0 );    temp_u.s[0]=4;    fourv = vec_splat( temp_u.v, 0 );    temp_u.s[0]=5;    fivev = vec_splat( temp_u.v, 0 );    temp_u.s[0]=6;    sixv = vec_splat( temp_u.v, 0 );    temp_u.s[0]=16;    sixteenv = (vec_s16_t)vec_splat( temp_u.v, 0 );    temp_u.s[0]=32;    thirtytwov = (vec_s16_t)vec_splat( temp_u.v, 0 );    for( y = 0; y < i_height; y++ )    {        x = 0;        /* horizontal_filter */        HPEL_FILTER_HORIZONTAL();        /* vertical_filter */        HPEL_FILTER_VERTICAL();        /* central_filter */        tempav = tempcv;        tempbv = tempdv;        tempcv = vec_splat( temp1v, 0 ); /* first only */        tempdv = temp1v;        tempev = temp4v;        for( x = 16; x < i_width; x+=16 )        {            /* horizontal_filter */            HPEL_FILTER_HORIZONTAL();            /* vertical_filter */            HPEL_FILTER_VERTICAL();            /* central_filter */            tempav = tempcv;            tempbv = tempdv;            tempcv = tempev;            tempdv = temp1v;            tempev = temp4v;            HPEL_FILTER_CENTRAL();        }        /* Partial vertical filter */        VEC_LOAD_PARTIAL( &src[x+i_stride*(y-2)], src1v, 16, vec_u8_t, src );        VEC_LOAD_PARTIAL( &src[x+i_stride*(y-1)], src2v, 16, vec_u8_t, src );        VEC_LOAD_PARTIAL( &src[x+i_stride*(y-0)], src3v, 16, vec_u8_t, src );        VEC_LOAD_PARTIAL( &src[x+i_stride*(y+1)], src4v, 16, vec_u8_t, src );        VEC_LOAD_PARTIAL( &src[x+i_stride*(y+2)], src5v, 16, vec_u8_t, src );        VEC_LOAD_PARTIAL( &src[x+i_stride*(y+3)], src6v, 16, vec_u8_t, src );        temp1v = vec_u8_to_s16_h( src1v );        temp2v = vec_u8_to_s16_h( src2v );        temp3v = vec_u8_to_s16_h( src3v );        temp4v = vec_u8_to_s16_h( src4v );        temp5v = vec_u8_to_s16_h( src5v );        temp6v = vec_u8_to_s16_h( src6v );        HPEL_FILTER_1( temp1v, temp2v, temp3v,                       temp4v, temp5v, temp6v );        /* central_filter */        tempav = tempcv;        tempbv = tempdv;        tempcv = tempev;        tempdv = temp1v;        /* tempev is not used */        HPEL_FILTER_CENTRAL();    }}void x264_mc_altivec_init( x264_mc_functions_t *pf ){    pf->mc_luma   = mc_luma_altivec;    pf->get_ref   = get_ref_altivec;    pf->mc_chroma = mc_chroma_altivec;    pf->copy_16x16_unaligned = x264_mc_copy_w16_altivec;    pf->copy[PIXEL_16x16] = x264_mc_copy_w16_aligned_altivec;    pf->hpel_filter = x264_hpel_filter_altivec;}
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -