📄 h264.c

📁 This the source release kit for the following system configuration(s): - AMD Alchemy(TM) DBAu1200(
💻 C
📖 第 1 页 / 共 5 页
字号:
    int16_t     (*mvd_table[2])[2];
    DECLARE_ALIGNED_8(int16_t, mvd_cache[2][5*8][2]);
    uint8_t     *direct_table;
    uint8_t     direct_cache[5*8];

    uint8_t zigzag_scan[16];
    uint8_t zigzag_scan8x8[64];
    uint8_t zigzag_scan8x8_cavlc[64];
    uint8_t field_scan[16];
    uint8_t field_scan8x8[64];
    uint8_t field_scan8x8_cavlc[64];
    const uint8_t *zigzag_scan_q0;
    const uint8_t *zigzag_scan8x8_q0;
    const uint8_t *zigzag_scan8x8_cavlc_q0;
    const uint8_t *field_scan_q0;
    const uint8_t *field_scan8x8_q0;
    const uint8_t *field_scan8x8_cavlc_q0;

    int x264_build;
} H264Context;

const uint8_t scan8[16 + 2*4] =
{
 4+1*8, 5+1*8, 4+2*8, 5+2*8,
 6+1*8, 7+1*8, 6+2*8, 7+2*8,
 4+3*8, 5+3*8, 4+4*8, 5+4*8,
 6+3*8, 7+3*8, 6+4*8, 7+4*8,
 1+1*8, 2+1*8,
 1+2*8, 2+2*8,
 1+4*8, 2+4*8,
 1+5*8, 2+5*8,
};

const uint8_t zigzag_scan[16] =
{
 0+0*4, 1+0*4, 0+1*4, 0+2*4,
 1+1*4, 2+0*4, 3+0*4, 2+1*4,
 1+2*4, 0+3*4, 1+3*4, 2+2*4,
 3+1*4, 3+2*4, 2+3*4, 3+3*4,
};

const uint8_t field_scan[16] =
{
 0+0*4, 0+1*4, 1+0*4, 0+2*4,
 0+3*4, 1+1*4, 1+2*4, 1+3*4,
 2+0*4, 2+1*4, 2+2*4, 2+3*4,
 3+0*4, 3+1*4, 3+2*4, 3+3*4,
};

const uint8_t luma_dc_zigzag_scan[16] =
{
 0*16 + 0*64, 1*16 + 0*64, 2*16 + 0*64, 0*16 + 2*64,
 3*16 + 0*64, 0*16 + 1*64, 1*16 + 1*64, 2*16 + 1*64,
 1*16 + 2*64, 2*16 + 2*64, 3*16 + 2*64, 0*16 + 3*64,
 3*16 + 1*64, 1*16 + 3*64, 2*16 + 3*64, 3*16 + 3*64,
};

const uint8_t luma_dc_field_scan[16] =
{
 0*16 + 0*64, 2*16 + 0*64, 1*16 + 0*64, 0*16 + 2*64,
 2*16 + 2*64, 3*16 + 0*64, 1*16 + 2*64, 3*16 + 2*64,
 0*16 + 1*64, 2*16 + 1*64, 0*16 + 3*64, 2*16 + 3*64,
 1*16 + 1*64, 3*16 + 1*64, 1*16 + 3*64, 3*16 + 3*64,
};

const uint8_t chroma_dc_scan[4] =
{
 (0+0*2)*16, (1+0*2)*16,
 (0+1*2)*16, (1+1*2)*16,  //FIXME
};

const uint8_t zigzag_scan8x8[64] =
{
 0+0*8, 1+0*8, 0+1*8, 0+2*8,
 1+1*8, 2+0*8, 3+0*8, 2+1*8,
 1+2*8, 0+3*8, 0+4*8, 1+3*8,
 2+2*8, 3+1*8, 4+0*8, 5+0*8,
 4+1*8, 3+2*8, 2+3*8, 1+4*8,
 0+5*8, 0+6*8, 1+5*8, 2+4*8,
 3+3*8, 4+2*8, 5+1*8, 6+0*8,
 7+0*8, 6+1*8, 5+2*8, 4+3*8,
 3+4*8, 2+5*8, 1+6*8, 0+7*8,
 1+7*8, 2+6*8, 3+5*8, 4+4*8,
 5+3*8, 6+2*8, 7+1*8, 7+2*8,
 6+3*8, 5+4*8, 4+5*8, 3+6*8,
 2+7*8, 3+7*8, 4+6*8, 5+5*8,
 6+4*8, 7+3*8, 7+4*8, 6+5*8,
 5+6*8, 4+7*8, 5+7*8, 6+6*8,
 7+5*8, 7+6*8, 6+7*8, 7+7*8,
};

// zigzag_scan8x8_cavlc[i] = zigzag_scan8x8[(i/4) + 16*(i%4)]
const uint8_t zigzag_scan8x8_cavlc[64] = 
{
 0+0*8, 1+1*8, 1+2*8, 2+2*8,
 4+1*8, 0+5*8, 3+3*8, 7+0*8,
 3+4*8, 1+7*8, 5+3*8, 6+3*8,
 2+7*8, 6+4*8, 5+6*8, 7+5*8,
 1+0*8, 2+0*8, 0+3*8, 3+1*8,
 3+2*8, 0+6*8, 4+2*8, 6+1*8,
 2+5*8, 2+6*8, 6+2*8, 5+4*8,
 3+7*8, 7+3*8, 4+7*8, 7+6*8,
 0+1*8, 3+0*8, 0+4*8, 4+0*8,
 2+3*8, 1+5*8, 5+1*8, 5+2*8,
 1+6*8, 3+5*8, 7+1*8, 4+5*8,
 4+6*8, 7+4*8, 5+7*8, 6+7*8,
 0+2*8, 2+1*8, 1+3*8, 5+0*8,
 1+4*8, 2+4*8, 6+0*8, 4+3*8,
 0+7*8, 4+4*8, 7+2*8, 3+6*8,
 5+5*8, 6+5*8, 6+6*8, 7+7*8,
};

const uint8_t field_scan8x8[64] =
{
 0+0*8, 0+1*8, 0+2*8, 1+0*8,
 1+1*8, 0+3*8, 0+4*8, 1+2*8,
 2+0*8, 1+3*8, 0+5*8, 0+6*8,
 0+7*8, 1+4*8, 2+1*8, 3+0*8,
 2+2*8, 1+5*8, 1+6*8, 1+7*8,
 2+3*8, 3+1*8, 4+0*8, 3+2*8,
 2+4*8, 2+5*8, 2+6*8, 2+7*8,
 3+3*8, 4+1*8, 5+0*8, 4+2*8,
 3+4*8, 3+5*8, 3+6*8, 3+7*8,
 4+3*8, 5+1*8, 6+0*8, 5+2*8,
 4+4*8, 4+5*8, 4+6*8, 4+7*8,
 5+3*8, 6+1*8, 6+2*8, 5+4*8,
 5+5*8, 5+6*8, 5+7*8, 6+3*8,
 7+0*8, 7+1*8, 6+4*8, 6+5*8,
 6+6*8, 6+7*8, 7+2*8, 7+3*8,
 7+4*8, 7+5*8, 7+6*8, 7+7*8,
};

const uint8_t field_scan8x8_cavlc[64] =
{
 0+0*8, 1+1*8, 2+0*8, 0+7*8,
 2+2*8, 2+3*8, 2+4*8, 3+3*8,
 3+4*8, 4+3*8, 4+4*8, 5+3*8,
 5+5*8, 7+0*8, 6+6*8, 7+4*8,
 0+1*8, 0+3*8, 1+3*8, 1+4*8,
 1+5*8, 3+1*8, 2+5*8, 4+1*8,
 3+5*8, 5+1*8, 4+5*8, 6+1*8,
 5+6*8, 7+1*8, 6+7*8, 7+5*8,
 0+2*8, 0+4*8, 0+5*8, 2+1*8,
 1+6*8, 4+0*8, 2+6*8, 5+0*8,
 3+6*8, 6+0*8, 4+6*8, 6+2*8,
 5+7*8, 6+4*8, 7+2*8, 7+6*8,
 1+0*8, 1+2*8, 0+6*8, 3+0*8,
 1+7*8, 3+2*8, 2+7*8, 4+2*8,
 3+7*8, 5+2*8, 4+7*8, 5+4*8,
 6+3*8, 6+5*8, 7+3*8, 7+7*8,
};


VLC coeff_token_vlc[4];
VLC chroma_dc_coeff_token_vlc;

VLC total_zeros_vlc[15];
VLC chroma_dc_total_zeros_vlc[3];

VLC run_vlc[6];
VLC run7_vlc;

void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);

#ifdef WORDS_BIGENDIAN
#define pack16to32(a, b) (uint32_t)(((b)&0xFFFF) + ((a)<<16))
#else
#define pack16to32(a, b) (uint32_t)(((a)&0xFFFF) + ((b)<<16))
#endif

/**
 * fill a rectangle.
 * @param h height of the rectangle, should be a constant
 * @param w width of the rectangle, should be a constant
 * @param size the size of val (1 or 4), should be a constant
 */
STATIC_FUNC always_inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size)
{
    uint8_t *p= (uint8_t*)vp;
    int stride2, stride3;
    assert(size==1 || size==4);
    assert(w<=4);

    stride *= size;
    w      *= size;
    stride2 = (stride<<1);
    stride3 = stride*3;

    assert((((long)vp)&(FFMIN(w, STRIDE_ALIGN)-1)) == 0);
    assert((stride&(w-1))==0);

    if (w==2)
    {
        const uint16_t v= size==4 ? val : val*0x0101;
        *(uint16_t*)(p) = v;
        if (h==1) return;
        *(uint16_t*)(p + stride)= v;
        if (h==2) return;
        *(uint16_t*)(p + stride2)=
        *(uint16_t*)(p + stride3)= v;
    }

    else if (w==4)
    {
        const uint32_t v= size==4 ? val : val*0x01010101;
        *(uint32_t*)(p)= v;
        if (h==1) return;
        *(uint32_t*)(p + stride)= v;
        if (h==2) return;
        *(uint32_t*)(p + stride2)=
        *(uint32_t*)(p + stride3)= v;
    }

    else if (w==8)
    {
        *(uint32_t*)(p    )=
        *(uint32_t*)(p + 4)= val;
        if (h==1) return;
        *(uint32_t*)(p +   stride)=
        *(uint32_t*)(p + 4+stride)= val;
        if (h==2) return;
        *(uint32_t*)(p +   stride2)=
        *(uint32_t*)(p + 4+stride2)=
        *(uint32_t*)(p +   stride3)=
        *(uint32_t*)(p + 4+stride3)= val;
    }

    else if (w==16)
    {
        *(uint32_t*)(p    )=
        *(uint32_t*)(p + 4)=
        *(uint32_t*)(p + 8)=
        *(uint32_t*)(p +12)=
        *(uint32_t*)(p +   stride)=
        *(uint32_t*)(p + 4+stride)=
        *(uint32_t*)(p + 8+stride)=
        *(uint32_t*)(p +12+stride)= val;
        if (h==2) return;
        *(uint32_t*)(p +   stride2)=
        *(uint32_t*)(p + 4+stride2)=
        *(uint32_t*)(p + 8+stride2)=
        *(uint32_t*)(p +12+stride2)=
        *(uint32_t*)(p +   stride3)=
        *(uint32_t*)(p + 4+stride3)=
        *(uint32_t*)(p + 8+stride3)=
        *(uint32_t*)(p +12+stride3)= val;
    }

    else
        assert(0);

    assert(h==4);
}

void fill_caches_interlaced(H264Context *h, int mb_type, int for_deblock)
{
    MpegEncContext * const s = &h->s;
    const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
    int topleft_xy, top_xy, topright_xy, left_xy[2];
    int topleft_type, top_type, topright_type, left_type[2];
    int left_block[8];
    int i;

    //FIXME deblocking could skip the intra and nnz parts.
    if (for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[mb_xy-s->mb_stride]) && !FRAME_MBAFF)
        return;

    //wow what a mess, why didn't they simplify the interlacing&intra stuff, i can't imagine that these complex rules are worth it
    top_xy     = mb_xy  - s->mb_stride;
    topleft_xy = top_xy - 1;
    topright_xy= top_xy + 1;
    left_xy[1] = left_xy[0] = mb_xy-1;
    left_block[0]= 0;
    left_block[1]= 1;
    left_block[2]= 2;
    left_block[3]= 3;
    left_block[4]= 7;
    left_block[5]= 10;
    left_block[6]= 8;
    left_block[7]= 11;
    if (FRAME_MBAFF)
    {
        const int pair_xy          = s->mb_x     + (s->mb_y & ~1)*s->mb_stride;
        const int top_pair_xy      = pair_xy     - s->mb_stride;
        const int topleft_pair_xy  = top_pair_xy - 1;
        const int topright_pair_xy = top_pair_xy + 1;
        const int topleft_mb_frame_flag  = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
        const int top_mb_frame_flag      = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
        const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
        const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
        const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
        const int bottom = (s->mb_y & 1);
       
        if (bottom
                ? !curr_mb_frame_flag // bottom macroblock
                : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
                ) {
            top_xy -= s->mb_stride;
        }
        if (bottom
                ? !curr_mb_frame_flag // bottom macroblock
                : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
                ) {
            topleft_xy -= s->mb_stride;
        }
        if (bottom
                ? !curr_mb_frame_flag // bottom macroblock
                : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
                ) {
            topright_xy -= s->mb_stride;
        }
        if (left_mb_frame_flag != curr_mb_frame_flag) {
            left_xy[1] = left_xy[0] = pair_xy - 1;
            if (curr_mb_frame_flag) {
                if (bottom) {
                    left_block[0]= 2;
                    left_block[1]= 2;
                    left_block[2]= 3;
                    left_block[3]= 3;
                    left_block[4]= 8;
                    left_block[5]= 11;
                    left_block[6]= 8;
                    left_block[7]= 11;
                } else {
                    left_block[0]= 0;
                    left_block[1]= 0;
                    left_block[2]= 1;
                    left_block[3]= 1;
                    left_block[4]= 7;
                    left_block[5]= 10;
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -