📄 h264.c
字号:
/** * num_ref_idx_l0/1_active_minus1 + 1 */ unsigned int ref_count[2]; ///< counts frames or fields, depending on current mb mode Picture *short_ref[32]; Picture *long_ref[32]; Picture default_ref_list[2][32]; Picture ref_list[2][48]; ///< 0..15: frame refs, 16..47: mbaff field refs Picture *delayed_pic[18]; //FIXME size? Picture *delayed_output_pic; /** * memory management control operations buffer. */ MMCO mmco[MAX_MMCO_COUNT]; int mmco_index; int long_ref_count; ///< number of actual long term references int short_ref_count; ///< number of actual short term references //data partitioning GetBitContext intra_gb; GetBitContext inter_gb; GetBitContext *intra_gb_ptr; GetBitContext *inter_gb_ptr; DECLARE_ALIGNED_8(DCTELEM, mb[16*24]); DCTELEM mb_padding[256]; ///< as mb is addressed by scantable[i] and scantable is uint8_t we can either check that i is not to large or ensure that there is some unused stuff after mb /** * Cabac */ CABACContext cabac; uint8_t cabac_state[460]; int cabac_init_idc; /* 0x100 -> non null luma_dc, 0x80/0x40 -> non null chroma_dc (cb/cr), 0x?0 -> chroma_cbp(0,1,2), 0x0? luma_cbp */ uint16_t *cbp_table; int cbp; int top_cbp; int left_cbp; /* chroma_pred_mode for i4x4 or i16x16, else 0 */ uint8_t *chroma_pred_mode_table; int last_qscale_diff; int16_t (*mvd_table[2])[2]; DECLARE_ALIGNED_8(int16_t, mvd_cache[2][5*8][2]); uint8_t *direct_table; uint8_t direct_cache[5*8]; uint8_t zigzag_scan[16]; uint8_t zigzag_scan8x8[64]; uint8_t zigzag_scan8x8_cavlc[64]; uint8_t field_scan[16]; uint8_t field_scan8x8[64]; uint8_t field_scan8x8_cavlc[64]; const uint8_t *zigzag_scan_q0; const uint8_t *zigzag_scan8x8_q0; const uint8_t *zigzag_scan8x8_cavlc_q0; const uint8_t *field_scan_q0; const uint8_t *field_scan8x8_q0; const uint8_t *field_scan8x8_cavlc_q0; int x264_build;}H264Context;static VLC coeff_token_vlc[4];static VLC chroma_dc_coeff_token_vlc;static VLC total_zeros_vlc[15];static VLC chroma_dc_total_zeros_vlc[3];static VLC run_vlc[6];static VLC run7_vlc;static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);static av_always_inline uint32_t pack16to32(int a, int b){#ifdef WORDS_BIGENDIAN return (b&0xFFFF) + (a<<16);#else return (a&0xFFFF) + (b<<16);#endif}const uint8_t ff_rem6[52]={0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,};const uint8_t ff_div6[52]={0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,};/** * fill a rectangle. * @param h height of the rectangle, should be a constant * @param w width of the rectangle, should be a constant * @param size the size of val (1 or 4), should be a constant */static av_always_inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){ uint8_t *p= (uint8_t*)vp; assert(size==1 || size==4); assert(w<=4); w *= size; stride *= size; assert((((long)vp)&(FFMIN(w, STRIDE_ALIGN)-1)) == 0); assert((stride&(w-1))==0); if(w==2){ const uint16_t v= size==4 ? val : val*0x0101; *(uint16_t*)(p + 0*stride)= v; if(h==1) return; *(uint16_t*)(p + 1*stride)= v; if(h==2) return; *(uint16_t*)(p + 2*stride)= *(uint16_t*)(p + 3*stride)= v; }else if(w==4){ const uint32_t v= size==4 ? val : val*0x01010101; *(uint32_t*)(p + 0*stride)= v; if(h==1) return; *(uint32_t*)(p + 1*stride)= v; if(h==2) return; *(uint32_t*)(p + 2*stride)= *(uint32_t*)(p + 3*stride)= v; }else if(w==8){ //gcc can't optimize 64bit math on x86_32#if defined(ARCH_X86_64) || (defined(MP_WORDSIZE) && MP_WORDSIZE >= 64) const uint64_t v= val*0x0100000001ULL; *(uint64_t*)(p + 0*stride)= v; if(h==1) return; *(uint64_t*)(p + 1*stride)= v; if(h==2) return; *(uint64_t*)(p + 2*stride)= *(uint64_t*)(p + 3*stride)= v; }else if(w==16){ const uint64_t v= val*0x0100000001ULL; *(uint64_t*)(p + 0+0*stride)= *(uint64_t*)(p + 8+0*stride)= *(uint64_t*)(p + 0+1*stride)= *(uint64_t*)(p + 8+1*stride)= v; if(h==2) return; *(uint64_t*)(p + 0+2*stride)= *(uint64_t*)(p + 8+2*stride)= *(uint64_t*)(p + 0+3*stride)= *(uint64_t*)(p + 8+3*stride)= v;#else *(uint32_t*)(p + 0+0*stride)= *(uint32_t*)(p + 4+0*stride)= val; if(h==1) return; *(uint32_t*)(p + 0+1*stride)= *(uint32_t*)(p + 4+1*stride)= val; if(h==2) return; *(uint32_t*)(p + 0+2*stride)= *(uint32_t*)(p + 4+2*stride)= *(uint32_t*)(p + 0+3*stride)= *(uint32_t*)(p + 4+3*stride)= val; }else if(w==16){ *(uint32_t*)(p + 0+0*stride)= *(uint32_t*)(p + 4+0*stride)= *(uint32_t*)(p + 8+0*stride)= *(uint32_t*)(p +12+0*stride)= *(uint32_t*)(p + 0+1*stride)= *(uint32_t*)(p + 4+1*stride)= *(uint32_t*)(p + 8+1*stride)= *(uint32_t*)(p +12+1*stride)= val; if(h==2) return; *(uint32_t*)(p + 0+2*stride)= *(uint32_t*)(p + 4+2*stride)= *(uint32_t*)(p + 8+2*stride)= *(uint32_t*)(p +12+2*stride)= *(uint32_t*)(p + 0+3*stride)= *(uint32_t*)(p + 4+3*stride)= *(uint32_t*)(p + 8+3*stride)= *(uint32_t*)(p +12+3*stride)= val;#endif }else assert(0); assert(h==4);}static void fill_caches(H264Context *h, int mb_type, int for_deblock){ MpegEncContext * const s = &h->s; const int mb_xy= s->mb_x + s->mb_y*s->mb_stride; int topleft_xy, top_xy, topright_xy, left_xy[2]; int topleft_type, top_type, topright_type, left_type[2]; int left_block[8]; int i; //FIXME deblocking could skip the intra and nnz parts. if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[mb_xy-s->mb_stride]) && !FRAME_MBAFF) return; //wow what a mess, why didn't they simplify the interlacing&intra stuff, i can't imagine that these complex rules are worth it top_xy = mb_xy - s->mb_stride; topleft_xy = top_xy - 1; topright_xy= top_xy + 1; left_xy[1] = left_xy[0] = mb_xy-1; left_block[0]= 0; left_block[1]= 1; left_block[2]= 2; left_block[3]= 3; left_block[4]= 7; left_block[5]= 10; left_block[6]= 8; left_block[7]= 11; if(FRAME_MBAFF){ const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride; const int top_pair_xy = pair_xy - s->mb_stride; const int topleft_pair_xy = top_pair_xy - 1; const int topright_pair_xy = top_pair_xy + 1; const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]); const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]); const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]); const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]); const int curr_mb_frame_flag = !IS_INTERLACED(mb_type); const int bottom = (s->mb_y & 1); tprintf("fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag); if (bottom ? !curr_mb_frame_flag // bottom macroblock : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock ) { top_xy -= s->mb_stride; } if (bottom ? !curr_mb_frame_flag // bottom macroblock : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock ) { topleft_xy -= s->mb_stride; } if (bottom ? !curr_mb_frame_flag // bottom macroblock : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock ) { topright_xy -= s->mb_stride; } if (left_mb_frame_flag != curr_mb_frame_flag) { left_xy[1] = left_xy[0] = pair_xy - 1; if (curr_mb_frame_flag) { if (bottom) { left_block[0]= 2; left_block[1]= 2; left_block[2]= 3; left_block[3]= 3; left_block[4]= 8; left_block[5]= 11; left_block[6]= 8; left_block[7]= 11; } else { left_block[0]= 0; left_block[1]= 0; left_block[2]= 1; left_block[3]= 1; left_block[4]= 7; left_block[5]= 10; left_block[6]= 7; left_block[7]= 10; } } else { left_xy[1] += s->mb_stride; //left_block[0]= 0; left_block[1]= 2; left_block[2]= 0; left_block[3]= 2; //left_block[4]= 7; left_block[5]= 10; left_block[6]= 7; left_block[7]= 10; } } } h->top_mb_xy = top_xy; h->left_mb_xy[0] = left_xy[0]; h->left_mb_xy[1] = left_xy[1]; if(for_deblock){ topleft_type = 0; topright_type = 0; top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0; left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0; left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0; if(FRAME_MBAFF && !IS_INTRA(mb_type)){ int list; int v = *(uint16_t*)&h->non_zero_count[mb_xy][14]; for(i=0; i<16; i++) h->non_zero_count_cache[scan8[i]] = (v>>i)&1; for(list=0; list<1+(h->slice_type==B_TYPE); list++){ if(USES_LIST(mb_type,list)){ uint32_t *src = (uint32_t*)s->current_picture.motion_val[list][h->mb2b_xy[mb_xy]]; uint32_t *dst = (uint32_t*)h->mv_cache[list][scan8[0]]; int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]]; for(i=0; i<4; i++, dst+=8, src+=h->b_stride){ dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; } *(uint32_t*)&h->ref_cache[list][scan8[ 0]] = *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = pack16to32(ref[0],ref[1])*0x0101; ref += h->b8_stride; *(uint32_t*)&h->ref_cache[list][scan8[ 8]] = *(uint32_t*)&h->ref_cache[list][scan8[10]] = pack16to32(ref[0],ref[1])*0x0101; }else{ fill_rectangle(&h-> mv_cache[list][scan8[ 0]], 4, 4, 8, 0, 4); fill_rectangle(&h->ref_cache[list][scan8[ 0]], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); } } } }else{ topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0; top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0; topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0; left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0; left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0; } if(IS_INTRA(mb_type)){ h->topleft_samples_available= h->top_samples_available=
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -