📄 h264.c
字号:
GetBitContext *inter_gb_ptr; DECLARE_ALIGNED_8(DCTELEM, mb[16*24]); /** * Cabac */ CABACContext cabac; uint8_t cabac_state[460]; int cabac_init_idc; /* 0x100 -> non null luma_dc, 0x80/0x40 -> non null chroma_dc (cb/cr), 0x?0 -> chroma_cbp(0,1,2), 0x0? luma_cbp */ uint16_t *cbp_table; int top_cbp; int left_cbp; /* chroma_pred_mode for i4x4 or i16x16, else 0 */ uint8_t *chroma_pred_mode_table; int last_qscale_diff; int16_t (*mvd_table[2])[2]; DECLARE_ALIGNED_8(int16_t, mvd_cache[2][5*8][2]); uint8_t *direct_table; uint8_t direct_cache[5*8]; uint8_t zigzag_scan[16]; uint8_t field_scan[16]; uint8_t zigzag_scan8x8[64]; uint8_t zigzag_scan8x8_cavlc[64]; const uint8_t *zigzag_scan_q0; const uint8_t *field_scan_q0; const uint8_t *zigzag_scan8x8_q0; const uint8_t *zigzag_scan8x8_cavlc_q0; int x264_build;}H264Context;static VLC coeff_token_vlc[4];static VLC chroma_dc_coeff_token_vlc;static VLC total_zeros_vlc[15];static VLC chroma_dc_total_zeros_vlc[3];static VLC run_vlc[6];static VLC run7_vlc;static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);static always_inline uint32_t pack16to32(int a, int b){#ifdef WORDS_BIGENDIAN return (b&0xFFFF) + (a<<16);#else return (a&0xFFFF) + (b<<16);#endif}/** * fill a rectangle. * @param h height of the rectangle, should be a constant * @param w width of the rectangle, should be a constant * @param size the size of val (1 or 4), should be a constant */static always_inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){ uint8_t *p= (uint8_t*)vp; assert(size==1 || size==4); w *= size; stride *= size; assert((((long)vp)&(FFMIN(w, STRIDE_ALIGN)-1)) == 0); assert((stride&(w-1))==0);//FIXME check what gcc generates for 64 bit on x86 and possibly write a 32 bit ver of it if(w==2 && h==2){ *(uint16_t*)(p + 0)= *(uint16_t*)(p + stride)= size==4 ? val : val*0x0101; }else if(w==2 && h==4){ *(uint16_t*)(p + 0*stride)= *(uint16_t*)(p + 1*stride)= *(uint16_t*)(p + 2*stride)= *(uint16_t*)(p + 3*stride)= size==4 ? val : val*0x0101; }else if(w==4 && h==1){ *(uint32_t*)(p + 0*stride)= size==4 ? val : val*0x01010101; }else if(w==4 && h==2){ *(uint32_t*)(p + 0*stride)= *(uint32_t*)(p + 1*stride)= size==4 ? val : val*0x01010101; }else if(w==4 && h==4){ *(uint32_t*)(p + 0*stride)= *(uint32_t*)(p + 1*stride)= *(uint32_t*)(p + 2*stride)= *(uint32_t*)(p + 3*stride)= size==4 ? val : val*0x01010101; }else if(w==8 && h==1){ *(uint32_t*)(p + 0)= *(uint32_t*)(p + 4)= size==4 ? val : val*0x01010101; }else if(w==8 && h==2){ *(uint32_t*)(p + 0 + 0*stride)= *(uint32_t*)(p + 4 + 0*stride)= *(uint32_t*)(p + 0 + 1*stride)= *(uint32_t*)(p + 4 + 1*stride)= size==4 ? val : val*0x01010101; }else if(w==8 && h==4){ *(uint64_t*)(p + 0*stride)= *(uint64_t*)(p + 1*stride)= *(uint64_t*)(p + 2*stride)= *(uint64_t*)(p + 3*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL; }else if(w==16 && h==2){ *(uint64_t*)(p + 0+0*stride)= *(uint64_t*)(p + 8+0*stride)= *(uint64_t*)(p + 0+1*stride)= *(uint64_t*)(p + 8+1*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL; }else if(w==16 && h==4){ *(uint64_t*)(p + 0+0*stride)= *(uint64_t*)(p + 8+0*stride)= *(uint64_t*)(p + 0+1*stride)= *(uint64_t*)(p + 8+1*stride)= *(uint64_t*)(p + 0+2*stride)= *(uint64_t*)(p + 8+2*stride)= *(uint64_t*)(p + 0+3*stride)= *(uint64_t*)(p + 8+3*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL; }else assert(0);}static void fill_caches(H264Context *h, int mb_type, int for_deblock){ MpegEncContext * const s = &h->s; const int mb_xy= s->mb_x + s->mb_y*s->mb_stride; int topleft_xy, top_xy, topright_xy, left_xy[2]; int topleft_type, top_type, topright_type, left_type[2]; int left_block[8]; int i; //FIXME deblocking can skip fill_caches much of the time with multiple slices too. // the actual condition is whether we're on the edge of a slice, // and even then the intra and nnz parts are unnecessary. if(for_deblock && h->slice_num == 1) return; //wow what a mess, why didn't they simplify the interlacing&intra stuff, i can't imagine that these complex rules are worth it top_xy = mb_xy - s->mb_stride; topleft_xy = top_xy - 1; topright_xy= top_xy + 1; left_xy[1] = left_xy[0] = mb_xy-1; left_block[0]= 0; left_block[1]= 1; left_block[2]= 2; left_block[3]= 3; left_block[4]= 7; left_block[5]= 10; left_block[6]= 8; left_block[7]= 11; if(h->mb_aff_frame){ const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride; const int top_pair_xy = pair_xy - s->mb_stride; const int topleft_pair_xy = top_pair_xy - 1; const int topright_pair_xy = top_pair_xy + 1; const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]); const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]); const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]); const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]); const int curr_mb_frame_flag = !IS_INTERLACED(mb_type); const int bottom = (s->mb_y & 1); tprintf("fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag); if (bottom ? !curr_mb_frame_flag // bottom macroblock : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock ) { top_xy -= s->mb_stride; } if (bottom ? !curr_mb_frame_flag // bottom macroblock : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock ) { topleft_xy -= s->mb_stride; } if (bottom ? !curr_mb_frame_flag // bottom macroblock : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock ) { topright_xy -= s->mb_stride; } if (left_mb_frame_flag != curr_mb_frame_flag) { left_xy[1] = left_xy[0] = pair_xy - 1; if (curr_mb_frame_flag) { if (bottom) { left_block[0]= 2; left_block[1]= 2; left_block[2]= 3; left_block[3]= 3; left_block[4]= 8; left_block[5]= 11; left_block[6]= 8; left_block[7]= 11; } else { left_block[0]= 0; left_block[1]= 0; left_block[2]= 1; left_block[3]= 1; left_block[4]= 7; left_block[5]= 10; left_block[6]= 7; left_block[7]= 10; } } else { left_xy[1] += s->mb_stride; //left_block[0]= 0; left_block[1]= 2; left_block[2]= 0; left_block[3]= 2; //left_block[4]= 7; left_block[5]= 10; left_block[6]= 7; left_block[7]= 10; } } } h->top_mb_xy = top_xy; h->left_mb_xy[0] = left_xy[0]; h->left_mb_xy[1] = left_xy[1]; if(for_deblock){ topleft_type = h->slice_table[topleft_xy ] < 255 ? s->current_picture.mb_type[topleft_xy] : 0; top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0; topright_type= h->slice_table[topright_xy] < 255 ? s->current_picture.mb_type[topright_xy]: 0; left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0; left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0; }else{ topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0; top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0; topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0; left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0; left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0; } if(IS_INTRA(mb_type)){ h->topleft_samples_available= h->top_samples_available= h->left_samples_available= 0xFFFF; h->topright_samples_available= 0xEEEA; if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){ h->topleft_samples_available= 0xB3FF; h->top_samples_available= 0x33FF; h->topright_samples_available= 0x26EA; } for(i=0; i<2; i++){ if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){ h->topleft_samples_available&= 0xDF5F; h->left_samples_available&= 0x5F5F; } } if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred)) h->topleft_samples_available&= 0x7FFF; if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred)) h->topright_samples_available&= 0xFBFF; if(IS_INTRA4x4(mb_type)){ if(IS_INTRA4x4(top_type)){ h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4]; h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5]; h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6]; h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3]; }else{ int pred; if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred)) pred= -1; else{ pred= 2; } h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode_cache[7+8*0]= pred; } for(i=0; i<2; i++){ if(IS_INTRA4x4(left_type[i])){ h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]]; h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]]; }else{ int pred; if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred)) pred= -1; else{ pred= 2; } h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred; } } } }/*0 . T T. T T T T1 L . .L . . . .2 L . .L . . . .3 . T TL . . . .4 L . .L . . . .5 L . .. . . . .*///FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec) if(top_type){ h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4]; h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5]; h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6]; h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3]; h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9]; h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8]; h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12]; h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11]; }else{ h->non_zero_count_cache[4+8*0]= h->non_zero_count_cache[5+8*0]= h->non_zero_count_cache[6+8*0]= h->non_zero_count_cache[7+8*0]= h->non_zero_count_cache[1+8*0]= h->non_zero_count_cache[2+8*0]= h->non_zero_count_cache[1+8*3]= h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64; } for (i=0; i<2; i++) { if(left_type[i]){ h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]]; h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]]; h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]]; h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]]; }else{
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -