📄 h264.c
字号:
/*
* H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
* Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file h264.c
* H.264 / AVC / MPEG4 part10 codec.
* @author Michael Niedermayer <michaelni@gmx.at>
*/
#include "dsputil.h"
#include "avcodec.h"
#include "mpegvideo.h"
#include "h264.h"
#include "h264data.h"
#include "h264_parser.h"
#include "golomb.h"
#include "cabac.h"
//#undef NDEBUG
#include <assert.h>
/**
* Value of Picture.reference when Picture is not a reference picture, but
* is held for delayed output.
*/
#define DELAYED_PIC_REF 4
static VLC coeff_token_vlc[4];
static VLC chroma_dc_coeff_token_vlc;
static VLC total_zeros_vlc[15];
static VLC chroma_dc_total_zeros_vlc[3];
static VLC run_vlc[6];
static VLC run7_vlc;
static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
static av_always_inline uint32_t pack16to32(int a, int b){
#ifdef WORDS_BIGENDIAN
return (b&0xFFFF) + (a<<16);
#else
return (a&0xFFFF) + (b<<16);
#endif
}
const uint8_t ff_rem6[52]={
0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
};
const uint8_t ff_div6[52]={
0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
};
/**
* fill a rectangle.
* @param h height of the rectangle, should be a constant
* @param w width of the rectangle, should be a constant
* @param size the size of val (1 or 4), should be a constant
*/
static av_always_inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){
uint8_t *p= (uint8_t*)vp;
assert(size==1 || size==4);
assert(w<=4);
w *= size;
stride *= size;
assert((((long)vp)&(FFMIN(w, STRIDE_ALIGN)-1)) == 0);
assert((stride&(w-1))==0);
if(w==2){
const uint16_t v= size==4 ? val : val*0x0101;
*(uint16_t*)(p + 0*stride)= v;
if(h==1) return;
*(uint16_t*)(p + 1*stride)= v;
if(h==2) return;
*(uint16_t*)(p + 2*stride)= v;
*(uint16_t*)(p + 3*stride)= v;
}else if(w==4){
const uint32_t v= size==4 ? val : val*0x01010101;
*(uint32_t*)(p + 0*stride)= v;
if(h==1) return;
*(uint32_t*)(p + 1*stride)= v;
if(h==2) return;
*(uint32_t*)(p + 2*stride)= v;
*(uint32_t*)(p + 3*stride)= v;
}else if(w==8){
//gcc can't optimize 64bit math on x86_32
#if defined(ARCH_X86_64) || (defined(MP_WORDSIZE) && MP_WORDSIZE >= 64)
const uint64_t v= val*0x0100000001ULL;
*(uint64_t*)(p + 0*stride)= v;
if(h==1) return;
*(uint64_t*)(p + 1*stride)= v;
if(h==2) return;
*(uint64_t*)(p + 2*stride)= v;
*(uint64_t*)(p + 3*stride)= v;
}else if(w==16){
const uint64_t v= val*0x0100000001ULL;
*(uint64_t*)(p + 0+0*stride)= v;
*(uint64_t*)(p + 8+0*stride)= v;
*(uint64_t*)(p + 0+1*stride)= v;
*(uint64_t*)(p + 8+1*stride)= v;
if(h==2) return;
*(uint64_t*)(p + 0+2*stride)= v;
*(uint64_t*)(p + 8+2*stride)= v;
*(uint64_t*)(p + 0+3*stride)= v;
*(uint64_t*)(p + 8+3*stride)= v;
#else
*(uint32_t*)(p + 0+0*stride)= val;
*(uint32_t*)(p + 4+0*stride)= val;
if(h==1) return;
*(uint32_t*)(p + 0+1*stride)= val;
*(uint32_t*)(p + 4+1*stride)= val;
if(h==2) return;
*(uint32_t*)(p + 0+2*stride)= val;
*(uint32_t*)(p + 4+2*stride)= val;
*(uint32_t*)(p + 0+3*stride)= val;
*(uint32_t*)(p + 4+3*stride)= val;
}else if(w==16){
*(uint32_t*)(p + 0+0*stride)= val;
*(uint32_t*)(p + 4+0*stride)= val;
*(uint32_t*)(p + 8+0*stride)= val;
*(uint32_t*)(p +12+0*stride)= val;
*(uint32_t*)(p + 0+1*stride)= val;
*(uint32_t*)(p + 4+1*stride)= val;
*(uint32_t*)(p + 8+1*stride)= val;
*(uint32_t*)(p +12+1*stride)= val;
if(h==2) return;
*(uint32_t*)(p + 0+2*stride)= val;
*(uint32_t*)(p + 4+2*stride)= val;
*(uint32_t*)(p + 8+2*stride)= val;
*(uint32_t*)(p +12+2*stride)= val;
*(uint32_t*)(p + 0+3*stride)= val;
*(uint32_t*)(p + 4+3*stride)= val;
*(uint32_t*)(p + 8+3*stride)= val;
*(uint32_t*)(p +12+3*stride)= val;
#endif
}else
assert(0);
assert(h==4);
}
static void fill_caches(H264Context *h, int mb_type, int for_deblock){
MpegEncContext * const s = &h->s;
const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
int topleft_xy, top_xy, topright_xy, left_xy[2];
int topleft_type, top_type, topright_type, left_type[2];
int left_block[8];
int i;
//FIXME deblocking could skip the intra and nnz parts.
if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[mb_xy-s->mb_stride]) && !FRAME_MBAFF)
return;
//wow what a mess, why didn't they simplify the interlacing&intra stuff, i can't imagine that these complex rules are worth it
top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
topleft_xy = top_xy - 1;
topright_xy= top_xy + 1;
left_xy[1] = left_xy[0] = mb_xy-1;
left_block[0]= 0;
left_block[1]= 1;
left_block[2]= 2;
left_block[3]= 3;
left_block[4]= 7;
left_block[5]= 10;
left_block[6]= 8;
left_block[7]= 11;
if(FRAME_MBAFF){
const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
const int top_pair_xy = pair_xy - s->mb_stride;
const int topleft_pair_xy = top_pair_xy - 1;
const int topright_pair_xy = top_pair_xy + 1;
const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
const int bottom = (s->mb_y & 1);
tprintf(s->avctx, "fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
if (bottom
? !curr_mb_frame_flag // bottom macroblock
: (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
) {
top_xy -= s->mb_stride;
}
if (bottom
? !curr_mb_frame_flag // bottom macroblock
: (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
) {
topleft_xy -= s->mb_stride;
}
if (bottom
? !curr_mb_frame_flag // bottom macroblock
: (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
) {
topright_xy -= s->mb_stride;
}
if (left_mb_frame_flag != curr_mb_frame_flag) {
left_xy[1] = left_xy[0] = pair_xy - 1;
if (curr_mb_frame_flag) {
if (bottom) {
left_block[0]= 2;
left_block[1]= 2;
left_block[2]= 3;
left_block[3]= 3;
left_block[4]= 8;
left_block[5]= 11;
left_block[6]= 8;
left_block[7]= 11;
} else {
left_block[0]= 0;
left_block[1]= 0;
left_block[2]= 1;
left_block[3]= 1;
left_block[4]= 7;
left_block[5]= 10;
left_block[6]= 7;
left_block[7]= 10;
}
} else {
left_xy[1] += s->mb_stride;
//left_block[0]= 0;
left_block[1]= 2;
left_block[2]= 0;
left_block[3]= 2;
//left_block[4]= 7;
left_block[5]= 10;
left_block[6]= 7;
left_block[7]= 10;
}
}
}
h->top_mb_xy = top_xy;
h->left_mb_xy[0] = left_xy[0];
h->left_mb_xy[1] = left_xy[1];
if(for_deblock){
topleft_type = 0;
topright_type = 0;
top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0;
left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
if(FRAME_MBAFF && !IS_INTRA(mb_type)){
int list;
int v = *(uint16_t*)&h->non_zero_count[mb_xy][14];
for(i=0; i<16; i++)
h->non_zero_count_cache[scan8[i]] = (v>>i)&1;
for(list=0; list<h->list_count; list++){
if(USES_LIST(mb_type,list)){
uint32_t *src = (uint32_t*)s->current_picture.motion_val[list][h->mb2b_xy[mb_xy]];
uint32_t *dst = (uint32_t*)h->mv_cache[list][scan8[0]];
int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
for(i=0; i<4; i++, dst+=8, src+=h->b_stride){
dst[0] = src[0];
dst[1] = src[1];
dst[2] = src[2];
dst[3] = src[3];
}
*(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
*(uint32_t*)&h->ref_cache[list][scan8[ 2]] = pack16to32(ref[0],ref[1])*0x0101;
ref += h->b8_stride;
*(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
*(uint32_t*)&h->ref_cache[list][scan8[10]] = pack16to32(ref[0],ref[1])*0x0101;
}else{
fill_rectangle(&h-> mv_cache[list][scan8[ 0]], 4, 4, 8, 0, 4);
fill_rectangle(&h->ref_cache[list][scan8[ 0]], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
}
}
}
}else{
topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
}
if(IS_INTRA(mb_type)){
h->topleft_samples_available=
h->top_samples_available=
h->left_samples_available= 0xFFFF;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -