📄 rdo.c
字号:
/***************************************************************************** * rdo.c: h264 encoder library (rate-distortion optimization) ***************************************************************************** * Copyright (C) 2005 x264 project * * Authors: Loren Merritt <lorenm@u.washington.edu> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. *****************************************************************************//* duplicate all the writer functions, just calculating bit cost * instead of writing the bitstream. * TODO: use these for fast 1st pass too. */#define RDO_SKIP_BS/* CAVLC: produces exactly the same bit count as a normal encode *//* this probably still leaves some unnecessary computations */#define bs_write1(s,v) ((s)->i_bits_encoded += 1)#define bs_write(s,n,v) ((s)->i_bits_encoded += (n))#define bs_write_ue(s,v) ((s)->i_bits_encoded += bs_size_ue(v))#define bs_write_se(s,v) ((s)->i_bits_encoded += bs_size_se(v))#define bs_write_te(s,v,l) ((s)->i_bits_encoded += bs_size_te(v,l))#define x264_macroblock_write_cavlc x264_macroblock_size_cavlc#include "cavlc.c"/* CABAC: not exactly the same. x264_cabac_size_decision() keeps track of * fractional bits, but only finite precision. */#define x264_cabac_encode_decision(c,x,v) x264_cabac_size_decision(c,x,v)#define x264_cabac_encode_terminal(c,v) x264_cabac_size_decision(c,276,v)#define x264_cabac_encode_bypass(c,v) ((c)->f8_bits_encoded += 256)#define x264_cabac_encode_flush(c)#define x264_macroblock_write_cabac x264_macroblock_size_cabac#define x264_cabac_mb_skip x264_cabac_mb_size_skip_unused#include "cabac.c"static int x264_rd_cost_mb( x264_t *h, int i_lambda2 ){ int b_transform_bak = h->mb.b_transform_8x8; int i_ssd; int i_bits; x264_macroblock_encode( h ); i_ssd = h->pixf.ssd[PIXEL_16x16]( h->mb.pic.p_fenc[0], h->mb.pic.i_stride[0], h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0] ) + h->pixf.ssd[PIXEL_8x8]( h->mb.pic.p_fenc[1], h->mb.pic.i_stride[1], h->mb.pic.p_fdec[1], h->mb.pic.i_stride[1] ) + h->pixf.ssd[PIXEL_8x8]( h->mb.pic.p_fenc[2], h->mb.pic.i_stride[2], h->mb.pic.p_fdec[2], h->mb.pic.i_stride[2] ); if( IS_SKIP( h->mb.i_type ) ) { i_bits = 1 * i_lambda2; } else if( h->param.b_cabac ) { x264_cabac_t cabac_tmp = h->cabac; cabac_tmp.f8_bits_encoded = 0; x264_macroblock_size_cabac( h, &cabac_tmp ); i_bits = ( cabac_tmp.f8_bits_encoded * i_lambda2 + 128 ) >> 8; } else { bs_t bs_tmp = h->out.bs; bs_tmp.i_bits_encoded = 0; x264_macroblock_size_cavlc( h, &bs_tmp ); i_bits = bs_tmp.i_bits_encoded * i_lambda2; } h->mb.b_transform_8x8 = b_transform_bak; return i_ssd + i_bits;}/**************************************************************************** * Trellis RD quantization ****************************************************************************/#define TRELLIS_SCORE_MAX (1ULL<<50)#define CABAC_SIZE_BITS 8#define SSD_WEIGHT_BITS 5#define LAMBDA_BITS 4/* precalculate the cost of coding abs_level_m1 */static int cabac_prefix_transition[15][128];static int cabac_prefix_size[15][128];void x264_rdo_init( ){ int i_prefix; int i_ctx; for( i_prefix = 0; i_prefix < 15; i_prefix++ ) { for( i_ctx = 0; i_ctx < 128; i_ctx++ ) { int f8_bits = 0; uint8_t ctx = i_ctx; int i; for( i = 1; i < i_prefix; i++ ) f8_bits += x264_cabac_size_decision2( &ctx, 1 ); if( i_prefix > 0 && i_prefix < 14 ) f8_bits += x264_cabac_size_decision2( &ctx, 0 ); f8_bits += 1 << CABAC_SIZE_BITS; //sign cabac_prefix_size[i_prefix][i_ctx] = f8_bits; cabac_prefix_transition[i_prefix][i_ctx] = ctx; } }}// node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).// 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter)./* map node ctx => cabac ctx for level=1 */static const int coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };/* map node ctx => cabac ctx for level>1 */static const int coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };static const int coeff_abs_level_transition[2][8] = {/* update node.ctx after coding a level=1 */ { 1, 2, 3, 3, 4, 5, 6, 7 },/* update node.ctx after coding a level>1 */ { 4, 4, 4, 4, 5, 6, 7, 7 }};static const int lambda2_tab[6] = { 1024, 1290, 1625, 2048, 2580, 3251 };typedef struct { uint64_t score; int level_idx; // index into level_tree[] uint8_t cabac_state[10]; //just the contexts relevant to coding abs_level_m1} trellis_node_t;// TODO:// support chroma and i16x16 DC// save cabac state between blocks?// use trellis' RD score instead of x264_mb_decimate_score?// code 8x8 sig/last flags forwards with deadzone and save the contexts at// each position?// change weights when using CQMs?// possible optimizations:// make scores fit in 32bit// save quantized coefs during rd, to avoid a duplicate trellis in the final encode// if trellissing all MBRD modes, finish SSD calculation so we can skip all of// the normal dequant/idct/ssd/cabac// the unquant_mf here is not the same as dequant_mf:// in normal operation (dct->quant->dequant->idct) the dct and idct are not// normalized. quant/dequant absorb those scaling factors.// in this function, we just do (quant->unquant) and want the output to be// comparable to the input. so unquant is the direct inverse of quant,// and uses the dct scaling factors, not the idct ones.static void quant_trellis_cabac( x264_t *h, int16_t *dct, const int *quant_mf, const int *unquant_mf, const int *coef_weight, const int *zigzag, int i_ctxBlockCat, int i_qbits, int i_lambda2, int b_ac, int i_coefs ){ int abs_coefs[64], signs[64]; trellis_node_t nodes[2][8]; trellis_node_t *nodes_cur = nodes[0]; trellis_node_t *nodes_prev = nodes[1]; trellis_node_t *bnode; uint8_t cabac_state_sig[64]; uint8_t cabac_state_last[64]; const int f = 1 << (i_qbits-1); // no deadzone int i_last_nnz = -1; int i, j; // (# of coefs) * (# of ctx) * (# of levels tried) = 1024 // we don't need to keep all of those: (# of coefs) * (# of ctx) would be enough, // but it takes more time to remove dead states than you gain in reduced memory. struct { uint16_t abs_level; uint16_t next; } level_tree[64*8*2]; int i_levels_used = 1; /* init coefs */ for( i = b_ac; i < i_coefs; i++ ) { int coef = dct[zigzag[i]]; abs_coefs[i] = abs(coef); signs[i] = coef < 0 ? -1 : 1;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -