📄 t264enc.c
字号:
T264_encode_intra_uv(t);
t->stat.i_block_num[t->mb.mb_mode] ++;
}
}
static void
T264_emms_c()
{
}
void
T264_init_cpu(T264_t* t)
{
#ifndef CHIP_DM642
if ((t->param.cpu & T264_CPU_FORCE) != T264_CPU_FORCE)
{
t->param.cpu = T264_detect_cpu();
}
#endif
t->pred16x16[Intra_16x16_TOP] = T264_predict_16x16_mode_0_c;
t->pred16x16[Intra_16x16_LEFT] = T264_predict_16x16_mode_1_c;
t->pred16x16[Intra_16x16_DC] = T264_predict_16x16_mode_2_c;
t->pred16x16[Intra_16x16_PLANE] = T264_predict_16x16_mode_3_c;
t->pred16x16[Intra_16x16_DCTOP] = T264_predict_16x16_mode_20_c;
t->pred16x16[Intra_16x16_DCLEFT] = T264_predict_16x16_mode_21_c;
t->pred16x16[Intra_16x16_DC128] = T264_predict_16x16_mode_22_c;
t->pred8x8[Intra_8x8_TOP] = T264_predict_8x8_mode_0_c;
t->pred8x8[Intra_8x8_LEFT] = T264_predict_8x8_mode_1_c;
t->pred8x8[Intra_8x8_DC] = T264_predict_8x8_mode_2_c;
t->pred8x8[Intra_8x8_PLANE] = T264_predict_8x8_mode_3_c;
t->pred8x8[Intra_8x8_DCTOP] = T264_predict_8x8_mode_20_c;
t->pred8x8[Intra_8x8_DCLEFT] = T264_predict_8x8_mode_21_c;
t->pred8x8[Intra_8x8_DC128] = T264_predict_8x8_mode_22_c;
t->pred4x4[Intra_4x4_TOP] = T264_predict_4x4_mode_0_c;
t->pred4x4[Intra_4x4_LEFT] = T264_predict_4x4_mode_1_c;
t->pred4x4[Intra_4x4_DC] = T264_predict_4x4_mode_2_c;
t->pred4x4[Intra_4x4_DCTOP] = T264_predict_4x4_mode_20_c;
t->pred4x4[Intra_4x4_DCLEFT] = T264_predict_4x4_mode_21_c;
t->pred4x4[Intra_4x4_DC128] = T264_predict_4x4_mode_22_c;
t->pred4x4[Intra_4x4_DIAGONAL_DOWNLEFT] = T264_predict_4x4_mode_3_c;
t->pred4x4[Intra_4x4_DIAGONAL_DOWNRIGHT] = T264_predict_4x4_mode_4_c;
t->pred4x4[Intra_4x4_VERTICAL_RIGHT] = T264_predict_4x4_mode_5_c;
t->pred4x4[Intra_4x4_HORIZONTAL_DOWN] = T264_predict_4x4_mode_6_c;
t->pred4x4[Intra_4x4_VERTICAL_LEFT] = T264_predict_4x4_mode_7_c;
t->pred4x4[Intra_4x4_HORIZONTAL_UP] = T264_predict_4x4_mode_8_c;
if (t->flags & USE_SAD)
{
t->cmp[MB_16x16] = T264_sad_u_16x16_c;
t->cmp[MB_16x8] = T264_sad_u_16x8_c;
t->cmp[MB_8x16] = T264_sad_u_8x16_c;
t->cmp[MB_8x8] = T264_sad_u_8x8_c;
t->cmp[MB_8x4] = T264_sad_u_8x4_c;
t->cmp[MB_4x8] = T264_sad_u_4x8_c;
t->cmp[MB_4x4] = T264_sad_u_4x4_c;
}
else
{
t->cmp[MB_16x16] = T264_satd_u_16x16_c;
t->cmp[MB_16x8] = T264_satd_u_16x8_c;
t->cmp[MB_8x16] = T264_satd_u_8x16_c;
t->cmp[MB_8x8] = T264_satd_u_8x8_c;
t->cmp[MB_8x4] = T264_satd_u_8x4_c;
t->cmp[MB_4x8] = T264_satd_u_4x8_c;
t->cmp[MB_4x4] = T264_satd_u_4x4_c;
}
t->sad[MB_16x16] = T264_sad_u_16x16_c;
t->sad[MB_16x8] = T264_sad_u_16x8_c;
t->sad[MB_8x16] = T264_sad_u_8x16_c;
t->sad[MB_8x8] = T264_sad_u_8x8_c;
t->sad[MB_8x4] = T264_sad_u_8x4_c;
t->sad[MB_4x8] = T264_sad_u_4x8_c;
t->sad[MB_4x4] = T264_sad_u_4x4_c;
t->fdct4x4 = dct4x4_c;
t->fdct4x4dc = dct4x4dc_c;
t->fdct2x2dc = dct2x2dc_c;
t->idct4x4 = idct4x4_c;
t->idct4x4dc = idct4x4dc_c;
t->idct2x2dc = idct2x2dc_c;
t->quant4x4 = quant4x4_c;
t->quant4x4dc = quant4x4dc_c;
t->quant2x2dc = quant2x2dc_c;
t->iquant4x4 = iquant4x4_c;
t->iquant4x4dc = iquant4x4dc_c;
t->iquant2x2dc = iquant2x2dc_c;
t->expand8to16 = expand8to16_c;
t->contract16to8 = contract16to8_c;
t->contract16to8add = contract16to8add_c;
t->expand8to16sub = expand8to16sub_c;
t->memcpy_stride_u = memcpy_stride_u_c;
t->eighth_pixel_mc_u = T264_eighth_pixel_mc_u_c;
t->interpolate_halfpel_h = interpolate_halfpel_h_c;
t->interpolate_halfpel_v = interpolate_halfpel_v_c;
t->interpolate_halfpel_hv = interpolate_halfpel_hv_c;
//t->pixel_avg = T264_pixel_avg_c; //modify by wushangyun for pia optimization
t->pia[MB_16x16] = T264_pia_u_16x16_c;
t->pia[MB_16x8] = T264_pia_u_16x8_c;
t->pia[MB_8x16] = T264_pia_u_8x16_c;
t->pia[MB_8x8] = T264_pia_u_8x8_c;
t->pia[MB_8x4] = T264_pia_u_8x4_c;
t->pia[MB_4x8] = T264_pia_u_4x8_c;
t->pia[MB_4x4] = T264_pia_u_4x4_c;
t->pia[MB_2x2] = T264_pia_u_2x2_c;
t->T264_satd_16x16_u = T264_satd_i16x16_u_c;
t->emms = T264_emms_c;
// flags relative
if (t->flags & USE_FULLSEARCH)
t->search = T264_spiral_search_full;
else if (t->flags & USE_DIAMONDSEACH)
t->search = T264_search;
else
t->search = T264_search_full;
#ifndef CHIP_DM642
if (t->param.cpu & T264_CPU_MMX)
{
t->emms = T264_emms_mmx;
t->fdct4x4 = dct4x4_mmx;
t->fdct4x4dc = dct4x4dc_mmx;
t->idct4x4 = idct4x4_mmx;
t->idct4x4dc = idct4x4dc_mmx;
t->contract16to8add = contract16to8add_mmx;
t->expand8to16sub = expand8to16sub_mmx;
t->pia[MB_4x8] = T264_pia_u_4x8_mmx;
t->pia[MB_4x4] = T264_pia_u_4x4_mmx;
}
if (t->param.cpu & T264_CPU_SSE)
{
if (t->flags & USE_SAD)
{
t->cmp[MB_8x16] = T264_sad_u_8x16_sse;
t->cmp[MB_8x8] = T264_sad_u_8x8_sse;
t->cmp[MB_8x4] = T264_sad_u_8x4_sse;
t->cmp[MB_4x8] = T264_sad_u_4x8_sse;
t->cmp[MB_4x4] = T264_sad_u_4x4_sse;
}
t->pia[MB_16x16] = T264_pia_u_16x16_sse;
t->pia[MB_16x8] = T264_pia_u_16x8_sse;
t->pia[MB_8x16] = T264_pia_u_8x16_sse;
t->pia[MB_8x8] = T264_pia_u_8x8_sse;
t->pia[MB_8x4] = T264_pia_u_8x4_sse;
t->sad[MB_8x16] = T264_sad_u_8x16_sse;
t->sad[MB_8x8] = T264_sad_u_8x8_sse;
t->sad[MB_8x4] = T264_sad_u_8x4_sse;
t->sad[MB_4x8] = T264_sad_u_4x8_sse;
t->sad[MB_4x4] = T264_sad_u_4x4_sse;
}
if (t->param.cpu & T264_CPU_SSE2)
{
t->quant4x4 = quant4x4_sse2;
t->iquant4x4 = iquant4x4_sse2;
if (t->flags & USE_SAD)
{
t->cmp[MB_16x16] = T264_sad_u_16x16_sse2;
t->cmp[MB_16x8] = T264_sad_u_16x8_sse2;
}
t->sad[MB_16x16] = T264_sad_u_16x16_sse2;
t->sad[MB_16x8] = T264_sad_u_16x8_sse2;
t->interpolate_halfpel_h = interpolate_halfpel_h_sse2;
t->interpolate_halfpel_v = interpolate_halfpel_v_sse2;
t->pia[MB_16x16] = T264_pia_u_16x16_sse2;
t->pia[MB_16x8] = T264_pia_u_16x8_sse2;
}
#endif
}
static void __inline
T264_init_frame(T264_t* t, uint8_t* src, T264_frame_t* f, int32_t poc)
{
f->Y[0] = src;
f->U = f->Y[0] + t->width * t->height;
f->V = f->U + (t->width * t->height >> 2);
f->poc = poc;
}
static void __inline
T264_pending_bframe(T264_t* t, uint8_t* src, int32_t poc)
{
T264_frame_t* f = &t->pending_bframes[t->pending_bframes_num ++];
memcpy(f->Y[0], src, t->height * t->width + (t->height * t->width >> 1));
f->poc = poc;
}
// get non zero count & cbp
void
T264_mb_encode_post(T264_t* t)
{
int32_t i, j;
//for CABAC
int32_t dc_nz, dc_nz0, dc_nz1, cbp_dc;
cbp_dc = 0;
if (t->mb.mb_mode == I_16x16)
{
t->mb.cbp_y = 0;
for(i = 0; i < 16 ; i ++)
{
int32_t x, y;
const int32_t nz = array_non_zero_count(&(t->mb.dct_y_z[i][1]), 15);
x = luma_inverse_x[i];
y = luma_inverse_y[i];
t->mb.nnz[luma_index[i]] = nz;
t->mb.nnz_ref[NNZ_LUMA + y * 8 + x] = nz;
if( nz > 0 )
{
t->mb.cbp_y = 0x0f;
}
}
//for CABAC, record the DC non_zero
dc_nz = array_non_zero_count(&(t->mb.dc4x4_z[0]), 16);
if(dc_nz != 0)
{
cbp_dc = 1;
}
}
else
{
t->mb.cbp_y = 0;
for(i = 0; i < 16; i ++)
{
int32_t x, y;
const int32_t nz = array_non_zero_count(t->mb.dct_y_z[i], 16);
x = luma_inverse_x[i];
y = luma_inverse_y[i];
t->mb.nnz[luma_index[i]] = nz;
t->mb.nnz_ref[NNZ_LUMA + y * 8 + x] = nz;
if( nz > 0 )
{
t->mb.cbp_y |= 1 << (i / 4);
}
}
}
/* Calculate the chroma patern */
t->mb.cbp_c = 0;
for(i = 0; i < 8; i ++)
{
int32_t x, y;
const int nz = array_non_zero_count(&(t->mb.dct_uv_z[i / 4][i % 4][1]), 15);
t->mb.nnz[i + 16] = nz;
if (i < 4)
{
x = i % 2;
y = i / 2;
t->mb.nnz_ref[NNZ_CHROMA0 + y * 8 + x] = nz;
}
else
{
int32_t j = i - 4;
x = j % 2;
y = j / 2;
t->mb.nnz_ref[NNZ_CHROMA1 + y * 8 + x] = nz;
}
if( nz > 0 )
{
t->mb.cbp_c = 0x02; /* dc+ac */
}
}
//for CABAC, chroma dc pattern
dc_nz0 = array_non_zero_count(t->mb.dc2x2_z[0], 4) > 0;
dc_nz1 = array_non_zero_count(t->mb.dc2x2_z[1], 4) > 0;
if(t->mb.cbp_c == 0x00 &&
(dc_nz0 || dc_nz1))
{
t->mb.cbp_c = 0x01; /* dc only */
}
if(dc_nz0)
cbp_dc |= 0x02;
if(dc_nz1)
cbp_dc |= 0x04;
// really decide SKIP mode
if(t->slice_type == SLICE_P)
{
if (t->mb.mb_part == MB_16x16 && t->mb.cbp_y == 0 && t->mb.cbp_c == 0 && t->mb.vec[0][0].refno == 0)
{
T264_vector_t vec;
T264_predict_mv_skip(t, 0, &vec);
if (vec.x == t->mb.vec[0][0].x &&
vec.y == t->mb.vec[0][0].y)
{
t->mb.mb_part = MB_16x16;
t->mb.mb_mode = P_SKIP;
}
}
}
else if (t->slice_type == SLICE_B)
{
if (t->mb.is_copy && t->mb.cbp_y == 0 && t->mb.cbp_c == 0)
{
t->mb.mb_mode = B_SKIP;
}
}
if (t->mb.mb_mode == I_4x4)
{
int8_t* p = t->mb.i4x4_pred_mode_ref;
for(i = 0; i < 16 ; i ++)
{
int32_t x, y;
x = luma_inverse_x[i];
y = luma_inverse_y[i];
p[IPM_LUMA + y * 8 + x] = t->mb.mode_i4x4[i];
}
}
else
{
memset(t->mb.mode_i4x4, Intra_4x4_DC, 16 * sizeof(uint8_t));
}
if (t->mb.mb_mode != I_4x4 && t->mb.mb_mode != I_16x16)
{
for(i = 0 ; i < 16 ; i ++)
{
int32_t x, y;
x = i % 4;
y = i / 4;
t->mb.vec_ref[VEC_LUMA + y * 8 + x].vec[0] = t->mb.vec[0][i];
t->mb.vec_ref[VEC_LUMA + y * 8 + x].vec[1] = t->mb.vec[1][i];
t->mb.vec_ref[VEC_LUMA + y * 8 + x].part = t->mb.mb_part;
t->mb.vec_ref[VEC_LUMA + y * 8 + x].subpart = t->mb.submb_part[i];
}
}
else
{
memset(t->mb.submb_part, -1, sizeof(uint8_t) * 16);//t->mb.submb_part));
t->mb.mb_part = -1;
#define INITINVALIDVEC(vec) vec.refno = -1; vec.x = vec.y = 0;
for(i = 0 ; i < 2 ; i ++)
{
for(j = 0 ; j < 16 ; j ++)
{
INITINVALIDVEC(t->mb.vec[i][j]);
}
}
}
#undef INITINVALIDVEC
//for CABAC, cbp
t->mb.cbp = t->mb.cbp_y | (t->mb.cbp_c<<4) | (cbp_dc << 8);
}
static uint32_t
write_dst(uint8_t* src, int32_t nal_pos[4], int32_t nal_num, uint8_t* dst, int32_t dst_size)
{
int32_t i, j, n;
int32_t count;
int32_t nal_len;
n = 0;
for(i = 0 ; i < nal_num - 1; i ++)
{
nal_len = nal_pos[i + 1] - nal_pos[i];
// start code 00 00 00 01
dst[n ++] = src[0];
dst[n ++] = src[1];
dst[n ++] = src[2];
dst[n ++] = src[3];
count = 0;
for(j = 4 ; j < nal_len - 1; j ++)
{
if (src[j] == 0)
{
count ++;
if (count >= 2 && src[j + 1] <= 3)
{
dst[n ++] = 0;
dst[n ++] = 3;
count = 0;
continue;
}
}
else
{
count = 0;
}
dst[n ++] = src[j];
}
dst[n ++] = src[j];
src += nal_len;
}
return n;
}
///////////////////////////////////////////////////////////
// interface
T264_t*
T264_open(T264_param_t* para)
{
T264_t* t;
int32_t i;
//
// TODO: here check the input param if it is valid
//
if (para->flags & USE_FORCEBLOCKSIZE)
para->flags |= USE_SUBBLOCK;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -