📄 inter.c
字号:
}
// rc will use sad value
t->mb.sad = t->cmp[MB_16x16](t->mb.src_y, t->stride, ref, 16);
if (t->mb.sad < sad_t)
{
return FALSE;
}
{
// use foreman.cif, qp = 30, waste 35497 times encode vs. total times 80266
DECLARE_ALIGNED_MATRIX(dct, 16, 16, int16_t, 16);
int32_t qp = t->qp_y;
int32_t i, j;
int16_t* curdct;
// we will count coeff cost, from jm80
int32_t run, k;
int32_t coeff_cost, total_cost;
total_cost = 0;
t->expand8to16sub(ref, 16 / 4, 16 / 4, dct, t->mb.src_y, t->stride);
for(i = 0 ; i < 4 ; i ++)
{
coeff_cost = 0;
for(j = 0 ; j < 4 ; j ++)
{
int32_t idx = 4 * i + j;
int32_t idx_r = luma_index[idx];
curdct = dct + 16 * idx_r;
t->fdct4x4(curdct);
t->quant4x4(curdct, qp, FALSE);
scan_zig_4x4(t->mb.dct_y_z[idx], curdct);
{
run = -1;
for(k = 0 ; k < 16 ; k ++)
{
run ++;
if (t->mb.dct_y_z[idx][k] != 0)
{
if (ABS(t->mb.dct_y_z[idx][k]) > 1)
{
return FALSE;
}
else
{
coeff_cost += COEFF_COST[run];
run = -1;
}
}
}
}
}
if (coeff_cost <= t->param.luma_coeff_cost)
{
int32_t idx_r = luma_index[4 * i];
memset(t->mb.dct_y_z[4 * i], 0, 16 * sizeof(int16_t));
memset(dct + 16 * idx_r, 0, 16 * sizeof(int16_t));
idx_r = luma_index[4 * i + 1];
memset(t->mb.dct_y_z[4 * i + 1], 0, 16 * sizeof(int16_t));
memset(dct + 16 * idx_r, 0, 16 * sizeof(int16_t));
idx_r = luma_index[4 * i + 2];
memset(t->mb.dct_y_z[4 * i + 2], 0, 16 * sizeof(int16_t));
memset(dct + 16 * idx_r, 0, 16 * sizeof(int16_t));
idx_r = luma_index[4 * i + 3];
memset(t->mb.dct_y_z[4 * i + 3], 0, 16 * sizeof(int16_t));
memset(dct + 16 * idx_r, 0, 16 * sizeof(int16_t));
coeff_cost = 0;
}
else
{
total_cost += coeff_cost;
if (total_cost > 5)
return FALSE;
}
}
memset(dct, 0, 16 * 16 * sizeof(int16_t));
memset(t->mb.dct_y_z, 0, sizeof(int16_t) * 16 * 16);
t->contract16to8add(dct, 16 / 4, 16 / 4, ref, t->mb.dst_y, t->edged_stride);
}
{
DECLARE_ALIGNED_MATRIX(pred_u, 8, 8, uint8_t, CACHE_SIZE);
DECLARE_ALIGNED_MATRIX(pred_v, 8, 8, uint8_t, CACHE_SIZE);
DECLARE_ALIGNED_MATRIX(dct, 10, 8, int16_t, CACHE_SIZE);
int32_t qp = t->qp_uv;
int16_t* curdct;
uint8_t* start;
uint8_t* dst;
uint8_t* src;
// we will count coeff cost, from jm80
int32_t run, k;
int32_t coeff_cost;
src = t->ref[0][vec.refno]->U + ((t->mb.mb_y << 3) + (vec.y >> 3)) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3);
t->eighth_pixel_mc_u(src, t->edged_stride_uv, pred_u, vec.x, vec.y, 8, 8);
src = t->ref[0][vec.refno]->V + ((t->mb.mb_y << 3) + (vec.y >> 3)) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3);
t->eighth_pixel_mc_u(src, t->edged_stride_uv, pred_v, vec.x, vec.y, 8, 8);
start = pred_u;
src = t->mb.src_u;
dst = t->mb.dst_u;
for(j = 0 ; j < 2 ; j ++)
{
coeff_cost = 0;
t->expand8to16sub(start, 8 / 4, 8 / 4, dct, src, t->stride_uv);
curdct = dct;
for(i = 0 ; i < 4 ; i ++)
{
run = -1;
t->fdct4x4(curdct);
dct[64 + i] = curdct[0];
t->quant4x4(curdct, qp, FALSE);
scan_zig_4x4(t->mb.dct_uv_z[j][i], curdct);
{
for(k = 1 ; k < 16 ; k ++)
{
run ++;
if (t->mb.dct_uv_z[j][i][k] != 0)
{
if (ABS(t->mb.dct_uv_z[j][i][k]) > 1)
{
coeff_cost += 16 * 16 * 256;
return FALSE;
}
else
{
coeff_cost += COEFF_COST[run];
run = -1;
}
}
}
}
curdct += 16;
}
if (coeff_cost < CHROMA_COEFF_COST)
{
memset(&t->mb.dct_uv_z[j][0][0], 0, 4 * 16 * sizeof(int16_t));
memset(dct, 0, 8 * 8 * sizeof(int16_t));
}
else
{
return FALSE;
}
t->fdct2x2dc(curdct);
t->quant2x2dc(curdct, qp, FALSE);
scan_zig_2x2(t->mb.dc2x2_z[j], curdct);
if (array_non_zero_count(t->mb.dc2x2_z[j], 4) != 0)
{
return FALSE;
}
t->iquant2x2dc(curdct, qp);
t->idct2x2dc(curdct);
curdct = dct;
for(i = 0 ; i < 4 ; i ++)
{
curdct[0] = dct[64 + i];
t->idct4x4(curdct);
curdct += 16;
}
t->contract16to8add(dct, 8 / 4, 8 / 4, start, dst, t->edged_stride_uv);
//
// change to v
//
start = pred_v;
dst = t->mb.dst_v;
src = t->mb.src_v;
}
}
t->mb.mb_mode = P_SKIP;
t->mb.mb_part = MB_16x16;
memcpy(t->mb.pred_p16x16, ref, sizeof(uint8_t) * 16 * 16);
copy_nvec(&vec, &t->mb.vec[0][0], 4, 4, 4);
return TRUE;
}
return FALSE;
}
uint32_t
T264_mode_decision_interp_y(_RW T264_t* t)
{
uint32_t sad;
uint32_t sad_min = -1;
uint8_t best_mode;
uint8_t sub_part[4];
uint8_t part;
int32_t i, n;
int32_t preds[9];
int32_t modes;
search_data_t s0;
subpart_search_data_t s1;
typedef uint32_t (*p16x16_function_t)(T264_t*, search_data_t* s);
static const p16x16_function_t p16x16_function[] =
{
T264_mode_decision_inter_16x16p,
T264_mode_decision_inter_16x8p,
T264_mode_decision_inter_8x16p
};
// xxx
#ifdef USE_PREV_DETECT
uint32_t sad_median; // for skip detect
sad_median = T264_predict_sad(t, 0);
// p skip detection
if (T264_detect_pskip(t, sad_median))
return t->mb.sad;
#endif
T264_inter_p16x16_mode_available(t, preds, &modes);
best_mode = P_MODE;
s0.list_index = 0;
s1.list_index = 0;
for(n = 0 ; n < modes ; n ++)
{
int32_t mode = preds[n];
sad = p16x16_function[mode](t, &s0);
if (sad < sad_min)
{
part = mode;
sad_min = sad;
}
}
if (t->flags & USE_SUBBLOCK)
{
uint32_t sub_sad_all = 0;
typedef uint32_t (*p8x8_function_t)(T264_t*, int32_t, subpart_search_data_t* s);
static const p8x8_function_t p8x8_function[] =
{
T264_mode_decision_inter_8x8p,
T264_mode_decision_inter_8x8p,
T264_mode_decision_inter_8x4p,
T264_mode_decision_inter_4x8p,
T264_mode_decision_inter_4x4p
};
s1.vec[0][0].refno = s0.vec[0].refno;
for(i = 0 ; i < 4 ; i ++)
{
uint32_t sub_sad;
uint32_t sub_sad_min = -1;
T264_inter_p8x8_mode_available(t, preds, &modes, i);
for(n = 0 ; n < modes ; n ++)
{
int32_t mode = preds[n];
T264_vector_t vec_bak[4];
vec_bak[0] = t->mb.vec_ref[VEC_LUMA + i / 2 * 16 + i % 2 * 2 + 0].vec[0];
vec_bak[1] = t->mb.vec_ref[VEC_LUMA + i / 2 * 16 + i % 2 * 2 + 1].vec[0];
vec_bak[2] = t->mb.vec_ref[VEC_LUMA + i / 2 * 16 + i % 2 * 2 + 8].vec[0];
vec_bak[3] = t->mb.vec_ref[VEC_LUMA + i / 2 * 16 + i % 2 * 2 + 9].vec[0];
sub_sad = p8x8_function[mode - MB_8x8](t, i, &s1);
if (sub_sad < sub_sad_min)
{
sub_part[i] = mode;
sub_sad_min = sub_sad;
}
else
{
// restore current best mode
t->mb.vec_ref[VEC_LUMA + i / 2 * 16 + i % 2 * 2 + 0].vec[0] = vec_bak[0];
t->mb.vec_ref[VEC_LUMA + i / 2 * 16 + i % 2 * 2 + 1].vec[0] = vec_bak[1];
t->mb.vec_ref[VEC_LUMA + i / 2 * 16 + i % 2 * 2 + 8].vec[0] = vec_bak[2];
t->mb.vec_ref[VEC_LUMA + i / 2 * 16 + i % 2 * 2 + 9].vec[0] = vec_bak[3];
}
}
sub_sad_all += sub_sad_min;
}
if (sub_sad_all < sad_min)
{
part = MB_8x8;
sad_min = sub_sad_all;
}
}
switch (part)
{
case MB_16x8:
sad_min = T264_quarter_pixel_search(t, 0, s0.src[1], s0.ref[1], s0.offset[1], &s0.vec[1], &s0.vec_median[1], s0.sad[1], 16, 8, t->mb.pred_p16x16, MB_16x8);
sad_min += T264_quarter_pixel_search(t, 0, s0.src[2], s0.ref[2], s0.offset[2], &s0.vec[2], &s0.vec_median[2], s0.sad[2], 16, 8, t->mb.pred_p16x16 + 16 * 8, MB_16x8);
copy_nvec(&s0.vec[1], &t->mb.vec[0][0], 4, 2, 4);
copy_nvec(&s0.vec[2], &t->mb.vec[0][8], 4, 2, 4);
break;
case MB_8x16:
sad_min = T264_quarter_pixel_search(t, 0, s0.src[3], s0.ref[3], s0.offset[3], &s0.vec[3], &s0.vec_median[3], s0.sad[3], 8, 16, t->mb.pred_p16x16, MB_8x16);
sad_min += T264_quarter_pixel_search(t, 0, s0.src[4], s0.ref[4], s0.offset[4], &s0.vec[4], &s0.vec_median[4], s0.sad[4], 8, 16, t->mb.pred_p16x16 + 8, MB_8x16);
copy_nvec(&s0.vec[3], &t->mb.vec[0][0], 2, 4, 4);
copy_nvec(&s0.vec[4], &t->mb.vec[0][2], 2, 4, 4);
break;
case MB_8x8:
case MB_8x8ref0:
sad_min = 0;
for(i = 0 ; i < 4 ; i ++)
{
switch(sub_part[i])
{
case MB_8x8:
sad_min += T264_quarter_pixel_search(t, 0, s1.src[i][0], s1.ref[i][0], s1.offset[i][0], &s1.vec[i][0], &s1.vec_median[i][0], s1.sad[i][0], 8, 8, t->mb.pred_p16x16 + i / 2 * 16 * 8 + i % 2 * 8, MB_8x8);
t->mb.vec[0][i / 2 * 8 + i % 2 * 2 + 0] = s1.vec[i][0];
t->mb.vec[0][i / 2 * 8 + i % 2 * 2 + 1] = s1.vec[i][0];
t->mb.vec[0][i / 2 * 8 + i % 2 * 2 + 4] = s1.vec[i][0];
t->mb.vec[0][i / 2 * 8 + i % 2 * 2 + 5] = s1.vec[i][0];
sad_min += eg_size_ue(t->bs, MB_8x8);
break;
case MB_8x4:
sad_min += T264_quarter_pixel_search(t, 0, s1.src[i][1], s1.ref[i][1], s1.offset[i][1], &s1.vec[i][1], &s1.vec_median[i][1], s1.sad[i][1], 8, 4, t->mb.pred_p16x16 + i / 2 * 16 * 8 + i % 2 * 8, MB_8x4);
sad_min += T264_quarter_pixel_search(t, 0, s1.src[i][2], s1.ref[i][2], s1.offset[i][2], &s1.vec[i][2], &s1.vec_median[i][2], s1.sad[i][2], 8, 4, t->mb.pred_p16x16 + i / 2 * 16 * 8 + i % 2 * 8 + 16 * 4, MB_8x4);
t->mb.vec[0][i / 2 * 8 + i % 2 * 2 + 0] = s1.vec[i][1];
t->mb.vec[0][i / 2 * 8 + i % 2 * 2 + 1] = s1.vec[i][1];
t->mb.vec[0][i / 2 * 8 + i % 2 * 2 + 4] = s1.vec[i][2];
t->mb.vec[0][i / 2 * 8 + i % 2 * 2 + 5] = s1.vec[i][2];
sad_min += eg_size_ue(t->bs, MB_8x4);
break;
case MB_4x8:
sad_min += T264_quarter_pixel_search(t, 0, s1.src[i][3], s1.ref[i][3], s1.offset[i][3], &s1.vec[i][3], &s1.vec[i][3], s1.sad[i][3], 4, 8, t->mb.pred_p16x16 + i / 2 * 16 * 8 + i % 2 * 8, MB_4x8);
sad_min += T264_quarter_pixel_search(t, 0, s1.src[i][4], s1.ref[i][4], s1.offset[i][4], &s1.vec[i][4], &s1.vec[i][4], s1.sad[i][4], 4, 8, t->mb.pred_p16x16 + i / 2 * 16 * 8 + i % 2 * 8 + 4, MB_4x8);
t->mb.vec[0][i / 2 * 8 + i % 2 * 2 + 0] = s1.vec[i][3];
t->mb.vec[0][i / 2 * 8 + i % 2 * 2 + 1] = s1.vec[i][4];
t->mb.vec[0][i / 2 * 8 + i % 2 * 2 + 4] = s1.vec[i][3];
t->mb.vec[0][i / 2 * 8 + i % 2 * 2 + 5] = s1.vec[i][4];
sad_min += eg_size_ue(t->bs, MB_4x8);
break;
case MB_4x4:
sad_min += T264_quarter_pixel_search(t, 0, s1.src[i][5], s1.ref[i][5], s1.offset[i][5], &s1.vec[i][5], &s1.vec[i][5], s1.sad[i][5], 4, 4, t->mb.pred_p16x16 + i / 2 * 16 * 8 + i % 2 * 8, MB_4x4);
sad_min += T264_quarter_pixel_search(t, 0, s1.src[i][6], s1.ref[i][6], s1.offset[i][6], &s1.vec[i][6], &s1.vec[i][6], s1.sad[i][6], 4, 4, t->mb.pred_p16x16 + i / 2 * 16 * 8 + i % 2 * 8 + 4, MB_4x4);
sad_min += T264_quarter_pixel_search(t, 0, s1.src[i][7], s1.ref[i][7], s1.offset[i][7], &s1.vec[i][7], &s1.vec[i][7], s1.sad[i][7], 4, 4, t->mb.pred_p16x16 + i / 2 * 16 * 8 + i % 2 * 8 + 16 * 4, MB_4x4);
sad_min += T264_quarter_pixel_search(t, 0, s1.src[i][8], s1.ref[i][8], s1.offset[i][8], &s1.vec[i][8], &s1.vec[i][8], s1.sad[i][8], 4, 4, t->mb.pred_p16x16 + i / 2 * 16 * 8 + i % 2 * 8 + 16 * 4 + 4, MB_4x4);
t->mb.vec[0][i / 2 * 8 + i % 2 * 2 + 0] = s1.vec[i][5];
t->mb.vec[0][i / 2 * 8 + i % 2 * 2 + 1] = s1.vec[i][6];
t->mb.vec[0][i / 2 * 8 + i % 2 * 2 + 4] = s1.vec[i][7];
t->mb.vec[0][i / 2 * 8 + i % 2 * 2 + 5] = s1.vec[i][8];
sad_min += eg_size_ue(t->bs, MB_4x4);
break;
default:
break;
}
t->mb.submb_part[i / 2 * 8 + i % 2 * 2 + 0] = sub_part[i];
t->mb.submb_part[i / 2 * 8 + i % 2 * 2 + 1] = sub_part[i];
t->mb.submb_part[i / 2 * 8 + i % 2 * 2 + 4] = sub_part[i];
t->mb.submb_part[i / 2 * 8 + i % 2 * 2 + 5] = sub_part[i];
}
break;
default:
break;
}
// 3ks chenm
if (t->flags & USE_INTRAININTER)
sad = T264_mode_decision_intra_y(t);
else
sad = -1;
if (sad <= sad_min)
{
best_mode = t->mb.mb_mode;
sad_min = sad;
}
else
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -