📄 inter.c
字号:
s->ref[i][5] = t->ref[context.list_index][s->vec[i][5].refno];
s->vec_median[i][5] = vec[0];
t->mb.vec_ref[VEC_LUMA + i / 2 * 16 + i % 2 * 2 + 0].vec[0] = s->vec[i][5];
get_pmv(t, 0, vec, MB_4x4, luma_index[4 * i + 1], 1, &num);
s->src[i][6] = s->src[i][5] + 4;
context.offset += 4;
s->sad[i][6] = t->search(t, &context);
s->sad[i][6]+= REFCOST(context.vec_best.refno);
s->vec[i][6] = context.vec_best;
s->offset[i][6] = context.offset;
s->ref[i][6] = t->ref[context.list_index][s->vec[i][6].refno];
s->vec_median[i][6] = vec[0];
t->mb.vec_ref[VEC_LUMA + i / 2 * 16 + i % 2 * 2 + 1].vec[0] = s->vec[i][6];
get_pmv(t, 0, vec, MB_4x4, luma_index[4 * i + 2], 1, &num);
s->src[i][7] = s->src[i][5] + 4 * t->stride;
context.offset += 4 * t->edged_stride - 4;
s->sad[i][7] = t->search(t, &context);
s->sad[i][7]+= REFCOST(context.vec_best.refno);
s->vec[i][7] = context.vec_best;
s->offset[i][7] = context.offset;
s->ref[i][7] = t->ref[context.list_index][s->vec[i][7].refno];
s->vec_median[i][7] = vec[0];
t->mb.vec_ref[VEC_LUMA + i / 2 * 16 + i % 2 * 2 + 8].vec[0] = s->vec[i][7];
get_pmv(t, 0, vec, MB_4x4, luma_index[4 * i + 3], 1, &num);
s->src[i][8] = s->src[i][7] + 4;
context.offset += 4;
s->sad[i][8] = t->search(t, &context);
s->sad[i][8]+= REFCOST(context.vec_best.refno);
s->vec[i][8] = context.vec_best;
s->offset[i][8] = context.offset;
s->ref[i][8] = t->ref[context.list_index][s->vec[i][8].refno];
s->vec_median[i][8] = vec[0];
t->mb.vec_ref[VEC_LUMA + i / 2 * 16 + i % 2 * 2 + 9].vec[0] = s->vec[i][8];
return s->sad[i][5] + s->sad[i][6] + s->sad[i][7] + s->sad[i][8] + eg_size_ue(t->bs, MB_4x4);
}
void
T264_encode_inter_16x16p(_RW T264_t* t, uint8_t* pred)
{
DECLARE_ALIGNED_MATRIX(dct, 16, 16, int16_t, 16);
int32_t qp = t->qp_y;
int32_t i, j;
int16_t* curdct;
// we will count coeff cost, from jm80
int32_t run, k;
int32_t coeff_cost, total_cost;
total_cost = 0;
t->expand8to16sub(pred, 16 / 4, 16 / 4, dct, t->mb.src_y, t->stride);
for(i = 0 ; i < 4 ; i ++)
{
coeff_cost = 0;
for(j = 0 ; j < 4 ; j ++)
{
int32_t idx = 4 * i + j;
int32_t idx_r = luma_index[idx];
curdct = dct + 16 * idx_r;
t->fdct4x4(curdct);
t->quant4x4(curdct, qp, FALSE);
scan_zig_4x4(t->mb.dct_y_z[idx], curdct);
if (coeff_cost <= 5)
{
run = -1;
for(k = 0 ; k < 16 ; k ++)
{
run ++;
if (t->mb.dct_y_z[idx][k] != 0)
{
if (ABS(t->mb.dct_y_z[idx][k]) > 1)
{
coeff_cost += 16 * 16 * 256; // big enough number
break;
}
else
{
coeff_cost += COEFF_COST[run];
run = -1;
}
}
}
}
else
{
coeff_cost = 16 * 16 * 256;
}
t->iquant4x4(curdct, qp);
t->idct4x4(curdct);
}
if (coeff_cost <= t->param.luma_coeff_cost)
{
int32_t idx_r = luma_index[4 * i];
memset(t->mb.dct_y_z[4 * i], 0, 16 * sizeof(int16_t));
memset(dct + 16 * idx_r, 0, 16 * sizeof(int16_t));
idx_r = luma_index[4 * i + 1];
memset(t->mb.dct_y_z[4 * i + 1], 0, 16 * sizeof(int16_t));
memset(dct + 16 * idx_r, 0, 16 * sizeof(int16_t));
idx_r = luma_index[4 * i + 2];
memset(t->mb.dct_y_z[4 * i + 2], 0, 16 * sizeof(int16_t));
memset(dct + 16 * idx_r, 0, 16 * sizeof(int16_t));
idx_r = luma_index[4 * i + 3];
memset(t->mb.dct_y_z[4 * i + 3], 0, 16 * sizeof(int16_t));
memset(dct + 16 * idx_r, 0, 16 * sizeof(int16_t));
coeff_cost = 0;
}
total_cost += coeff_cost;
}
if (total_cost <= 5)
{
memset(dct, 0, 16 * 16 * sizeof(int16_t));
memset(t->mb.dct_y_z, 0, sizeof(int16_t) * 16 * 16);
}
t->contract16to8add(dct, 16 / 4, 16 / 4, pred, t->mb.dst_y, t->edged_stride);
}
void
T264_encode_inter_y(_RW T264_t* t)
{
T264_encode_inter_16x16p(t, t->mb.pred_p16x16);
}
// NOTE: this routine will merge with T264_encode_intra_uv
void
T264_transform_inter_uv(_RW T264_t* t, uint8_t* pred_u, uint8_t* pred_v)
{
DECLARE_ALIGNED_MATRIX(dct, 10, 8, int16_t, CACHE_SIZE);
int32_t qp = t->qp_uv;
int32_t i, j;
int16_t* curdct;
uint8_t* start;
uint8_t* dst;
uint8_t* src;
start = pred_u;
src = t->mb.src_u;
dst = t->mb.dst_u;
for(j = 0 ; j < 2 ; j ++)
{
// we will count coeff cost, from jm80
int32_t run, k;
int32_t coeff_cost;
coeff_cost = 0;
t->expand8to16sub(start, 8 / 4, 8 / 4, dct, src, t->stride_uv);
curdct = dct;
for(i = 0 ; i < 4 ; i ++)
{
run = -1;
t->fdct4x4(curdct);
dct[64 + i] = curdct[0];
t->quant4x4(curdct, qp, FALSE);
scan_zig_4x4(t->mb.dct_uv_z[j][i], curdct);
{
for(k = 1 ; k < 16 ; k ++)
{
run ++;
if (t->mb.dct_uv_z[j][i][k] != 0)
{
if (ABS(t->mb.dct_uv_z[j][i][k]) > 1)
{
coeff_cost += 16 * 16 * 256;
break;
}
else
{
coeff_cost += COEFF_COST[run];
run = -1;
}
}
}
}
t->iquant4x4(curdct, qp);
curdct += 16;
}
if (coeff_cost < CHROMA_COEFF_COST)
{
memset(&t->mb.dct_uv_z[j][0][0], 0, 4 * 16 * sizeof(int16_t));
memset(dct, 0, 8 * 8 * sizeof(int16_t));
}
t->fdct2x2dc(curdct);
t->quant2x2dc(curdct, qp, FALSE);
scan_zig_2x2(t->mb.dc2x2_z[j], curdct);
t->iquant2x2dc(curdct, qp);
t->idct2x2dc(curdct);
curdct = dct;
for(i = 0 ; i < 4 ; i ++)
{
curdct[0] = dct[64 + i];
t->idct4x4(curdct);
curdct += 16;
}
t->contract16to8add(dct, 8 / 4, 8 / 4, start, dst, t->edged_stride_uv);
//
// change to v
//
start = pred_v;
dst = t->mb.dst_v;
src = t->mb.src_v;
}
}
void
T264_encode_inter_uv(_RW T264_t* t)
{
DECLARE_ALIGNED_MATRIX(pred_u, 8, 8, uint8_t, CACHE_SIZE);
DECLARE_ALIGNED_MATRIX(pred_v, 8, 8, uint8_t, CACHE_SIZE);
T264_vector_t vec;
uint8_t* src, *dst;
uint8_t* src_u, *dst_u;
int32_t i;
int32_t list_index = 0;
switch (t->mb.mb_part)
{
case MB_16x16:
vec = t->mb.vec[0][0];
src = t->ref[list_index][vec.refno]->U + ((t->mb.mb_y << 3) + (vec.y >> 3)) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3);
dst = pred_u;
t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec.x, vec.y, 8, 8);
src = t->ref[list_index][vec.refno]->V + ((t->mb.mb_y << 3) + (vec.y >> 3)) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3);
dst = pred_v;
t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec.x, vec.y, 8, 8);
break;
case MB_16x8:
vec = t->mb.vec[0][0];
src_u = t->ref[list_index][vec.refno]->U + ((t->mb.mb_y << 3) + (vec.y >> 3)) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3);
dst_u = pred_u;
t->eighth_pixel_mc_u(src_u, t->edged_stride_uv, dst_u, vec.x, vec.y, 8, 4);
src = t->ref[list_index][vec.refno]->V + ((t->mb.mb_y << 3) + (vec.y >> 3)) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3);
dst = pred_v;
t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec.x, vec.y, 8, 4);
vec = t->mb.vec[0][luma_index[8]];
src_u = t->ref[list_index][vec.refno]->U + ((t->mb.mb_y << 3) + (vec.y >> 3)) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3) +
4 * t->edged_stride_uv;
dst_u += 4 * 8;
t->eighth_pixel_mc_u(src_u, t->edged_stride_uv, dst_u, vec.x, vec.y, 8, 4);
src = t->ref[list_index][vec.refno]->V + ((t->mb.mb_y << 3) + (vec.y >> 3)) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3) +
4 * t->edged_stride_uv;
dst += 4 * 8;
t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec.x, vec.y, 8, 4);
break;
case MB_8x16:
vec = t->mb.vec[0][0];
src_u = t->ref[list_index][vec.refno]->U + ((t->mb.mb_y << 3) + (vec.y >> 3)) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3);
dst_u = pred_u;
t->eighth_pixel_mc_u(src_u, t->edged_stride_uv, dst_u, vec.x, vec.y, 4, 8);
src = t->ref[list_index][vec.refno]->V + ((t->mb.mb_y << 3) + (vec.y >> 3)) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3);
dst = pred_v;
t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec.x, vec.y, 4, 8);
vec = t->mb.vec[0][luma_index[4]];
src_u = t->ref[list_index][vec.refno]->U + ((t->mb.mb_y << 3) + (vec.y >> 3)) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3) + 4;
dst_u += 4;
t->eighth_pixel_mc_u(src_u, t->edged_stride_uv, dst_u, vec.x, vec.y, 4, 8);
src = t->ref[list_index][vec.refno]->V + ((t->mb.mb_y << 3) + (vec.y >> 3)) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3) + 4;
dst += 4;
t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec.x, vec.y, 4, 8);
break;
case MB_8x8:
case MB_8x8ref0:
for(i = 0 ; i < 4 ; i ++)
{
switch(t->mb.submb_part[luma_index[4 * i]])
{
case MB_8x8:
vec = t->mb.vec[0][luma_index[4 * i]];
src = t->ref[list_index][vec.refno]->U + ((t->mb.mb_y << 3) + (vec.y >> 3) + i / 2 * 4) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3) + (i % 2 * 4);
dst = pred_u + i / 2 * 32 + i % 2 * 4;
t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec.x, vec.y, 4, 4);
src = t->ref[list_index][vec.refno]->V + ((t->mb.mb_y << 3) + (vec.y >> 3) + i / 2 * 4) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3) + (i % 2 * 4);
dst = pred_v + i / 2 * 32 + i % 2 * 4;
t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec.x, vec.y, 4, 4);
break;
case MB_8x4:
vec = t->mb.vec[0][luma_index[4 * i]];
src_u = t->ref[list_index][vec.refno]->U + ((t->mb.mb_y << 3) + (vec.y >> 3) + i / 2 * 4) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3) + (i % 2 * 4);
dst_u = pred_u + i / 2 * 32 + i % 2 * 4;
t->eighth_pixel_mc_u(src_u, t->edged_stride_uv, dst_u, vec.x, vec.y, 4, 2);
src = t->ref[list_index][vec.refno]->V + ((t->mb.mb_y << 3) + (vec.y >> 3) + i / 2 * 4) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3) + (i % 2 * 4);
dst = pred_v + i / 2 * 32 + i % 2 * 4;
t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec.x, vec.y, 4, 2);
vec = t->mb.vec[0][luma_index[4 * i + 2]];
src_u = t->ref[list_index][vec.refno]->U + ((t->mb.mb_y << 3) + (vec.y >> 3) + i / 2 * 4) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3) + (i % 2 * 4) +
2 * t->edged_stride_uv;
dst_u += 2 * 8;
t->eighth_pixel_mc_u(src_u, t->edged_stride_uv, dst_u, vec.x, vec.y, 4, 2);
src = t->ref[list_index][vec.refno]->V + ((t->mb.mb_y << 3) + (vec.y >> 3) + i / 2 * 4) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3) + (i % 2 * 4) +
2 * t->edged_stride_uv;
dst += 2 * 8;
t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec.x, vec.y, 4, 2);
break;
case MB_4x8:
vec = t->mb.vec[0][luma_index[4 * i]];
src_u = t->ref[list_index][vec.refno]->U + ((t->mb.mb_y << 3) + (vec.y >> 3) + i / 2 * 4) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3) + (i % 2 * 4);
dst_u = pred_u + i / 2 * 32 + i % 2 * 4;
t->eighth_pixel_mc_u(src_u, t->edged_stride_uv, dst_u, vec.x, vec.y, 2, 4);
src = t->ref[list_index][vec.refno]->V + ((t->mb.mb_y << 3) + (vec.y >> 3) + i / 2 * 4) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3) + (i % 2 * 4);
dst = pred_v + i / 2 * 32 + i % 2 * 4;
t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec.x, vec.y, 2, 4);
vec = t->mb.vec[0][luma_index[4 * i + 1]];
src_u = t->ref[list_index][vec.refno]->U + ((t->mb.mb_y << 3) + (vec.y >> 3) + i / 2 * 4) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3) + (i % 2 * 4) + 2;
dst_u += 2;
t->eighth_pixel_mc_u(src_u, t->edged_stride_uv, dst_u, vec.x, vec.y, 2, 4);
src = t->ref[list_index][vec.refno]->V + ((t->mb.mb_y << 3) + (vec.y >> 3) + i / 2 * 4) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3) + (i % 2 * 4) + 2;
dst += 2;
t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec.x, vec.y, 2, 4);
break;
case MB_4x4:
vec = t->mb.vec[0][luma_index[4 * i]];
src_u = t->ref[list_index][vec.refno]->U + ((t->mb.mb_y << 3) + (vec.y >> 3) + i / 2 * 4) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3) + (i % 2 * 4);
dst_u = pred_u + i / 2 * 32 + i % 2 * 4;
t->eighth_pixel_mc_u(src_u, t->edged_stride_uv, dst_u, vec.x, vec.y, 2, 2);
src = t->ref[list_index][vec.refno]->V + ((t->mb.mb_y << 3) + (vec.y >> 3) + i / 2 * 4) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3) + (i % 2 * 4);
dst = pred_v + i / 2 * 32 + i % 2 * 4;
t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec.x, vec.y, 2, 2);
vec = t->mb.vec[0][luma_index[4 * i + 1]];
src_u = t->ref[list_index][vec.refno]->U + ((t->mb.mb_y << 3) + (vec.y >> 3) + i / 2 * 4) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3) + (i % 2 * 4) + 2;
dst_u += 2;
t->eighth_pixel_mc_u(src_u, t->edged_stride_uv, dst_u, vec.x, vec.y, 2, 2);
src = t->ref[list_index][vec.refno]->V + ((t->mb.mb_y << 3) + (vec.y >> 3) + i / 2 * 4) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3) + (i % 2 * 4) + 2;
dst += 2;
t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec.x, vec.y, 2, 2);
vec = t->mb.vec[0][luma_index[4 * i + 2]];
src_u = t->ref[list_index][vec.refno]->U + ((t->mb.mb_y << 3) + (vec.y >> 3) + i / 2 * 4) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3) + (i % 2 * 4) +
2 * t->edged_stride_uv;
dst_u += 2 * 8 - 2;
t->eighth_pixel_mc_u(src_u, t->edged_stride_uv, dst_u, vec.x, vec.y, 2, 2);
src = t->ref[list_index][vec.refno]->V + ((t->mb.mb_y << 3) + (vec.y >> 3) + i / 2 * 4) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3) + (i % 2 * 4) +
2 * t->edged_stride_uv;
dst += 2 * 8 - 2;
t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec.x, vec.y, 2, 2);
vec = t->mb.vec[0][luma_index[4 * i + 3]];
src_u = t->ref[list_index][vec.refno]->U + ((t->mb.mb_y << 3) + (vec.y >> 3) + i / 2 * 4) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3) + (i % 2 * 4) +
2 * t->edged_stride_uv + 2;
dst_u += 2;
t->eighth_pixel_mc_u(src_u, t->edged_stride_uv, dst_u, vec.x, vec.y, 2, 2);
src = t->ref[list_index][vec.refno]->V + ((t->mb.mb_y << 3) + (vec.y >> 3) + i / 2 * 4) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3) + (i % 2 * 4) +
2 * t->edged_stride_uv + 2;
dst += 2;
t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec.x, vec.y, 2, 2);
break;
default:
break;
}
}
break;
default:
break;
}
T264_transform_inter_uv(t, pred_u, pred_v);
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -