📄 inter_test.c
字号:
s->offset[i][2] = context.offset;
s->ref[i][2] = t->refl0[s->vec[i][2].refno];
s->vec_median[i][2] = vec[0];
s->sad[i][2] = T264_quarter_pixel_search(t, s->src[i][2], s->ref[i][2], s->offset[i][2], &s->vec[i][2], &s->vec_median[i][2], s->sad[i][2], 8, 4, t->mb.pred_p8x8 + i / 2 * 16 * 8 + i % 2 * 8 + 16 * 4);
t->mb.vec_ref[VEC_LUMA + i / 2 * 16 + i % 2 * 2 + 8].vec =
t->mb.vec_ref[VEC_LUMA + i / 2 * 16 + i % 2 * 2 + 9].vec = s->vec[i][2];
t->mb.vec[0][i / 2 * 8 + i % 2 * 2 + 4] = s->vec[i][2];
t->mb.vec[0][i / 2 * 8 + i % 2 * 2 + 5] = s->vec[i][2];
return s->sad[i][1] + s->sad[i][2];
}
uint32_t
T264_mode_decision_inter_4x8p(_RW T264_t * t, int32_t i, subpart_search_data_t* s)
{
T264_vector_t vec[5 + 10]; // NOTE: max 10 refs
T264_search_context_t context;
int32_t num;
get_pmv(t, vec, MB_4x8, luma_index[4 * i + 0], 1, &num);
context.height = 8;
context.width = 4;
context.limit_x= t->param.search_x;
context.limit_y= t->param.search_y;
context.vec = vec;
context.vec_num= num;
context.offset = ((t->mb.mb_y << 4) + i / 2 * 8) * t->edged_stride + (t->mb.mb_x << 4) + i % 2 * 8;
s->src[i][3] = t->mb.src_y + (i / 2 * 8) * t->stride + i % 2 * 8;
s->sad[i][3] = t->search(t, &context);
s->vec[i][3] = context.vec_best;
s->offset[i][3] = context.offset;
s->ref[i][3] = t->refl0[s->vec[i][3].refno];
s->vec_median[i][3] = vec[0];
s->sad[i][3] = T264_quarter_pixel_search(t, s->src[i][3], s->ref[i][3], s->offset[i][3], &s->vec[i][3], &s->vec[i][3], s->sad[i][3], 4, 8, t->mb.pred_p8x8 + i / 2 * 16 * 8 + i % 2 * 8);
t->mb.vec_ref[VEC_LUMA + i / 2 * 16 + i % 2 * 2 + 0].vec =
t->mb.vec_ref[VEC_LUMA + i / 2 * 16 + i % 2 * 2 + 8].vec = s->vec[i][3];
t->mb.vec[0][i / 2 * 8 + i % 2 * 2 + 0] = s->vec[i][3];
t->mb.vec[0][i / 2 * 8 + i % 2 * 2 + 4] = s->vec[i][3];
get_pmv(t, vec, MB_4x8, luma_index[4 * i + 1], 1, &num);
s->src[i][4] = s->src[i][3] + 4;
context.offset += 4;
s->sad[i][4] = t->search(t, &context);
s->vec[i][4] = context.vec_best;
s->offset[i][4] = context.offset;
s->ref[i][4] = t->refl0[s->vec[i][4].refno];
s->vec_median[i][4] = vec[0];
s->sad[i][4] = T264_quarter_pixel_search(t, s->src[i][4], s->ref[i][4], s->offset[i][4], &s->vec[i][4], &s->vec[i][4], s->sad[i][4], 4, 8, t->mb.pred_p8x8 + i / 2 * 16 * 8 + i % 2 * 8 + 4);
t->mb.vec_ref[VEC_LUMA + i / 2 * 16 + i % 2 * 2 + 1].vec =
t->mb.vec_ref[VEC_LUMA + i / 2 * 16 + i % 2 * 2 + 9].vec = s->vec[i][4];
t->mb.vec[0][i / 2 * 8 + i % 2 * 2 + 1] = s->vec[i][4];
t->mb.vec[0][i / 2 * 8 + i % 2 * 2 + 5] = s->vec[i][4];
return s->sad[i][3] + s->sad[i][4];
}
uint32_t
T264_mode_decision_inter_4x4p(_RW T264_t * t, int32_t i, subpart_search_data_t* s)
{
T264_vector_t vec[5 + 10]; // NOTE: max 10 refs
T264_search_context_t context;
int32_t num;
get_pmv(t, vec, MB_4x4, luma_index[4 * i + 0], 1, &num);
context.height = 4;
context.width = 4;
context.limit_x= t->param.search_x;
context.limit_y= t->param.search_y;
context.vec = vec;
context.vec_num= num;
context.offset = ((t->mb.mb_y << 4) + i / 2 * 8) * t->edged_stride + (t->mb.mb_x << 4) + i % 2 * 8;
s->src[i][5] = t->mb.src_y + (i / 2 * 8) * t->stride + i % 2 * 8;
s->sad[i][5] = t->search(t, &context);
s->vec[i][5] = context.vec_best;
s->offset[i][5] = context.offset;
s->ref[i][5] = t->refl0[s->vec[i][5].refno];
s->vec_median[i][5] = vec[0];
s->sad[i][5] = T264_quarter_pixel_search(t, s->src[i][5], s->ref[i][5], s->offset[i][5], &s->vec[i][5], &s->vec[i][5], s->sad[i][5], 4, 4, t->mb.pred_p8x8 + i / 2 * 16 * 8 + i % 2 * 8);
t->mb.vec_ref[VEC_LUMA + i / 2 * 16 + i % 2 * 2 + 0].vec =
t->mb.vec[0][i / 2 * 8 + i % 2 * 2 + 0] = s->vec[i][5];
get_pmv(t, vec, MB_4x4, luma_index[4 * i + 1], 1, &num);
s->src[i][6] = s->src[i][5] + 4;
context.offset += 4;
s->sad[i][6] = t->search(t, &context);
s->vec[i][6] = context.vec_best;
s->offset[i][6] = context.offset;
s->ref[i][6] = t->refl0[s->vec[i][6].refno];
s->vec_median[i][6] = vec[0];
s->sad[i][6] = T264_quarter_pixel_search(t, s->src[i][6], s->ref[i][6], s->offset[i][6], &s->vec[i][6], &s->vec[i][6], s->sad[i][6], 4, 4, t->mb.pred_p8x8 + i / 2 * 16 * 8 + i % 2 * 8 + 4);
t->mb.vec_ref[VEC_LUMA + i / 2 * 16 + i % 2 * 2 + 1].vec =
t->mb.vec[0][i / 2 * 8 + i % 2 * 2 + 1] = s->vec[i][6];
get_pmv(t, vec, MB_4x4, luma_index[4 * i + 2], 1, &num);
s->src[i][7] = s->src[i][5] + 4 * t->stride;
context.offset += 4 * t->edged_stride - 4;
s->sad[i][7] = t->search(t, &context);
s->vec[i][7] = context.vec_best;
s->offset[i][7] = context.offset;
s->ref[i][7] = t->refl0[s->vec[i][7].refno];
s->vec_median[i][7] = vec[0];
s->sad[i][7] = T264_quarter_pixel_search(t, s->src[i][7], s->ref[i][7], s->offset[i][7], &s->vec[i][7], &s->vec[i][7], s->sad[i][7], 4, 4, t->mb.pred_p8x8 + i / 2 * 16 * 8 + i % 2 * 8 + 16 * 4);
t->mb.vec_ref[VEC_LUMA + i / 2 * 16 + i % 2 * 2 + 8].vec =
t->mb.vec[0][i / 2 * 8 + i % 2 * 2 + 4] = s->vec[i][7];
get_pmv(t, vec, MB_4x4, luma_index[4 * i + 3], 1, &num);
s->src[i][8] = s->src[i][7] + 4;
context.offset += 4;
s->sad[i][8] = t->search(t, &context);
s->vec[i][8] = context.vec_best;
s->offset[i][8] = context.offset;
s->ref[i][8] = t->refl0[s->vec[i][8].refno];
s->vec_median[i][8] = vec[0];
s->sad[i][8] = T264_quarter_pixel_search(t, s->src[i][8], s->ref[i][8], s->offset[i][8], &s->vec[i][8], &s->vec[i][8], s->sad[i][8], 4, 4, t->mb.pred_p8x8 + i / 2 * 16 * 8 + i % 2 * 8 + 16 * 4 + 4);
t->mb.vec_ref[VEC_LUMA + i / 2 * 16 + i % 2 * 2 + 9].vec =
t->mb.vec[0][i / 2 * 8 + i % 2 * 2 + 5] = s->vec[i][8];
return s->sad[i][5] + s->sad[i][6] + s->sad[i][7] + s->sad[i][8];
}
void
T264_encode_inter_16x16p(_RW T264_t* t, uint8_t* pred)
{
DECLARE_ALIGNED_MATRIX(dct, 16, 16, int16_t, 16);
int32_t qp = t->qp_y;
int32_t i;
int16_t* curdct;
t->expand8to16sub(pred, 16 / 4, 16 / 4, dct, t->mb.src_y, t->stride);
curdct = dct;
for(i = 0 ; i < 16 ; i ++)
{
t->fdct4x4(curdct);
t->quant4x4(curdct, qp, FALSE);
scan_zig_4x4(t->mb.dct_y_z[luma_index[i]], curdct);
t->iquant4x4(curdct, qp);
t->idct4x4(curdct);
curdct += 16;
}
t->contract16to8add(dct, 16 / 4, 16 / 4, pred, t->mb.dst_y, t->edged_stride);
}
void
T264_encode_inter_y(_RW T264_t* t)
{
T264_encode_inter_16x16p(t, t->mb.pred_p16x16[t->mb.mb_part]);
}
// NOTE: this routine will merge with T264_encode_intra_uv
void
T264_transform_inter_uv(_RW T264_t* t, uint8_t* pred_u, uint8_t* pred_v)
{
DECLARE_ALIGNED_MATRIX(dct, 10, 8, int16_t, CACHE_SIZE);
int32_t qp = t->qp_uv;
int32_t i, j;
int16_t* curdct;
uint8_t* start;
uint8_t* dst;
uint8_t* src;
start = pred_u;
src = t->mb.src_u;
dst = t->mb.dst_u;
for(j = 0 ; j < 2 ; j ++)
{
t->expand8to16sub(start, 8 / 4, 8 / 4, dct, src, t->stride_uv);
curdct = dct;
for(i = 0 ; i < 4 ; i ++)
{
t->fdct4x4(curdct);
dct[64 + i] = curdct[0];
t->quant4x4(curdct, qp, FALSE);
scan_zig_4x4(t->mb.dct_uv_z[j][i], curdct);
t->iquant4x4(curdct, qp);
curdct += 16;
}
t->fdct2x2dc(curdct);
t->quant2x2dc(curdct, qp, FALSE);
scan_zig_2x2(t->mb.dc2x2_z[j], curdct);
t->iquant2x2dc(curdct, qp);
t->idct2x2dc(curdct);
curdct = dct;
for(i = 0 ; i < 4 ; i ++)
{
curdct[0] = dct[64 + i];
t->idct4x4(curdct);
curdct += 16;
}
t->contract16to8add(dct, 8 / 4, 8 / 4, start, dst, t->edged_stride_uv);
//
// change to v
//
start = pred_v;
dst = t->mb.dst_v;
src = t->mb.src_v;
}
}
void
T264_encode_inter_uv(_RW T264_t* t)
{
DECLARE_ALIGNED_MATRIX(pred_u, 8, 8, uint8_t, CACHE_SIZE);
DECLARE_ALIGNED_MATRIX(pred_v, 8, 8, uint8_t, CACHE_SIZE);
T264_vector_t vec;
uint8_t* src, *dst;
uint8_t* src_u, *dst_u;
int32_t i;
switch (t->mb.mb_part)
{
case MB_16x16:
vec = t->mb.vec[0][0];
src = t->refl0[vec.refno]->U + ((t->mb.mb_y << 3) + (vec.y >> 3)) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3);
dst = pred_u;
t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec.x, vec.y, 8, 8);
src = t->refl0[vec.refno]->V + ((t->mb.mb_y << 3) + (vec.y >> 3)) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3);
dst = pred_v;
t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec.x, vec.y, 8, 8);
break;
case MB_16x8:
vec = t->mb.vec[0][0];
src_u = t->refl0[vec.refno]->U + ((t->mb.mb_y << 3) + (vec.y >> 3)) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3);
dst_u = pred_u;
t->eighth_pixel_mc_u(src_u, t->edged_stride_uv, dst_u, vec.x, vec.y, 8, 4);
src = t->refl0[vec.refno]->V + ((t->mb.mb_y << 3) + (vec.y >> 3)) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3);
dst = pred_v;
t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec.x, vec.y, 8, 4);
vec = t->mb.vec[0][luma_index[8]];
src_u = t->refl0[vec.refno]->U + ((t->mb.mb_y << 3) + (vec.y >> 3)) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3) +
4 * t->edged_stride_uv;
dst_u += 4 * 8;
t->eighth_pixel_mc_u(src_u, t->edged_stride_uv, dst_u, vec.x, vec.y, 8, 4);
src = t->refl0[vec.refno]->V + ((t->mb.mb_y << 3) + (vec.y >> 3)) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3) +
4 * t->edged_stride_uv;
dst += 4 * 8;
t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec.x, vec.y, 8, 4);
break;
case MB_8x16:
vec = t->mb.vec[0][0];
src_u = t->refl0[vec.refno]->U + ((t->mb.mb_y << 3) + (vec.y >> 3)) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3);
dst_u = pred_u;
t->eighth_pixel_mc_u(src_u, t->edged_stride_uv, dst_u, vec.x, vec.y, 4, 8);
src = t->refl0[vec.refno]->V + ((t->mb.mb_y << 3) + (vec.y >> 3)) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3);
dst = pred_v;
t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec.x, vec.y, 4, 8);
vec = t->mb.vec[0][luma_index[4]];
src_u = t->refl0[vec.refno]->U + ((t->mb.mb_y << 3) + (vec.y >> 3)) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3) + 4;
dst_u += 4;
t->eighth_pixel_mc_u(src_u, t->edged_stride_uv, dst_u, vec.x, vec.y, 4, 8);
src = t->refl0[vec.refno]->V + ((t->mb.mb_y << 3) + (vec.y >> 3)) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3) + 4;
dst += 4;
t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec.x, vec.y, 4, 8);
break;
case MB_8x8:
case MB_8x8ref0:
for(i = 0 ; i < 4 ; i ++)
{
switch(t->mb.submb_part[luma_index[4 * i]])
{
case MB_8x8:
vec = t->mb.vec[0][luma_index[4 * i]];
src = t->refl0[vec.refno]->U + ((t->mb.mb_y << 3) + (vec.y >> 3) + i / 2 * 4) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3) + (i % 2 * 4);
dst = pred_u + i / 2 * 32 + i % 2 * 4;
t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec.x, vec.y, 4, 4);
src = t->refl0[vec.refno]->V + ((t->mb.mb_y << 3) + (vec.y >> 3) + i / 2 * 4) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3) + (i % 2 * 4);
dst = pred_v + i / 2 * 32 + i % 2 * 4;
t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec.x, vec.y, 4, 4);
break;
case MB_8x4:
vec = t->mb.vec[0][luma_index[4 * i]];
src_u = t->refl0[vec.refno]->U + ((t->mb.mb_y << 3) + (vec.y >> 3) + i / 2 * 4) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3) + (i % 2 * 4);
dst_u = pred_u + i / 2 * 32 + i % 2 * 4;
t->eighth_pixel_mc_u(src_u, t->edged_stride_uv, dst_u, vec.x, vec.y, 4, 2);
src = t->refl0[vec.refno]->V + ((t->mb.mb_y << 3) + (vec.y >> 3) + i / 2 * 4) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3) + (i % 2 * 4);
dst = pred_v + i / 2 * 32 + i % 2 * 4;
t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec.x, vec.y, 4, 2);
vec = t->mb.vec[0][luma_index[4 * i + 2]];
src_u = t->refl0[vec.refno]->U + ((t->mb.mb_y << 3) + (vec.y >> 3) + i / 2 * 4) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3) + (i % 2 * 4) +
2 * t->edged_stride_uv;
dst_u += 2 * 8;
t->eighth_pixel_mc_u(src_u, t->edged_stride_uv, dst_u, vec.x, vec.y, 4, 2);
src = t->refl0[vec.refno]->V + ((t->mb.mb_y << 3) + (vec.y >> 3) + i / 2 * 4) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3) + (i % 2 * 4) +
2 * t->edged_stride_uv;
dst += 2 * 8;
t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec.x, vec.y, 4, 2);
break;
case MB_4x8:
vec = t->mb.vec[0][luma_index[4 * i]];
src_u = t->refl0[vec.refno]->U + ((t->mb.mb_y << 3) + (vec.y >> 3) + i / 2 * 4) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3) + (i % 2 * 4);
dst_u = pred_u + i / 2 * 32 + i % 2 * 4;
t->eighth_pixel_mc_u(src_u, t->edged_stride_uv, dst_u, vec.x, vec.y, 2, 4);
src = t->refl0[vec.refno]->V + ((t->mb.mb_y << 3) + (vec.y >> 3) + i / 2 * 4) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3) + (i % 2 * 4);
dst = pred_v + i / 2 * 32 + i % 2 * 4;
t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec.x, vec.y, 2, 4);
vec = t->mb.vec[0][luma_index[4 * i + 1]];
src_u = t->refl0[vec.refno]->U + ((t->mb.mb_y << 3) + (vec.y >> 3) + i / 2 * 4) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3) + (i % 2 * 4) + 2;
dst_u += 2;
t->eighth_pixel_mc_u(src_u, t->edged_stride_uv, dst_u, vec.x, vec.y, 2, 4);
src = t->refl0[vec.refno]->V + ((t->mb.mb_y << 3) + (vec.y >> 3) + i / 2 * 4) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3) + (i % 2 * 4) + 2;
dst += 2;
t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec.x, vec.y, 2, 4);
break;
case MB_4x4:
vec = t->mb.vec[0][luma_index[4 * i]];
src_u = t->refl0[vec.refno]->U + ((t->mb.mb_y << 3) + (vec.y >> 3) + i / 2 * 4) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3) + (i % 2 * 4);
dst_u = pred_u + i / 2 * 32 + i % 2 * 4;
t->eighth_pixel_mc_u(src_u, t->edged_stride_uv, dst_u, vec.x, vec.y, 2, 2);
src = t->refl0[vec.refno]->V + ((t->mb.mb_y << 3) + (vec.y >> 3) + i / 2 * 4) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3) + (i % 2 * 4);
dst = pred_v + i / 2 * 32 + i % 2 * 4;
t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec.x, vec.y, 2, 2);
vec = t->mb.vec[0][luma_index[4 * i + 1]];
src_u = t->refl0[vec.refno]->U + ((t->mb.mb_y << 3) + (vec.y >> 3) + i / 2 * 4) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3) + (i % 2 * 4) + 2;
dst_u += 2;
t->eighth_pixel_mc_u(src_u, t->edged_stride_uv, dst_u, vec.x, vec.y, 2, 2);
src = t->refl0[vec.refno]->V + ((t->mb.mb_y << 3) + (vec.y >> 3) + i / 2 * 4) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3) + (i % 2 * 4) + 2;
dst += 2;
t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec.x, vec.y, 2, 2);
vec = t->mb.vec[0][luma_index[4 * i + 2]];
src_u = t->refl0[vec.refno]->U + ((t->mb.mb_y << 3) + (vec.y >> 3) + i / 2 * 4) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3) + (i % 2 * 4) +
2 * t->edged_stride_uv;
dst_u += 2 * 8 - 2;
t->eighth_pixel_mc_u(src_u, t->edged_stride_uv, dst_u, vec.x, vec.y, 2, 2);
src = t->refl0[vec.refno]->V + ((t->mb.mb_y << 3) + (vec.y >> 3) + i / 2 * 4) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3) + (i % 2 * 4) +
2 * t->edged_stride_uv;
dst += 2 * 8 - 2;
t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec.x, vec.y, 2, 2);
vec = t->mb.vec[0][luma_index[4 * i + 3]];
src_u = t->refl0[vec.refno]->U + ((t->mb.mb_y << 3) + (vec.y >> 3) + i / 2 * 4) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3) + (i % 2 * 4) +
2 * t->edged_stride_uv + 2;
dst_u += 2;
t->eighth_pixel_mc_u(src_u, t->edged_stride_uv, dst_u, vec.x, vec.y, 2, 2);
src = t->refl0[vec.refno]->V + ((t->mb.mb_y << 3) + (vec.y >> 3) + i / 2 * 4) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3) + (i % 2 * 4) +
2 * t->edged_stride_uv + 2;
dst += 2;
t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec.x, vec.y, 2, 2);
break;
default:
break;
}
}
break;
default:
break;
}
T264_transform_inter_uv(t, pred_u, pred_v);
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -