📄 macroblock.c
字号:
* Predict one 4x4 Luma block
************************************************************************
*/
void
LumaPrediction4x4 (int block_x, // <-- relative horizontal block coordinate of 4x4 block
int block_y, // <-- relative vertical block coordinate of 4x4 block
int fw_mode, // <-- forward prediction mode (1-7, 0=DIRECT if bw_mode=0)
int bw_mode, // <-- backward prediction mode (1-7, 0=DIRECT if fw_mode=0)
int fw_ref, // <-- reference frame for forward prediction (-1: Intra4x4 pred. with fw_mode)
int bw_ref )
{
static byte fw_pred[16];
static byte bw_pred[16];
int i, j;
int block_x4 = block_x+4;
int block_y4 = block_y+4;
int pic_pix_x = img->pix_x + block_x;
int pic_pix_y = img->pix_y + block_y;
int bx = block_x >> 2;
int by = block_y >> 2;
byte* fpred = fw_pred;
byte* bpred = bw_pred;
int direct = (fw_mode == 0 && bw_mode == 0 && (img->type == B_IMG ));
int skipped = (fw_mode == 0 && bw_mode == 0 && (img->type != B_IMG ));
int *****fmv_array = img->all_mv; // For MB level frame/field coding
int *****bmv_array = img->all_bmv; // For MB level frame/field coding
int fw_ref_idx, bw_ref_idx;
//zdd
_int16 *pt1=&(img->mpr[block_x][block_y]);
static _int64 f1=0x0001000100010001;
static _int64 zero=0;
//end
if (direct)
{
fw_ref= fwdir_refFrArr[pic_pix_y>>2][pic_pix_x>>2];
bw_ref= bwdir_refFrArr[pic_pix_y>>2][pic_pix_x>>2];
}
fw_ref_idx = fw_ref;
bw_ref_idx = bw_ref;
direct_mode = 0;
if (fw_mode ||(direct && ( fw_ref !=-1) )|| skipped)
{
OneComponentLumaPrediction4x4 (fw_pred, pic_pix_x, pic_pix_y, fmv_array [bx][by][fw_ref][fw_mode], fw_ref);
}
if (bw_mode || (direct && ( bw_ref !=-1) ))
{
OneComponentLumaPrediction4x4 (bw_pred, pic_pix_x, pic_pix_y, bmv_array[bx][by][ 0][bw_mode], -1);
}
if (direct || (fw_mode && bw_mode))
{
if ( direct)
{
if (fw_ref ==-1)
{
_asm
{
mov edi,pt1
mov esi,bpred
pxor mm7,mm7
movd mm0,[esi]
movd mm1,[esi+4]
movd mm2,[esi+8]
movd mm3,[esi+12]
PUNPCKLBW mm0,mm7
PUNPCKLBW mm1,mm7
PUNPCKLBW mm2,mm7
PUNPCKLBW mm3,mm7
//转秩
movq mm4,mm0
movq mm5,mm1
movq mm6,mm2
movq mm7,mm3
PUNPCKLWD mm0,mm1
PUNPCKLWD mm2,mm3
movq mm1,mm0
PUNPCKLDQ mm0,mm2
PUNPCKHDQ mm1,mm2
PUNPCKHWD mm4,mm5
PUNPCKHWD mm6,mm3
movq mm3,mm4
PUNPCKLDQ mm4,mm6
PUNPCKHDQ mm3,mm6
movq mm2,mm4
movq [edi],mm0
movq [edi+32],mm1
movq [edi+64],mm2
movq [edi+96],mm3
emms
}
}
else if (bw_ref ==-1 )
{
_asm
{
mov edi,pt1
mov esi,fpred
pxor mm7,mm7
movd mm0,[esi]
movd mm1,[esi+4]
movd mm2,[esi+8]
movd mm3,[esi+12]
PUNPCKLBW mm0,mm7
PUNPCKLBW mm1,mm7
PUNPCKLBW mm2,mm7
PUNPCKLBW mm3,mm7
//转秩
movq mm4,mm0
movq mm5,mm1
movq mm6,mm2
movq mm7,mm3
PUNPCKLWD mm0,mm1
PUNPCKLWD mm2,mm3
movq mm1,mm0
PUNPCKLDQ mm0,mm2
PUNPCKHDQ mm1,mm2
PUNPCKHWD mm4,mm5
PUNPCKHWD mm6,mm3
movq mm3,mm4
PUNPCKLDQ mm4,mm6
PUNPCKHDQ mm3,mm6
movq mm2,mm4
movq [edi],mm0
movq [edi+32],mm1
movq [edi+64],mm2
movq [edi+96],mm3
emms
}
}
else
{
_asm
{
mov edi,pt1
mov eax,bpred
mov ebx,fpred
movd mm0,[eax]
movd mm1,[eax+4]
movd mm2,[eax+8]
movd mm3,[eax+12]
movd mm4,[ebx]
movd mm5,[ebx+4]
movd mm6,[ebx+8]
movd mm7,[ebx+12]
PUNPCKLBW mm0,zero
PUNPCKLBW mm1,zero
PUNPCKLBW mm2,zero
PUNPCKLBW mm3,zero
PUNPCKLBW mm4,zero
PUNPCKLBW mm5,zero
PUNPCKLBW mm6,zero
PUNPCKLBW mm7,zero
paddw mm0,mm4
paddw mm0,f1
psraw mm0,1
paddw mm1,mm5
paddw mm1,f1
psraw mm1,1
paddw mm2,mm6
paddw mm2,f1
psraw mm2,1
paddw mm3,mm7
paddw mm3,f1
psraw mm3,1
//转秩
movq mm4,mm0
movq mm5,mm1
movq mm6,mm2
movq mm7,mm3
PUNPCKLWD mm0,mm1
PUNPCKLWD mm2,mm3
movq mm1,mm0
PUNPCKLDQ mm0,mm2
PUNPCKHDQ mm1,mm2
PUNPCKHWD mm4,mm5
PUNPCKHWD mm6,mm3
movq mm3,mm4
PUNPCKLDQ mm4,mm6
PUNPCKHDQ mm3,mm6
movq mm2,mm4
movq [edi],mm0
movq [edi+32],mm1
movq [edi+64],mm2
movq [edi+96],mm3
emms
}
}
}
else
{
_asm
{
mov edi,pt1
mov eax,bpred
mov ebx,fpred
movd mm0,[eax]
movd mm1,[eax+4]
movd mm2,[eax+8]
movd mm3,[eax+12]
movd mm4,[ebx]
movd mm5,[ebx+4]
movd mm6,[ebx+8]
movd mm7,[ebx+12]
PUNPCKLBW mm0,zero
PUNPCKLBW mm1,zero
PUNPCKLBW mm2,zero
PUNPCKLBW mm3,zero
PUNPCKLBW mm4,zero
PUNPCKLBW mm5,zero
PUNPCKLBW mm6,zero
PUNPCKLBW mm7,zero
paddw mm0,mm4
paddw mm0,f1
psraw mm0,1
paddw mm1,mm5
paddw mm1,f1
psraw mm1,1
paddw mm2,mm6
paddw mm2,f1
psraw mm2,1
paddw mm3,mm7
paddw mm3,f1
psraw mm3,1
//转秩
movq mm4,mm0
movq mm5,mm1
movq mm6,mm2
movq mm7,mm3
PUNPCKLWD mm0,mm1
PUNPCKLWD mm2,mm3
movq mm1,mm0
PUNPCKLDQ mm0,mm2
PUNPCKHDQ mm1,mm2
PUNPCKHWD mm4,mm5
PUNPCKHWD mm6,mm3
movq mm3,mm4
PUNPCKLDQ mm4,mm6
PUNPCKHDQ mm3,mm6
movq mm2,mm4
movq [edi],mm0
movq [edi+32],mm1
movq [edi+64],mm2
movq [edi+96],mm3
emms
}
}
}
else if (fw_mode || skipped)
{
_asm
{
mov edi,pt1
mov esi,fpred
pxor mm7,mm7
movd mm0,[esi]
movd mm1,[esi+4]
movd mm2,[esi+8]
movd mm3,[esi+12]
PUNPCKLBW mm0,mm7
PUNPCKLBW mm1,mm7
PUNPCKLBW mm2,mm7
PUNPCKLBW mm3,mm7
//转秩
movq mm4,mm0
movq mm5,mm1
movq mm6,mm2
movq mm7,mm3
PUNPCKLWD mm0,mm1
PUNPCKLWD mm2,mm3
movq mm1,mm0
PUNPCKLDQ mm0,mm2
PUNPCKHDQ mm1,mm2
PUNPCKHWD mm4,mm5
PUNPCKHWD mm6,mm3
movq mm3,mm4
PUNPCKLDQ mm4,mm6
PUNPCKHDQ mm3,mm6
movq mm2,mm4
movq [edi],mm0
movq [edi+32],mm1
movq [edi+64],mm2
movq [edi+96],mm3
emms
}
}
else
{
_asm
{
mov edi,pt1
mov esi,bpred
pxor mm7,mm7
movd mm0,[esi]
movd mm1,[esi+4]
movd mm2,[esi+8]
movd mm3,[esi+12]
PUNPCKLBW mm0,mm7
PUNPCKLBW mm1,mm7
PUNPCKLBW mm2,mm7
PUNPCKLBW mm3,mm7
//转秩
movq mm4,mm0
movq mm5,mm1
movq mm6,mm2
movq mm7,mm3
PUNPCKLWD mm0,mm1
PUNPCKLWD mm2,mm3
movq mm1,mm0
PUNPCKLDQ mm0,mm2
PUNPCKHDQ mm1,mm2
PUNPCKHWD mm4,mm5
PUNPCKHWD mm6,mm3
movq mm3,mm4
PUNPCKLDQ mm4,mm6
PUNPCKHDQ mm3,mm6
movq mm2,mm4
movq [edi],mm0
movq [edi+32],mm1
movq [edi+64],mm2
movq [edi+96],mm3
emms
}
}
}
/*!
************************************************************************
* \brief
* Residual Coding of an 8x8 Luma block (not for intra)
************************************************************************
*/
int // ==> coefficient cost
LumaResidualCoding8x8 (int *cbp, // --> cbp (updated according to processed 8x8 luminance block)
int *cbp_blk, // --> block cbp (updated according to processed 8x8 luminance block)
int block8x8, // <-- block number of 8x8 block
int fw_mode, // <-- forward prediction mode (1-7, 0=DIRECT)
int bw_mode, // <-- backward prediction mode (1-7, 0=DIRECT)
int fw_refframe, // <-- reference frame for forward prediction
int bw_refframe // <-- reference frame for backward prediction
)
{
int block_y, block_x, pic_pix_y, pic_pix_x, i, j, nonzero, cbp_blk_mask;
int coeff_cost = 0;
int mb_y = (block8x8 / 2) << 3;
int mb_x = (block8x8 % 2) << 3;
int cbp_mask = 1 << block8x8;
int bxx, byy; // indexing curr_blk
int scrFlag = 0; // 0=noSCR, 1=strongSCR, 2=jmSCR
byte** imgY_original = imgY_org;
int pix_y = img->pix_y;
int skipped = (fw_mode == 0 && bw_mode == 0 && (img->type != B_IMG && img->type != BS_IMG));
if (img->type==B_IMG || img->type==BS_IMG)
scrFlag = 1;
if(input->InterlaceCodingOption >= MB_CODING && mb_adaptive && img->field_mode)
{
pix_y = img->field_pix_y;
imgY_original = img->top_field ? imgY_org_top:imgY_org_bot;
}
//===== loop over 4x4 blocks =====
for (byy=0, block_y=mb_y; block_y<mb_y+8; byy+=4, block_y+=4)
{
pic_pix_y = pix_y + block_y;
for (bxx=0, block_x=mb_x; block_x<mb_x+8; bxx+=4, block_x+=4)
{
pic_pix_x = img->pix_x + block_x;
cbp_blk_mask = (block_x>>2) + block_y;
//===== prediction of 4x4 block =====
LumaPrediction4x4 (block_x, block_y, fw_mode, bw_mode, fw_refframe, bw_refframe);
//===== get displaced frame difference ======
for (j=0; j<4; j++)
for (i=0; i<4; i++)
{
img->m7[i][j] = imgY_original[pic_pix_y+j][pic_pix_x+i] - img->mpr[i+block_x][j+block_y];
}
//===== DCT, Quantization, inverse Quantization, IDCT, Reconstruction =====
if (img->NoResidueDirect != 1 && !skipped )
{
//===== DCT, Quantization, inverse Quantization, IDCT, Reconstruction =====
if (img->types!=SP_IMG) nonzero = dct_luma (block_x, block_y, &coeff_cost, 0);
else nonzero = dct_luma_sp(block_x, block_y, &coeff_cost);
if (nonzero)
{
(*cbp_blk) |= 1 << cbp_blk_mask; // one bit for every 4x4 block
(*cbp) |= cbp_mask; // one bit for the 4x4 blocks of an 8x8 block
}
}
}
}
/*
The purpose of the action below is to prevent that single or 'expensive' coefficients are coded.
With 4x4 transform there is larger chance that a single coefficient in a 8x8 or 16x16 block may be nonzero.
A single small (level=1) coefficient in a 8x8 block will cost: 3 or more bits for the coefficient,
4 bits for EOBs for the 4x4 blocks,possibly also more bits for CBP. Hence the total 'cost' of that single
coefficient will typically be 10-12 bits which in a RD consideration is too much to justify the distortion improvement.
The action below is to watch such 'single' coefficients and set the reconstructed block equal to the prediction according
to a given criterium. The action is taken only for inter luma blocks.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -