mv-search.c
来自「包含了从MPEG4的视频解码到H.264的视频编码部分的源代码」· C语言 代码 · 共 1,913 行 · 第 1/5 页
C
1,913 行
{
range_partly_outside = 1;
}
//===== determine position of (0,0)-vector =====
if (!input->rdopt)
{
ref_x = img->pix_x - offset_x;
ref_y = pix_y - offset_y;
for (pos = 0; pos < max_pos; pos++)
{
if (ref_x == spiral_search_x[pos] &&
ref_y == spiral_search_y[pos])
{
pos_00[refindex] = pos;
break;
}
}
}
//===== loop over search range (spiral search): get blockwise SAD =====
for (pos = 0; pos < max_pos; pos++)
{
abs_y = offset_y + spiral_search_y[pos];
abs_x = offset_x + spiral_search_x[pos];
if (range_partly_outside)
{
if (abs_y >= 0 && abs_y <= max_height &&
abs_x >= 0 && abs_x <= max_width )
{
PelYline_11 = FastLine16Y_11;
}
else
{
PelYline_11 = UMVLine16Y_11;
}
}
orgptr = orig_blocks;
bindex = 0;
for (blky = 0; blky < 4; blky++)
{
LineSadBlk0 = LineSadBlk1 = LineSadBlk2 = LineSadBlk3 = 0;
for (y = 0; y < 4; y++)
{
refptr = PelYline_11 (ref_pic, abs_y++, abs_x);
LineSadBlk0 += byte_abs [*refptr++ - *orgptr++];
LineSadBlk0 += byte_abs [*refptr++ - *orgptr++];
LineSadBlk0 += byte_abs [*refptr++ - *orgptr++];
LineSadBlk0 += byte_abs [*refptr++ - *orgptr++];
LineSadBlk1 += byte_abs [*refptr++ - *orgptr++];
LineSadBlk1 += byte_abs [*refptr++ - *orgptr++];
LineSadBlk1 += byte_abs [*refptr++ - *orgptr++];
LineSadBlk1 += byte_abs [*refptr++ - *orgptr++];
LineSadBlk2 += byte_abs [*refptr++ - *orgptr++];
LineSadBlk2 += byte_abs [*refptr++ - *orgptr++];
LineSadBlk2 += byte_abs [*refptr++ - *orgptr++];
LineSadBlk2 += byte_abs [*refptr++ - *orgptr++];
LineSadBlk3 += byte_abs [*refptr++ - *orgptr++];
LineSadBlk3 += byte_abs [*refptr++ - *orgptr++];
LineSadBlk3 += byte_abs [*refptr++ - *orgptr++];
LineSadBlk3 += byte_abs [*refptr++ - *orgptr++];
}
block_sad[bindex++][pos] = LineSadBlk0;
block_sad[bindex++][pos] = LineSadBlk1;
block_sad[bindex++][pos] = LineSadBlk2;
block_sad[bindex++][pos] = LineSadBlk3;
}
}
//===== combine SAD's for larger block types =====
SetupLargerBlocks (refindex, max_pos);
//===== set flag marking that search setup have been done =====
search_setup_done[refindex] = 1;
}*/
void
SetupFastFullPelSearch (int ref) // <-- reference frame parameter (0... or -1 (backward))
{
int pmv[2];
pel_t orig_blocks[256], *orgptr=orig_blocks, *refptr1,*refptr2,*refptr3,*refptr4,refpix[16][16],*refptr[16];
int offset_x, offset_y, x, y, range_partly_outside, ref_x, ref_y, pos, abs_x, abs_y, bindex, blky;
_int64 LineSadBlk[4],*lsadptr=LineSadBlk;
int refframe = (ref>=0 ? ref : 0);
int refindex = (ref>=0 ? ref : img->buf_cycle);
pel_t** ref_pic; //zdd // = img->type==B_IMG? Refbuf11 [ref+((mref==mref_fld)) +1] : Refbuf11[ref];
int** ref_array = ((img->type!=B_IMG )? refFrArr : ref>=0 ? fw_refFrArr : bw_refFrArr);
int*** mv_array = ((img->type!=B_IMG )? tmp_mv : ref>=0 ? tmp_fwMV : tmp_bwMV);
int** block_sad = BlockSAD[refindex][7];
int max_width = img->width - 17;
int max_height = img->height - 17;
int search_range = max_search_range[refindex];
int max_pos = (2*search_range+1) * (2*search_range+1);
byte** imgY_orig = imgY_org;
int pix_y = img->pix_y;
//zdd
int i,j;
int imgw=img->width;
pel_t *pt1,*pt2;
int bframe = (img->type==B_IMG);
Macroblock *currMB = &img->mb_data[img->current_mb_nr];
int **prevmv,*mvacc,mvdiff[2];
//end
/*if(img->current_mb_nr==0)
{
if(img->type==B_IMG)
{
copyframewithextend16(&(tmpRef1[0][0]),Refbuf11[ref+1],img->height,img->width);
copyframewithextend16(&(tmpRef2[0][0]),Refbuf11[0],img->height,img->width);
}
else
{
copyframewithextend16(&(tmpRef1[0][0]),Refbuf11[ref],img->height,img->width);
}
}*/
ref_pic = img->type==B_IMG? Refbuf11 [ref+1] : Refbuf11[ref];
//end
//===== get search center: predictor of 16x16 block =====
/*if(currMB->bestmode.state<3||bframe)
SetMotionVectorPredictor (pmv, ref_array, mv_array, refframe, 0, 0, 16, 16);
else
{
pmv[0]=2*currMB->bestmode.mv[2][0]-currMB->bestmode.mv[1][0]+currMB->bestmode.acc[0];
pmv[1]=2*currMB->bestmode.mv[2][1]-currMB->bestmode.mv[1][1]+currMB->bestmode.acc[1];
max_pos=9;
}*/
SetMotionVectorPredictor (pmv, ref_array, mv_array, refframe, 0, 0, 16, 16);
search_center_x[refindex] = pmv[0] / 4;
search_center_y[refindex] = pmv[1] / 4;
if (!input->rdopt)
{
//--- correct center so that (0,0) vector is inside ---
search_center_x[refindex] = max(-search_range, min(search_range, search_center_x[refindex]));
search_center_y[refindex] = max(-search_range, min(search_range, search_center_y[refindex]));
}
search_center_x[refindex] += img->pix_x;
search_center_y[refindex] += pix_y;
offset_x = search_center_x[refindex];
offset_y = search_center_y[refindex];
//===== copy original block for fast access =====
for (y = pix_y; y < pix_y+16; y++)
for (x = img->pix_x; x < img->pix_x+16; x++)
*orgptr++ = imgY_orig [y][x];
//===== check if whole search range is inside image =====
//===== determine position of (0,0)-vector =====
ref_x = img->pix_x - offset_x;
ref_y = pix_y - offset_y;
for (pos = 0; pos < max_pos; pos++)
{
if (ref_x == spiral_search_x[pos] &&
ref_y == spiral_search_y[pos])
{
pos_00[refindex] = pos;
break;
}
}
//===== loop over search range (spiral search): get blockwise SAD =====
for (pos = 0; pos < max_pos; pos++)
{
abs_y = offset_y + spiral_search_y[pos];
abs_x = offset_x + spiral_search_x[pos];
abs_x=abs_x>=-16?abs_x:-16;
abs_y=abs_y>=-16?abs_y:-16;
abs_x=abs_x<=img->width?abs_x:img->width;
abs_y=abs_y<=img->height?abs_y:img->height;
i=0;
//zdd
for(j=0;j<16;j++,abs_y++)
{
refptr[j]=&(ref_pic[abs_y][abs_x]);
}
orgptr = orig_blocks;
bindex = 0;
//zdd
for (blky = 0; blky < 4; blky++)
{
refptr1 = refptr[i++];
refptr2 = refptr[i++];
refptr3 = refptr[i++];
refptr4 = refptr[i++];
_asm
{
mov eax,refptr1
mov ebx,refptr2
mov ecx,refptr3
mov edx,refptr4
mov esi,orgptr
mov edi,lsadptr
PXOR xmm7,xmm7
movd mm0,[eax]
movd mm1,[ebx]
movd mm2,[ecx]
movd mm3,[edx]
movd mm4,[esi]
movd mm5,[esi+16]
movd mm6,[esi+32]
movd mm7,[esi+48]
PUNPCKLDQ mm0,mm1
PUNPCKLDQ mm2,mm3
PUNPCKLDQ mm4,mm5
PUNPCKLDQ mm6,mm7
MOVQ2DQ xmm0,mm0
MOVQ2DQ xmm1,mm2
UNPCKLPD xmm0,xmm1
MOVQ2DQ xmm2,mm4
MOVQ2DQ xmm3,mm6
UNPCKLPD xmm2,xmm3
PSADBW xmm0,xmm2
MOVAPD xmm1,xmm0
UNPCKLPD xmm0,xmm7
UNPCKHPD xmm1,xmm7
PADDD xmm0,xmm1
MOVLPD [edi],xmm0
movd mm0,[eax+4]
movd mm1,[ebx+4]
movd mm2,[ecx+4]
movd mm3,[edx+4]
movd mm4,[esi+4]
movd mm5,[esi+20]
movd mm6,[esi+36]
movd mm7,[esi+52]
PUNPCKLDQ mm0,mm1
PUNPCKLDQ mm2,mm3
PUNPCKLDQ mm4,mm5
PUNPCKLDQ mm6,mm7
MOVQ2DQ xmm0,mm0
MOVQ2DQ xmm1,mm2
UNPCKLPD xmm0,xmm1
MOVQ2DQ xmm2,mm4
MOVQ2DQ xmm3,mm6
UNPCKLPD xmm2,xmm3
PSADBW xmm0,xmm2
MOVAPD xmm1,xmm0
UNPCKLPD xmm0,xmm7
UNPCKHPD xmm1,xmm7
PADDD xmm0,xmm1
MOVLPD [edi+8],xmm0
movd mm0,[eax+8]
movd mm1,[ebx+8]
movd mm2,[ecx+8]
movd mm3,[edx+8]
movd mm4,[esi+8]
movd mm5,[esi+24]
movd mm6,[esi+40]
movd mm7,[esi+56]
PUNPCKLDQ mm0,mm1
PUNPCKLDQ mm2,mm3
PUNPCKLDQ mm4,mm5
PUNPCKLDQ mm6,mm7
MOVQ2DQ xmm0,mm0
MOVQ2DQ xmm1,mm2
UNPCKLPD xmm0,xmm1
MOVQ2DQ xmm2,mm4
MOVQ2DQ xmm3,mm6
UNPCKLPD xmm2,xmm3
PSADBW xmm0,xmm2
MOVAPD xmm1,xmm0
UNPCKLPD xmm0,xmm7
UNPCKHPD xmm1,xmm7
PADDD xmm0,xmm1
MOVLPD [edi+16],xmm0
movd mm0,[eax+12]
movd mm1,[ebx+12]
movd mm2,[ecx+12]
movd mm3,[edx+12]
movd mm4,[esi+12]
movd mm5,[esi+28]
movd mm6,[esi+44]
movd mm7,[esi+60]
PUNPCKLDQ mm0,mm1
PUNPCKLDQ mm2,mm3
PUNPCKLDQ mm4,mm5
PUNPCKLDQ mm6,mm7
MOVQ2DQ xmm0,mm0
MOVQ2DQ xmm1,mm2
UNPCKLPD xmm0,xmm1
MOVQ2DQ xmm2,mm4
MOVQ2DQ xmm3,mm6
UNPCKLPD xmm2,xmm3
PSADBW xmm0,xmm2
MOVAPD xmm1,xmm0
UNPCKLPD xmm0,xmm7
UNPCKHPD xmm1,xmm7
PADDD xmm0,xmm1
MOVLPD [edi+24],xmm0
emms
}
orgptr+=64;
block_sad[bindex++][pos] = LineSadBlk[0];
block_sad[bindex++][pos] = LineSadBlk[1];
block_sad[bindex++][pos] = LineSadBlk[2];
block_sad[bindex++][pos] = LineSadBlk[3];
}
}
//===== combine SAD's for larger block types =====
SetupLargerBlocks (refindex, max_pos);
//===== set flag marking that search setup have been done =====
search_setup_done[refindex] = 1;
}
#endif // _FAST_FULL_ME_
/*!
***********************************************************************
* \brief
* setting the motion vector predictor
***********************************************************************
*/
void
SetMotionVectorPredictor (int pmv[2],
int **refFrArr,
int ***tmp_mv,
int ref_frame,
int mb_x,
int mb_y,
int blockshape_x,
int blockshape_y)
{
int pic_block_x = img->block_x + (mb_x>>2);
int pic_block_y = img->block_y + (mb_y>>2);
int mb_nr = img->current_mb_nr;
int mb_width = img->width/16;
int mb_available_up = (img->mb_y == 0 ) ? 0 : (img->mb_data[mb_nr].slice_nr == img->mb_data[mb_nr-mb_width ].slice_nr);
int mb_available_left = (img->mb_x == 0 ) ? 0 : (img->mb_data[mb_nr].slice_nr == img->mb_data[mb_nr-1 ].slice_nr);
int mb_available_upleft = (img->mb_x == 0 ||
img->mb_y == 0 ) ? 0 : (img->mb_data[mb_nr].slice_nr == img->mb_data[mb_nr-mb_width-1].slice_nr);
int mb_available_upright = (img->mb_x >= mb_width-1 ||
img->mb_y == 0 ) ? 0 : (img->mb_data[mb_nr].slice_nr == img->mb_data[mb_nr-mb_width+1].slice_nr);
int block_available_up, block_available_left, block_available_upright, block_available_upleft;
int mv_a, mv_b, mv_c, mv_d, pred_vec=0;
int mvPredType, rFrameL, rFrameU, rFrameUR;
int hv;
Macroblock *currMB = &img->mb_data[img->current_mb_nr];
if(input->InterlaceCodingOption >= MB_CODING && mb_adaptive && img->field_mode)
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?