📄 motionsearch.c
字号:
}STATIC int sad_10(uint8_t *blk1,uint8_t *blk2, int rowstride, int h){ uint8_t *p1,*p1a,*p2; int i,j; int s; register int v; s = 0; p1 = blk1; p2 = blk2; p1a = p1 + rowstride; for (j=0; j<h; j++) { for (i=0; i<16; i++) { v = ((unsigned int)(p1[i]+p1a[i]+1)>>1) - p2[i]; s += abs(v); } p1 = p1a; p1a+= rowstride; p2+= rowstride; } return s;}STATIC int sad_11(uint8_t *blk1,uint8_t *blk2, int rowstride, int h){ uint8_t *p1,*p1a,*p2; int i,j; int s; register int v; s = 0; p1 = blk1; p2 = blk2; p1a = p1 + rowstride; for (j=0; j<h; j++) { for (i=0; i<16; i++) { v = ((unsigned int)((p1[i]+p1[i+1])+(p1a[i]+p1a[i+1])+2)>>2) - p2[i]; s += abs(v); } p1 = p1a; p1a+= rowstride; p2+= rowstride; } return s;}/* * Compute subsampled images for fast motion compensation search * N.b. rowstride should be *two* line widths for interlace images */void subsample_image( uint8_t *image, int rowstride, uint8_t *sub22_image, uint8_t *sub44_image){ uint8_t *blk = image; uint8_t *b, *nb; uint8_t *pb; uint8_t *qb; uint8_t *start_s22blk, *start_s44blk; int i; int nextfieldline = rowstride; start_s22blk = sub22_image; start_s44blk = sub44_image; b = blk; nb = (blk+nextfieldline); pb = (uint8_t *) start_s22blk; while( nb < start_s22blk ) { for( i = 0; i < nextfieldline/4; ++i ) /* We're doing 4 pels horizontally at once */ { pb[0] = ((b[0]+b[1])+(nb[0]+nb[1])+2)>>2; pb[1] = ((b[2]+b[3])+(nb[2]+nb[3])+2)>>2; pb += 2; b += 4; nb += 4; } b += nextfieldline; nb = b + nextfieldline; } /* Now create the 4*4 sub-sampled data from the 2*2 N.b. the 2*2 sub-sampled motion data preserves the interlace structure of the original. Albeit half as many lines and pixels... */ nextfieldline = nextfieldline >> 1; qb = start_s44blk; b = start_s22blk; nb = (start_s22blk+nextfieldline); while( nb < start_s44blk ) { for( i = 0; i < nextfieldline/4; ++i ) { qb[0] = ((b[0]+b[1])+(nb[0]+nb[1])+2)>>2; qb[1] = ((b[2]+b[3])+(nb[2]+nb[3])+2)>>2; qb += 2; b += 4; nb += 4; } b += nextfieldline; nb = b + nextfieldline; }}/* * Same as sad_00 except for 2*2 subsampled data so only 8 wide! * */ STATIC int sad_sub22( uint8_t *s22blk1, uint8_t *s22blk2,int frowstride,int fh){ uint8_t *p1 = s22blk1; uint8_t *p2 = s22blk2; int s = 0; int j; for( j = 0; j < fh; ++j ) { register int diff;#define pipestep(o) diff = p1[o]-p2[o]; s += abs(diff) pipestep(0); pipestep(1); pipestep(2); pipestep(3); pipestep(4); pipestep(5); pipestep(6); pipestep(7); p1 += frowstride; p2 += frowstride;#undef pipestep } return s;}/* * Same as sad_00 except for 4*4 sub-sampled data. * * N.b.: currently assumes only 16*16 or 16*8 motion estimation will * be used... I.e. 4*4 or 4*2 sub-sampled blocks will be compared. * * */STATIC int sad_sub44( uint8_t *s44blk1, uint8_t *s44blk2,int qrowstride,int qh){ register uint8_t *p1 = s44blk1; register uint8_t *p2 = s44blk2; int s = 0; register int diff; /* #define pipestep(o) diff = p1[o]-p2[o]; s += abs(diff) */#define pipestep(o) diff = p1[o]-p2[o]; s += diff < 0 ? -diff : diff; pipestep(0); pipestep(1); pipestep(2); pipestep(3); if( qh > 1 ) { p1 += qrowstride; p2 += qrowstride; pipestep(0); pipestep(1); pipestep(2); pipestep(3); if( qh > 2 ) { p1 += qrowstride; p2 += qrowstride; pipestep(0); pipestep(1); pipestep(2); pipestep(3); p1 += qrowstride; p2 += qrowstride; pipestep(0); pipestep(1); pipestep(2); pipestep(3); } } return s;}/* * total squared difference between two (8*h) blocks of 2*2 sub-sampled pels * blk1,blk2: addresses of top left pels of both blocks * rowstride: distance (in bytes) of vertically adjacent pels * h: height of block (usually 8 or 16) */ STATIC int sumsq_sub22(uint8_t *blk1, uint8_t *blk2, int rowstride, int h){ uint8_t *p1 = blk1, *p2 = blk2; int i,j,v; int s = 0; for (j=0; j<h; j++) { for (i=0; i<8; i++) { v = p1[i] - p2[i]; s+= v*v; } p1+= rowstride; p2+= rowstride; } return s;}/* total squared difference between bidirection prediction of (8*h) * blocks of 2*2 sub-sampled pels and reference * blk1f, blk1b,blk2: addresses of top left * pels of blocks * rowstride: distance (in bytes) of vertically adjacent * pels * h: height of block (usually 4 or 8) */ STATIC int bsumsq_sub22(uint8_t *blk1f, uint8_t *blk1b, uint8_t *blk2, int rowstride, int h){ uint8_t *p1f = blk1f,*p1b = blk1b,*p2 = blk2; int i,j,v; int s = 0; for (j=0; j<h; j++) { for (i=0; i<8; i++) { v = ((p1f[i]+p1b[i]+1)>>1) - p2[i]; s+= v*v; } p1f+= rowstride; p1b+= rowstride; p2+= rowstride; } return s;}/* * total squared difference between two (16*h) blocks * including optional half pel interpolation of blk1 (hx,hy) * blk1,blk2: addresses of top left pels of both blocks * rowstride: distance (in bytes) of vertically adjacent pels * hx,hy: flags for horizontal and/or vertical interpolation * h: height of block (usually 8 or 16) */ STATIC int sumsq(blk1,blk2,rowstride,hx,hy,h) uint8_t *blk1,*blk2; int rowstride,hx,hy,h;{ uint8_t *p1,*p1a,*p2; int i,j; int s,v; s = 0; p1 = blk1; p2 = blk2; if (!hx && !hy) for (j=0; j<h; j++) { for (i=0; i<16; i++) { v = p1[i] - p2[i]; s+= v*v; } p1+= rowstride; p2+= rowstride; } else if (hx && !hy) for (j=0; j<h; j++) { for (i=0; i<16; i++) { v = ((unsigned int)(p1[i]+p1[i+1]+1)>>1) - p2[i]; s+= v*v; } p1+= rowstride; p2+= rowstride; } else if (!hx && hy) { p1a = p1 + rowstride; for (j=0; j<h; j++) { for (i=0; i<16; i++) { v = ((unsigned int)(p1[i]+p1a[i]+1)>>1) - p2[i]; s+= v*v; } p1 = p1a; p1a+= rowstride; p2+= rowstride; } } else /* if (hx && hy) */ { p1a = p1 + rowstride; for (j=0; j<h; j++) { for (i=0; i<16; i++) { v = ((unsigned int)(p1[i]+p1[i+1]+p1a[i]+p1a[i+1]+2)>>2) - p2[i]; s+= v*v; } p1 = p1a; p1a+= rowstride; p2+= rowstride; } } return s;}/* * absolute difference error between a (16*h) block and a bidirectional * prediction * * p2: address of top left pel of block * pf,hxf,hyf: address and half pel flags of forward ref. block * pb,hxb,hyb: address and half pel flags of backward ref. block * h: height of block * rowstride: distance (in bytes) of vertically adjacent pels in p2,pf,pb */ STATIC int bsad(pf,pb,p2,rowstride,hxf,hyf,hxb,hyb,h) uint8_t *pf,*pb,*p2; int rowstride,hxf,hyf,hxb,hyb,h;{ uint8_t *pfa,*pfb,*pfc,*pba,*pbb,*pbc; int i,j; int s,v; pfa = pf + hxf; pfb = pf + rowstride*hyf; pfc = pfb + hxf; pba = pb + hxb; pbb = pb + rowstride*hyb; pbc = pbb + hxb; s = 0; for (j=0; j<h; j++) { for (i=0; i<16; i++) { v = ((((unsigned int)(*pf++ + *pfa++ + *pfb++ + *pfc++ + 2)>>2) + ((unsigned int)(*pb++ + *pba++ + *pbb++ + *pbc++ + 2)>>2) + 1)>>1) - *p2++; s += abs(v); } p2+= rowstride-16; pf+= rowstride-16; pfa+= rowstride-16; pfb+= rowstride-16; pfc+= rowstride-16; pb+= rowstride-16; pba+= rowstride-16; pbb+= rowstride-16; pbc+= rowstride-16; } return s;}/* * squared error between a (16*h) block and a bidirectional * prediction * * p2: address of top left pel of block * pf,hxf,hyf: address and half pel flags of forward ref. block * pb,hxb,hyb: address and half pel flags of backward ref. block * h: height of block * rowstride: distance (in bytes) of vertically adjacent pels in p2,pf,pb */ STATIC int bsumsq(pf,pb,p2,rowstride,hxf,hyf,hxb,hyb,h) uint8_t *pf,*pb,*p2; int rowstride,hxf,hyf,hxb,hyb,h;{ uint8_t *pfa,*pfb,*pfc,*pba,*pbb,*pbc; int i,j; int s,v; pfa = pf + hxf; pfb = pf + rowstride*hyf; pfc = pfb + hxf; pba = pb + hxb; pbb = pb + rowstride*hyb; pbc = pbb + hxb; s = 0; for (j=0; j<h; j++) { for (i=0; i<16; i++) {#define ui(x) ((unsigned int)x) v = ((((ui(*pf++) + ui(*pfa++) + ui(*pfb++) + ui(*pfc++) + 2)>>2) + ((ui(*pb++) + ui(*pba++) + ui(*pbb++) + ui(*pbc++) + 2)>>2) + 1 )>>1) - ui(*p2++);#undef ui s+=v*v; } p2+= rowstride-16; pf+= rowstride-16; pfa+= rowstride-16; pfb+= rowstride-16; pfc+= rowstride-16; pb+= rowstride-16; pba+= rowstride-16; pbb+= rowstride-16; pbc+= rowstride-16; } return s;}/* * variance of a (size*size) block, multiplied by 256 * p: address of top left pel of block * rowstride: distance (in bytes) of vertically adjacent pels * SIZE is a multiple of 8. */void variance(uint8_t *p, int size, int rowstride, unsigned int *p_var, unsigned int *p_mean){ int i,j; unsigned int v,s,s2; s = s2 = 0; for (j=0; j<size; j++) { for (i=0; i<size; i++) { v = *p++; s+= v; s2+= v*v; } p+= rowstride-size; } *p_mean = s/(size*size); *p_var = s2 - (s*s)/(size*size);}/* * Initialise motion estimation - currently only selection of which * versions of the various low level computation routines to use * */void init_motion_search(void){ int cpucap = cpu_accel(); /* Initialize function pointers. This allows partial acceleration * implementations to update only the function pointers they support. */ psad_sub22 = sad_sub22; psad_sub44 = sad_sub44; psad_00 = sad_00; psad_01 = sad_01; psad_10 = sad_10; psad_11 = sad_11; pbsad = bsad; pvariance = variance; psumsq = sumsq; pbsumsq = bsumsq; psumsq_sub22 = sumsq_sub22; pbsumsq_sub22 = bsumsq_sub22; pfind_best_one_pel = find_best_one_pel; pbuild_sub22_mests = build_sub22_mests; pbuild_sub44_mests = build_sub44_mests; psubsample_image = subsample_image;#if defined(HAVE_ASM_MMX) enable_mmxsse_motion(cpucap);#endif#ifdef HAVE_ALTIVEC if (cpucap > 0) { enable_altivec_motion(); }#endif}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -