📄 motionsearch.c
字号:
/* motionsearch.c, block motion estimation for mpeg2enc *//* (C) 2000/2001 Andrew Stevens *//* These modifications are free software; you can redistribute it * and/or modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 of * the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA * 02111-1307, USA. * */#include "config.h"#include <limits.h>#include <stdlib.h>#include <math.h>#include "cpu_accel.h"#include "fastintfns.h"#include "motionsearch.h"#include "mjpeg_logging.h"/* * Define prototypes so as to avoid warning errors at compile time*/void sub_mean_reduction(me_result_set *, int, int *);void subsample_image(uint8_t *,int ,uint8_t *,uint8_t *);void variance(uint8_t *,int ,int, unsigned int *,unsigned int *);/* The AltiVec code needs access to symbols during benchmarking * and verification. */#define STATIC static#ifdef HAVE_ALTIVEC#include "altivec/altivec_motion.h"# if ALTIVEC_TEST_MOTION# undef STATIC# define STATIC /* static */# endif#endif#if defined(HAVE_ASM_MMX)#include "mmxsse/mmxsse_motion.h"#endif/* * Function pointers for selecting CPU specific implementations * */int (*pmblocks_sub44_mests)( uint8_t *blk, uint8_t *ref, int ilow, int jlow, int ihigh, int jhigh, int h, int rowstride, int threshold, me_result_s *resvec);void (*pfind_best_one_pel)( me_result_set *sub22set, uint8_t *org, uint8_t *blk, int i0, int j0, int ihigh, int jhigh, int rowstride, int h, me_result_s *res );int (*pbuild_sub22_mests)( me_result_set *sub44set, me_result_set *sub22set, int i0, int j0, int ihigh, int jhigh, int null_mc_sad, uint8_t *s22org, uint8_t *s22blk, int frowstride, int fh, int reduction );int (*pbuild_sub44_mests)( me_result_set *sub44set, int ilow, int jlow, int ihigh, int jhigh, int i0, int j0, int null_mc_sad, uint8_t *s44org, uint8_t *s44blk, int qrowstride, int qh, int reduction );int (*psumsq_sub22)( uint8_t *blk1, uint8_t *blk2, int rowstride, int h);int (*pbsumsq_sub22)( uint8_t *blk1f, uint8_t *blk1b, uint8_t *blk2, int rowstride, int h);void (*pvariance)(uint8_t *mb, int size, int rowstride, uint32_t *p_var, uint32_t *p_mean);int (*psad_sub22) ( uint8_t *blk1, uint8_t *blk2, int frowstride, int fh);int (*psad_sub44) ( uint8_t *blk1, uint8_t *blk2, int qrowstride, int qh);int (*psad_00) ( uint8_t *blk1, uint8_t *blk2, int rowstride, int h, int distlim);int (*psad_01) (uint8_t *blk1, uint8_t *blk2, int rowstride, int h);int (*psad_10) (uint8_t *blk1, uint8_t *blk2, int rowstride, int h);int (*psad_11) (uint8_t *blk1, uint8_t *blk2, int rowstride, int h);int (*psumsq) (uint8_t *blk1, uint8_t *blk2, int rowstride, int hx, int hy, int h); int (*pbsumsq) (uint8_t *pf, uint8_t *pb, uint8_t *p2, int rowstride, int hxf, int hyf, int hxb, int hyb, int h);int (*pbsad) (uint8_t *pf, uint8_t *pb, uint8_t *p2, int rowstride, int hxf, int hyf, int hxb, int hyb, int h);void (*psubsample_image) (uint8_t *image, int rowstride, uint8_t *sub22_image, uint8_t *sub44_image);/* * Round search radius to suit the search algorithm. * Currently radii must be multiples of 8. * */int round_search_radius( int radius ){ return intmax(8,((radius+4) /8)*8);}/* Take a vector of motion estimations and repeatedly make passes discarding all elements whose sad "weight" is above the current mean weight.*/void sub_mean_reduction( me_result_set *matchset, int times, int *minweight_res){ me_result_s *matches = matchset->mests; int len = matchset->len; int i,j; int weight_sum; int mean_weight; if( len <= 1 ) { *minweight_res = (len==0) ? 100000 : matches[0].weight; return; } for(;;) { weight_sum = 0; for( i = 0; i < len ; ++i ) weight_sum += matches[i].weight; mean_weight = weight_sum / len; if( times <= 0) break; j = 0; for( i =0; i < len; ++i ) { if( matches[i].weight <= mean_weight ) { matches[j] = matches[i]; ++j; } } len = j; --times; } matchset->len = len; *minweight_res = mean_weight;}/* * Build a vector of the top 4*4 sub-sampled motion estimations in * the box (ilow,jlow) to (ihigh,jhigh). * * The algorithm is as follows: * * 1. Matches on an 4*4 pel grid are collected. All those matches * whose that is over a (conservative) threshold (basically 50% more * than moving average of the mean sad of such matches) are discarded. * * 2. Multiple passes are made discarding worse than-average matches. * The number of passes is specified by the user. The default it 2 * (leaving roughly 1/4 of the matches). * * The intial threshold and discard passes are controlled by reduction * [1..4]. The intial SAD threshold is calculated as 6 / reduction of * a reference SAD passed as a parameter. For reduction == 1 one * discard pass is made otherwise two are made. * * The net result is very fast and find good matches if they're to be * found. I.e. the penalty over exhaustive search is pretty low. * * NOTE: The "discard below average" trick depends critically on * having some variation in the matches. The slight penalty imposed * for distant matches (reasonable since the motion vectors have to * be encoded) is *vital* as otherwise pathologically bad performance * results on highly uniform images. * * TODO: We should probably allow the user to eliminate the initial * thinning of 4*4 grid matches if ultimate quality is demanded * (e.g. for low bit-rate applications). * */STATIC int build_sub44_mests( me_result_set *sub44set, int ilow, int jlow, int ihigh, int jhigh, int i0, int j0, int null_ctl_sad, uint8_t *s44org, uint8_t *s44blk, int qrowstride, int qh, int reduction ){ uint8_t *s44orgblk; me_result_s *sub44_mests = sub44set->mests; int istrt = ilow-i0; int jstrt = jlow-j0; int iend = ihigh-i0; int jend = jhigh-j0; int mean_weight; int threshold; int i,j; int s1; uint8_t *old_s44orgblk; int sub44_num_mests; /* N.b. we may ignore the right hand block of the pair going over the right edge as we have carefully allocated the buffer oversize to ensure no memory faults. The later motion estimation calculations performed on the results of this pass will filter out out-of-range blocks... */ threshold = 6*null_ctl_sad / (4*4*reduction); s44orgblk = s44org+(ilow>>2)+qrowstride*(jlow>>2); /* Exhaustive search on 4*4 sub-sampled data. This is affordable because (a) it is only 16th of the size of the real 1-pel data (b) we ignore those matches with an sad above our threshold. */ sub44_num_mests = 0; /* Invariant: s44orgblk = s44org+(i>>2)+qrowstride*(j>>2) */ s44orgblk = s44org+(ilow>>2)+qrowstride*(jlow>>2); for( j = jstrt; j <= jend; j += 4 ) { old_s44orgblk = s44orgblk; for( i = istrt; i <= iend; i += 4 ) { s1 = ((*psad_sub44)( s44orgblk,s44blk,qrowstride,qh) & 0xffff); if( s1 < threshold ) { threshold = intmin(s1<<2,threshold); sub44_mests[sub44_num_mests].x = i; sub44_mests[sub44_num_mests].y = j; sub44_mests[sub44_num_mests].weight = s1 + (intmax(abs(i-i0), abs(j-j0))<<1); ++sub44_num_mests; } s44orgblk += 1; } s44orgblk = old_s44orgblk + qrowstride; } sub44set->len = sub44_num_mests; sub_mean_reduction( sub44set, 1+(reduction>1), &mean_weight); return sub44set->len;}/* Build a vector of the best 2*2 sub-sampled motion estimations for * the 16*16 macroblock at i0,j0 using a set of 4*4 sub-sampled matches as * starting points. As with with the 4*4 matches We don't collect * them densely as they're just search starting points for 1-pel * search and ones that are 1 out * should still give better than * average matches... * * The resulting candidate motion vectors are thinned by thresholding * and discarding worse than-average matches. * * The intial threshold and number of discard passes are controlled by reduction * [1..4]: the intial SAD threshold is calculated as 6 / reduction of * a reference SAD passed as a parameter and then reduction discard passes * are made. * */STATIC int build_sub22_mests( me_result_set *sub44set, me_result_set *sub22set, int i0, int j0, int ihigh, int jhigh, int null_ctl_sad, uint8_t *s22org, uint8_t *s22blk, int frowstride, int fh, int reduction){ int i,k,s; int threshold = 6*null_ctl_sad / (2 * 2*reduction); int min_weight; int ilim = ihigh-i0; int jlim = jhigh-j0; int x,y; uint8_t *s22orgblk; sub22set->len = 0; for( k = 0; k < sub44set->len; ++k ) { x = sub44set->mests[k].x; y = sub44set->mests[k].y; s22orgblk = s22org +((y+j0)>>1)*frowstride +((x+i0)>>1); for( i = 0; i < 4; ++i ) { if( x <= ilim && y <= jlim ) { s = (*psad_sub22)( s22orgblk,s22blk,frowstride,fh)+ (intmax(abs(x), abs(y))<<3); if( s < threshold ) { me_result_s *mc = &sub22set->mests[sub22set->len]; mc->x = (int8_t)x; mc->y = (int8_t)y; mc->weight = s; ++(sub22set->len); } } if( i == 1 ) { s22orgblk += frowstride-1; x -= 2; y += 2; } else { s22orgblk += 1; x += 2; } } } sub_mean_reduction( sub22set, reduction, &min_weight ); return sub22set->len;}/* * Search for the best 1-pel match within 1-pel of a good 2*2-pel * match. * * N.b. best_so_far must be initialised by the caller! */STATIC void find_best_one_pel( me_result_set *sub22set, uint8_t *org, uint8_t *blk, int i0, int j0, int ihigh, int jhigh, int rowstride, int h, me_result_s *best_so_far ){ int i,k; int d; me_result_s minpos = *best_so_far; int dmin = INT_MAX; int ilim = ihigh-i0; int jlim = jhigh-j0; uint8_t *orgblk; int penalty; me_result_s matchrec; for( k = 0; k < sub22set->len; ++k ) { matchrec = sub22set->mests[k]; orgblk = org + (i0+matchrec.x)+rowstride*(j0+matchrec.y); penalty = (abs(matchrec.x) + abs(matchrec.y))<<3; for( i = 0; i < 4; ++i ) { if( matchrec.x <= ilim && matchrec.y <= jlim ) { d = penalty+(*psad_00)(orgblk,blk,rowstride,h, dmin); if (d<dmin) { dmin = d; minpos = matchrec; } } if( i == 1 ) { orgblk += rowstride-1; matchrec.x -= 1; matchrec.y += 1; } else { orgblk += 1; matchrec.x += 1; } } } minpos.weight = (uint16_t)intmin(255*255, dmin); *best_so_far = minpos;} /* * sum absolute difference between two (16*h) blocks Four variations * depending on the required half pel interpolation of blk1 (hx,hy) * * blk1,blk2: addresses of top left pels of both blocks * rowstride: distance (in bytes) of vertically adjacent pels * hx,hy: flags for horizontal and/or vertical interpolation * h: height of block (usually 8 or 16) * distlim: bail out if sum exceeds this value * **/STATIC int sad_00(uint8_t *blk1,uint8_t *blk2, int rowstride, int h,int distlim){ uint8_t *p1,*p2; int j; int s; register int v; s = 0; p1 = blk1; p2 = blk2; for (j=0; j<h; j++) {#define pipestep(o) v = p1[o]-p2[o]; s+= abs(v); pipestep(0); pipestep(1); pipestep(2); pipestep(3); pipestep(4); pipestep(5); pipestep(6); pipestep(7); pipestep(8); pipestep(9); pipestep(10); pipestep(11); pipestep(12); pipestep(13); pipestep(14); pipestep(15);#undef pipestep if (s >= distlim) break; p1+= rowstride; p2+= rowstride; } return s;}STATIC int sad_01(uint8_t *blk1,uint8_t *blk2,int rowstride, int h){ uint8_t *p1,*p2; int i,j; int s; register int v; s = 0; p1 = blk1; p2 = blk2; for (j=0; j<h; j++) { for (i=0; i<16; i++) { v = ((unsigned int)(p1[i]+p1[i+1]+1)>>1) - p2[i]; s += abs(v); } p1+= rowstride; p2+= rowstride; } return s;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -