📄 motion_est.c

📁 杜比AC-3编码解码器（参考程序）
💻 C
字号:
/*
 * Motion estimation 
 * Copyright (c) 2000 Gerard Lantau.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */
#include <stdlib.h>
#include <stdio.h>
#include "avcodec.h"
#include "dsputil.h"
#include "mpegvideo.h"

static int pix_sum(UINT8 *pix, int line_size)
{
    int s, i, j;

    s = 0;
    for(i=0;i<16;i++) {
        for(j=0;j<16;j+=8) {
            s += pix[0];
            s += pix[1];
            s += pix[2];
            s += pix[3];
            s += pix[4];
            s += pix[5];
            s += pix[6];
            s += pix[7];
            pix += 8;
        }
        pix += line_size - 16;
    }
    return s;
}

static int pix_norm1(UINT8 *pix, int line_size)
{
    int s, i, j;
    UINT32 *sq = squareTbl + 256;

    s = 0;
    for(i=0;i<16;i++) {
        for(j=0;j<16;j+=8) {
            s += sq[pix[0]];
            s += sq[pix[1]];
            s += sq[pix[2]];
            s += sq[pix[3]];
            s += sq[pix[4]];
            s += sq[pix[5]];
            s += sq[pix[6]];
            s += sq[pix[7]];
            pix += 8;
        }
        pix += line_size - 16;
    }
    return s;
}

static int pix_norm(UINT8 *pix1, UINT8 *pix2, int line_size)
{
    int s, i, j;
    UINT32 *sq = squareTbl + 256;

    s = 0;
    for(i=0;i<16;i++) {
        for(j=0;j<16;j+=8) {
            s += sq[pix1[0] - pix2[0]];
            s += sq[pix1[1] - pix2[1]];
            s += sq[pix1[2] - pix2[2]];
            s += sq[pix1[3] - pix2[3]];
            s += sq[pix1[4] - pix2[4]];
            s += sq[pix1[5] - pix2[5]];
            s += sq[pix1[6] - pix2[6]];
            s += sq[pix1[7] - pix2[7]];
            pix1 += 8;
            pix2 += 8;
        }
        pix1 += line_size - 16;
        pix2 += line_size - 16;
    }
    return s;
}

#define USE_MMX

#ifdef USE_MMX

/* original mmx code from mpeg2movie by Adam Williams */

static void inline mmx_start_block(void)
{
	asm(" 
		.align 8
		pxor %%mm7, %%mm7; 
		pxor %%mm6, %%mm6;
		" : : );
}

static void inline mmx_absdiff(unsigned char *a, unsigned char *b)
{
	asm("
		.align 8
		movq		(%%ebx),	%%mm0;     // Get first half of row1
		movq		(%%ecx),	%%mm1;     // Get first half of row2
		movq		%%mm0,		%%mm2;     // Make a copy of row1 for absdiff operation
		movq		8(%%ebx),	%%mm3;     // Get second half of row1
		psubusb		%%mm1,		%%mm0;     // Subtract the first halves one way
		psubusb		%%mm2,		%%mm1;     // Subtract the other way
		movq        8(%%ecx),   %%mm4;     // Get second half of row2
		por			%%mm1,      %%mm0;     // Merge first half results
		movq		%%mm3,		%%mm5;     // Copy for absdiff operation
		movq		%%mm0,		%%mm1;     // Keep a copy
		psubusb		%%mm4,		%%mm3;     // Subtract second halves one way
		punpcklbw	%%mm6,		%%mm0;     // Unpack to higher precision for accumulation
		psubusb		%%mm5,		%%mm4;     // Subtract the other way
		psrlq		$32,		%%mm1;     // Shift registeres for accumulation
		por			%%mm4,		%%mm3;     // merge results of 2nd half
		punpcklbw	%%mm6,		%%mm1;     // unpack to higher precision for accumulation
		movq		%%mm3,		%%mm4;     // keep a copy
		punpcklbw	%%mm6,		%%mm3;     // unpack to higher precision for accumulation
		paddw		%%mm0,		%%mm7;     // accumulate difference
		psrlq		$32,		%%mm4;     // shift results for accumulation
		paddw		%%mm1,		%%mm7;     // accumulate difference
		punpcklbw	%%mm6,		%%mm4;     // unpack to higher precision for accumulation
		paddw		%%mm3,		%%mm7;     // accumulate difference
		paddw		%%mm4,		%%mm7;     // accumulate difference
		"
		: 
		: "b" (a), "c" (b) 
		: "st" );
}

static inline unsigned int mmx_accum_absdiff()
{
	unsigned long long r = 0;
	asm("
		.align 8
		movq		%%mm7,	%%mm5;
		movq		%%mm7,	%%mm4;
		punpcklwd	%%mm6,	%%mm4;
		punpckhwd	%%mm6,	%%mm5;
		paddd		%%mm5,	%%mm4;
		movq		%%mm4,	%%mm5;
		punpckldq	%%mm6,	%%mm5;
		punpckhdq	%%mm6,	%%mm4;
		paddd		%%mm5,	%%mm4;
		movq		%%mm4,	(%%ebx);
		emms;
		"
		: :  "b" (&r));

	return r;
}

static unsigned int pix_abs16x16(UINT8 *pix1, UINT8 *pix2, int line_size)
{
    int i;

    mmx_start_block();
    for(i=0;i<16;i++) {
        mmx_absdiff(pix1, pix2);
        pix1 += line_size;
        pix2 += line_size;
    }
    return mmx_accum_absdiff();
}

#else

static int pix_abs16x16(UINT8 *pix1, UINT8 *pix2, int line_size)
{
    int s, i;

    s = 0;
    for(i=0;i<16;i++) {
        s += abs(pix1[0] - pix2[0]);
        s += abs(pix1[1] - pix2[1]);
        s += abs(pix1[2] - pix2[2]);
        s += abs(pix1[3] - pix2[3]);
        s += abs(pix1[4] - pix2[4]);
        s += abs(pix1[5] - pix2[5]);
        s += abs(pix1[6] - pix2[6]);
        s += abs(pix1[7] - pix2[7]);
        s += abs(pix1[8] - pix2[8]);
        s += abs(pix1[9] - pix2[9]);
        s += abs(pix1[10] - pix2[10]);
        s += abs(pix1[11] - pix2[11]);
        s += abs(pix1[12] - pix2[12]);
        s += abs(pix1[13] - pix2[13]);
        s += abs(pix1[14] - pix2[14]);
        s += abs(pix1[15] - pix2[15]);
        pix1 += line_size;
        pix2 += line_size;
    }
    return s;
}

#endif

#define avg2(a,b) ((a+b+1)>>1)
#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)

static int pix_abs16x16_x2(UINT8 *pix1, UINT8 *pix2, int line_size)
{
    int s, i;

    s = 0;
    for(i=0;i<16;i++) {
        s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
        s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
        s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
        s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
        s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
        s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
        s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
        s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
        s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
        s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
        s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
        s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
        s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
        s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
        s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
        s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
        pix1 += line_size;
        pix2 += line_size;
    }
    return s;
}

static int pix_abs16x16_y2(UINT8 *pix1, UINT8 *pix2, int line_size)
{
    int s, i;
    UINT8 *pix3 = pix2 + line_size;

    s = 0;
    for(i=0;i<16;i++) {
        s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
        s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
        s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
        s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
        s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
        s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
        s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
        s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
        s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
        s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
        s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
        s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
        s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
        s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
        s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
        s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
        pix1 += line_size;
        pix2 += line_size;
    }
    return s;
}

static int pix_abs16x16_xy2(UINT8 *pix1, UINT8 *pix2, int line_size)
{
    int s, i;
    UINT8 *pix3 = pix2 + line_size;

    s = 0;
    for(i=0;i<16;i++) {
        s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
        s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
        s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
        s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
        s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
        s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
        s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
        s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
        s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
        s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
        s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
        s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
        s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
        s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
        s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
        s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
        pix1 += line_size;
        pix2 += line_size;
    }
    return s;
}

static void no_motion_search(MpegEncContext *s, 
                             int *mx_ptr, int *my_ptr)
{
    *mx_ptr = 0;
    *my_ptr = 0;
}

static void full_motion_search(MpegEncContext *s, 
                               int *mx_ptr, int *my_ptr)
{
    int range, x1, y1, x2, y2, xx, yy, x, y;
    int mx, my, mx1, my1, dmin, d;
    UINT8 *pix;

    range = 8 * (1 << (s->f_code - 1));
    if (s->out_format == FMT_H263)
        range = range * 2;
    /* do not allow vectors outside the image (will change one day for
       h263) */
    xx = 16 * s->mb_x;
    yy = 16 * s->mb_y;
    x1 = xx - range + 1; /* we loose one pixel to avoid boundary pb with half pixel pred */
    if (x1 < 0)
        x1 = 0;
    x2 = xx + range - 1;
    if (x2 > (s->width - 16))
        x2 = s->width - 16;
    y1 = yy - range + 1;
    if (y1 < 0)
        y1 = 0;
    y2 = yy + range - 1;
    if (y2 > (s->height - 16))
        y2 = s->height - 16;
    pix = s->new_picture[0] + (yy * s->width) + xx;
    dmin = 0x7fffffff;
    mx = 0;
    my = 0;
    for(y=y1;y<=y2;y++) {
        for(x=x1;x<=x2;x++) {
            d = pix_abs16x16(pix, s->last_picture[0] + (y * s->width) + x, 
                             s->width);
            if (d < dmin ||
                (d == dmin && 
                 (abs(x - xx) + abs(y - yy)) < 
                 (abs(mx - xx) + abs(my - yy)))) {
                dmin = d;
                mx = x;
                my = y;
            }
        }
    }

    /* half pixel search */
    mx1 = mx = 2 * mx;
    my1 = my = 2 * my;

    if (mx > 0 && mx < 2 * (s->width - 16) &&
        my > 0 && my < 2 * (s->height - 16)) {
        int dx, dy, px, py;
        UINT8 *ptr;
        for(dy=-1;dy<=1;dy++) {
            for(dx=-1;dx<=1;dx++) {
                if (dx != 0 || dy != 0) {
                    px = mx1 + dx;
                    py = my1 + dy;
                    ptr = s->last_picture[0] + ((py >> 1) * s->width) + (px >> 1);
                    switch(((py & 1) << 1) | (px & 1)) {
                    default:
                    case 0:
                        d = pix_abs16x16(pix, ptr, s->width);
                        break;
                    case 1:
                        d = pix_abs16x16_x2(pix, ptr, s->width);
                        break;
                    case 2:
                        d = pix_abs16x16_y2(pix, ptr, s->width);
                        break;
                    case 3:
                        d = pix_abs16x16_xy2(pix, ptr, s->width);
                        break;
                    }
                    if (d < dmin) {
                        dmin = d;
                        mx = px;
                        my = py;
                    }
                }
            }
        }
    }

    *mx_ptr = mx - 2 * xx;
    *my_ptr = my - 2 * yy;
#if 0
    if (*mx_ptr < -(2 * range) || *mx_ptr >= (2 * range) ||
        *my_ptr < -(2 * range) || *my_ptr >= (2 * range)) {
        fprintf(stderr, "error %d %d\n", *mx_ptr, *my_ptr);
    }
#endif
}


#if 1

int estimate_motion(MpegEncContext *s, 
                    int mb_x, int mb_y,
                    int *mx_ptr, int *my_ptr)
{
    UINT8 *pix, *ppix;
    int sum, varc, vard, mx, my;

    
    if (s->full_search) {
        full_motion_search(s, &mx, &my);
    } else {
        no_motion_search(s, &mx, &my);
    }

    /* intra / predictive decision */

    pix = s->new_picture[0] + (mb_y * 16 * s->width) + mb_x * 16;
    ppix = s->last_picture[0] + 
        ((mb_y * 16 + (my >> 1)) * s->width) + (mb_x * 16 + (mx >> 1));

    sum = pix_sum(pix, s->width);
    varc = pix_norm1(pix, s->width);
    vard = pix_norm(pix, ppix, s->width);
    
    vard = vard >> 8;
    sum = sum >> 8;
    varc = (varc >> 8)  - (sum * sum);
#if 0
    printf("varc=%d (sum=%d) vard=%d mx=%d my=%d\n",
           varc, sum, vard, mx, my);
#endif
    if (vard <= 64) {
        *mx_ptr = mx;
        *my_ptr = my;
	return 0;
    } else if (vard < varc) {
        *mx_ptr = mx;
        *my_ptr = my;
	return 0;
    } else {
        *mx_ptr = 0;
        *my_ptr = 0;
        return 1;
    }
}

#else

/* test version which generates valid random vectors */
int estimate_motion(MpegEncContext *s, 
                    int mb_x, int mb_y,
                    int *mx_ptr, int *my_ptr)
{
     int xx, yy, x1, y1, x2, y2, range;

     if ((random() % 10) >= 5) {
         range = 8 * (1 << (s->f_code - 1));
         if (s->out_format == FMT_H263)
             range = range * 2;

         xx = 16 * s->mb_x;
         yy = 16 * s->mb_y;
         x1 = xx - range;
         if (x1 < 0)
             x1 = 0;
         x2 = xx + range - 1;
         if (x2 > (s->width - 16))
             x2 = s->width - 16;
         y1 = yy - range;
         if (y1 < 0)
             y1 = 0;
         y2 = yy + range - 1;
         if (y2 > (s->height - 16))
             y2 = s->height - 16;

         *mx_ptr = (random()%(2 * (x2 - x1 + 1))) + 2 * (x1 - xx);
         *my_ptr = (random()%(2 * (y2 - y1 + 1))) + 2 * (y1 - yy);
         return 0;
    } else {
        *mx_ptr = 0;
        *my_ptr = 0;
        return 1;
    }
}

#endif
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -