📄 me.c.svn-base

📁 一个快速的H.264解码器
💻 SVN-BASE
📖 第 1 页 / 共 2 页
字号:
12 下一页
/***************************************************************************** * me.c: h264 encoder library (Motion Estimation) ***************************************************************************** * Copyright (C) 2003 Laurent Aimar * $Id: me.c,v 1.1 2004/06/03 19:27:08 fenrir Exp $ * * Authors: Laurent Aimar <fenrir@via.ecp.fr> *          Loren Merritt <lorenm@u.washington.edu> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA. *****************************************************************************/#include <stdio.h>#include <string.h>#include "common/common.h"#include "me.h"/* presets selected from good points on the speed-vs-quality curve of several test videos * subpel_iters[i_subpel_refine] = { refine_hpel, refine_qpel, me_hpel, me_qpel } * where me_* are the number of EPZS iterations run on all candidate block types, * and refine_* are run only on the winner. */static const int subpel_iterations[][4] =    {{1,0,0,0},    {1,1,0,0},    {0,1,1,0},    {0,2,1,0},    {0,2,1,1},    {0,2,1,2},    {0,0,2,2}};static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_iters, int *p_halfpel_thresh, int b_refine_qpel );#define COST_MV_INT( mx, my, bd, d ) \{ \    int cost = h->pixf.sad[i_pixel]( m->p_fenc[0], m->i_stride[0],     \                   &p_fref[(my)*m->i_stride[0]+(mx)], m->i_stride[0] ) \             + p_cost_mvx[ (mx)<<2 ]  \             + p_cost_mvy[ (my)<<2 ]; \    if( cost < bcost ) \    {                  \        bcost = cost;  \        bmx = mx;      \        bmy = my;      \        if( bd ) \            dir = d; \    } \}#define COST_MV( mx, my )         COST_MV_INT( mx, my, 0, 0 )#define COST_MV_DIR( mx, my, d )  COST_MV_INT( mx, my, 1, d )#define COST_MV_PDE( mx, my ) \{ \    int cost = h->pixf.sad_pde[i_pixel]( m->p_fenc[0], m->i_stride[0], \                   &p_fref[(my)*m->i_stride[0]+(mx)], m->i_stride[0], \                   bcost - p_cost_mvx[ (mx)<<2 ] - p_cost_mvy[ (my)<<2 ] ); \    if( cost < bcost - p_cost_mvx[ (mx)<<2 ] - p_cost_mvy[ (my)<<2 ] ) \    {                  \        bcost = cost + p_cost_mvx[ (mx)<<2 ] + p_cost_mvy[ (my)<<2 ];  \        bmx = mx;      \        bmy = my;      \    } \}#define DIA1_ITER( mx, my )\    {\        omx = mx; omy = my;\        COST_MV( omx  , omy-1 );/*  1  */\        COST_MV( omx  , omy+1 );/* 101 */\        COST_MV( omx-1, omy   );/*  1  */\        COST_MV( omx+1, omy   );\    }#define DIA2 \    {\        COST_MV( omx  , omy-2 );\        COST_MV( omx-1, omy-1 );/*   1   */\        COST_MV( omx+1, omy-1 );/*  1 1  */\        COST_MV( omx-2, omy   );/* 1 0 1 */\        COST_MV( omx+2, omy   );/*  1 1  */\        COST_MV( omx-1, omy+1 );/*   1   */\        COST_MV( omx+1, omy+1 );\        COST_MV( omx  , omy+2 );\    }\#define OCT2 \    {\        COST_MV( omx-1, omy-2 );\        COST_MV( omx+1, omy-2 );/*  1 1  */\        COST_MV( omx-2, omy-1 );/* 1   1 */\        COST_MV( omx+2, omy-1 );/*   0   */\        COST_MV( omx-2, omy+1 );/* 1   1 */\        COST_MV( omx+2, omy+1 );/*  1 1  */\        COST_MV( omx-1, omy+2 );\        COST_MV( omx+1, omy+2 );\    }#define CROSS( start, x_max, y_max ) \    { \        for( i = start; i < x_max; i+=2 ) \        { \            if( omx + i <= mv_x_max ) \                COST_MV( omx + i, omy ); \            if( omx - i >= mv_x_min ) \                COST_MV( omx - i, omy ); \        } \        for( i = start; i < y_max; i+=2 ) \        { \            if( omy + i <= mv_y_max ) \                COST_MV( omx, omy + i ); \            if( omy - i >= mv_y_min ) \                COST_MV( omx, omy - i ); \        } \    }void x264_me_search_ref( x264_t *h, x264_me_t *m, int (*mvc)[2], int i_mvc, int *p_halfpel_thresh ){    const int i_pixel = m->i_pixel;    int i_me_range = h->param.analyse.i_me_range;    int bmx, bmy, bcost;    int omx, omy, pmx, pmy;    uint8_t *p_fref = m->p_fref[0];    int i, j;    int dir;    int mv_x_min = h->mb.mv_min_fpel[0];    int mv_y_min = h->mb.mv_min_fpel[1];    int mv_x_max = h->mb.mv_max_fpel[0];    int mv_y_max = h->mb.mv_max_fpel[1];    const int16_t *p_cost_mvx = m->p_cost_mv - m->mvp[0];    const int16_t *p_cost_mvy = m->p_cost_mv - m->mvp[1];    if( h->mb.i_me_method == X264_ME_UMH )    {        /* clamp mvp to inside frame+padding, so that we don't have to check it each iteration */        p_cost_mvx = m->p_cost_mv - x264_clip3( m->mvp[0], h->mb.mv_min_spel[0], h->mb.mv_max_spel[0] );        p_cost_mvy = m->p_cost_mv - x264_clip3( m->mvp[1], h->mb.mv_min_spel[1], h->mb.mv_max_spel[1] );    }    bmx = pmx = x264_clip3( ( m->mvp[0] + 2 ) >> 2, mv_x_min, mv_x_max );    bmy = pmy = x264_clip3( ( m->mvp[1] + 2 ) >> 2, mv_y_min, mv_y_max );    bcost = COST_MAX;    COST_MV( pmx, pmy );    /* I don't know why this helps */    bcost -= p_cost_mvx[ bmx<<2 ] + p_cost_mvy[ bmy<<2 ];    /* try extra predictors if provided */    for( i = 0; i < i_mvc; i++ )    {        const int mx = x264_clip3( ( mvc[i][0] + 2 ) >> 2, mv_x_min, mv_x_max );        const int my = x264_clip3( ( mvc[i][1] + 2 ) >> 2, mv_y_min, mv_y_max );        if( mx != bmx || my != bmy )            COST_MV( mx, my );    }        COST_MV( 0, 0 );    mv_x_max += 8;    mv_y_max += 8;    mv_x_min -= 8;    mv_y_min -= 8;    switch( h->mb.i_me_method )    {    case X264_ME_DIA:        /* diamond search, radius 1 */        for( i = 0; i < i_me_range; i++ )        {            DIA1_ITER( bmx, bmy );            if( bmx == omx && bmy == omy )                break;        }        break;    case X264_ME_HEX:me_hex2:        /* hexagon search, radius 2 */#if 0        for( i = 0; i < i_me_range/2; i++ )        {            omx = bmx; omy = bmy;            COST_MV( omx-2, omy   );            COST_MV( omx-1, omy+2 );            COST_MV( omx+1, omy+2 );            COST_MV( omx+2, omy   );            COST_MV( omx+1, omy-2 );            COST_MV( omx-1, omy-2 );            if( bmx == omx && bmy == omy )                break;        }#else        /* equivalent to the above, but eliminates duplicate candidates */        dir = -1;        omx = bmx; omy = bmy;        COST_MV_DIR( omx-2, omy,   0 );        COST_MV_DIR( omx-1, omy+2, 1 );        COST_MV_DIR( omx+1, omy+2, 2 );        COST_MV_DIR( omx+2, omy,   3 );        COST_MV_DIR( omx+1, omy-2, 4 );        COST_MV_DIR( omx-1, omy-2, 5 );        if( dir != -1 )        {            for( i = 1; i < i_me_range/2; i++ )            {                static const int hex2[8][2] = {{-1,-2}, {-2,0}, {-1,2}, {1,2}, {2,0}, {1,-2}, {-1,-2}, {-2,0}};                static const int mod6[8] = {5,0,1,2,3,4,5,0};                const int odir = mod6[dir+1];                omx = bmx; omy = bmy;                COST_MV_DIR( omx + hex2[odir+0][0], omy + hex2[odir+0][1], odir-1 );                COST_MV_DIR( omx + hex2[odir+1][0], omy + hex2[odir+1][1], odir   );                COST_MV_DIR( omx + hex2[odir+2][0], omy + hex2[odir+2][1], odir+1 );                if( bmx == omx && bmy == omy )                    break;            }        }#endif        /* square refine */        DIA1_ITER( bmx, bmy );        COST_MV( omx-1, omy-1 );        COST_MV( omx-1, omy+1 );        COST_MV( omx+1, omy-1 );        COST_MV( omx+1, omy+1 );        break;    case X264_ME_UMH:        {            /* Uneven-cross Multi-Hexagon-grid Search             * as in JM, except with different early termination */            static const int x264_pixel_size_shift[7] = { 0, 1, 1, 2, 3, 3, 4 };            int ucost1, ucost2;            int cross_start = 1;            /* refine predictors */            ucost1 = bcost;            DIA1_ITER( pmx, pmy );            if( pmx || pmy )                DIA1_ITER( 0, 0 );            if(i_pixel == PIXEL_4x4)                goto me_hex2;            ucost2 = bcost;            if( (bmx || bmy) && (bmx!=pmx || bmy!=pmy) )                DIA1_ITER( bmx, bmy );            if( bcost == ucost2 )                cross_start = 3;            omx = bmx; omy = bmy;            /* early termination */#define SAD_THRESH(v) ( bcost < ( v >> x264_pixel_size_shift[i_pixel] ) )            if( bcost == ucost2 && SAD_THRESH(2000) )            {                DIA2;                if( bcost == ucost1 && SAD_THRESH(500) )                    break;                if( bcost == ucost2 )                {                    int range = (i_me_range>>1) | 1;                    CROSS( 3, range, range );                    OCT2;                    if( bcost == ucost2 )                        break;                    cross_start = range + 2;                }            }            /* adaptive search range */            if( i_mvc )             {                /* range multipliers based on casual inspection of some statistics of                 * average distance between current predictor and final mv found by ESA.                 * these have not been tuned much by actual encoding. */                static const int range_mul[4][4] =                {                    { 3, 3, 4, 4 },                    { 3, 4, 4, 4 },                    { 4, 4, 4, 5 },                    { 4, 4, 5, 6 },                };                int mvd;                int sad_ctx, mvd_ctx;                if( i_mvc == 1 )                {                    if( i_pixel == PIXEL_16x16 )                        /* mvc is probably the same as mvp, so the difference isn't meaningful.                         * but prediction usually isn't too bad, so just use medium range */                        mvd = 25;                    else                        mvd = abs( m->mvp[0] - mvc[0][0] )                            + abs( m->mvp[1] - mvc[0][1] );                }                else                {                    /* calculate the degree of agreement between predictors. */                    /* in 16x16, mvc includes all the neighbors used to make mvp,                     * so don't count mvp separately. */                    int i_denom = i_mvc - 1;                    mvd = 0;                    if( i_pixel != PIXEL_16x16 )                    {                        mvd = abs( m->mvp[0] - mvc[0][0] )                            + abs( m->mvp[1] - mvc[0][1] );                        i_denom++;                    }                    for( i = 0; i < i_mvc-1; i++ )                        mvd += abs( mvc[i][0] - mvc[i+1][0] )                             + abs( mvc[i][1] - mvc[i+1][1] );                    mvd /= i_denom; //FIXME idiv                }                sad_ctx = SAD_THRESH(1000) ? 0                        : SAD_THRESH(2000) ? 1                        : SAD_THRESH(4000) ? 2 : 3;                mvd_ctx = mvd < 10 ? 0                        : mvd < 20 ? 1                        : mvd < 40 ? 2 : 3;                i_me_range = i_me_range * range_mul[mvd_ctx][sad_ctx] / 4;            }            /* FIXME if the above DIA2/OCT2/CROSS found a new mv, it has not updated omx/omy.             * we are still centered on the same place as the DIA2. is this desirable? */            CROSS( cross_start, i_me_range, i_me_range/2 );            /* 5x5 ESA */            omx = bmx; omy = bmy;            for( i = (bcost == ucost2) ? 4 : 0; i < 24; i++ )            {                static const int square2[24][2] = {                    { 1, 0}, { 0, 1}, {-1, 0}, { 0,-1},                    { 1, 1}, {-1, 1}, {-1,-1}, { 1,-1},                    { 2,-1}, { 2, 0}, { 2, 1}, { 2, 2},                    { 1, 2}, { 0, 2}, {-1, 2}, {-2, 2},                    {-2, 1}, {-2, 0}, {-2,-1}, {-2,-2},                    {-1,-2}, { 0,-2}, { 1,-2}, { 2,-2}                };                COST_MV( omx + square2[i][0], omy + square2[i][1] );            }            /* hexagon grid */            omx = bmx; omy = bmy;
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -