postprocessing_mmx.c

来自「VLC媒体播放程序」· C语言 代码 · 共 890 行 · 第 1/3 页

C
890
字号
/***************************************************************************** * postprocessing_mmx.c: Post Processing library in MMX ***************************************************************************** * Copyright (C) 2001 VideoLAN * $Id: postprocessing_mmx.c,v 1.3 2002/12/18 14:17:10 sam Exp $ * * Authors: Laurent Aimar <fenrir@via.ecp.fr> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA. *****************************************************************************/#include <vlc/vlc.h> /* only use uint8_t, uint32_t .... */#include "postprocessing.h"#include "postprocessing_common.h"/***************************************************************************** * * Internals functions common to pp_Deblock_V and pp_Deblock_H * *****************************************************************************//***************************************************************************** * MMX stuff *****************************************************************************//* XXX PP_THR1 need to be defined as ULL *//* Use same things as in idct but how it work ? */#define UNUSED_LONGLONG( foo ) \    static const unsigned long long foo __asm__ (#foo)  __attribute__((unused))/* to calculate isDC_mode for mmx */UNUSED_LONGLONG( mmx_127_thr1 ) = ( ( 127ULL - PP_THR1 ) << 56 )|                                  ( ( 127ULL - PP_THR1 ) << 48 )|                                  ( ( 127ULL - PP_THR1 ) << 40 )|                                  ( ( 127ULL - PP_THR1 ) << 32 )|                                  ( ( 127ULL - PP_THR1 ) << 24 )|                                  ( ( 127ULL - PP_THR1 ) << 16 )|                                  ( ( 127ULL - PP_THR1 ) <<  8 )|                                  ( ( 127ULL - PP_THR1 ) );UNUSED_LONGLONG( mmx_127_2xthr1_1 ) = ( ( 127ULL - PP_2xTHR1 -1) << 56 )|                                    ( ( 127ULL - PP_2xTHR1 -1 ) << 48 )|                                    ( ( 127ULL - PP_2xTHR1 -1 ) << 40 )|                                    ( ( 127ULL - PP_2xTHR1 -1 ) << 32 )|                                    ( ( 127ULL - PP_2xTHR1 -1 ) << 24 )|                                    ( ( 127ULL - PP_2xTHR1 -1 ) << 16 )|                                    ( ( 127ULL - PP_2xTHR1 -1 ) <<  8 )|                                    ( ( 127ULL - PP_2xTHR1 -1 ) );UNUSED_LONGLONG( mmx_m2_5_m5_2 ) = 0xfffe0005fffb0002ULL;#if 0/* find min bytes from r ans set it in r, t is destroyed */#define MMXEXT_GET_PMIN( r, t ) \   "movq      " #r ",     " #t "                                \n" \   "psrlq       $8,       " #t "                                \n" \   "pminub    " #t ",     " #r "                                \n" \   "pshufw $0xf5, " #r ", " #t " #instead of shift with tmp reg \n" \   "pminub    " #t ",     " #r "                                \n" \   "pshufw $0xfe, " #r ", " #t "                                \n" \   "pminub    " #t ",     " #r "                                \n" /* find mzx bytes from r ans set it in r, t is destroyed */#define MMXEXT_GET_PMAX( r, t ) \   "movq      " #r ",     " #t "                                \n" \   "psrlq       $8,       " #t "                                \n" \   "pmaxub    " #t ",     " #r "                                \n" \   "pshufw $0xf5, " #r ", " #t "                                \n" \   "pmaxub    " #t ",     " #r "                                \n" \   "pshufw $0xfe, " #r ", " #t "                                \n" \   "pmaxub    " #t ",     " #r "                                \n"#define MMXEXT_GET_LMINMAX( s, m, M, t ) \    "movq   " #s ",        " #t "   \n" \    "pminub " #t ",        " #m "   \n" \    "pmaxub " #t ",        " #M "   \n"/* Some tips for MMX    * |a-b| :        d1 = a - b with unsigned saturate        d2 = b - a  with ...        |a-b| = d1 | d2*/#endif/**************************************************************************** * pp_deblock_isDC_mode : Check if we will use DC mode or Default mode **************************************************************************** * Use constant PP_THR1 and PP_THR2 ( PP_2xTHR1 ) * * Called for for each pixel on a boundary block when doing deblocking *  so need to be fast ... * ****************************************************************************/static inline int pp_deblock_isDC_mode( uint8_t *p_v ){    unsigned int i_eq_cnt;    /* algo :       x = v[i] - v[i+1] without signed saturation        ( XXX see if there is'nt problem, but can't be with signed        sat because pixel will be saturate :(       so x within [-128, 127] and we have to test if it fit in [-M, M]       we add 127-M with wrap around -> good value fit in [ 127-2*M, 127]       and if x >= 127 - 2 * M ie x > 127 -2*M - 1 value is good    */    __asm__ __volatile__ (   "                                #* Do (v0-v1) to (v7-v8)            \n"   "movq      (%1),         %%mm1   #  load v0->v7                      \n"   "movq      1(%1),        %%mm2   #  load v1->v8                      \n"   "psubb    %%mm2,         %%mm1   #  v[i]-v[i+1]                      \n"   "paddb     mmx_127_thr1, %%mm1   #  + 127-THR1 with wrap             \n"   "pcmpgtb   mmx_127_2xthr1_1, %%mm1 #  >  127 -2*thr1 - 1             \n"   "movq      %%mm1,        %%mm0   #                                   \n"   "psrlw     $8,           %%mm1   #                                   \n"   "paddb     %%mm1,        %%mm0   #                                   \n"   "                                #                                   \n"   "movq      %%mm0,        %%mm1   #  Now sum to create eq_cnt         \n"   "psrld     $16,          %%mm0   #                                   \n"   "paddb     %%mm0,        %%mm1   #                                   \n"   "                                #                                   \n"   "movq      %%mm1,        %%mm0   #                                   \n"   "psrlq     $32,          %%mm1   #                                   \n"   "paddb     %%mm1,        %%mm0                                       \n"   "movd      %%mm0,        %0      #                                   \n"   "negl      %0                                                        \n"   "andl      $255,         %0"       : "=r"(i_eq_cnt) : "r" (p_v) );    /* last test, hey, 9 don't fit in MMX */    if((  ( p_v[8] - p_v[9] + PP_THR1 )&0xffff )<= PP_2xTHR1 )    {         i_eq_cnt++;    }#if 0    /* algo :  if ( | v[i] -v[i+1] | <= PP_THR1 ) { i_eq_cnt++; } */    i_eq_cnt = 0;    for( i =0; i < 9; i++ )    {        if((  ( p_v[i] - p_v[i+1] + PP_THR1 )&0xffff )<= PP_2xTHR1 )        {            i_eq_cnt++;        }    }#endif    return( (i_eq_cnt >= PP_THR2 ) ? 1 : 0 );}static inline int pp_deblock_isMinMaxOk( uint8_t *p_v, int i_QP ){    int i_range;#if 0    __asm__ __volatile__ (   "movq        1(%1),      %%mm0   # 8 bytes                   \n"   "movq        %%mm0,      %%mm1                               \n"    MMXEXT_GET_PMIN( %%mm0, %%mm7 )    MMXEXT_GET_PMAX( %%mm1, %%mm7 )   "psubd       %%mm0,      %%mm1   # max - min                 \n"   "movd        %%mm1,      %0                                  \n"   "andl        $255,       %0" : "=r"(i_range) : "r"(p_v) );#endif    int i_max, i_min;    int i;    i_min = i_max = p_v[1];    for( i = 2; i < 9; i++ )    {        if( i_max < p_v[i] ) i_max = p_v[i];        if( i_min > p_v[i] ) i_min = p_v[i];    }    i_range = i_max - i_min;    return( i_range< 2*i_QP ? 1 : 0 );}static inline void pp_deblock_DefaultMode( uint8_t i_v[10], int i_stride,                                      int i_QP ){    int d, i_delta;    int a3x0, a3x0_, a3x1, a3x2;    int b_neg;    /* d = CLIP( 5(a3x0' - a3x0)//8, 0, (v4-v5)/2 ).d( abs(a3x0) < QP ) */    /* First calculate a3x0 */    __asm__ __volatile__ (   "pxor    %%mm7,  %%mm7           # mm7 = 0          \n"   "movq    mmx_m2_5_m5_2, %%mm6    # mm6 =(2,-5,5,-2) \n"   "movd    3(%1),  %%mm0           \n"   "punpcklbw %%mm7,%%mm0           \n"   "pmaddwd %%mm6,  %%mm0           \n"   "movq    %%mm0,  %%mm1           \n"   "psrlq   $32,    %%mm1           \n"   "paddd   %%mm1,  %%mm0           \n"   "movd    %%mm0,  %0" : "=r"(a3x0) :"r"(i_v) );#if 0    a3x0 = 2 * ( i_v[3] - i_v[6] ) + 5 *( i_v[5] - i_v[4] );#endif    if( a3x0 < 0 )    {        b_neg = 1;        a3x0  = -a3x0;    }    else    {        b_neg = 0;    }    /* XXX Now a3x0 is abs( a3x0 ) */    if( ( a3x0 < 8 * i_QP )&&( a3x0 != 0 ) ) /* |a3x0| < 8*i_QP */    {        /* calculate a3x1 et a3x2 */        __asm__ __volatile__ (       "                                # mm7 = 0                   \n"       "                                # mm6 = ( 2, -5, 5, -2 )    \n"       "movd    1(%2),  %%mm0           \n"       "movd    5(%2),  %%mm2           \n"       "punpcklbw %%mm7,%%mm0           \n"       "punpcklbw %%mm7,%%mm2           \n"       "pmaddwd %%mm6,  %%mm0           \n"       "pmaddwd %%mm6,  %%mm2           \n"       "movq    %%mm0,  %%mm1           \n"       "psrlq   $32,    %%mm1           \n"       "paddd   %%mm1,  %%mm0           # mm0 = a3x1    \n"       "movd    %%mm0,  %0              \n"       "movq    %%mm2,  %%mm1           \n"       "psrlq   $32,    %%mm1           \n"       "paddd   %%mm1,  %%mm2           # mm2 = a3x2    \n"       "movd    %%mm2,  %1              \n"        : "=r"(a3x1), "=r"(a3x2) : "r"(i_v) );#if 0        a3x1 = 2 * ( i_v[1] - i_v[4] ) + 5 * ( i_v[3] - i_v[2] );        a3x2 = 2 * ( i_v[5] - i_v[8] ) + 5 * ( i_v[7] - i_v[6] );#endif        if( a3x1 < 0) a3x1 = -a3x1; /* abs( a3x1 ) */        if( a3x2 < 0) a3x2 = -a3x2; /* abs( a3x2 ) */        a3x0_ = PP_MIN3( a3x0, a3x1, a3x2 );        d = 5 *( a3x0 - a3x0_ ) / 8; /* always > 0 */        i_delta = ( i_v[4] - i_v[5] ) / 2;        /* clip into [0, i_delta] or [i_delta, 0] */        if( i_delta < 0 )        {            if( !b_neg ) /* since true d has sgn(d) = - sgn( a3x0 ) */            {                d = -d;                if( d < i_delta ) d = i_delta;                i_v[4] -= d;                i_v[5] += d;            }        }        else        {            if( b_neg )            {                if( d > i_delta ) d = i_delta;                i_v[4] -= d;                i_v[5] += d;            }        }    }}static inline void pp_deblock_DCMode( uint8_t *p_v, /*  = int i_v[10] */                                 int i_QP )

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?