postprocessing_mmx.c

来自「VLC媒体播放程序」· C语言 代码 · 共 890 行 · 第 1/3 页

C
890
字号
{    int i_p0, i_p9;    i_p0 = PP_ABS( p_v[1] - p_v[0] ) < i_QP ? p_v[0] : p_v[1];    i_p9 = PP_ABS( p_v[8] - p_v[9] ) < i_QP ? p_v[9] : p_v[8];    /* mm0 = 8 pix unmodified     -We will process first 4 pixel       mm0 = 8 pix unmodified       mm1 = for the first part of the 4 first pix             (v1) -> (p0) -> ... ( word )             (v2)    (v1)             (v3)    (v2)             (v4)    (v3)           = for the commoin part between first and last pix             (v2) -> (v3) -> ... ( word )             (v3)    (v4)             (v4)    (v5)             (v5)    (v6)           = for the last part of the 4 last pix             (v5) -> (v6) -> ... ( word )             (v6)    (v7)             (v7)    (v8)             (v8)    (p9)       mm2 = acu for first new pix       mm3 = acu for last pix       mm4 = unused       mm5 = p0       mm6 = p9 << 48       mm7 = 0 */    __asm__ __volatile__ (   "pxor        %%mm7,      %%mm7   \n"   "movq        1(%0),      %%mm0   # get 8 pix             \n"   "                                # unpack into mm1       \n"   "movq        %%mm0,      %%mm1   \n"   "punpcklbw   %%mm7,      %%mm1   \n"   "                                # get p_0 and i_p9      \n"   "movd        %1,         %%mm5   \n"   "movd        %2,         %%mm6   \n"   "psllq       $48,        %%mm6   \n"   "                                \n"   "movq        %%mm1,      %%mm3   # p_v[5-8] = v[1-4] !!  \n"   "movq        %%mm1,      %%mm2   \n"   "psllw       $2,         %%mm2   # p_v[1-4] = 4*v[1-4]   \n"   "                                \n"   "psllq       $16,        %%mm1   \n"   "por         %%mm5,      %%mm1   # mm1 =( p0, v1, v2 ,v3)\n"   "                                \n"   "paddw       %%mm1,      %%mm2   \n"   "paddw       %%mm1,      %%mm2   \n"   "                                \n"   "psllq       $16,        %%mm1   \n"   "por         %%mm5,      %%mm1   # mm1 =( p0, p0, v1, v2)\n"   "                                \n"   "paddw       %%mm1,      %%mm2   \n"   "paddw       %%mm1,      %%mm2   \n"   "                                \n"   "psllq       $16,        %%mm1   \n"   "por         %%mm5,      %%mm1   # mm1 =( p0, p0, p0, v1)\n"   "                                \n"   "paddw       %%mm1,      %%mm2   \n"   "                                \n"   "psllq       $16,        %%mm1   \n"   "por         %%mm5,      %%mm1   # mm1 =( p0, p0, p0, p0)\n"   "                                \n"   "paddw       %%mm1,      %%mm2   \n"   "                                # Now last part a little borring\n"   "                                # last part for mm2, beginig for mm3\n"   "movq        %%mm0,      %%mm1   \n"   "psrlq       $8,         %%mm1   \n"   "punpcklbw   %%mm7,      %%mm1   # mm1 =( v2, v3, v4, v5 )\n"   "paddw       %%mm1,      %%mm2   \n"   "paddw       %%mm1,      %%mm2   \n"   "paddw       %%mm1,      %%mm3   \n"   "                                \n"   "movq        %%mm0,      %%mm1   \n"   "psrlq       $16,        %%mm1   \n"   "punpcklbw   %%mm7,      %%mm1   # mm1 =( v3, v4, v5, v6 )\n"   "psllw       $1,         %%mm1   \n"   "paddw       %%mm1,      %%mm2   \n"   "paddw       %%mm1,      %%mm3   \n"   "                                \n"   "movq        %%mm0,      %%mm1   \n"   "psrlq       $24,        %%mm1   \n"   "punpcklbw   %%mm7,      %%mm1   # mm1 =( v4, v5, v6, v7)    \n"   "paddw       %%mm1,      %%mm2   \n"   "paddw       %%mm1,      %%mm3   \n"   "paddw       %%mm1,      %%mm3   \n"   "                                \n"   "movq        %%mm0,      %%mm1   \n"   "psrlq       $32,        %%mm1   \n"   "punpcklbw   %%mm7,      %%mm1   # mm1 =( v5, v6, v7, v8)    \n"   "paddw       %%mm1,      %%mm2   \n"   "psllw       $2,         %%mm1   \n"   "paddw       %%mm1,      %%mm3   \n"   "                                # Now last part for last 4 pix \n"   "                                # \n"   "movq        %%mm0,      %%mm1   \n"   "punpckhbw   %%mm7,      %%mm1   # mm1 = ( v5, v6, v7, v8)      \n"   "                                \n"   "psrlq       $16,        %%mm1   \n"   "por         %%mm6,      %%mm1   # mm1 =( v6, v7, v8, p9 )\n"   "                                \n"   "paddw       %%mm1,      %%mm3   \n"   "paddw       %%mm1,      %%mm3   \n"   "                                \n"   "psrlq       $16,        %%mm1   \n"   "por         %%mm6,      %%mm1   # mm1 =( v7, v8, p9, p9)\n"   "                                \n"   "paddw       %%mm1,      %%mm3   \n"   "paddw       %%mm1,      %%mm3   \n"   "                                \n"   "psrlq       $16,        %%mm1   \n"   "por         %%mm6,      %%mm1   # mm1 =( v8, p9, p9, p9 )\n"   "                                \n"   "paddw       %%mm1,      %%mm3   \n"   "                                \n"   "psrlq       $16,        %%mm1   \n"   "por         %%mm6,      %%mm1   # mm1 =( p9, p9, p9, p9 )\n"   "                                \n"   "paddw       %%mm1,      %%mm3   \n"   "psrlw       $4,         %%mm2   \n"   "psrlw       $4,         %%mm3   \n"   "packuswb    %%mm3,      %%mm2   \n"   "movq        %%mm2,      1(%0)   \n"    : : "r"(p_v), "r"(i_p0), "r"(i_p9) : "memory" );#if 0    for( i = 1; i < 9; i++ )    {        v[i] = p_v[i]; /* save 8 pix that will be modified */    }    p_v[1] = ( 6 * i_p0                        + 4 * v[1]                + 2 *( v[2] + v[3]) + v[4] + v[5]) >> 4;    p_v[2] = ( 4 * i_p0    + 2 * v[1]          + 4 * v[2]                + 2 *( v[3] + v[4]) + v[5] + v[6]) >> 4;    p_v[3] = ( 2 * i_p0    + 2 * (v[1] + v[2]) + 4 * v[3]                + 2 *( v[4] + v[5]) + v[6] + v[7]) >> 4;    p_v[4] = ( i_p0 + v[1] + 2 * (v[2] + v[3]) + 4 * v[4]                + 2 *( v[5] + v[6]) + v[7] + v[8]) >> 4;    p_v[5] = ( v[1] + v[2] + 2 * (v[3] + v[4]) + 4 * v[5]                + 2 *( v[6] + v[7]) + v[8] + i_p9) >> 4;    p_v[6] = ( v[2] + v[3] + 2 * (v[4] + v[5]) + 4 * v[6]            + 2 *( v[7] + v[8]) + 2 * i_p9) >> 4;    p_v[7] = ( v[3] + v[4] + 2 * (v[5] + v[6]) + 4 * v[7]                + 2 * v[8] + 4 * i_p9) >> 4;    p_v[8] = ( v[4] + v[5] + 2 * (v[6] + v[7]) + 4 * v[8]                                    + 6 * i_p9) >> 4;#endif}/*****************************************************************************//*---------------------------------------------------------------------------*//*                                                                           *//*    ---------- filter Vertical lines so follow horizontal edges --------   *//*                                                                           *//*---------------------------------------------------------------------------*//*****************************************************************************/void E_( pp_deblock_V )( uint8_t *p_plane,                         int i_width, int i_height, int i_stride,                         QT_STORE_T *p_QP_store, int i_QP_stride,                         int b_chroma ){    int x, y, i;    uint8_t *p_v;    int i_QP_scale; /* use to do ( ? >> i_QP_scale ) */    int i_QP;    uint8_t i_v[10];    i_QP_scale = b_chroma ? 5 : 4 ;    for( y = 8; y < i_height - 4; y += 8 )    {        p_v = p_plane + ( y - 5 )* i_stride;        for( x = 0; x < i_width; x++ )        {            /* First get  10 vert pix to use them without i_stride */            for( i = 0; i < 10; i++ )            {                i_v[i] = p_v[i*i_stride + x];            }            i_QP = p_QP_store[(y>>i_QP_scale)*i_QP_stride+                                (x>>i_QP_scale)];            /* XXX QP is for v5 */            if( pp_deblock_isDC_mode( i_v ) )            {                if( pp_deblock_isMinMaxOk( i_v, i_QP ) )                {                    pp_deblock_DCMode( i_v, i_QP );                }            }            else            {                pp_deblock_DefaultMode( i_v, i_stride, i_QP );            }            /* Copy back, XXX only 1-8 were modified */            for( i = 1; i < 9; i++ )            {                p_v[i*i_stride + x] = i_v[i];            }        }    }    return;}/*****************************************************************************//*---------------------------------------------------------------------------*//*                                                                           *//*     --------- filter Horizontal lines so follow vertical edges --------   *//*                                                                           *//*---------------------------------------------------------------------------*//*****************************************************************************/void E_( pp_deblock_H )( uint8_t *p_plane,                         int i_width, int i_height, int i_stride,                         QT_STORE_T *p_QP_store, int i_QP_stride,                         int b_chroma ){    int x, y;    uint8_t *p_v;    int i_QP_scale;    int i_QP;    i_QP_scale = b_chroma ? 5 : 4 ;    for( y = 0; y < i_height; y++ )    {        p_v = p_plane + y * i_stride - 5;        for( x = 8; x < i_width - 4; x += 8 )        {            /* p_v point 5 pix before a block boundary */            /* XXX QP is for v5 */            i_QP = p_QP_store[(y>>i_QP_scale)*i_QP_stride+                                 (x>>i_QP_scale)];            if( pp_deblock_isDC_mode( p_v + x ) )            {                if( pp_deblock_isMinMaxOk( p_v+ x, i_QP ) )                {                    pp_deblock_DCMode( p_v+x, i_QP );                }            }            else            {                pp_deblock_DefaultMode( p_v+x, i_stride, i_QP );            }        }    }    return;}/***************************************************************************** * * Internals functions common to pp_Dering_Y pp_Dering_C * *****************************************************************************/static inline void pp_dering_MinMax( uint8_t *p_block, int i_stride,                                     int *pi_min, int *pi_max ){    int x, y;    int i_min, i_max;#if 0    /* First we will extract min/max for each pix on vertical line        and next extract global min/max */    __asm__ __volatile__(    "leal   (%2,%3),        %%eax       \n"    "movq   (%2),           %%mm0 #load line \n"    "movq   %%mm0,          %%mm1       \n"    MMXEXT_GET_LMINMAX( (%%eax),        %%mm0, %%mm1, %%mm7 )

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?