⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 mc.c

📁 图象压缩程序
💻 C
📖 第 1 页 / 共 3 页
字号:
/***************************************************************************** * mc.c: h264 encoder library (Motion Compensation) ***************************************************************************** * Copyright (C) 2003 Laurent Aimar * $Id: mc.c,v 1.1 2003/11/09 23:25:04 fenrir Exp $ * * Authors: Laurent Aimar <fenrir@via.ecp.fr> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA. *****************************************************************************/#include <stdlib.h>#include <stdio.h>#include <string.h>#include <stdint.h>#include "../mc.h"#include "../clip1.h"#include "mc.h"#define UNUSED_UINT64( foo ) \    static const uint64_t foo __asm__ (#foo)  __attribute__((unused))UNUSED_UINT64( x264_w0x10 ) = 0x0010001000100010ULL;static inline int x264_tapfilter( uint8_t *pix, int i_pix_next ){    return pix[-2*i_pix_next] - 5*pix[-1*i_pix_next] + 20*(pix[0] + pix[1*i_pix_next]) - 5*pix[ 2*i_pix_next] + pix[ 3*i_pix_next];}static inline int x264_tapfilter1( uint8_t *pix ){    return pix[-2] - 5*pix[-1] + 20*(pix[0] + pix[1]) - 5*pix[ 2] + pix[ 3];}static inline void pixel_avg_w4( uint8_t *dst,  int i_dst_stride,                                 uint8_t *src1, int i_src1_stride,                                 uint8_t *src2, int i_src2_stride,                                 int i_height ){    int x, y;    for( y = 0; y < i_height; y++ )    {        for( x = 0; x < 4; x++ )        {            dst[x] = ( src1[x] + src2[x] + 1 ) >> 1;        }        dst  += i_dst_stride;        src1 += i_src1_stride;        src2 += i_src2_stride;    }}static inline void pixel_avg_w8( uint8_t *dst,  int i_dst_stride,                                 uint8_t *src1, int i_src1_stride,                                 uint8_t *src2, int i_src2_stride,                                 int i_height ){    int y;    for( y = 0; y < i_height; y++ )    {        asm volatile(            "movq (%1), %%mm0\n"            "movq (%2), %%mm1\n"            "pavgb %%mm1, %%mm0\n"            "movq %%mm0, (%0)\n"            : : "r"(dst), "r"(src1), "r"(src2)            );        dst  += i_dst_stride;        src1 += i_src1_stride;        src2 += i_src2_stride;    }}static inline void pixel_avg_w16( uint8_t *dst,  int i_dst_stride,                                  uint8_t *src1, int i_src1_stride,                                  uint8_t *src2, int i_src2_stride,                                  int i_height ){    int y;    for( y = 0; y < i_height; y++ )    {        asm volatile(            "movq (%1), %%mm0\n"            "movq 8(%1), %%mm2\n"            "movq (%2), %%mm1\n"            "movq 8(%2), %%mm3\n"            "pavgb %%mm1, %%mm0\n"            "movq %%mm0, (%0)\n"            "pavgb %%mm3, %%mm2\n"            "movq %%mm2, 8(%0)\n"            : : "r"(dst), "r"(src1), "r"(src2)            );        dst  += i_dst_stride;        src1 += i_src1_stride;        src2 += i_src2_stride;    }}typedef void (*pf_mc_t)(uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height );/***************************************************************************** * MC with width == 4 (height <= 8) *****************************************************************************/static void mc_copy_w4( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height ){    int y;    for( y = 0; y < i_height; y++ )    {        memcpy( dst, src, 4 );        src += i_src_stride;        dst += i_dst_stride;    }}static inline void mc_hh_w4( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height ){    int x, y;    for( y = 0; y < i_height; y++ )    {        for( x = 0; x < 4; x++ )        {            dst[x] = x264_mc_clip1( ( x264_tapfilter1( &src[x] ) + 16 ) >> 5 );        }        src += i_src_stride;        dst += i_dst_stride;    }}static inline void mc_hv_w4( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height ){    int y;    src -= 2 * i_src_stride;    asm volatile(        "pxor %%mm7,        %%mm7\n"        "movq x264_w0x10,   %%mm4\n" : : );    for( y = 0; y < i_height; y++ )    {        asm volatile(            "leal   (%0, %1),   %%eax\n"            "movd       (%0),   %%mm0\n"    /* load pix-2 */            "punpcklbw  %%mm7,  %%mm0\n"            "movd       (%%eax),%%mm1\n"    /* load pix-1 */            "punpcklbw  %%mm7,  %%mm1\n"            "psubw      %%mm1,  %%mm0\n"            "psllw      $2,     %%mm1\n"            "psubw      %%mm1,  %%mm0\n"            "movd       (%%eax,%1),%%mm1\n"  /* load pix */            "punpcklbw  %%mm7,  %%mm1\n"            "psllw      $2,     %%mm1\n"            "paddw      %%mm1,  %%mm0\n"            "psllw      $2,     %%mm1\n"            "paddw      %%mm1,  %%mm0\n"            "movd       (%%eax,%1,2),%%mm1\n"  /* load pix+1 */            "punpcklbw  %%mm7,  %%mm1\n"            "psllw      $2,     %%mm1\n"            "paddw      %%mm1,  %%mm0\n"            "psllw      $2,     %%mm1\n"            "paddw      %%mm1,  %%mm0\n"            "movd       (%0,%1,4),%%mm1\n"  /* load pix+2 */            "punpcklbw  %%mm7,  %%mm1\n"            "psubw      %%mm1,  %%mm0\n"            "psllw      $2,     %%mm1\n"            "psubw      %%mm1,  %%mm0\n"            "movd       (%%eax,%1,4),%%mm1\n"  /* load pix+3 */            "punpcklbw  %%mm7,  %%mm1\n"            "paddw      %%mm1,  %%mm0\n"            "paddw      %%mm4,  %%mm0\n"            "psraw      $5,     %%mm0\n"            "packuswb   %%mm7,  %%mm0\n"            "movd       %%mm0,  (%2)\n"            : : "r"(src), "r"(i_src_stride), "r"(dst) : "%eax" );        src += i_src_stride;        dst += i_dst_stride;    }}static inline void mc_hc_w4( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height ){#if 0    uint8_t *out;    uint8_t *pix;    int x, y;    for( x = 0; x < 4; x++ )    {        int tap[6];        pix = &src[x];        out = &dst[x];        tap[0] = x264_tapfilter1( &pix[-2*i_src_stride] );        tap[1] = x264_tapfilter1( &pix[-1*i_src_stride] );        tap[2] = x264_tapfilter1( &pix[ 0*i_src_stride] );        tap[3] = x264_tapfilter1( &pix[ 1*i_src_stride] );        tap[4] = x264_tapfilter1( &pix[ 2*i_src_stride] );        for( y = 0; y < i_height; y++ )        {            tap[5] = x264_tapfilter1( &pix[ 3*i_src_stride] );            *out = x264_mc_clip1( ( tap[0] - 5*tap[1] + 20 * tap[2] + 20 * tap[3] -5*tap[4] + tap[5] + 512 ) >> 10 );            /* Next line */            pix += i_src_stride;            out += i_dst_stride;            tap[0] = tap[1];            tap[1] = tap[2];            tap[2] = tap[3];            tap[3] = tap[4];            tap[4] = tap[5];        }    }#else    int i, x, y;    for( y = 0; y < i_height; y++ )    {        int16_t tap[5+4];        for( i = 0; i < 5+4; i++ )        {            tap[i] = x264_tapfilter( &src[-2+i], i_src_stride );        }        for( x = 0; x < 4; x++ )        {            dst[x] = x264_mc_clip1( ( tap[0+x] - 5*tap[1+x] + 20 * tap[2+x] + 20 * tap[3+x] -5*tap[4+x] + tap[5+x] + 512 ) >> 10 );        }        src += i_src_stride;        dst += i_dst_stride;    }#endif}/* mc I+H */static void mc_xy10_w4( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height ){    uint8_t tmp[4*8];    mc_hh_w4( src, i_src_stride, tmp, 4, i_height );    pixel_avg_w4( dst, i_dst_stride, src, i_src_stride, tmp, 4, i_height );}static void mc_xy30_w4( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height ){    uint8_t tmp[4*8];    mc_hh_w4( src, i_src_stride, tmp, 4, i_height );    pixel_avg_w4( dst, i_dst_stride, src+1, i_src_stride, tmp, 4, i_height );}/* mc I+V */static void mc_xy01_w4( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height ){    uint8_t tmp[4*8];    mc_hv_w4( src, i_src_stride, tmp, 4, i_height );    pixel_avg_w4( dst, i_dst_stride, src, i_src_stride, tmp, 4, i_height );}static void mc_xy03_w4( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height ){    uint8_t tmp[4*8];    mc_hv_w4( src, i_src_stride, tmp, 4, i_height );    pixel_avg_w4( dst, i_dst_stride, src+i_src_stride, i_src_stride, tmp, 4, i_height );}/* H+V */static void mc_xy11_w4( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height ){    uint8_t tmp1[4*8];    uint8_t tmp2[4*8];    mc_hv_w4( src, i_src_stride, tmp1, 4, i_height );    mc_hh_w4( src, i_src_stride, tmp2, 4, i_height );    pixel_avg_w4( dst, i_dst_stride, tmp1, 4, tmp2, 4, i_height );}static void mc_xy31_w4( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height ){    uint8_t tmp1[4*8];    uint8_t tmp2[4*8];    mc_hv_w4( src+1, i_src_stride, tmp1, 4, i_height );    mc_hh_w4( src,   i_src_stride, tmp2, 4, i_height );    pixel_avg_w4( dst, i_dst_stride, tmp1, 4, tmp2, 4, i_height );}static void mc_xy13_w4( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height ){    uint8_t tmp1[4*8];    uint8_t tmp2[4*8];    mc_hv_w4( src,              i_src_stride, tmp1, 4, i_height );    mc_hh_w4( src+i_src_stride, i_src_stride, tmp2, 4, i_height );    pixel_avg_w4( dst, i_dst_stride, tmp1, 4, tmp2, 4, i_height );}static void mc_xy33_w4( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height ){    uint8_t tmp1[4*8];    uint8_t tmp2[4*8];    mc_hv_w4( src+1,            i_src_stride, tmp1, 4, i_height );    mc_hh_w4( src+i_src_stride, i_src_stride, tmp2, 4, i_height );    pixel_avg_w4( dst, i_dst_stride, tmp1, 4, tmp2, 4, i_height );}static void mc_xy21_w4( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height ){    uint8_t tmp1[4*8];    uint8_t tmp2[4*8];    mc_hc_w4( src, i_src_stride, tmp1, 4, i_height );    mc_hh_w4( src, i_src_stride, tmp2, 4, i_height );    pixel_avg_w4( dst, i_dst_stride, tmp1, 4, tmp2, 4, i_height );}static void mc_xy12_w4( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height ){    uint8_t tmp1[4*8];    uint8_t tmp2[4*8];    mc_hc_w4( src, i_src_stride, tmp1, 4, i_height );    mc_hv_w4( src, i_src_stride, tmp2, 4, i_height );    pixel_avg_w4( dst, i_dst_stride, tmp1, 4, tmp2, 4, i_height );}static void mc_xy32_w4( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height ){    uint8_t tmp1[4*8];    uint8_t tmp2[4*8];    mc_hc_w4( src,   i_src_stride, tmp1, 4, i_height );    mc_hv_w4( src+1, i_src_stride, tmp2, 4, i_height );    pixel_avg_w4( dst, i_dst_stride, tmp1, 4, tmp2, 4, i_height );}static void mc_xy23_w4( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height ){    uint8_t tmp1[4*8];    uint8_t tmp2[4*8];    mc_hc_w4( src,              i_src_stride, tmp1, 4, i_height );    mc_hh_w4( src+i_src_stride, i_src_stride, tmp2, 4, i_height );    pixel_avg_w4( dst, i_dst_stride, tmp1, 4, tmp2, 4, i_height );}/***************************************************************************** * MC with width == 8 (height <= 16) *****************************************************************************/static void mc_copy_w8( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height ){    int y;    for( y = 0; y < i_height; y++ )    {        memcpy( dst, src, 8 );        src += i_src_stride;        dst += i_dst_stride;    }}static inline void mc_hh_w8( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height ){    int x, y;    for( y = 0; y < i_height; y++ )    {        for( x = 0; x < 8; x++ )        {            dst[x] = x264_mc_clip1( ( x264_tapfilter1( &src[x] ) + 16 ) >> 5 );        }

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -