speedy.c

来自「linux下的MPEG1」· C语言 代码 · 共 2,101 行 · 第 1/5 页

C
2,101
字号
/** * Copyright (c) 2002, 2003 Billy Biggs <vektor@dumbterm.net>. * Copyright (C) 2001 Matthew J. Marjanovic <maddog@mir.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2, or (at your option) * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software Foundation, * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *//** * Includes 420to422, 422to444 scaling filters from the MPEG2 reference * implementation.  The v12 source code indicates that they were written * by Cheung Auyeung <auyeung@mot.com>.  The file they were in was: * * store.c, picture output routines * Copyright (C) 1996, MPEG Software Simulation Group. All Rights Reserved. * * Disclaimer of Warranty * * These software programs are available to the user without any license fee or * royalty on an "as is" basis.  The MPEG Software Simulation Group disclaims * any and all warranties, whether express, implied, or statuary, including any * implied warranties or merchantability or of fitness for a particular * purpose.  In no event shall the copyright-holder be liable for any * incidental, punitive, or consequential damages of any kind whatsoever * arising from the use of these programs. * * This disclaimer of warranty extends to the user of these programs and user's * customers, employees, agents, transferees, successors, and assigns. * * The MPEG Software Simulation Group does not represent or warrant that the * programs furnished hereunder are free of infringement of any third-party * patents. * * Commercial implementations of MPEG-1 and MPEG-2 video, including shareware, * are subject to royalty fees to patent holders.  Many of these patents are * general enough such that they are unavoidable regardless of implementation * design. * */#ifdef HAVE_CONFIG_H# include "config.h"#endif#include <stdio.h>#include <string.h>#if HAVE_INTTYPES_H#include <inttypes.h>#else#include <stdint.h>#endif#include "attributes.h"#include "xineutils.h"#include "speedtools.h"#include "speedy.h"/* Function pointer definitions. */void (*interpolate_packed422_scanline)( uint8_t *output, uint8_t *top,                                        uint8_t *bot, int width );void (*blit_colour_packed422_scanline)( uint8_t *output,                                        int width, int y, int cb, int cr );void (*blit_colour_packed4444_scanline)( uint8_t *output,                                         int width, int alpha, int luma,                                         int cb, int cr );void (*blit_packed422_scanline)( uint8_t *dest, const uint8_t *src, int width );void (*composite_packed4444_to_packed422_scanline)( uint8_t *output, uint8_t *input,                                                    uint8_t *foreground, int width );void (*composite_packed4444_alpha_to_packed422_scanline)( uint8_t *output,                                                          uint8_t *input,                                                          uint8_t *foreground,                                                          int width, int alpha );void (*composite_alphamask_to_packed4444_scanline)( uint8_t *output,                                                uint8_t *input,                                                uint8_t *mask, int width,                                                int textluma, int textcb,                                                int textcr );void (*composite_alphamask_alpha_to_packed4444_scanline)( uint8_t *output,                                                       uint8_t *input,                                                       uint8_t *mask, int width,                                                       int textluma, int textcb,                                                       int textcr, int alpha );void (*premultiply_packed4444_scanline)( uint8_t *output, uint8_t *input, int width );void (*blend_packed422_scanline)( uint8_t *output, uint8_t *src1,                                  uint8_t *src2, int width, int pos );void (*filter_luma_121_packed422_inplace_scanline)( uint8_t *data, int width );void (*filter_luma_14641_packed422_inplace_scanline)( uint8_t *data, int width );unsigned int (*diff_factor_packed422_scanline)( uint8_t *cur, uint8_t *old, int width );unsigned int (*comb_factor_packed422_scanline)( uint8_t *top, uint8_t *mid,                                                uint8_t *bot, int width );void (*kill_chroma_packed422_inplace_scanline)( uint8_t *data, int width );void (*mirror_packed422_inplace_scanline)( uint8_t *data, int width );void (*halfmirror_packed422_inplace_scanline)( uint8_t *data, int width );void *(*speedy_memcpy)( void *output, const void *input, size_t size );void (*diff_packed422_block8x8)( pulldown_metrics_t *m, uint8_t *old,                                 uint8_t *new, int os, int ns );void (*a8_subpix_blit_scanline)( uint8_t *output, uint8_t *input,                                 int lasta, int startpos, int width );void (*quarter_blit_vertical_packed422_scanline)( uint8_t *output, uint8_t *one,                                                  uint8_t *three, int width );void (*subpix_blit_vertical_packed422_scanline)( uint8_t *output, uint8_t *top,                                                 uint8_t *bot, int subpixpos, int width );void (*composite_bars_packed4444_scanline)( uint8_t *output,                                            uint8_t *background, int width,                                            int a, int luma, int cb, int cr,                                            int percentage );void (*packed444_to_nonpremultiplied_packed4444_scanline)( uint8_t *output,                                                            uint8_t *input,                                                           int width, int alpha );void (*aspect_adjust_packed4444_scanline)( uint8_t *output,                                           uint8_t *input,                                            int width,                                           double pixel_aspect );void (*packed444_to_packed422_scanline)( uint8_t *output,                                         uint8_t *input,                                         int width );void (*packed422_to_packed444_scanline)( uint8_t *output,                                         uint8_t *input,                                         int width );void (*packed422_to_packed444_rec601_scanline)( uint8_t *dest,                                                uint8_t *src,                                                int width );void (*packed444_to_rgb24_rec601_scanline)( uint8_t *output,                                            uint8_t *input,                                            int width );void (*rgb24_to_packed444_rec601_scanline)( uint8_t *output,                                            uint8_t *input,                                            int width );void (*rgba32_to_packed4444_rec601_scanline)( uint8_t *output,                                              uint8_t *input,                                              int width );void (*chroma_422_to_444_mpeg2_plane)( uint8_t *dst, uint8_t *src,                                       int width, int height );void (*chroma_420_to_422_mpeg2_plane)( uint8_t *dst, uint8_t *src,                                       int width, int height, int progressive );void (*invert_colour_packed422_inplace_scanline)( uint8_t *data, int width );void (*vfilter_chroma_121_packed422_scanline)( uint8_t *output, int width,                                               uint8_t *m, uint8_t *t, uint8_t *b );void (*vfilter_chroma_332_packed422_scanline)( uint8_t *output, int width,                                               uint8_t *m, uint8_t *t, uint8_t *b );/** * result = (1 - alpha)B + alpha*F *        =  B - alpha*B + alpha*F *        =  B + alpha*(F - B) */static inline __attribute__ ((always_inline,const)) int multiply_alpha( int a, int r ){    int temp;    temp = (r * a) + 0x80;    return ((temp + (temp >> 8)) >> 8);}static inline __attribute__ ((always_inline,const)) uint8_t clip255( int x ){    if( x > 255 ) {        return 255;    } else if( x < 0 ) {        return 0;    } else {        return x;    }}static unsigned long CombJaggieThreshold = 73;#if defined(ARCH_X86) || defined(ARCH_X86_64)static unsigned int comb_factor_packed422_scanline_mmx( uint8_t *top, uint8_t *mid,                                                        uint8_t *bot, int width ){    const mmx_t qwYMask = { 0x00ff00ff00ff00ffULL };    const mmx_t qwOnes = { 0x0001000100010001ULL };    mmx_t qwThreshold;    unsigned int temp1, temp2;    width /= 4;    qwThreshold.uw[ 0 ] = CombJaggieThreshold;    qwThreshold.uw[ 1 ] = CombJaggieThreshold;    qwThreshold.uw[ 2 ] = CombJaggieThreshold;    qwThreshold.uw[ 3 ] = CombJaggieThreshold;    movq_m2r( qwThreshold, mm0 );    movq_m2r( qwYMask, mm1 );    movq_m2r( qwOnes, mm2 );    pxor_r2r( mm7, mm7 );         /* mm7 = 0. */    while( width-- ) {        /* Load and keep just the luma. */        movq_m2r( *top, mm3 );        movq_m2r( *mid, mm4 );        movq_m2r( *bot, mm5 );        pand_r2r( mm1, mm3 );        pand_r2r( mm1, mm4 );        pand_r2r( mm1, mm5 );        /* Work out mm6 = (top - mid) * (bot - mid) - ( (top - mid)^2 >> 7 ) */        psrlw_i2r( 1, mm3 );        psrlw_i2r( 1, mm4 );        psrlw_i2r( 1, mm5 );        /* mm6 = (top - mid) */        movq_r2r( mm3, mm6 );        psubw_r2r( mm4, mm6 );        /* mm3 = (top - bot) */        psubw_r2r( mm5, mm3 );        /* mm5 = (bot - mid) */        psubw_r2r( mm4, mm5 );        /* mm6 = (top - mid) * (bot - mid) */        pmullw_r2r( mm5, mm6 );        /* mm3 = (top - bot)^2 >> 7 */        pmullw_r2r( mm3, mm3 );   /* mm3 = (top - bot)^2 */        psrlw_i2r( 7, mm3 );      /* mm3 = ((top - bot)^2 >> 7) */        /* mm6 is what we want. */        psubw_r2r( mm3, mm6 );        /* FF's if greater than qwTheshold */        pcmpgtw_r2r( mm0, mm6 );        /* Add to count if we are greater than threshold */        pand_r2r( mm2, mm6 );        paddw_r2r( mm6, mm7 );        top += 8;        mid += 8;        bot += 8;    }    movd_r2m( mm7, temp1 );    psrlq_i2r( 32, mm7 );    movd_r2m( mm7, temp2 );    temp1 += temp2;    temp2 = temp1;    temp1 >>= 16;    temp1 += temp2 & 0xffff;    emms();    return temp1;}#endifstatic unsigned long BitShift = 6;static unsigned int diff_factor_packed422_scanline_c( uint8_t *cur, uint8_t *old, int width ){    unsigned int ret = 0;    width /= 4;    while( width-- ) {        unsigned int tmp1 = (cur[ 0 ] + cur[ 2 ] + cur[ 4 ] + cur[ 6 ] + 2)>>2;        unsigned int tmp2 = (old[ 0 ] + old[ 2 ] + old[ 4 ] + old[ 6 ] + 2)>>2;        tmp1  = (tmp1 - tmp2);        tmp1 *= tmp1;        tmp1 >>= BitShift;        ret += tmp1;        cur += 8;        old += 8;    }    return ret;}/*static unsigned int diff_factor_packed422_scanline_test_c( uint8_t *cur, uint8_t *old, int width ){    unsigned int ret = 0;    width /= 16;    while( width-- ) {        unsigned int tmp1 = (cur[ 0 ] + cur[ 2 ] + cur[ 4 ] + cur[ 6 ])>>2;        unsigned int tmp2 = (old[ 0 ] + old[ 2 ] + old[ 4 ] + old[ 6 ])>>2;        tmp1  = (tmp1 - tmp2);        tmp1 *= tmp1;        tmp1 >>= BitShift;        ret += tmp1;        cur += (8*4);        old += (8*4);    }    return ret;}*/#if defined(ARCH_X86) || defined(ARCH_X86_64)static unsigned int diff_factor_packed422_scanline_mmx( uint8_t *cur, uint8_t *old, int width ){    const mmx_t qwYMask = { 0x00ff00ff00ff00ffULL };    unsigned int temp1, temp2;    width /= 4;    movq_m2r( qwYMask, mm1 );    movd_m2r( BitShift, mm7 );    pxor_r2r( mm0, mm0 );    while( width-- ) {        movq_m2r( *cur, mm4 );        movq_m2r( *old, mm5 );        pand_r2r( mm1, mm4 );        pand_r2r( mm1, mm5 );        psubw_r2r( mm5, mm4 );   /* mm4 = Y1 - Y2            */        pmaddwd_r2r( mm4, mm4 ); /* mm4 = (Y1 - Y2)^2        */        psrld_r2r( mm7, mm4 );   /* divide mm4 by 2^BitShift */        paddd_r2r( mm4, mm0 );   /* keep total in mm0        */        cur += 8;        old += 8;    }    movd_r2m( mm0, temp1 );    psrlq_i2r( 32, mm0 );    movd_r2m( mm0, temp2 );    temp1 += temp2;    emms();    return temp1;}#endif#define ABS(a) (((a) < 0)?-(a):(a))#if defined(ARCH_X86) || defined(ARCH_X86_64)static void diff_packed422_block8x8_mmx( pulldown_metrics_t *m, uint8_t *old,                                         uint8_t *new, int os, int ns ){    const mmx_t ymask = { 0x00ff00ff00ff00ffULL };    short out[ 24 ]; /* Output buffer for the partial metrics from the mmx code. */    uint8_t *outdata = (uint8_t *) out;    uint8_t *oldp, *newp;    int i;    pxor_r2r( mm4, mm4 );  // 4 even difference sums.    pxor_r2r( mm5, mm5 );  // 4 odd difference sums.    pxor_r2r( mm7, mm7 );  // zeros    oldp = old; newp = new;    for( i = 4; i; --i ) {        // Even difference.        movq_m2r( oldp[0], mm0 );        movq_m2r( oldp[8], mm2 );        pand_m2r( ymask, mm0 );        pand_m2r( ymask, mm2 );        oldp += os;        movq_m2r( newp[0], mm1 );        movq_m2r( newp[8], mm3 );        pand_m2r( ymask, mm1 );        pand_m2r( ymask, mm3 );        newp += ns;        movq_r2r( mm0, mm6 );        psubusb_r2r( mm1, mm0 );        psubusb_r2r( mm6, mm1 );        movq_r2r( mm2, mm6 );        psubusb_r2r( mm3, mm2 );        psubusb_r2r( mm6, mm3 );        paddw_r2r( mm0, mm4 );        paddw_r2r( mm1, mm4 );        paddw_r2r( mm2, mm4 );        paddw_r2r( mm3, mm4 );        // Odd difference.        movq_m2r( oldp[0], mm0 );        movq_m2r( oldp[8], mm2 );        pand_m2r( ymask, mm0 );        pand_m2r( ymask, mm2 );        oldp += os;        movq_m2r( newp[0], mm1 );        movq_m2r( newp[8], mm3 );        pand_m2r( ymask, mm1 );        pand_m2r( ymask, mm3 );        newp += ns;        movq_r2r( mm0, mm6 );        psubusb_r2r( mm1, mm0 );        psubusb_r2r( mm6, mm1 );        movq_r2r( mm2, mm6 );        psubusb_r2r( mm3, mm2 );        psubusb_r2r( mm6, mm3 );        paddw_r2r( mm0, mm5 );        paddw_r2r( mm1, mm5 );        paddw_r2r( mm2, mm5 );        paddw_r2r( mm3, mm5 );    }    movq_r2m( mm4, outdata[0] );    movq_r2m( mm5, outdata[8] );    m->e = out[0] + out[1] + out[2] + out[3];    m->o = out[4] + out[5] + out[6] + out[7];    m->d = m->e + m->o;    pxor_r2r( mm4, mm4 );  // Past spacial noise.

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?