speedy.c
来自「linux下的MPEG1」· C语言 代码 · 共 2,101 行 · 第 1/5 页
C
2,101 行
/** * Copyright (c) 2002, 2003 Billy Biggs <vektor@dumbterm.net>. * Copyright (C) 2001 Matthew J. Marjanovic <maddog@mir.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2, or (at your option) * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software Foundation, * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *//** * Includes 420to422, 422to444 scaling filters from the MPEG2 reference * implementation. The v12 source code indicates that they were written * by Cheung Auyeung <auyeung@mot.com>. The file they were in was: * * store.c, picture output routines * Copyright (C) 1996, MPEG Software Simulation Group. All Rights Reserved. * * Disclaimer of Warranty * * These software programs are available to the user without any license fee or * royalty on an "as is" basis. The MPEG Software Simulation Group disclaims * any and all warranties, whether express, implied, or statuary, including any * implied warranties or merchantability or of fitness for a particular * purpose. In no event shall the copyright-holder be liable for any * incidental, punitive, or consequential damages of any kind whatsoever * arising from the use of these programs. * * This disclaimer of warranty extends to the user of these programs and user's * customers, employees, agents, transferees, successors, and assigns. * * The MPEG Software Simulation Group does not represent or warrant that the * programs furnished hereunder are free of infringement of any third-party * patents. * * Commercial implementations of MPEG-1 and MPEG-2 video, including shareware, * are subject to royalty fees to patent holders. Many of these patents are * general enough such that they are unavoidable regardless of implementation * design. * */#ifdef HAVE_CONFIG_H# include "config.h"#endif#include <stdio.h>#include <string.h>#if HAVE_INTTYPES_H#include <inttypes.h>#else#include <stdint.h>#endif#include "attributes.h"#include "xineutils.h"#include "speedtools.h"#include "speedy.h"/* Function pointer definitions. */void (*interpolate_packed422_scanline)( uint8_t *output, uint8_t *top, uint8_t *bot, int width );void (*blit_colour_packed422_scanline)( uint8_t *output, int width, int y, int cb, int cr );void (*blit_colour_packed4444_scanline)( uint8_t *output, int width, int alpha, int luma, int cb, int cr );void (*blit_packed422_scanline)( uint8_t *dest, const uint8_t *src, int width );void (*composite_packed4444_to_packed422_scanline)( uint8_t *output, uint8_t *input, uint8_t *foreground, int width );void (*composite_packed4444_alpha_to_packed422_scanline)( uint8_t *output, uint8_t *input, uint8_t *foreground, int width, int alpha );void (*composite_alphamask_to_packed4444_scanline)( uint8_t *output, uint8_t *input, uint8_t *mask, int width, int textluma, int textcb, int textcr );void (*composite_alphamask_alpha_to_packed4444_scanline)( uint8_t *output, uint8_t *input, uint8_t *mask, int width, int textluma, int textcb, int textcr, int alpha );void (*premultiply_packed4444_scanline)( uint8_t *output, uint8_t *input, int width );void (*blend_packed422_scanline)( uint8_t *output, uint8_t *src1, uint8_t *src2, int width, int pos );void (*filter_luma_121_packed422_inplace_scanline)( uint8_t *data, int width );void (*filter_luma_14641_packed422_inplace_scanline)( uint8_t *data, int width );unsigned int (*diff_factor_packed422_scanline)( uint8_t *cur, uint8_t *old, int width );unsigned int (*comb_factor_packed422_scanline)( uint8_t *top, uint8_t *mid, uint8_t *bot, int width );void (*kill_chroma_packed422_inplace_scanline)( uint8_t *data, int width );void (*mirror_packed422_inplace_scanline)( uint8_t *data, int width );void (*halfmirror_packed422_inplace_scanline)( uint8_t *data, int width );void *(*speedy_memcpy)( void *output, const void *input, size_t size );void (*diff_packed422_block8x8)( pulldown_metrics_t *m, uint8_t *old, uint8_t *new, int os, int ns );void (*a8_subpix_blit_scanline)( uint8_t *output, uint8_t *input, int lasta, int startpos, int width );void (*quarter_blit_vertical_packed422_scanline)( uint8_t *output, uint8_t *one, uint8_t *three, int width );void (*subpix_blit_vertical_packed422_scanline)( uint8_t *output, uint8_t *top, uint8_t *bot, int subpixpos, int width );void (*composite_bars_packed4444_scanline)( uint8_t *output, uint8_t *background, int width, int a, int luma, int cb, int cr, int percentage );void (*packed444_to_nonpremultiplied_packed4444_scanline)( uint8_t *output, uint8_t *input, int width, int alpha );void (*aspect_adjust_packed4444_scanline)( uint8_t *output, uint8_t *input, int width, double pixel_aspect );void (*packed444_to_packed422_scanline)( uint8_t *output, uint8_t *input, int width );void (*packed422_to_packed444_scanline)( uint8_t *output, uint8_t *input, int width );void (*packed422_to_packed444_rec601_scanline)( uint8_t *dest, uint8_t *src, int width );void (*packed444_to_rgb24_rec601_scanline)( uint8_t *output, uint8_t *input, int width );void (*rgb24_to_packed444_rec601_scanline)( uint8_t *output, uint8_t *input, int width );void (*rgba32_to_packed4444_rec601_scanline)( uint8_t *output, uint8_t *input, int width );void (*chroma_422_to_444_mpeg2_plane)( uint8_t *dst, uint8_t *src, int width, int height );void (*chroma_420_to_422_mpeg2_plane)( uint8_t *dst, uint8_t *src, int width, int height, int progressive );void (*invert_colour_packed422_inplace_scanline)( uint8_t *data, int width );void (*vfilter_chroma_121_packed422_scanline)( uint8_t *output, int width, uint8_t *m, uint8_t *t, uint8_t *b );void (*vfilter_chroma_332_packed422_scanline)( uint8_t *output, int width, uint8_t *m, uint8_t *t, uint8_t *b );/** * result = (1 - alpha)B + alpha*F * = B - alpha*B + alpha*F * = B + alpha*(F - B) */static inline __attribute__ ((always_inline,const)) int multiply_alpha( int a, int r ){ int temp; temp = (r * a) + 0x80; return ((temp + (temp >> 8)) >> 8);}static inline __attribute__ ((always_inline,const)) uint8_t clip255( int x ){ if( x > 255 ) { return 255; } else if( x < 0 ) { return 0; } else { return x; }}static unsigned long CombJaggieThreshold = 73;#if defined(ARCH_X86) || defined(ARCH_X86_64)static unsigned int comb_factor_packed422_scanline_mmx( uint8_t *top, uint8_t *mid, uint8_t *bot, int width ){ const mmx_t qwYMask = { 0x00ff00ff00ff00ffULL }; const mmx_t qwOnes = { 0x0001000100010001ULL }; mmx_t qwThreshold; unsigned int temp1, temp2; width /= 4; qwThreshold.uw[ 0 ] = CombJaggieThreshold; qwThreshold.uw[ 1 ] = CombJaggieThreshold; qwThreshold.uw[ 2 ] = CombJaggieThreshold; qwThreshold.uw[ 3 ] = CombJaggieThreshold; movq_m2r( qwThreshold, mm0 ); movq_m2r( qwYMask, mm1 ); movq_m2r( qwOnes, mm2 ); pxor_r2r( mm7, mm7 ); /* mm7 = 0. */ while( width-- ) { /* Load and keep just the luma. */ movq_m2r( *top, mm3 ); movq_m2r( *mid, mm4 ); movq_m2r( *bot, mm5 ); pand_r2r( mm1, mm3 ); pand_r2r( mm1, mm4 ); pand_r2r( mm1, mm5 ); /* Work out mm6 = (top - mid) * (bot - mid) - ( (top - mid)^2 >> 7 ) */ psrlw_i2r( 1, mm3 ); psrlw_i2r( 1, mm4 ); psrlw_i2r( 1, mm5 ); /* mm6 = (top - mid) */ movq_r2r( mm3, mm6 ); psubw_r2r( mm4, mm6 ); /* mm3 = (top - bot) */ psubw_r2r( mm5, mm3 ); /* mm5 = (bot - mid) */ psubw_r2r( mm4, mm5 ); /* mm6 = (top - mid) * (bot - mid) */ pmullw_r2r( mm5, mm6 ); /* mm3 = (top - bot)^2 >> 7 */ pmullw_r2r( mm3, mm3 ); /* mm3 = (top - bot)^2 */ psrlw_i2r( 7, mm3 ); /* mm3 = ((top - bot)^2 >> 7) */ /* mm6 is what we want. */ psubw_r2r( mm3, mm6 ); /* FF's if greater than qwTheshold */ pcmpgtw_r2r( mm0, mm6 ); /* Add to count if we are greater than threshold */ pand_r2r( mm2, mm6 ); paddw_r2r( mm6, mm7 ); top += 8; mid += 8; bot += 8; } movd_r2m( mm7, temp1 ); psrlq_i2r( 32, mm7 ); movd_r2m( mm7, temp2 ); temp1 += temp2; temp2 = temp1; temp1 >>= 16; temp1 += temp2 & 0xffff; emms(); return temp1;}#endifstatic unsigned long BitShift = 6;static unsigned int diff_factor_packed422_scanline_c( uint8_t *cur, uint8_t *old, int width ){ unsigned int ret = 0; width /= 4; while( width-- ) { unsigned int tmp1 = (cur[ 0 ] + cur[ 2 ] + cur[ 4 ] + cur[ 6 ] + 2)>>2; unsigned int tmp2 = (old[ 0 ] + old[ 2 ] + old[ 4 ] + old[ 6 ] + 2)>>2; tmp1 = (tmp1 - tmp2); tmp1 *= tmp1; tmp1 >>= BitShift; ret += tmp1; cur += 8; old += 8; } return ret;}/*static unsigned int diff_factor_packed422_scanline_test_c( uint8_t *cur, uint8_t *old, int width ){ unsigned int ret = 0; width /= 16; while( width-- ) { unsigned int tmp1 = (cur[ 0 ] + cur[ 2 ] + cur[ 4 ] + cur[ 6 ])>>2; unsigned int tmp2 = (old[ 0 ] + old[ 2 ] + old[ 4 ] + old[ 6 ])>>2; tmp1 = (tmp1 - tmp2); tmp1 *= tmp1; tmp1 >>= BitShift; ret += tmp1; cur += (8*4); old += (8*4); } return ret;}*/#if defined(ARCH_X86) || defined(ARCH_X86_64)static unsigned int diff_factor_packed422_scanline_mmx( uint8_t *cur, uint8_t *old, int width ){ const mmx_t qwYMask = { 0x00ff00ff00ff00ffULL }; unsigned int temp1, temp2; width /= 4; movq_m2r( qwYMask, mm1 ); movd_m2r( BitShift, mm7 ); pxor_r2r( mm0, mm0 ); while( width-- ) { movq_m2r( *cur, mm4 ); movq_m2r( *old, mm5 ); pand_r2r( mm1, mm4 ); pand_r2r( mm1, mm5 ); psubw_r2r( mm5, mm4 ); /* mm4 = Y1 - Y2 */ pmaddwd_r2r( mm4, mm4 ); /* mm4 = (Y1 - Y2)^2 */ psrld_r2r( mm7, mm4 ); /* divide mm4 by 2^BitShift */ paddd_r2r( mm4, mm0 ); /* keep total in mm0 */ cur += 8; old += 8; } movd_r2m( mm0, temp1 ); psrlq_i2r( 32, mm0 ); movd_r2m( mm0, temp2 ); temp1 += temp2; emms(); return temp1;}#endif#define ABS(a) (((a) < 0)?-(a):(a))#if defined(ARCH_X86) || defined(ARCH_X86_64)static void diff_packed422_block8x8_mmx( pulldown_metrics_t *m, uint8_t *old, uint8_t *new, int os, int ns ){ const mmx_t ymask = { 0x00ff00ff00ff00ffULL }; short out[ 24 ]; /* Output buffer for the partial metrics from the mmx code. */ uint8_t *outdata = (uint8_t *) out; uint8_t *oldp, *newp; int i; pxor_r2r( mm4, mm4 ); // 4 even difference sums. pxor_r2r( mm5, mm5 ); // 4 odd difference sums. pxor_r2r( mm7, mm7 ); // zeros oldp = old; newp = new; for( i = 4; i; --i ) { // Even difference. movq_m2r( oldp[0], mm0 ); movq_m2r( oldp[8], mm2 ); pand_m2r( ymask, mm0 ); pand_m2r( ymask, mm2 ); oldp += os; movq_m2r( newp[0], mm1 ); movq_m2r( newp[8], mm3 ); pand_m2r( ymask, mm1 ); pand_m2r( ymask, mm3 ); newp += ns; movq_r2r( mm0, mm6 ); psubusb_r2r( mm1, mm0 ); psubusb_r2r( mm6, mm1 ); movq_r2r( mm2, mm6 ); psubusb_r2r( mm3, mm2 ); psubusb_r2r( mm6, mm3 ); paddw_r2r( mm0, mm4 ); paddw_r2r( mm1, mm4 ); paddw_r2r( mm2, mm4 ); paddw_r2r( mm3, mm4 ); // Odd difference. movq_m2r( oldp[0], mm0 ); movq_m2r( oldp[8], mm2 ); pand_m2r( ymask, mm0 ); pand_m2r( ymask, mm2 ); oldp += os; movq_m2r( newp[0], mm1 ); movq_m2r( newp[8], mm3 ); pand_m2r( ymask, mm1 ); pand_m2r( ymask, mm3 ); newp += ns; movq_r2r( mm0, mm6 ); psubusb_r2r( mm1, mm0 ); psubusb_r2r( mm6, mm1 ); movq_r2r( mm2, mm6 ); psubusb_r2r( mm3, mm2 ); psubusb_r2r( mm6, mm3 ); paddw_r2r( mm0, mm5 ); paddw_r2r( mm1, mm5 ); paddw_r2r( mm2, mm5 ); paddw_r2r( mm3, mm5 ); } movq_r2m( mm4, outdata[0] ); movq_r2m( mm5, outdata[8] ); m->e = out[0] + out[1] + out[2] + out[3]; m->o = out[4] + out[5] + out[6] + out[7]; m->d = m->e + m->o; pxor_r2r( mm4, mm4 ); // Past spacial noise.
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?