postprocessing_mmxext.c
来自「VLC媒体播放程序」· C语言 代码 · 共 918 行 · 第 1/3 页
C
918 行
/***************************************************************************** * postprocessing_mmxext.c: Post Processing plugin MMXEXT ***************************************************************************** * Copyright (C) 2001 VideoLAN * $Id: postprocessing_mmxext.c,v 1.5 2002/12/18 14:17:10 sam Exp $ * * Authors: Laurent Aimar <fenrir@via.ecp.fr> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. *****************************************************************************/#include <vlc/vlc.h> /* only use uint8_t, uint32_t .... */#include "postprocessing.h"#include "postprocessing_common.h"/***************************************************************************** * * Internals functions common to pp_Deblock_V and pp_Deblock_H * *****************************************************************************//***************************************************************************** * MMX stuff *****************************************************************************//* XXX PP_THR1 need to be defined as ULL *//* Use same things as in idct but how it work ? */#define UNUSED_LONGLONG( foo ) \ static const unsigned long long foo __asm__ (#foo) __attribute__((unused))/* to calculate isDC_mode for mmx */UNUSED_LONGLONG( mmx_thr1 ) = ( PP_THR1 << 56 )| ( PP_THR1 << 48 )| ( PP_THR1 << 40 )| ( PP_THR1 << 32 )| ( PP_THR1 << 24 )| ( PP_THR1 << 16 )| ( PP_THR1 << 8 )| ( PP_THR1 );UNUSED_LONGLONG( mmx_127_thr1 ) = ( ( 127ULL - PP_THR1 ) << 56 )| ( ( 127ULL - PP_THR1 ) << 48 )| ( ( 127ULL - PP_THR1 ) << 40 )| ( ( 127ULL - PP_THR1 ) << 32 )| ( ( 127ULL - PP_THR1 ) << 24 )| ( ( 127ULL - PP_THR1 ) << 16 )| ( ( 127ULL - PP_THR1 ) << 8 )| ( ( 127ULL - PP_THR1 ) );UNUSED_LONGLONG( mmx_127_2xthr1_1 ) = ( ( 127ULL - PP_2xTHR1 -1) << 56 )| ( ( 127ULL - PP_2xTHR1 -1 ) << 48 )| ( ( 127ULL - PP_2xTHR1 -1 ) << 40 )| ( ( 127ULL - PP_2xTHR1 -1 ) << 32 )| ( ( 127ULL - PP_2xTHR1 -1 ) << 24 )| ( ( 127ULL - PP_2xTHR1 -1 ) << 16 )| ( ( 127ULL - PP_2xTHR1 -1 ) << 8 )| ( ( 127ULL - PP_2xTHR1 -1 ) );UNUSED_LONGLONG( mmx_m2_5_m5_2 ) = 0xfffe0005fffb0002ULL;/* find min bytes from r ans set it in r, t is destroyed */#define MMXEXT_GET_PMIN( r, t ) \ "movq " #r ", " #t " \n" \ "psrlq $8, " #t " \n" \ "pminub " #t ", " #r " \n" \ "pshufw $0xf5, " #r ", " #t " #instead of shift with tmp reg \n" \ "pminub " #t ", " #r " \n" \ "pshufw $0xfe, " #r ", " #t " \n" \ "pminub " #t ", " #r " \n" /* find mzx bytes from r ans set it in r, t is destroyed */#define MMXEXT_GET_PMAX( r, t ) \ "movq " #r ", " #t " \n" \ "psrlq $8, " #t " \n" \ "pmaxub " #t ", " #r " \n" \ "pshufw $0xf5, " #r ", " #t " \n" \ "pmaxub " #t ", " #r " \n" \ "pshufw $0xfe, " #r ", " #t " \n" \ "pmaxub " #t ", " #r " \n"#define MMXEXT_GET_LMINMAX( s, m, M, t ) \ "movq " #s ", " #t " \n" \ "pminub " #t ", " #m " \n" \ "pmaxub " #t ", " #M " \n"/* Some tips for MMX * |a-b| : d1 = a - b with unsigned saturate d2 = b - a with ... |a-b| = d1 | d2*//**************************************************************************** * pp_deblock_isDC_mode : Check if we will use DC mode or Default mode **************************************************************************** * Use constant PP_THR1 and PP_THR2 ( PP_2xTHR1 ) * * Called for for each pixel on a boundary block when doing deblocking * so need to be fast ... * ****************************************************************************/static inline int pp_deblock_isDC_mode( uint8_t *p_v ){ unsigned int i_eq_cnt; /* algo : x = v[i] - v[i+1] without signed saturation ( XXX see if there is'nt problem, but can't be with signed sat because pixel will be saturate :( so x within [-128, 127] and we have to test if it fit in [-M, M] we add 127-M with wrap around -> good value fit in [ 127-2*M, 127] and if x >= 127 - 2 * M ie x > 127 -2*M - 1 value is good */#if 0 __asm__ __volatile__ ( " #* Do (v0-v1) to (v7-v8) \n" "movq (%1), %%mm1 # load v0->v7 \n" "movq 1(%1), %%mm2 # load v1->v8 \n" "psubb %%mm2, %%mm1 # v[i]-v[i+1] \n" "paddb mmx_127_thr1, %%mm1 # + 127-THR1 with wrap \n" "pcmpgtb mmx_127_2xthr1_1, %%mm1 # > 127 -2*thr1 - 1 \n" "pxor %%mm0, %%mm0 # mm0 = 0 \n" "psadbw %%mm1, %%mm0 \n" "movd %%mm0, %0 # \n" "negl %0 \n" "andl $255, %0" : "=r"(i_eq_cnt) : "r" (p_v) );#endif __asm__ __volatile__ ( " #* Do (v0-v1) to (v7-v8) \n" "movq (%1), %%mm1 # load v0->v7 \n" "pxor %%mm0, %%mm0 # mm0 = 0 \n" "movq 1(%1), %%mm2 # load v1->v8 \n" "psubb %%mm2, %%mm1 # v[i]-v[i+1] \n" "paddb mmx_127_thr1, %%mm1 # + 127-THR1 with wrap \n" "pcmpgtb mmx_127_2xthr1_1, %%mm1 # > 127 -2*thr1 - 1 \n" "psadbw %%mm1, %%mm0 \n" "movd %%mm0, %0 # \n" "negl %0" : "=r"(i_eq_cnt) : "r" (p_v) ); /* last test, hey, 9 don't fit in MMX */ if(( ( p_v[8] - p_v[9] + PP_THR1 )&0xffff )<= PP_2xTHR1 ) { i_eq_cnt++; }#if 0 /* algo : if ( | v[i] -v[i+1] | <= PP_THR1 ) { i_eq_cnt++; } */ i_eq_cnt = 0; for( i =0; i < 9; i++ ) { if(( ( p_v[i] - p_v[i+1] + PP_THR1 )&0xffff )<= PP_2xTHR1 ) { i_eq_cnt++; } }#endif return( (i_eq_cnt >= PP_THR2 ) ? 1 : 0 );}static inline int pp_deblock_isMinMaxOk( uint8_t *p_v, int i_QP ){ int i_range; __asm__ __volatile__ ( "movq 1(%1), %%mm0 # 8 bytes \n" "movq %%mm0, %%mm1 \n" MMXEXT_GET_PMIN( %%mm0, %%mm7 ) MMXEXT_GET_PMAX( %%mm1, %%mm7 ) "psubd %%mm0, %%mm1 # max - min \n" "movd %%mm1, %0 \n" "andl $255, %0" : "=r"(i_range) : "r"(p_v) );#if 0 int i_max, i_min; int i; i_min = i_max = p_v[1]; for( i = 2; i < 9; i++ ) { if( i_max < p_v[i] ) i_max = p_v[i]; if( i_min > p_v[i] ) i_min = p_v[i]; } i_range = i_max - i_min;#endif return( i_range< 2*i_QP ? 1 : 0 );}static inline void pp_deblock_DefaultMode( uint8_t i_v[10], int i_stride, int i_QP ){ int d, i_delta; int a3x0, a3x0_, a3x1, a3x2; int b_neg; /* d = CLIP( 5(a3x0' - a3x0)//8, 0, (v4-v5)/2 ).d( abs(a3x0) < QP ) */ /* First calculate a3x0 */ __asm__ __volatile__ ( "pxor %%mm7, %%mm7 # mm7 = 0 \n" "movq mmx_m2_5_m5_2, %%mm6 # mm6 =(2,-5,5,-2) \n" "movd 3(%1), %%mm0 \n" "punpcklbw %%mm7,%%mm0 \n" "pmaddwd %%mm6, %%mm0 \n" "pshufw $0xfe, %%mm0, %%mm1 \n" "paddd %%mm1, %%mm0 \n" "movd %%mm0, %0" : "=r"(a3x0) :"r"(i_v) );#if 0 a3x0 = 2 * ( i_v[3] - i_v[6] ) + 5 *( i_v[5] - i_v[4] );#endif if( a3x0 < 0 ) { b_neg = 1; a3x0 = -a3x0; } else { b_neg = 0; } /* XXX Now a3x0 is abs( a3x0 ) */ if( ( a3x0 < 8 * i_QP )&&( a3x0 != 0 ) ) /* |a3x0| < 8*i_QP */ { /* calculate a3x1 et a3x2 */ __asm__ __volatile__ ( " # mm7 = 0 \n" " # mm6 = ( 2, -5, 5, -2 ) \n" "movd 1(%2), %%mm0 \n" "movd 5(%2), %%mm2 \n" "punpcklbw %%mm7,%%mm0 \n" "punpcklbw %%mm7,%%mm2 \n" "pmaddwd %%mm6, %%mm0 \n" "pmaddwd %%mm6, %%mm2 \n" "pshufw $0xfe, %%mm0, %%mm1 \n" "paddd %%mm1, %%mm0 # mm0 = a3x1 \n" "movd %%mm0, %0 \n" "pshufw $0xfe, %%mm2, %%mm1 \n" "paddd %%mm1, %%mm2 # mm2 = a3x2 \n" "movd %%mm2, %1 \n" : "=r"(a3x1), "=r"(a3x2) : "r"(i_v) );#if 0 a3x1 = 2 * ( i_v[1] - i_v[4] ) + 5 * ( i_v[3] - i_v[2] ); a3x2 = 2 * ( i_v[5] - i_v[8] ) + 5 * ( i_v[7] - i_v[6] );#endif if( a3x1 < 0) a3x1 = -a3x1; /* abs( a3x1 ) */ if( a3x2 < 0) a3x2 = -a3x2; /* abs( a3x2 ) */ a3x0_ = PP_MIN3( a3x0, a3x1, a3x2 ); d = 5 *( a3x0 - a3x0_ ) / 8; /* always > 0 */ i_delta = ( i_v[4] - i_v[5] ) / 2; /* clip into [0, i_delta] or [i_delta, 0] */ if( i_delta < 0 ) { if( !b_neg ) /* since true d has sgn(d) = - sgn( a3x0 ) */ { d = -d; if( d < i_delta ) d = i_delta; i_v[4] -= d; i_v[5] += d; } } else { if( b_neg ) { if( d > i_delta ) d = i_delta; i_v[4] -= d; i_v[5] += d; } } }}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?