postprocessing_mmx.c
来自「VLC媒体播放程序」· C语言 代码 · 共 890 行 · 第 1/3 页
C
890 行
{ int i_p0, i_p9; i_p0 = PP_ABS( p_v[1] - p_v[0] ) < i_QP ? p_v[0] : p_v[1]; i_p9 = PP_ABS( p_v[8] - p_v[9] ) < i_QP ? p_v[9] : p_v[8]; /* mm0 = 8 pix unmodified -We will process first 4 pixel mm0 = 8 pix unmodified mm1 = for the first part of the 4 first pix (v1) -> (p0) -> ... ( word ) (v2) (v1) (v3) (v2) (v4) (v3) = for the commoin part between first and last pix (v2) -> (v3) -> ... ( word ) (v3) (v4) (v4) (v5) (v5) (v6) = for the last part of the 4 last pix (v5) -> (v6) -> ... ( word ) (v6) (v7) (v7) (v8) (v8) (p9) mm2 = acu for first new pix mm3 = acu for last pix mm4 = unused mm5 = p0 mm6 = p9 << 48 mm7 = 0 */ __asm__ __volatile__ ( "pxor %%mm7, %%mm7 \n" "movq 1(%0), %%mm0 # get 8 pix \n" " # unpack into mm1 \n" "movq %%mm0, %%mm1 \n" "punpcklbw %%mm7, %%mm1 \n" " # get p_0 and i_p9 \n" "movd %1, %%mm5 \n" "movd %2, %%mm6 \n" "psllq $48, %%mm6 \n" " \n" "movq %%mm1, %%mm3 # p_v[5-8] = v[1-4] !! \n" "movq %%mm1, %%mm2 \n" "psllw $2, %%mm2 # p_v[1-4] = 4*v[1-4] \n" " \n" "psllq $16, %%mm1 \n" "por %%mm5, %%mm1 # mm1 =( p0, v1, v2 ,v3)\n" " \n" "paddw %%mm1, %%mm2 \n" "paddw %%mm1, %%mm2 \n" " \n" "psllq $16, %%mm1 \n" "por %%mm5, %%mm1 # mm1 =( p0, p0, v1, v2)\n" " \n" "paddw %%mm1, %%mm2 \n" "paddw %%mm1, %%mm2 \n" " \n" "psllq $16, %%mm1 \n" "por %%mm5, %%mm1 # mm1 =( p0, p0, p0, v1)\n" " \n" "paddw %%mm1, %%mm2 \n" " \n" "psllq $16, %%mm1 \n" "por %%mm5, %%mm1 # mm1 =( p0, p0, p0, p0)\n" " \n" "paddw %%mm1, %%mm2 \n" " # Now last part a little borring\n" " # last part for mm2, beginig for mm3\n" "movq %%mm0, %%mm1 \n" "psrlq $8, %%mm1 \n" "punpcklbw %%mm7, %%mm1 # mm1 =( v2, v3, v4, v5 )\n" "paddw %%mm1, %%mm2 \n" "paddw %%mm1, %%mm2 \n" "paddw %%mm1, %%mm3 \n" " \n" "movq %%mm0, %%mm1 \n" "psrlq $16, %%mm1 \n" "punpcklbw %%mm7, %%mm1 # mm1 =( v3, v4, v5, v6 )\n" "psllw $1, %%mm1 \n" "paddw %%mm1, %%mm2 \n" "paddw %%mm1, %%mm3 \n" " \n" "movq %%mm0, %%mm1 \n" "psrlq $24, %%mm1 \n" "punpcklbw %%mm7, %%mm1 # mm1 =( v4, v5, v6, v7) \n" "paddw %%mm1, %%mm2 \n" "paddw %%mm1, %%mm3 \n" "paddw %%mm1, %%mm3 \n" " \n" "movq %%mm0, %%mm1 \n" "psrlq $32, %%mm1 \n" "punpcklbw %%mm7, %%mm1 # mm1 =( v5, v6, v7, v8) \n" "paddw %%mm1, %%mm2 \n" "psllw $2, %%mm1 \n" "paddw %%mm1, %%mm3 \n" " # Now last part for last 4 pix \n" " # \n" "movq %%mm0, %%mm1 \n" "punpckhbw %%mm7, %%mm1 # mm1 = ( v5, v6, v7, v8) \n" " \n" "psrlq $16, %%mm1 \n" "por %%mm6, %%mm1 # mm1 =( v6, v7, v8, p9 )\n" " \n" "paddw %%mm1, %%mm3 \n" "paddw %%mm1, %%mm3 \n" " \n" "psrlq $16, %%mm1 \n" "por %%mm6, %%mm1 # mm1 =( v7, v8, p9, p9)\n" " \n" "paddw %%mm1, %%mm3 \n" "paddw %%mm1, %%mm3 \n" " \n" "psrlq $16, %%mm1 \n" "por %%mm6, %%mm1 # mm1 =( v8, p9, p9, p9 )\n" " \n" "paddw %%mm1, %%mm3 \n" " \n" "psrlq $16, %%mm1 \n" "por %%mm6, %%mm1 # mm1 =( p9, p9, p9, p9 )\n" " \n" "paddw %%mm1, %%mm3 \n" "psrlw $4, %%mm2 \n" "psrlw $4, %%mm3 \n" "packuswb %%mm3, %%mm2 \n" "movq %%mm2, 1(%0) \n" : : "r"(p_v), "r"(i_p0), "r"(i_p9) : "memory" );#if 0 for( i = 1; i < 9; i++ ) { v[i] = p_v[i]; /* save 8 pix that will be modified */ } p_v[1] = ( 6 * i_p0 + 4 * v[1] + 2 *( v[2] + v[3]) + v[4] + v[5]) >> 4; p_v[2] = ( 4 * i_p0 + 2 * v[1] + 4 * v[2] + 2 *( v[3] + v[4]) + v[5] + v[6]) >> 4; p_v[3] = ( 2 * i_p0 + 2 * (v[1] + v[2]) + 4 * v[3] + 2 *( v[4] + v[5]) + v[6] + v[7]) >> 4; p_v[4] = ( i_p0 + v[1] + 2 * (v[2] + v[3]) + 4 * v[4] + 2 *( v[5] + v[6]) + v[7] + v[8]) >> 4; p_v[5] = ( v[1] + v[2] + 2 * (v[3] + v[4]) + 4 * v[5] + 2 *( v[6] + v[7]) + v[8] + i_p9) >> 4; p_v[6] = ( v[2] + v[3] + 2 * (v[4] + v[5]) + 4 * v[6] + 2 *( v[7] + v[8]) + 2 * i_p9) >> 4; p_v[7] = ( v[3] + v[4] + 2 * (v[5] + v[6]) + 4 * v[7] + 2 * v[8] + 4 * i_p9) >> 4; p_v[8] = ( v[4] + v[5] + 2 * (v[6] + v[7]) + 4 * v[8] + 6 * i_p9) >> 4;#endif}/*****************************************************************************//*---------------------------------------------------------------------------*//* *//* ---------- filter Vertical lines so follow horizontal edges -------- *//* *//*---------------------------------------------------------------------------*//*****************************************************************************/void E_( pp_deblock_V )( uint8_t *p_plane, int i_width, int i_height, int i_stride, QT_STORE_T *p_QP_store, int i_QP_stride, int b_chroma ){ int x, y, i; uint8_t *p_v; int i_QP_scale; /* use to do ( ? >> i_QP_scale ) */ int i_QP; uint8_t i_v[10]; i_QP_scale = b_chroma ? 5 : 4 ; for( y = 8; y < i_height - 4; y += 8 ) { p_v = p_plane + ( y - 5 )* i_stride; for( x = 0; x < i_width; x++ ) { /* First get 10 vert pix to use them without i_stride */ for( i = 0; i < 10; i++ ) { i_v[i] = p_v[i*i_stride + x]; } i_QP = p_QP_store[(y>>i_QP_scale)*i_QP_stride+ (x>>i_QP_scale)]; /* XXX QP is for v5 */ if( pp_deblock_isDC_mode( i_v ) ) { if( pp_deblock_isMinMaxOk( i_v, i_QP ) ) { pp_deblock_DCMode( i_v, i_QP ); } } else { pp_deblock_DefaultMode( i_v, i_stride, i_QP ); } /* Copy back, XXX only 1-8 were modified */ for( i = 1; i < 9; i++ ) { p_v[i*i_stride + x] = i_v[i]; } } } return;}/*****************************************************************************//*---------------------------------------------------------------------------*//* *//* --------- filter Horizontal lines so follow vertical edges -------- *//* *//*---------------------------------------------------------------------------*//*****************************************************************************/void E_( pp_deblock_H )( uint8_t *p_plane, int i_width, int i_height, int i_stride, QT_STORE_T *p_QP_store, int i_QP_stride, int b_chroma ){ int x, y; uint8_t *p_v; int i_QP_scale; int i_QP; i_QP_scale = b_chroma ? 5 : 4 ; for( y = 0; y < i_height; y++ ) { p_v = p_plane + y * i_stride - 5; for( x = 8; x < i_width - 4; x += 8 ) { /* p_v point 5 pix before a block boundary */ /* XXX QP is for v5 */ i_QP = p_QP_store[(y>>i_QP_scale)*i_QP_stride+ (x>>i_QP_scale)]; if( pp_deblock_isDC_mode( p_v + x ) ) { if( pp_deblock_isMinMaxOk( p_v+ x, i_QP ) ) { pp_deblock_DCMode( p_v+x, i_QP ); } } else { pp_deblock_DefaultMode( p_v+x, i_stride, i_QP ); } } } return;}/***************************************************************************** * * Internals functions common to pp_Dering_Y pp_Dering_C * *****************************************************************************/static inline void pp_dering_MinMax( uint8_t *p_block, int i_stride, int *pi_min, int *pi_max ){ int x, y; int i_min, i_max;#if 0 /* First we will extract min/max for each pix on vertical line and next extract global min/max */ __asm__ __volatile__( "leal (%2,%3), %%eax \n" "movq (%2), %%mm0 #load line \n" "movq %%mm0, %%mm1 \n" MMXEXT_GET_LMINMAX( (%%eax), %%mm0, %%mm1, %%mm7 )
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?