📄 predict-c.c
字号:
/***************************************************************************** * predict.c: h264 encoder ***************************************************************************** * Copyright (C) 2003-2008 x264 project * * Authors: Laurent Aimar <fenrir@via.ecp.fr> * Loren Merritt <lorenm@u.washington.edu> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. *****************************************************************************/#include "common/common.h"#include "predict.h"#include "pixel.h"extern void predict_16x16_v_mmx( uint8_t *src );extern void predict_16x16_h_mmxext( uint8_t *src );extern void predict_16x16_h_ssse3( uint8_t *src );extern void predict_16x16_dc_core_mmxext( uint8_t *src, int i_dc_left );extern void predict_16x16_dc_left_core_mmxext( uint8_t *src, int i_dc_left );extern void predict_16x16_dc_top_mmxext( uint8_t *src );extern void predict_16x16_p_core_mmxext( uint8_t *src, int i00, int b, int c );extern void predict_8x8c_p_core_mmxext( uint8_t *src, int i00, int b, int c );extern void predict_8x8c_p_core_sse2( uint8_t *src, int i00, int b, int c );extern void predict_8x8c_dc_core_mmxext( uint8_t *src, int s2, int s3 );extern void predict_8x8c_dc_top_mmxext( uint8_t *src );extern void predict_8x8c_v_mmx( uint8_t *src );extern void predict_8x8c_h_mmxext( uint8_t *src );extern void predict_8x8c_h_ssse3( uint8_t *src );extern void predict_8x8_v_mmxext( uint8_t *src, uint8_t edge[33] );extern void predict_8x8_h_mmxext( uint8_t *src, uint8_t edge[33] );extern void predict_8x8_hd_mmxext( uint8_t *src, uint8_t edge[33] );extern void predict_8x8_hu_mmxext( uint8_t *src, uint8_t edge[33] );extern void predict_8x8_dc_mmxext( uint8_t *src, uint8_t edge[33] );extern void predict_8x8_dc_top_mmxext( uint8_t *src, uint8_t edge[33] );extern void predict_8x8_dc_left_mmxext( uint8_t *src, uint8_t edge[33] );extern void predict_8x8_ddl_mmxext( uint8_t *src, uint8_t edge[33] );extern void predict_8x8_ddr_mmxext( uint8_t *src, uint8_t edge[33] );extern void predict_8x8_ddl_sse2( uint8_t *src, uint8_t edge[33] );extern void predict_8x8_ddr_sse2( uint8_t *src, uint8_t edge[33] );extern void predict_8x8_vl_sse2( uint8_t *src, uint8_t edge[33] );extern void predict_8x8_vr_sse2( uint8_t *src, uint8_t edge[33] );extern void predict_8x8_hu_sse2( uint8_t *src, uint8_t edge[33] );extern void predict_8x8_hd_sse2( uint8_t *src, uint8_t edge[33] );extern void predict_8x8_vr_core_mmxext( uint8_t *src, uint8_t edge[33] );extern void predict_8x8_hd_ssse3( uint8_t *src, uint8_t edge[33] );extern void predict_8x8_hu_ssse3( uint8_t *src, uint8_t edge[33] );extern void predict_8x8_filter_mmxext ( uint8_t *src, uint8_t edge[33], int i_neighbor, int i_filters );extern void predict_8x8_filter_ssse3 ( uint8_t *src, uint8_t edge[33], int i_neighbor, int i_filters );extern void predict_4x4_ddl_mmxext( uint8_t *src );extern void predict_4x4_ddr_mmxext( uint8_t *src );extern void predict_4x4_vl_mmxext( uint8_t *src );extern void predict_4x4_vr_mmxext( uint8_t *src );extern void predict_4x4_vr_ssse3( uint8_t *src );extern void predict_4x4_hd_mmxext( uint8_t *src );extern void predict_4x4_hd_ssse3( uint8_t *src );extern void predict_4x4_dc_mmxext( uint8_t *src );extern void predict_4x4_ddr_ssse3( uint8_t *src );extern void predict_4x4_hu_mmxext( uint8_t *src );extern void predict_16x16_dc_top_sse2( uint8_t *src );extern void predict_16x16_dc_core_sse2( uint8_t *src, int i_dc_left );extern void predict_16x16_dc_left_core_sse2( uint8_t *src, int i_dc_left );extern void predict_16x16_v_sse2( uint8_t *src );extern void predict_16x16_p_core_sse2( uint8_t *src, int i00, int b, int c );DECLARE_ALIGNED_8( static const int8_t pb_12345678[8] ) = {1,2,3,4,5,6,7,8};DECLARE_ALIGNED_8( static const int8_t pb_m87654321[8] ) = {-8,-7,-6,-5,-4,-3,-2,-1};DECLARE_ALIGNED_8( static const int8_t pb_m32101234[8] ) = {-3,-2,-1,0,1,2,3,4};#define PREDICT_P_SUM(j,i)\ H += i * ( src[j+i - FDEC_STRIDE ] - src[j-i - FDEC_STRIDE ] );\ V += i * ( src[(j+i)*FDEC_STRIDE -1] - src[(j-i)*FDEC_STRIDE -1] );\#define PREDICT_16x16_P(name)\static void predict_16x16_p_##name( uint8_t *src )\{\ int a, b, c;\ int H = 0;\ int V = 0;\ int i00;\ PREDICT_P_SUM(7,1) \ PREDICT_P_SUM(7,2) \ PREDICT_P_SUM(7,3) \ PREDICT_P_SUM(7,4) \ PREDICT_P_SUM(7,5) \ PREDICT_P_SUM(7,6) \ PREDICT_P_SUM(7,7) \ PREDICT_P_SUM(7,8) \ a = 16 * ( src[15*FDEC_STRIDE -1] + src[15 - FDEC_STRIDE] );\ b = ( 5 * H + 32 ) >> 6;\ c = ( 5 * V + 32 ) >> 6;\ i00 = a - b * 7 - c * 7 + 16;\ predict_16x16_p_core_##name( src, i00, b, c );\}#ifndef ARCH_X86_64PREDICT_16x16_P( mmxext )#endifPREDICT_16x16_P( sse2 )#ifdef __GNUC__static void predict_16x16_p_ssse3( uint8_t *src ){ int a, b, c, i00; int H, V; asm ( "movq %1, %%mm1 \n" "movq 8+%1, %%mm0 \n" "palignr $7, -8+%1, %%mm1 \n" "pmaddubsw %2, %%mm0 \n" "pmaddubsw %3, %%mm1 \n" "paddw %%mm1, %%mm0 \n" "pshufw $14, %%mm0, %%mm1 \n" "paddw %%mm1, %%mm0 \n" "pshufw $1, %%mm0, %%mm1 \n" "paddw %%mm1, %%mm0 \n" "movd %%mm0, %0 \n" "movsx %w0, %0 \n" :"=r"(H) :"m"(src[-FDEC_STRIDE]), "m"(*pb_12345678), "m"(*pb_m87654321) ); V = 8 * ( src[15*FDEC_STRIDE-1] - src[-1*FDEC_STRIDE-1] ) + 7 * ( src[14*FDEC_STRIDE-1] - src[ 0*FDEC_STRIDE-1] ) + 6 * ( src[13*FDEC_STRIDE-1] - src[ 1*FDEC_STRIDE-1] ) + 5 * ( src[12*FDEC_STRIDE-1] - src[ 2*FDEC_STRIDE-1] ) + 4 * ( src[11*FDEC_STRIDE-1] - src[ 3*FDEC_STRIDE-1] ) + 3 * ( src[10*FDEC_STRIDE-1] - src[ 4*FDEC_STRIDE-1] ) + 2 * ( src[ 9*FDEC_STRIDE-1] - src[ 5*FDEC_STRIDE-1] ) + 1 * ( src[ 8*FDEC_STRIDE-1] - src[ 6*FDEC_STRIDE-1] ); a = 16 * ( src[15*FDEC_STRIDE -1] + src[15 - FDEC_STRIDE] ); b = ( 5 * H + 32 ) >> 6; c = ( 5 * V + 32 ) >> 6; i00 = a - b * 7 - c * 7 + 16; predict_16x16_p_core_sse2( src, i00, b, c );}#endif#define PREDICT_8x8_P(name)\static void predict_8x8c_p_##name( uint8_t *src )\{\ int a, b, c;\ int H = 0;\ int V = 0;\ int i00;\ PREDICT_P_SUM(3,1)\ PREDICT_P_SUM(3,2)\ PREDICT_P_SUM(3,3)\ PREDICT_P_SUM(3,4)\ a = 16 * ( src[7*FDEC_STRIDE -1] + src[7 - FDEC_STRIDE] );\ b = ( 17 * H + 16 ) >> 5;\ c = ( 17 * V + 16 ) >> 5;\ i00 = a -3*b -3*c + 16;\ predict_8x8c_p_core_##name( src, i00, b, c );\}#ifndef ARCH_X86_64PREDICT_8x8_P( mmxext )#endifPREDICT_8x8_P( sse2 )#ifdef __GNUC__static void predict_8x8c_p_ssse3( uint8_t *src ){ int a, b, c, i00; int H, V; asm ( "movq %1, %%mm0 \n" "pmaddubsw %2, %%mm0 \n" "pshufw $14, %%mm0, %%mm1 \n" "paddw %%mm1, %%mm0 \n" "pshufw $1, %%mm0, %%mm1 \n" "paddw %%mm1, %%mm0 \n" "movd %%mm0, %0 \n" "movsx %w0, %0 \n" :"=r"(H) :"m"(src[-FDEC_STRIDE]), "m"(*pb_m32101234) ); V = 1 * ( src[4*FDEC_STRIDE -1] - src[ 2*FDEC_STRIDE -1] ) + 2 * ( src[5*FDEC_STRIDE -1] - src[ 1*FDEC_STRIDE -1] ) + 3 * ( src[6*FDEC_STRIDE -1] - src[ 0*FDEC_STRIDE -1] ) + 4 * ( src[7*FDEC_STRIDE -1] - src[-1*FDEC_STRIDE -1] ); H += -4 * src[-1*FDEC_STRIDE -1]; a = 16 * ( src[7*FDEC_STRIDE -1] + src[7 - FDEC_STRIDE] ); b = ( 17 * H + 16 ) >> 5; c = ( 17 * V + 16 ) >> 5; i00 = a -3*b -3*c + 16; predict_8x8c_p_core_sse2( src, i00, b, c );}#endif#define PREDICT_16x16_DC(name)\static void predict_16x16_dc_##name( uint8_t *src )\{\ uint32_t dc=16;\ int i;\ for( i = 0; i < 16; i+=2 )\ {\ dc += src[-1 + i * FDEC_STRIDE];\ dc += src[-1 + (i+1) * FDEC_STRIDE];\ }\ predict_16x16_dc_core_##name( src, dc );\}PREDICT_16x16_DC( mmxext )PREDICT_16x16_DC( sse2 )#define PREDICT_16x16_DC_LEFT(name)\static void predict_16x16_dc_left_##name( uint8_t *src )\{\ uint32_t dc=8;\ int i;\ for( i = 0; i < 16; i+=2 )\ {\ dc += src[-1 + i * FDEC_STRIDE];\ dc += src[-1 + (i+1) * FDEC_STRIDE];\ }\ predict_16x16_dc_left_core_##name( src, dc>>4 );\}PREDICT_16x16_DC_LEFT( mmxext )PREDICT_16x16_DC_LEFT( sse2 )static void predict_8x8c_dc_mmxext( uint8_t *src ){ int s2 = 4 + src[-1 + 0*FDEC_STRIDE] + src[-1 + 1*FDEC_STRIDE]
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -