speedy.c
来自「linux下的MPEG1」· C语言 代码 · 共 2,101 行 · 第 1/5 页
C
2,101 行
| (multiply_alpha( cur_a, input[ 2 ] ) << 16) | (multiply_alpha( cur_a, input[ 1 ] ) << 8) | cur_a; output += 4; input += 4; }}#if defined(ARCH_X86) || defined(ARCH_X86_64)static void premultiply_packed4444_scanline_mmxext( uint8_t *output, uint8_t *input, int width ){ const mmx_t round = { 0x0080008000800080ULL }; const mmx_t alpha = { 0x00000000000000ffULL }; const mmx_t noalp = { 0xffffffffffff0000ULL }; pxor_r2r( mm7, mm7 ); while( width-- ) { movd_m2r( *input, mm0 ); punpcklbw_r2r( mm7, mm0 ); movq_r2r( mm0, mm2 ); pshufw_r2r( mm2, mm2, 0 ); movq_r2r( mm2, mm4 ); pand_m2r( alpha, mm4 ); pmullw_r2r( mm2, mm0 ); paddw_m2r( round, mm0 ); movq_r2r( mm0, mm3 ); psrlw_i2r( 8, mm3 ); paddw_r2r( mm3, mm0 ); psrlw_i2r( 8, mm0 ); pand_m2r( noalp, mm0 ); paddw_r2r( mm4, mm0 ); packuswb_r2r( mm0, mm0 ); movd_r2m( mm0, *output ); output += 4; input += 4; } sfence(); emms();}#endifstatic void blend_packed422_scanline_c( uint8_t *output, uint8_t *src1, uint8_t *src2, int width, int pos ){ if( pos == 0 ) { blit_packed422_scanline( output, src1, width ); } else if( pos == 256 ) { blit_packed422_scanline( output, src2, width ); } else if( pos == 128 ) { interpolate_packed422_scanline( output, src1, src2, width ); } else { width *= 2; while( width-- ) { *output++ = ( (*src1++ * ( 256 - pos )) + (*src2++ * pos) + 0x80 ) >> 8; } }}#if defined(ARCH_X86) || defined(ARCH_X86_64)static void blend_packed422_scanline_mmxext( uint8_t *output, uint8_t *src1, uint8_t *src2, int width, int pos ){ if( pos <= 0 ) { blit_packed422_scanline( output, src1, width ); } else if( pos >= 256 ) { blit_packed422_scanline( output, src2, width ); } else if( pos == 128 ) { interpolate_packed422_scanline( output, src1, src2, width ); } else { const mmx_t all256 = { 0x0100010001000100ULL }; const mmx_t round = { 0x0080008000800080ULL }; movd_m2r( pos, mm0 ); pshufw_r2r( mm0, mm0, 0 ); movq_m2r( all256, mm1 ); psubw_r2r( mm0, mm1 ); pxor_r2r( mm7, mm7 ); for( width /= 2; width; width-- ) { movd_m2r( *src1, mm3 ); movd_m2r( *src2, mm4 ); punpcklbw_r2r( mm7, mm3 ); punpcklbw_r2r( mm7, mm4 ); pmullw_r2r( mm1, mm3 ); pmullw_r2r( mm0, mm4 ); paddw_r2r( mm4, mm3 ); paddw_m2r( round, mm3 ); psrlw_i2r( 8, mm3 ); packuswb_r2r( mm3, mm3 ); movd_r2m( mm3, *output ); output += 4; src1 += 4; src2 += 4; } sfence(); emms(); }}#endif#if defined(ARCH_X86) || defined(ARCH_X86_64)static void quarter_blit_vertical_packed422_scanline_mmxext( uint8_t *output, uint8_t *one, uint8_t *three, int width ){ int i; for( i = width/16; i; --i ) { movq_m2r( *one, mm0 ); movq_m2r( *three, mm1 ); movq_m2r( *(one + 8), mm2 ); movq_m2r( *(three + 8), mm3 ); movq_m2r( *(one + 16), mm4 ); movq_m2r( *(three + 16), mm5 ); movq_m2r( *(one + 24), mm6 ); movq_m2r( *(three + 24), mm7 ); pavgb_r2r( mm1, mm0 ); pavgb_r2r( mm1, mm0 ); pavgb_r2r( mm3, mm2 ); pavgb_r2r( mm3, mm2 ); pavgb_r2r( mm5, mm4 ); pavgb_r2r( mm5, mm4 ); pavgb_r2r( mm7, mm6 ); pavgb_r2r( mm7, mm6 ); movntq_r2m( mm0, *output ); movntq_r2m( mm2, *(output + 8) ); movntq_r2m( mm4, *(output + 16) ); movntq_r2m( mm6, *(output + 24) ); output += 32; one += 32; three += 32; } width = (width & 0xf); for( i = width/4; i; --i ) { movq_m2r( *one, mm0 ); movq_m2r( *three, mm1 ); pavgb_r2r( mm1, mm0 ); pavgb_r2r( mm1, mm0 ); movntq_r2m( mm0, *output ); output += 8; one += 8; three += 8; } width = width & 0x7; /* Handle last few pixels. */ for( i = width * 2; i; --i ) { *output++ = (*one + *three + *three + *three + 2) / 4; one++; three++; } sfence(); emms();}#endifstatic void quarter_blit_vertical_packed422_scanline_c( uint8_t *output, uint8_t *one, uint8_t *three, int width ){ width *= 2; while( width-- ) { *output++ = (*one + *three + *three + *three + 2) / 4; one++; three++; }}static void subpix_blit_vertical_packed422_scanline_c( uint8_t *output, uint8_t *top, uint8_t *bot, int subpixpos, int width ){ if( subpixpos == 32768 ) { interpolate_packed422_scanline( output, top, bot, width ); } else if( subpixpos == 16384 ) { quarter_blit_vertical_packed422_scanline( output, top, bot, width ); } else if( subpixpos == 49152 ) { quarter_blit_vertical_packed422_scanline( output, bot, top, width ); } else { int x; width *= 2; for( x = 0; x < width; x++ ) { output[ x ] = ( ( top[ x ] * subpixpos ) + ( bot[ x ] * ( 0xffff - subpixpos ) ) ) >> 16; } }}static void a8_subpix_blit_scanline_c( uint8_t *output, uint8_t *input, int lasta, int startpos, int width ){ int pos = 0xffff - (startpos & 0xffff); int prev = lasta; int x; for( x = 0; x < width; x++ ) { output[ x ] = ( ( prev * pos ) + ( input[ x ] * ( 0xffff - pos ) ) ) >> 16; prev = input[ x ]; }}/** * These are from lavtools in mjpegtools: * * colorspace.c: Routines to perform colorspace conversions. * * Copyright (C) 2001 Matthew J. Marjanovic <maddog@mir.com> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */#define FP_BITS 18/* precomputed tables */static int Y_R[256];static int Y_G[256];static int Y_B[256];static int Cb_R[256];static int Cb_G[256];static int Cb_B[256];static int Cr_R[256];static int Cr_G[256];static int Cr_B[256];static int conv_RY_inited = 0;static int RGB_Y[256];static int R_Cr[256];static int G_Cb[256];static int G_Cr[256];static int B_Cb[256];static int conv_YR_inited = 0;static int myround(double n){ if (n >= 0) return (int)(n + 0.5); else return (int)(n - 0.5);}static void init_RGB_to_YCbCr_tables(void){ int i; /* * Q_Z[i] = (coefficient * i * * (Q-excursion) / (Z-excursion) * fixed-point-factor) * * to one of each, add the following: * + (fixed-point-factor / 2) --- for rounding later * + (Q-offset * fixed-point-factor) --- to add the offset * */ for (i = 0; i < 256; i++) { Y_R[i] = myround(0.299 * (double)i * 219.0 / 255.0 * (double)(1<<FP_BITS)); Y_G[i] = myround(0.587 * (double)i * 219.0 / 255.0 * (double)(1<<FP_BITS)); Y_B[i] = myround((0.114 * (double)i * 219.0 / 255.0 * (double)(1<<FP_BITS)) + (double)(1<<(FP_BITS-1)) + (16.0 * (double)(1<<FP_BITS))); Cb_R[i] = myround(-0.168736 * (double)i * 224.0 / 255.0 * (double)(1<<FP_BITS)); Cb_G[i] = myround(-0.331264 * (double)i * 224.0 / 255.0 * (double)(1<<FP_BITS)); Cb_B[i] = myround((0.500 * (double)i * 224.0 / 255.0 * (double)(1<<FP_BITS)) + (double)(1<<(FP_BITS-1)) + (128.0 * (double)(1<<FP_BITS))); Cr_R[i] = myround(0.500 * (double)i * 224.0 / 255.0 * (double)(1<<FP_BITS)); Cr_G[i] = myround(-0.418688 * (double)i * 224.0 / 255.0 * (double)(1<<FP_BITS)); Cr_B[i] = myround((-0.081312 * (double)i * 224.0 / 255.0 * (double)(1<<FP_BITS)) + (double)(1<<(FP_BITS-1)) + (128.0 * (double)(1<<FP_BITS))); } conv_RY_inited = 1;}static void init_YCbCr_to_RGB_tables(void){ int i; /* * Q_Z[i] = (coefficient * i * * (Q-excursion) / (Z-excursion) * fixed-point-factor) * * to one of each, add the following: * + (fixed-point-factor / 2) --- for rounding later * + (Q-offset * fixed-point-factor) --- to add the offset * */ /* clip Y values under 16 */ for (i = 0; i < 16; i++) { RGB_Y[i] = myround((1.0 * (double)(16) * 255.0 / 219.0 * (double)(1<<FP_BITS)) + (double)(1<<(FP_BITS-1))); } for (i = 16; i < 236; i++) { RGB_Y[i] = myround((1.0 * (double)(i - 16) * 255.0 / 219.0 * (double)(1<<FP_BITS)) + (double)(1<<(FP_BITS-1))); } /* clip Y values above 235 */ for (i = 236; i < 256; i++) { RGB_Y[i] = myround((1.0 * (double)(235) * 255.0 / 219.0 * (double)(1<<FP_BITS)) + (double)(1<<(FP_BITS-1))); } /* clip Cb/Cr values below 16 */ for (i = 0; i < 16; i++) { R_Cr[i] = myround(1.402 * (double)(-112) * 255.0 / 224.0 * (double)(1<<FP_BITS)); G_Cr[i] = myround(-0.714136 * (double)(-112) * 255.0 / 224.0 * (double)(1<<FP_BITS)); G_Cb[i] = myround(-0.344136 * (double)(-112) * 255.0 / 224.0 * (double)(1<<FP_BITS)); B_Cb[i] = myround(1.772 * (double)(-112) * 255.0 / 224.0 * (double)(1<<FP_BITS)); } for (i = 16; i < 241; i++) { R_Cr[i] = myround(1.402 * (double)(i - 128) * 255.0 / 224.0 * (double)(1<<FP_BITS)); G_Cr[i] = myround(-0.714136 * (double)(i - 128) * 255.0 / 224.0 * (double)(1<<FP_BITS)); G_Cb[i] = myround(-0.344136 * (double)(i - 128) * 255.0 / 224.0 * (double)(1<<FP_BITS)); B_Cb[i] = myround(1.772 * (double)(i - 128) * 255.0 / 224.0 * (double)(1<<FP_BITS)); } /* clip Cb/Cr values above 240 */ for (i = 241; i < 256; i++) { R_Cr[i] = myround(1.402 * (double)(112) * 255.0 / 224.0 * (double)(1<<FP_BITS)); G_Cr[i] = myround(-0.714136 * (double)(112) * 255.0 / 224.0 * (double)(1<<FP_BITS)); G_Cb[i] = myround(-0.344136 * (double)(i - 128) * 255.0 / 224.0 * (double)(1<<FP_BITS)); B_Cb[i] = myround(1.772 * (double)(112) * 255.0 / 224.0 * (double)(1<<FP_BITS)); } conv_YR_inited = 1;}static void rgb24_to_packed444_rec601_scanline_c( uint8_t *output, uint8_t *input, int width ){ if( !conv_RY_inited ) init_RGB_to_YCbCr_tables(); while( width-- ) { int r = input[ 0 ]; int g = input[ 1 ]; int b = input[ 2 ]; output[ 0 ] = (Y_R[ r ] + Y_G[ g ] + Y_B[ b ]) >> FP_BITS; output[ 1 ] = (Cb_R[ r ] + Cb_G[ g ] + Cb_B[ b ]) >> FP_BITS; output[ 2 ] = (Cr_R[ r ] + Cr_G[ g ] + Cr_B[ b ]) >> FP_BITS; output += 3; input += 3; }}static void rgba32_to_packed4444_rec601_scanline_c( uint8_t *output, uint8_t *input, int width ){ if( !conv_RY_inited ) init_RGB_to_YCbCr_tables(); while( width-- ) { int r = input[ 0 ]; int g = input[ 1 ]; int b = input[ 2 ]; int a = input[ 3 ]; output[ 0 ] = a; output[ 1 ] = (Y_R[ r ] + Y_G[ g ] + Y_B[ b ]) >> FP_BITS; output[ 2 ] = (Cb_R[ r ] + Cb_G[ g ] + Cb_B[ b ]) >> FP_BITS; output[ 3 ] = (Cr_R[ r ] + Cr_G[ g ] + Cr_B[ b ]) >> FP_BITS; output += 4; input += 4; }}static void packed444_to_rgb24_rec601_scanline_c( uint8_t *output, uint8_t *input, int width ){ if( !conv_YR_inited ) init_YCbCr_to_RGB_tables(); while( width-- ) { int luma = input[ 0 ]; int cb = input[ 1 ]; int cr = input[ 2 ]; output[ 0 ] = clip255( (RGB_Y[ luma ] + R_Cr[ cr ]) >> FP_BITS ); output[ 1 ] = clip255( (RGB_Y[ luma ] + G_Cb[ cb ] + G_Cr[cr]) >> FP_BITS ); output[ 2 ] = clip255( (RGB_Y[ luma ] + B_Cb[ cb ]) >> FP_BITS ); output += 3; input += 3; }}/** * 601 numbers: * * Y' = 0.299*R' + 0.587*G' + 0.114*B' (in 0.0 to 1.0) * Cb = -0.169*R' - 0.331*G' + 0.500*B' (in -0.5 to +0.5) * Cr = 0.500*R' - 0.419*G' - 0.081*B' (in -0.5 to +0.5) * * Inverse: * Y Cb Cr * R 1.0000 -0.0009 1.4017 * G 1.0000 -0.3437 -0.7142 * B 1.0000 1.7722 0.0010 * * S170M numbers: * Y' = 0.299*R' + 0.587*G' + 0.114*B' (in 0.0 to 1.0) * B-Y' = -0.299*R' - 0.587*G'
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?