yuvscaler_bicubic.c

来自「Motion JPEG编解码器源代码」· C语言代码 · 共 855 行 · 第 1/3 页
855 行
/*    This program is free software; you can redistribute it and/or modify    it under the terms of the GNU General Public License as published by    the Free Software Foundation; either version 2 of the License, or    (at your option) any later version.    This program is distributed in the hope that it will be useful,    but WITHOUT ANY WARRANTY; without even the implied warranty of    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the    GNU General Public License for more details.    You should have received a copy of the GNU General Public License    along with this program; if not, write to the Free Software    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.*/// Version of the 25/05/2003// Copyright X. Biquard xbiquard@free.fr// Great speed Warning : function mjpeg_debug implies an implicit test => may slow down a lot the execution// of the program.// SIMD accelerated multiplications with csplineh not possible since value to be multiply do not stand// in int16_t, but in int32_t// Maybe possible in SSE since xmm registers of 128 bits available// IL FAUT NETTOYER LE HEADER : PAS BESOIN DE TOUTES CES VARIABLES GLOBALES A LA CON#ifdef HAVE_CONFIG_H#include "config.h"#endif#include <stdio.h>#include <stdlib.h>#include <unistd.h>#include <string.h>#include <math.h>#include <signal.h>#include "yuv4mpeg.h"#include "mjpeg_types.h"#include "yuvscaler.h"#include "attributes.h"#include "../utils/mmx.h"extern unsigned int output_active_width;extern unsigned int output_active_height;extern unsigned int output_width;extern unsigned int input_width;extern unsigned int input_useful_width;extern unsigned int input_useful_height;extern unsigned int specific;// Defines#define FLOAT2INTEGERPOWER 11#define FLOAT2INTOFFSET 1024#define DBLEFLOAT2INT 22#define DBLEFLOAT2INTOFFSET 2097152// MMX test#ifdef HAVE_ASM_MMXextern int32_t *mmx_res;extern int mmx;			// =1 for mmx activated#endif// MMX testextern int32_t *intermediate;// *************************************************************************************intcubic_scale (uint8_t * padded_input, uint8_t * output, 	     unsigned int *in_col, unsigned int *in_line,	     int16_t *cspline_w, uint16_t  width_neighbors, uint8_t zero_width_neighbors,	     int16_t *cspline_h, uint16_t height_neighbors, uint8_t zero_height_neighbors,	     unsigned int half){  // Warning: because cubic-spline values may be <0 or >1.0, a range test on value is mandatory  unsigned int local_output_active_width = output_active_width >> half;  unsigned int local_output_active_height = output_active_height >> half;  unsigned int local_output_width = output_width >> half;  unsigned int local_input_useful_height = input_useful_height >> half;  unsigned int local_input_useful_width = input_useful_width >> half;  unsigned int local_padded_height = local_input_useful_height + height_neighbors -1;  unsigned int local_padded_width = local_input_useful_width + width_neighbors -1;  unsigned int out_line, out_col,w,h;  unsigned int *in_line_p,*in_col_p;  int16_t output_offset;  uint8_t *output_p,*line,*line_begin,*line_0;  int16_t *cspline_wp,*cspline_hp,*cspline_hp0;  int32_t value=0,value1=0,*intermediate_p,*inter_begin;//   mjpeg_debug ("Start of cubic_scale ");   output_p = output;   output_offset = local_output_width-local_output_active_width;      /* *INDENT-OFF* */      switch(specific) {    case 0: 	{	   // First scale along the Width, not the height	   width_neighbors-=zero_width_neighbors;	   height_neighbors-=zero_height_neighbors;	   intermediate_p=intermediate;	   line_0 = padded_input;	   for (out_line = 0; out_line < local_padded_height; out_line++)	     {		cspline_wp=cspline_w;		in_col_p=in_col;		for (out_col = 0; out_col < local_output_active_width; out_col++)		  {		     line = line_0 + *(in_col_p++);		     value1=*(line++)*(*(cspline_wp++));		     for (w=1;w<width_neighbors-1;w++) 		       value1+=*(line++)*(*(cspline_wp++));		     value1+=*(line)*(*(cspline_wp++));		     if (zero_width_neighbors)		       cspline_wp++;		     *(intermediate_p++)=value1;		  }		// a line of intermediate in now finished. Make line_0 points on the next line of padded_input		line_0+=local_padded_width;	     } 	   // Intermediate now contains an width-scaled frame	   // we now scale it along the height, not the width	   cspline_hp0=cspline_h;	   in_line_p=in_line;	   for (out_line = 0; out_line < local_output_active_height; out_line++)	     {		inter_begin=intermediate + *(in_line_p++) * local_output_active_width;		for (out_col = 0; out_col < local_output_active_width-1; out_col++)		  {		     cspline_hp=cspline_hp0;		     value1 = *(inter_begin)*(*(cspline_hp++));		     for (h=1;h<height_neighbors-1;h++)		       value1 += (*(inter_begin+h*local_output_active_width))*(*(cspline_hp++));		     value1 += (*((inter_begin++)+h*local_output_active_width))*(*(cspline_hp));		     		     if (value1 < 0) *(output_p++) = 0;		     else {			value =(value1 + DBLEFLOAT2INTOFFSET) >> DBLEFLOAT2INT;			if (value > 255) *(output_p++) = 255;			else *(output_p++) = (uint8_t) value;		     }		  }		// last out_col to be treated => cspline_hp0 incremented => we use the C "++" facility		value1 = *(inter_begin)*(*(cspline_hp0++));		for (h=1;h<height_neighbors;h++)		  value1 += (*(inter_begin+h*local_output_active_width))*(*(cspline_hp0++));		if (zero_height_neighbors)		  cspline_hp0++;				if (value1 < 0) *(output_p++) = 0;		else {		   value =(value1 + DBLEFLOAT2INTOFFSET) >> DBLEFLOAT2INT;		   if (value > 255) *(output_p++) = 255;		   else *(output_p++) = (uint8_t) value;		}		// a line on output is now finished. We jump to the beginning of the next line		output_p+=output_offset;	     }	}    break;      case 1:/*      #ifdef HAVE_ASM_MMX      if (mmx==1)	{	   // We only downscale on width, not height	   emms();	   pxor_r2r(mm7,mm7);	   // only zeros in mm7	   	   line_0=padded_input;	   for (out_line = 0; out_line < local_output_active_height; out_line++)	     {		cspline_wp=cspline_w;		in_col_p=in_col;		for (out_col = 0; out_col < local_output_active_width; out_col++)		  {		     line = line_0 + *(in_col_p++);		     switch(width_neighbors)		       {			  // NB : c'est l'op閞ation movq_r2m qui coute le plus en temps, seulement pour la premi鑢e !!!			case 4:			  movq_m2r(*cspline_wp,mm0);			  movq_m2r(*(line),mm6);     punpcklbw_r2r(mm7,mm6);			  pmaddwd_r2r(mm0,mm6);      movq_r2m(mm6,*(mmx_t *)mmx_res);			  value1=mmx_res[0]+mmx_res[1];			  break;			case 6:			  // cspline_w in mm0 and mm1			  movq_m2r(*cspline_wp,mm0);			  movq_m2r(*(cspline_wp+4),mm1);			  			  // 4 pixels in mm6 and next 4 in mm5			  movq_m2r(*(line),mm6);//			  __m64 mm6= _mm_unpacklo_pi16(mm6, mm7);			  punpcklbw_r2r(mm7,mm6);			  movq_m2r(*(line+4),mm5);    			  punpcklbw_r2r(mm7,mm5);			  // multiply and add => these take more than one cycle and may be done in parallel			  pmaddwd_r2r(mm0,mm6);			  movq_r2m(mm6,*(mmx_t *)mmx_res);//			  movntq_r2m(mm6,*(mmx_t *)mmx_res);			  value1=mmx_res[0]+mmx_res[1];			  pmaddwd_r2r(mm1,mm5);			  movq_r2m(mm5,*(mmx_t *)mmx_res);//			  movntq_r2m(mm5,*(mmx_t *)mmx_res);			  value1+=mmx_res[0];			  break;			case 8:			  movq_m2r(*cspline_wp,mm0);			  movq_m2r(*(cspline_wp+4),mm1);			  movq_m2r(*(line),mm6);     punpcklbw_r2r(mm7,mm6);			  pmaddwd_r2r(mm0,mm6);     movq_r2m(mm6,*(mmx_t *)mmx_res);			  value1=mmx_res[0]+mmx_res[1];			  movq_m2r(*(line+4),mm6);     punpcklbw_r2r(mm7,mm6);			  pmaddwd_r2r(mm1,mm6);     movq_r2m(mm6,*(mmx_t *)mmx_res);			  value1+=mmx_res[0]+mmx_res[1];			  break;			case 10:			  movq_m2r(*cspline_wp,mm0);			  movq_m2r(*(cspline_wp+4),mm1);			  movq_m2r(*(cspline_wp+8),mm2);			  movq_m2r(*(line),mm6);     punpcklbw_r2r(mm7,mm6);			  pmaddwd_r2r(mm0,mm6);     movq_r2m(mm6,*(mmx_t *)mmx_res);			  value1=mmx_res[0]+mmx_res[1];			  movq_m2r(*(line+4),mm6);     punpcklbw_r2r(mm7,mm6);			  pmaddwd_r2r(mm1,mm6);     movq_r2m(mm6,*(mmx_t *)mmx_res);			  value1+=mmx_res[0]+mmx_res[1];			  movq_m2r(*(line+8),mm6);     punpcklbw_r2r(mm7,mm6);			  pmaddwd_r2r(mm2,mm6);     movq_r2m(mm6,*(mmx_t *)mmx_res);			  value1+=mmx_res[0];			  break;			case 12:			  movq_m2r(*cspline_wp,mm0);			  movq_m2r(*(cspline_wp+4),mm1);			  movq_m2r(*(cspline_wp+8),mm2);			  movq_m2r(*(line),mm6);     punpcklbw_r2r(mm7,mm6);			  pmaddwd_r2r(mm0,mm6);     movq_r2m(mm6,*(mmx_t *)mmx_res);			  value1=mmx_res[0]+mmx_res[1];			  movq_m2r(*(line+4),mm6);     punpcklbw_r2r(mm7,mm6);			  pmaddwd_r2r(mm1,mm6);     movq_r2m(mm6,*(mmx_t *)mmx_res);			  value1+=mmx_res[0]+mmx_res[1];			  movq_m2r(*(line+8),mm6);     punpcklbw_r2r(mm7,mm6);			  pmaddwd_r2r(mm2,mm6);     movq_r2m(mm6,*(mmx_t *)mmx_res);			  value1+=mmx_res[0]+mmx_res[1];			  break;			case 14:			  movq_m2r(*cspline_wp,mm0);			  movq_m2r(*(cspline_wp+4),mm1);			  movq_m2r(*(cspline_wp+8),mm2);			  movq_m2r(*(cspline_wp+12),mm3);			  movq_m2r(*(line),mm6);     punpcklbw_r2r(mm7,mm6);			  pmaddwd_r2r(mm0,mm6);     movq_r2m(mm6,*(mmx_t *)mmx_res);			  value1=mmx_res[0]+mmx_res[1];			  movq_m2r(*(line+4),mm6);     punpcklbw_r2r(mm7,mm6);			  pmaddwd_r2r(mm1,mm6);     movq_r2m(mm6,*(mmx_t *)mmx_res);			  value1+=mmx_res[0]+mmx_res[1];			  movq_m2r(*(line+8),mm6);     punpcklbw_r2r(mm7,mm6);			  pmaddwd_r2r(mm2,mm6);     movq_r2m(mm6,*(mmx_t *)mmx_res);			  value1+=mmx_res[0]+mmx_res[1];			  movq_m2r(*(line+12),mm6);     punpcklbw_r2r(mm7,mm6);			  pmaddwd_r2r(mm3,mm6);     movq_r2m(mm6,*(mmx_t *)mmx_res);			  value1+=mmx_res[0];			  break;			case 16:			  movq_m2r(*cspline_wp,mm0);			  movq_m2r(*(cspline_wp+4),mm1);			  movq_m2r(*(cspline_wp+8),mm2);			  movq_m2r(*(cspline_wp+12),mm3);			  movq_m2r(*(line),mm6);     punpcklbw_r2r(mm7,mm6);			  pmaddwd_r2r(mm0,mm6);     movq_r2m(mm6,*(mmx_t *)mmx_res);			  value1=mmx_res[0]+mmx_res[1];			  movq_m2r(*(line+4),mm6);     punpcklbw_r2r(mm7,mm6);			  pmaddwd_r2r(mm1,mm6);     movq_r2m(mm6,*(mmx_t *)mmx_res);			  value1+=mmx_res[0]+mmx_res[1];			  movq_m2r(*(line+8),mm6);     punpcklbw_r2r(mm7,mm6);			  pmaddwd_r2r(mm2,mm6);     movq_r2m(mm6,*(mmx_t *)mmx_res);			  value1+=mmx_res[0]+mmx_res[1];			  movq_m2r(*(line+12),mm6);     punpcklbw_r2r(mm7,mm6);
yuvscaler_bicubic.c - 源码说明

本页面展示了「Motion JPEG编解码器源代码」中的 yuvscaler_bicubic.c 源码文件，采用 C语言编程语言编写，共 855 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与Motion相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?