📄 jcdctmgr.c

📁 jpeg编解码器
💻 C
📖 第 1 页 / 共 2 页
字号:
上一页 12
{  /* This routine is heavily used, so it's worth coding it tightly. */  my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;  forward_DCT_method_ptr do_dct = fdct->do_dct;  DCTELEM * divisors = fdct->divisors[compptr->quant_tbl_no];  DCTELEM workspace[DCTSIZE2];	/* work area for FDCT subroutine */  JDIMENSION bi;  sample_data += start_row;	/* fold in the vertical offset once */  for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {    /* Load data into workspace, applying unsigned->signed conversion */    { register DCTELEM *workspaceptr;      register JSAMPROW elemptr;      register int elemr;      workspaceptr = workspace;      for (elemr = 0; elemr < DCTSIZE; elemr++) 	  {		  elemptr = sample_data[elemr] + start_col;#if DCTSIZE == 8		/* unroll the inner loop */		  workspaceptr[0] = GETJSAMPLE(elemptr[0]) - CENTERJSAMPLE;		  workspaceptr[1] = GETJSAMPLE(elemptr[1]) - CENTERJSAMPLE;		  workspaceptr[2] = GETJSAMPLE(elemptr[2]) - CENTERJSAMPLE;		  workspaceptr[3] = GETJSAMPLE(elemptr[3]) - CENTERJSAMPLE;		  workspaceptr[4] = GETJSAMPLE(elemptr[4]) - CENTERJSAMPLE;		  workspaceptr[5] = GETJSAMPLE(elemptr[5]) - CENTERJSAMPLE;		  workspaceptr[6] = GETJSAMPLE(elemptr[6]) - CENTERJSAMPLE;		  workspaceptr[7] = GETJSAMPLE(elemptr[7]) - CENTERJSAMPLE;		  workspaceptr += 8; elemptr += 8;#else		  { 			  register int elemc;			  for (elemc = DCTSIZE; elemc > 0; elemc--) {				  *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;			  }		  }#endif      }    }    /* Perform the DCT */    (*do_dct) (workspace);	jcquant_int( workspace, coef_blocks[bi], 				 fdct->divisors[compptr->quant_tbl_no] );  }}#if DCTSIZE == 8 && defined(HAVE_MMX_ATT_MNEMONICS)	METHODDEF(void)	forward_DCT_x86float32 (j_compress_ptr cinfo, jpeg_component_info * compptr,							JSAMPARRAY sample_data, JBLOCKROW coef_blocks,							JDIMENSION start_row, JDIMENSION start_col,							JDIMENSION num_blocks)/* This version is used for integer DCT implementations. */{	/* This routine is heavily used, so it's worth coding it tightly. */	my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;	forward_DCT_method_ptr do_dct = fdct->do_dct;	float32_quant_method_ptr do_quant = fdct->do_float32_quant;	DCTELEM * divisors = fdct->divisors[compptr->quant_tbl_no];	DCTELEM workspace[DCTSIZE2] ATTR_ALIGN(SIMD_ALIGN);	/* work area for FDCT subroutine */	JDIMENSION bi;	sample_data += start_row;	/* fold in the vertical offset once */	for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) 	{		register DCTELEM *workspaceptr;		register JSAMPROW elemptr;		register int elemr;		workspaceptr = workspace;		elemr = 0;		elemptr = sample_data[elemr] + start_col;	  		pxor_r2r(mm7,mm7);		movq_m2r( *(mmx_t*)&int16_centrejsample, mm6 );		while(elemr < DCTSIZE) 		{			movq_m2r( *(mmx_t*)elemptr, mm0 );			elemptr = sample_data[elemr+1] + start_col;			movq_m2r( *(mmx_t*)elemptr, mm1 );			movq_r2r( mm0, mm2 );			punpcklbw_r2r( mm7, mm0 );			movq_r2r( mm1, mm3 );			punpcklbw_r2r( mm7, mm1 );			psubw_r2r( mm6, mm0 );			psubw_r2r( mm6, mm1 );			elemr += 2;			punpckhbw_r2r( mm7, mm2 );			punpckhbw_r2r( mm7, mm3 );			elemptr = sample_data[elemr] + start_col;			psubw_r2r( mm6, mm2 );			psubw_r2r( mm6, mm3 );			movq_r2m( mm0, *(mmx_t*)(&workspaceptr[0]) );			movq_r2m( mm2, *(mmx_t*)(&workspaceptr[4]) );			movq_r2m( mm1, *(mmx_t*)(&workspaceptr[8]) );			movq_r2m( mm3, *(mmx_t*)(&workspaceptr[12]) );			workspaceptr += 16;		}		emms();		/* Perform the DCT */		(*do_dct)(workspace);				(*do_quant)( workspace, coef_blocks[bi], 					 fdct->float32_divisors[compptr->quant_tbl_no] );	}}METHODDEF(void)	forward_DCT_mmx (j_compress_ptr cinfo, jpeg_component_info * compptr,						  JSAMPARRAY sample_data, JBLOCKROW coef_blocks,						  JDIMENSION start_row, JDIMENSION start_col,						  JDIMENSION num_blocks)/* This version is used for integer DCT implementations. */{	/* This routine is heavily used, so it's worth coding it tightly. */	my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;	forward_DCT_method_ptr do_dct = fdct->do_dct;	float32_quant_method_ptr do_quant = fdct->do_float32_quant;	DCTELEM workspace[DCTSIZE2] ATTR_ALIGN(SIMD_ALIGN);	/* work area for FDCT subroutine */	JDIMENSION bi;	int i;	sample_data += start_row;	/* fold in the vertical offset once */	for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) 	{		register DCTELEM *workspaceptr;		register JSAMPROW elemptr;		register int elemr;		workspaceptr = workspace;		elemr = 0;		elemptr = sample_data[elemr] + start_col;	  		pxor_r2r(mm7,mm7);		movq_m2r( *(mmx_t*)&int16_centrejsample, mm6 );		while(elemr < DCTSIZE) 		{			movq_m2r( *(mmx_t*)elemptr, mm0 );			elemptr = sample_data[elemr+1] + start_col;			movq_m2r( *(mmx_t*)elemptr, mm1 );			movq_r2r( mm0, mm2 );			punpcklbw_r2r( mm7, mm0 );			movq_r2r( mm1, mm3 );			punpcklbw_r2r( mm7, mm1 );			psubw_r2r( mm6, mm0 );			psubw_r2r( mm6, mm1 );			elemr += 2;			punpckhbw_r2r( mm7, mm2 );			punpckhbw_r2r( mm7, mm3 );			elemptr = sample_data[elemr] + start_col;			psubw_r2r( mm6, mm2 );			psubw_r2r( mm6, mm3 );			movq_r2m( mm0, *(mmx_t*)(&workspaceptr[0]) );			movq_r2m( mm2, *(mmx_t*)(&workspaceptr[4]) );			movq_r2m( mm1, *(mmx_t*)(&workspaceptr[8]) );			movq_r2m( mm3, *(mmx_t*)(&workspaceptr[12]) );			workspaceptr += 16;		}		emms();		/* Perform the DCT */		(*do_dct)(workspace);				jcquant_mmx( workspace,					 coef_blocks[bi],  					 fdct->divisors[compptr->quant_tbl_no],					 fdct->int16_divisors[compptr->quant_tbl_no],					 fdct->shift			);	}}#endif#ifdef DCT_FLOAT_SUPPORTEDMETHODDEF(void)forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,		   JSAMPARRAY sample_data, JBLOCKROW coef_blocks,		   JDIMENSION start_row, JDIMENSION start_col,		   JDIMENSION num_blocks)/* This version is used for floating-point DCT implementations. */{  /* This routine is heavily used, so it's worth coding it tightly. */  my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;  float_DCT_method_ptr do_dct = fdct->do_float_dct;  FAST_FLOAT * divisors = fdct->float_divisors[compptr->quant_tbl_no];  FAST_FLOAT workspace[DCTSIZE2]; /* work area for FDCT subroutine */  JDIMENSION bi;  sample_data += start_row;	/* fold in the vertical offset once */  for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {    /* Load data into workspace, applying unsigned->signed conversion */    { register FAST_FLOAT *workspaceptr;      register JSAMPROW elemptr;      register int elemr;      workspaceptr = workspace;      for (elemr = 0; elemr < DCTSIZE; elemr++) {	elemptr = sample_data[elemr] + start_col;#if DCTSIZE == 8		/* unroll the inner loop */	*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);	*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);	*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);	*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);	*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);	*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);	*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);	*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);#else	{ register int elemc;	  for (elemc = DCTSIZE; elemc > 0; elemc--) {	    *workspaceptr++ = (FAST_FLOAT)	      (GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);	  }	}#endif      }    }    /* Perform the DCT */    (*do_dct) (workspace);    /* Quantize/descale the coefficients, and store into coef_blocks[] */    { register FAST_FLOAT temp;      register int i;      register JCOEFPTR output_ptr = coef_blocks[bi];      for (i = 0; i < DCTSIZE2; i++) {	/* Apply the quantization and scaling factor */	temp = workspace[i] * divisors[i];	/* Round to nearest integer.	 * Since C does not specify the direction of rounding for negative	 * quotients, we have to force the dividend positive for portability.	 * The maximum coefficient size is +-16K (for 12-bit data), so this	 * code should work for either 16-bit or 32-bit ints.	 */	output_ptr[i] = (JCOEF) ((int) (temp + (FAST_FLOAT) 16384.5) - 16384);      }    }  }}#endif /* DCT_FLOAT_SUPPORTED *//* * Initialize FDCT manager. */GLOBAL(void)jinit_forward_dct (j_compress_ptr cinfo){  my_fdct_ptr fdct;  int i;  fdct = (my_fdct_ptr)    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,				SIZEOF(my_fdct_controller));  cinfo->fdct = (struct jpeg_forward_dct *) fdct;  fdct->pub.start_pass = start_pass_fdctmgr;  switch (cinfo->dct_method) {#ifdef DCT_ISLOW_SUPPORTED  case JDCT_ISLOW:    fdct->pub.forward_DCT = forward_DCT;    fdct->do_dct = jpeg_fdct_islow;    break;#endif#ifdef DCT_IFAST_SUPPORTED  case JDCT_IFAST:  {#if defined(HAVE_MMX_ATT_MNEMONICS)	  	  int cpu_flags =  cpu_accel();	  if( cpu_flags & ACCEL_X86_MMX )	  {		  fdct->do_dct = jpeg_fdct_ifast_mmx;		  if( cpu_flags & ACCEL_X86_SSE )		  {			  fdct->fast_quantiser = QUANT_FLOAT32;			  fdct->pub.forward_DCT = forward_DCT_x86float32;			  fdct->do_float32_quant = jcquant_sse;		  }		  else if( cpu_flags & ACCEL_X86_3DNOW )		  {			  fdct->fast_quantiser = QUANT_FLOAT32;			  fdct->pub.forward_DCT = forward_DCT_x86float32;			  fdct->do_float32_quant = jcquant_3dnow;		  }		  else		  {			  fdct->fast_quantiser = QUANT_INT16;			  fdct->pub.forward_DCT = forward_DCT_mmx;			  fdct->do_float32_quant = NULL;		  }	  }	  else#endif	  {		  fdct->fast_quantiser = QUANT_INT;		  fdct->pub.forward_DCT = forward_DCT;		  fdct->do_dct = jpeg_fdct_ifast;		  fdct->do_float32_quant = NULL;	  }	  break;  }#endif#ifdef DCT_FLOAT_SUPPORTED  case JDCT_FLOAT:    fdct->pub.forward_DCT = forward_DCT_float;    fdct->do_float_dct = jpeg_fdct_float;    break;#endif  default:    ERREXIT(cinfo, JERR_NOT_COMPILED);    break;  }  /* Mark divisor tables unallocated */  for (i = 0; i < NUM_QUANT_TBLS; i++) {    fdct->divisors[i] = NULL;    fdct->int16_divisors[i] = NULL;    fdct->float32_divisors[i] = NULL;#ifdef DCT_FLOAT_SUPPORTED    fdct->float_divisors[i] = NULL;#endif  }}
上一页 12
💿 文件大小 584 K
👤 上传用户 singwolf
📂 所属分类压缩解压
🏷️ 相关标签

#jpeg #编解码器
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -