📄 quant_mmx.c

📁 Motion JPEG编解码器源代码
💻 C
字号:
/* *   Copyright (C) 2000 Andrew Stevens <as@comlab.ox.ac.uk> *  *  *   This program is free software; you can redistribute it and/or *   modify it under the terms of the GNU General Public License *   as published by the Free Software Foundation; either version 2 *   of the License, or (at your option) any later version. *  *   This program is distributed in the hope that it will be useful, *   but WITHOUT ANY WARRANTY; without even the implied warranty of *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *   GNU General Public License for more details. *  *   You should have received a copy of the GNU General Public License *   along with this program; if not, write to the Free Software *   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA. *  *  *  *   quantize_ni_mmx.s:  MMX optimized coefficient quantization sub-routine */#include <config.h>#include "mjpeg_types.h"#include "mmx.h"		void iquantize_non_intra_m1_mmx(int16_t *src, int16_t *dst, uint16_t *quant_mat){	int i;		/* 		 *   MMX Register usage		 *   mm7 = [1|0..3]W		 *   mm6 = [MAX_UINT16-2047|0..3]W		 *   mm5 = 0		 */	/* Load 1 into all 4 words of mm7 */	pxor_r2r(mm7, mm7);	pcmpeqw_r2r(mm7, mm7);	psrlw_i2r(15, mm7);		pxor_r2r(mm6, mm6);		for(i=0; i<64; i+=4) /* 64 coeffs in a DCT block */	{		movq_m2r(src[0], mm0); 	/* mm0 = *psrc */		src += 4; 				movq_r2r(mm0, mm2);	/* mm2 = TRUE where *psrc==0 */		pcmpeqw_r2r(mm6, mm2);				movq_r2r(mm0, mm3);	/* mm3 = TRUE where *psrc<0 */		psraw_i2r(15, mm3);				/* Work with absolute value for convenience... */		pxor_r2r (mm3, mm0); 	/* mm0 = abs(*psrc) */		psubw_r2r(mm3, mm0);				paddw_r2r(mm0, mm0);	/* mm0 = 2*abs(*psrc)     */		paddw_r2r(mm7, mm0);	/* mm0 = 2*abs(*psrc) + 1 */				movq_m2r(quant_mat[0], mm4); /* multiply by *quant_mat */		movq_r2r(mm0, mm1);		pmullw_r2r(mm4, mm0);		pmulhw_r2r(mm4, mm1);		quant_mat += 4;				pcmpgtw_r2r(mm6, mm1); 	/* if there was overflow, saturate low bits with all 1's */		por_r2r(mm1, mm0);				psrlw_i2r(5, mm0);	/* divide by 32 (largest possible value = 65535/32 == 2047) */				/* zero case */		pandn_r2r(mm0, mm2);	/* set to 0 where *psrc==0 */				/* mismatch control */		movq_r2r(mm2, mm1);		psubw_r2r(mm7, mm2);		pcmpeqw_r2r(mm6, mm1);	/* mm0 = v==0 */		por_r2r(mm7, mm2);		pandn_r2r(mm2, mm1);				/* Handle zero case and restoring sign */		pxor_r2r(mm3, mm1); 	/* retain original sign of *psrc */		psubw_r2r(mm3, mm1);				movq_r2m(mm1, dst[0]);		dst += 4;	}		emms();}/* extmmx Inverse mpeg-2 quantisation routine. */void iquantize_non_intra_m2_mmx(int16_t *src, int16_t *dst, uint16_t *quant_mat){	int sum;	int i;	int16_t *dst2;			/* 		 *  mm0 *psrc, scratch		 *  mm1 *pdst		 *  mm2 TRUE if *psrc is 0, then scratch		 *  mm3 TRUE if *psrc is negative		 *  mm4 Partial sums 		 *  mm5 		 *  mm6 <0,0,0,0>		 *  mm7 <1,1,1,1>		 */	/* Load 1 into all 4 words of mm7 */	pxor_r2r(mm7, mm7);	pcmpeqw_r2r(mm7, mm7);	psrlw_i2r(15, mm7);		pxor_r2r(mm6, mm6);	pxor_r2r(mm4, mm4);	dst2 = dst;	for(i=0; i<64; i+=4) /* 64 coeffs in a DCT block */	{		movq_m2r(src[0], mm0); 	/* mm0 = *psrc */		src += 4; 				movq_r2r(mm0, mm2);	/* mm2 = TRUE where *psrc==0 */		pcmpeqw_r2r(mm6, mm2);				movq_r2r(mm0, mm3);	/* mm3 = TRUE where *psrc<0 */		psraw_i2r(15, mm3);				/* Work with absolute value for convenience... */		pxor_r2r (mm3, mm0); 	/* mm0 = abs(*psrc) */		psubw_r2r(mm3, mm0);				paddw_r2r(mm0, mm0);	/* mm0 = 2*abs(*psrc)      */		paddw_r2r(mm7, mm0);	/* mm0 = 2*abs(*psrc) + 1  */		pandn_r2r(mm0, mm2);	/* set to 0 where *psrc==0 */				movq_m2r(quant_mat[0], mm1); /* multiply by *quant_mat */		movq_r2r(mm2, mm0);		pmulhw_r2r(mm1, mm2);		pmullw_r2r(mm1, mm0);		quant_mat += 4;				pcmpgtw_r2r(mm6, mm2); 	/* if there was overflow, saturate low bits with all 1's */		por_r2r(mm2, mm0);				psrlw_i2r(5, mm0); 	/* divide by 32 (largest possible value = 65535/32 == 2047) */				/* Accumulate sum... */		paddw_r2r(mm0, mm4);				/* Handle zero case and restoring sign */		pxor_r2r(mm3, mm0);	/* retain original sign of *psrc */		psubw_r2r(mm3, mm0);				movq_r2m(mm0, dst[0]);		dst += 4;	}		/* Mismatch control compute lower bits of sum... */	movq_r2r(mm4, mm5);	psrlq_i2r(32, mm5);	paddw_r2r(mm5, mm4);	movq_r2r(mm4, mm5);	psrlq_i2r(16, mm5);	paddw_r2r(mm5, mm4);	movd_r2g(mm4, sum);		sum &= 1;	sum ^= 1;	dst2[63] ^= sum;	emms();}/*  *  Simply add up the sum of coefficients weighted   *  by their quantisation coefficients */int32_t quant_weight_coeff_sum_mmx(int16_t *src, int16_t *i_quant_mat){	int32_t sum, sum1, sum2;	int i;		/*		 *   MMX Register usage		 *   mm7 = [1|0..3]W		 *   mm6 = [2047|0..3]W		 *   mm5 = 0		 */	pxor_r2r(mm6, mm6); /*  Accumulator */		for(i=0; i<16; i+=2) /* 16 coefficient / quantiser quads to process... */	{		movq_m2r(src[0], mm0);		movq_m2r(src[4], mm2);		pxor_r2r(mm1, mm1);		pxor_r2r(mm3, mm3);			/* 		 *  Compute absolute value of coefficients...		 */		pcmpgtw_r2r(mm0, mm1); 	/* (mm0 < 0 ) */		pcmpgtw_r2r(mm2, mm3); 	/* (mm0 < 0 ) */		pxor_r2r(mm1, mm0);		pxor_r2r(mm3, mm2);		psubw_r2r(mm1, mm0);		psubw_r2r(mm3, mm2);				/* 		 *  Compute the low and high words of the result....		 */		pmaddwd_m2r(i_quant_mat[0], mm0);		pmaddwd_m2r(i_quant_mat[4], mm2);		src += 8;		i_quant_mat += 8;		paddd_r2r(mm0, mm6);		paddd_r2r(mm2, mm6);		}	movd_r2g(mm6, sum1);	psrlq_i2r(32, mm6);	movd_r2g(mm6, sum2);	sum = sum1 + sum2;		emms();		return sum;}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -