⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 mblock_bsumsq_mmx.c

📁 Motion JPEG编解码器源代码
💻 C
字号:
/* *   bdist2_mmx.s:  MMX optimized bidirectional squared distance sum *  *   Original believed to be Copyright (C) 2000 Brent Byeler *  *   This program is free software; you can redistribute it and/or *   modify it under the terms of the GNU General Public License *   as published by the Free Software Foundation; either version 2 *   of the License, or (at your option) any later version. *  *   This program is distributed in the hope that it will be useful, *   but WITHOUT ANY WARRANTY; without even the implied warranty of *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *   GNU General Public License for more details. *  *   You should have received a copy of the GNU General Public License *   along with this program; if not, write to the Free Software *   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA. */#include <config.h>#include "mjpeg_types.h"#include "mmx.h"/* * squared error between a (16*h) block and a bidirectional * prediction * * p2: address of top left pel of block * pf,hxf,hyf: address and half pel flags of forward ref. block * pb,hxb,hyb: address and half pel flags of backward ref. block * h: height of block * lx: distance (in bytes) of vertically adjacent pels in p2,pf,pb * mmX version */ int bsumsq_mmx(uint8_t *pf, uint8_t *pb, uint8_t *p2, int lx, int hxf, int hyf, int hxb, int hyb, int h){	uint8_t *pfa,*pfb,*pfc,*pba,*pbb,*pbc;	int s,s1,s2;		pfa = pf + hxf;	pfb = pf + lx * hyf;	pfc = pfb + hxf;	pba = pb + hxb;	pbb = pb + lx * hyb; 	pbc = pbb + hxb;		s = 0; /* (accumulated sum) */		if (h > 0)	{		pxor_r2r(mm7, mm7);		pxor_r2r(mm6, mm6);		pcmpeqw_r2r(mm5, mm5);		psubw_r2r(mm5, mm6);		psllw_i2r(1, mm6);				do {			movq_m2r(pf[0], mm0);			movq_r2r(mm0, mm1);			punpcklbw_r2r(mm7, mm0);			punpckhbw_r2r(mm7, mm1);			movq_m2r(pfa[0], mm2);			movq_r2r(mm2, mm3);			punpcklbw_r2r(mm7, mm2);			punpckhbw_r2r(mm7, mm3);			paddw_r2r(mm2, mm0);			paddw_r2r(mm3, mm1);			movq_m2r(pfb[0], mm2);			movq_r2r(mm2, mm3);			punpcklbw_r2r(mm7, mm2);			punpckhbw_r2r(mm7, mm3);			paddw_r2r(mm2, mm0);			paddw_r2r(mm3, mm1);			movq_m2r(pfc[0], mm2);			movq_r2r(mm2, mm3);			punpcklbw_r2r(mm7, mm2);			punpckhbw_r2r(mm7, mm3);			paddw_r2r(mm2, mm0);			paddw_r2r(mm3, mm1);			paddw_r2r(mm6, mm0);			paddw_r2r(mm6, mm1);												psrlw_i2r(2, mm0);			psrlw_i2r(2, mm1);						movq_m2r(pb[0], mm2);			movq_r2r(mm2, mm3);			punpcklbw_r2r(mm7, mm2);			punpckhbw_r2r(mm7, mm3);			movq_m2r(pba[0], mm4);			movq_r2r(mm4, mm5);			punpcklbw_r2r(mm7, mm4);			punpckhbw_r2r(mm7, mm5);			paddw_r2r(mm4, mm2);			paddw_r2r(mm5, mm3);			movq_m2r(pbb[0], mm4);			movq_r2r(mm4, mm5);			punpcklbw_r2r(mm7, mm4);			punpckhbw_r2r(mm7, mm5);			paddw_r2r(mm4, mm2);			paddw_r2r(mm5, mm3);			movq_m2r(pbc[0], mm4);			movq_r2r(mm4, mm5);			punpcklbw_r2r(mm7, mm4);			punpckhbw_r2r(mm7, mm5);			paddw_r2r(mm4, mm2);			paddw_r2r(mm5, mm3);			paddw_r2r(mm6, mm2);			paddw_r2r(mm6, mm3);			psrlw_i2r(2, mm2);						psrlw_i2r(2, mm3);						paddw_r2r(mm2, mm0);			paddw_r2r(mm3, mm1);			psrlw_i2r(1, mm6);			paddw_r2r(mm6, mm0);			paddw_r2r(mm6, mm1);			psllw_i2r(1, mm6);			psrlw_i2r(1, mm0);			psrlw_i2r(1, mm1);						movq_m2r(p2[0], mm2);			movq_r2r(mm2, mm3);			punpcklbw_r2r(mm7, mm2);			punpckhbw_r2r(mm7, mm3);						psubw_r2r(mm2, mm0);			psubw_r2r(mm3, mm1);			pmaddwd_r2r(mm0, mm0);			pmaddwd_r2r(mm1, mm1);			paddd_r2r(mm1, mm0);						movd_r2g(mm0, s1);			psrlq_i2r(32, mm0);			movd_r2g(mm0, s2);			s += s1 + s2;												movq_m2r(pf[8], mm0);			movq_r2r(mm0, mm1);			punpcklbw_r2r(mm7, mm0);			punpckhbw_r2r(mm7, mm1);			movq_m2r(pfa[8], mm2);			movq_r2r(mm2, mm3);			punpcklbw_r2r(mm7, mm2);			punpckhbw_r2r(mm7, mm3);			paddw_r2r(mm2, mm0);			paddw_r2r(mm3, mm1);			movq_m2r(pfb[8], mm2);			movq_r2r(mm2, mm3);			punpcklbw_r2r(mm7, mm2);			punpckhbw_r2r(mm7, mm3);			paddw_r2r(mm2, mm0);			paddw_r2r(mm3, mm1);			movq_m2r(pfc[8], mm2);			movq_r2r(mm2, mm3);			punpcklbw_r2r(mm7, mm2);			punpckhbw_r2r(mm7, mm3);			paddw_r2r(mm2, mm0);			paddw_r2r(mm3, mm1);			paddw_r2r(mm6, mm0);			paddw_r2r(mm6, mm1);												psrlw_i2r(2, mm0);			psrlw_i2r(2, mm1);						movq_m2r(pb[8], mm2);			movq_r2r(mm2, mm3);			punpcklbw_r2r(mm7, mm2);			punpckhbw_r2r(mm7, mm3);			movq_m2r(pba[8], mm4);			movq_r2r(mm4, mm5);			punpcklbw_r2r(mm7, mm4);			punpckhbw_r2r(mm7, mm5);			paddw_r2r(mm4, mm2);			paddw_r2r(mm5, mm3);			movq_m2r(pbb[8], mm4);			movq_r2r(mm4, mm5);			punpcklbw_r2r(mm7, mm4);			punpckhbw_r2r(mm7, mm5);			paddw_r2r(mm4, mm2);			paddw_r2r(mm5, mm3);			movq_m2r(pbc[8], mm4);			movq_r2r(mm4, mm5);			punpcklbw_r2r(mm7, mm4);			punpckhbw_r2r(mm7, mm5);			paddw_r2r(mm4, mm2);			paddw_r2r(mm5, mm3);			paddw_r2r(mm6, mm2);			paddw_r2r(mm6, mm3);			psrlw_i2r(2, mm2);						psrlw_i2r(2, mm3);						paddw_r2r(mm2, mm0);			paddw_r2r(mm3, mm1);			psrlw_i2r(1, mm6);			paddw_r2r(mm6, mm0);			paddw_r2r(mm6, mm1);			psllw_i2r(1, mm6);			psrlw_i2r(1, mm0);			psrlw_i2r(1, mm1);						movq_m2r(p2[8], mm2);			movq_r2r(mm2, mm3);			punpcklbw_r2r(mm7, mm2);			punpckhbw_r2r(mm7, mm3);						psubw_r2r(mm2, mm0);			psubw_r2r(mm3, mm1);			pmaddwd_r2r(mm0, mm0);			pmaddwd_r2r(mm1, mm1);			paddd_r2r(mm1, mm0);						movd_r2g(mm0, s1);			psrlq_i2r(32, mm0);			movd_r2g(mm0, s2);			s += s1 + s2;					p2  += lx;			pf  += lx;			pfa += lx;			pfb += lx;			pfc += lx;			pb  += lx;			pba += lx;			pbb += lx;			pbc += lx; 					h--;		} while(h > 0);	}	emms();		return s;	}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -