⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 sad_altivec.c

📁 xvidcore,XVID解码编码器
💻 C
字号:
/*    Copyright (C) 2002 Benjamin Herrenschmidt <benh@kernel.crashing.org>    This program is free software; you can redistribute it and/or modify    it under the terms of the GNU General Public License as published by    the Free Software Foundation; either version 2 of the License, or    (at your option) any later version.    This program is distributed in the hope that it will be useful,    but WITHOUT ANY WARRANTY; without even the implied warranty of    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the    GNU General Public License for more details.    You should have received a copy of the GNU General Public License    along with this program; if not, write to the Free Software    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA    $Id$    $Source$    $Date$    $Author$*/#define G_REG#ifdef G_REGregister vector unsigned char perm0 asm("%v29");register vector unsigned char perm1 asm("%v30");register vector unsigned int zerovec asm("%v31");#endif#include <stdio.h>#undef DEBUGstatic const vector unsigned char perms[2] = {	(vector unsigned char) (	/* Used when cur is aligned */							   0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,							   0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17),	(vector unsigned char) (	/* Used when cur is unaligned */							   0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,							   0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f),};#ifdef G_REGvoidsadInit_altivec(void){	perm0 = perms[0];	perm1 = perms[1];	zerovec = (vector unsigned int) (0);}static inline const vector unsigned charget_perm(unsigned long i){	return i ? perm1 : perm0;}#define ZERODEF#define ZEROVEC zerovec#elsevoidsadInit_altivec(void){}static inline const vector unsigned charget_perm(unsigned long i){	return perms[i];}#define ZERODEF vector unsigned int zerovec = (vector unsigned int)(0)#define ZEROVEC zerovec#endif#define SAD16() \t1  = vec_perm(ref[0], ref[1], perm);  /* align current vector  */ \t2  = vec_max(t1, *cur);      	 /* find largest of two           */ \t3  = vec_min(t1, *cur); 	         /* find smaller of two           */ \t4  = vec_sub(t2, t3);                   /* find absolute difference      */ \sad = vec_sum4s(t4, sad);                /* accumulate sum of differences */ \cur += stride; ref += stride;/* * This function assumes cur and stride are 16 bytes aligned and ref is unaligned */unsigned longsad16_altivec(const vector unsigned char *cur,			  const vector unsigned char *ref,			  unsigned long stride,			  const unsigned long best_sad){	vector unsigned char perm;	vector unsigned char t1, t2, t3, t4;	vector unsigned int sad;	vector signed int sumdiffs, best_vec;	unsigned long result;	ZERODEF;#ifdef DEBUG	if (((unsigned long) cur) & 0xf)		fprintf(stderr, "sad16_altivec:incorrect align, cur: %x\n", cur);//  if (((unsigned long)ref) & 0xf)//      fprintf(stderr, "sad16_altivec:incorrect align, ref: %x\n", ref);	if (stride & 0xf)		fprintf(stderr, "sad16_altivec:incorrect align, stride: %x\n", stride);#endif	/* initialization */	sad = (vector unsigned int) (ZEROVEC);	stride >>= 4;	perm = vec_lvsl(0, (unsigned char *) ref);	*((unsigned long *) &best_vec) = best_sad;	best_vec = vec_splat(best_vec, 0);	/* perform sum of differences between current and previous */	SAD16();	SAD16();	SAD16();	SAD16();	/* Temp sum for exit */	sumdiffs = vec_sums((vector signed int) sad, (vector signed int) ZEROVEC);	if (vec_all_ge(sumdiffs, best_vec))		goto bail;	SAD16();	SAD16();	SAD16();	SAD16();	sumdiffs = vec_sums((vector signed int) sad, (vector signed int) ZEROVEC);	if (vec_all_ge(sumdiffs, best_vec))		goto bail;	SAD16();	SAD16();	SAD16();	SAD16();	SAD16();	SAD16();	SAD16();	SAD16();	/* sum all parts of difference into one 32 bit quantity */	sumdiffs = vec_sums((vector signed int) sad, (vector signed int) ZEROVEC);  bail:	/* copy vector sum into unaligned result */	sumdiffs = vec_splat(sumdiffs, 3);	vec_ste(sumdiffs, 0, (int *) &result);	return (result);}#define SAD8() \t1  = vec_perm(cur[0], cur[stride], perm_cur);  /* align current vector  */ \t2  = vec_perm(ref[0], ref[1], perm_ref1);  /* align current vector  */ \tp  = vec_perm(ref[stride], ref[stride+1], perm_ref1);  /* align current vector  */ \t2  = vec_perm(t2,tp,perm_ref2); \t3  = vec_max(t1, t2);  	        /* find largest of two           */ \t4  = vec_min(t1, t2);	        	 /* find smaller of two           */ \t5  = vec_sub(t3, t4);                   /* find absolute difference      */ \sad = vec_sum4s(t5, sad);                /* accumulate sum of differences */ \cur += stride<<1; ref += stride<<1;/* * This function assumes cur is 8 bytes aligned, stride is 16 bytes * aligned and ref is unaligned */unsigned longsad8_altivec(const vector unsigned char *cur,			 const vector unsigned char *ref,			 unsigned long stride){	vector unsigned char t1, t2, t3, t4, t5, tp;	vector unsigned int sad;	vector signed int sumdiffs;	vector unsigned char perm_cur;	vector unsigned char perm_ref1, perm_ref2;	unsigned long result;	ZERODEF;#ifdef DEBUG	if (((unsigned long) cur) & 0x7)		fprintf(stderr, "sad8_altivec:incorrect align, cur: %x\n", cur);//  if (((unsigned long)ref) & 0x7)//      fprintf(stderr, "sad8_altivec:incorrect align, ref: %x\n", ref);	if (stride & 0xf)		fprintf(stderr, "sad8_altivec:incorrect align, stride: %x\n", stride);#endif	perm_cur = get_perm((((unsigned long) cur) >> 3) & 0x01);	perm_ref1 = vec_lvsl(0, (unsigned char *) ref);	perm_ref2 = get_perm(0);	/* initialization */	sad = (vector unsigned int) (ZEROVEC);	stride >>= 4;	/* perform sum of differences between current and previous */	SAD8();	SAD8();	SAD8();	SAD8();	/* sum all parts of difference into one 32 bit quantity */	sumdiffs = vec_sums((vector signed int) sad, (vector signed int) ZEROVEC);	/* copy vector sum into unaligned result */	sumdiffs = vec_splat(sumdiffs, 3);	vec_ste(sumdiffs, 0, (int *) &result);	return (result);}#define MEAN16(i)\c##i=*cur;\mean = vec_sum4s(c##i,mean);\cur += stride;#define DEV16(i) \t2  = vec_max(c##i, mn);  	        /* find largest of two           */ \t3  = vec_min(c##i, mn);	        	 /* find smaller of two           */ \t4  = vec_sub(t2, t3);                   /* find absolute difference      */ \dev = vec_sum4s(t4, dev);unsigned longdev16_altivec(const vector unsigned char *cur,			  unsigned long stride){	vector unsigned char t2, t3, t4, mn;	vector unsigned int mean, dev;	vector signed int sumdiffs;	vector unsigned char c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12,		c13, c14, c15;	unsigned long result;	ZERODEF;	mean = (vector unsigned int) (ZEROVEC);	dev = (vector unsigned int) (ZEROVEC);	stride >>= 4;	MEAN16(0);	MEAN16(1);	MEAN16(2);	MEAN16(3);	MEAN16(4);	MEAN16(5);	MEAN16(6);	MEAN16(7);	MEAN16(8);	MEAN16(9);	MEAN16(10);	MEAN16(11);	MEAN16(12);	MEAN16(13);	MEAN16(14);	MEAN16(15);	sumdiffs = vec_sums((vector signed int) mean, (vector signed int) ZEROVEC);	mn = vec_perm((vector unsigned char) sumdiffs,				  (vector unsigned char) sumdiffs, (vector unsigned char) (14,																		   14,																		   14,																		   14,																		   14,																		   14,																		   14,																		   14,																		   14,																		   14,																		   14,																		   14,																		   14,																		   14,																		   14,																		   14));	DEV16(0);	DEV16(1);	DEV16(2);	DEV16(3);	DEV16(4);	DEV16(5);	DEV16(6);	DEV16(7);	DEV16(8);	DEV16(9);	DEV16(10);	DEV16(11);	DEV16(12);	DEV16(13);	DEV16(14);	DEV16(15);	/* sum all parts of difference into one 32 bit quantity */	sumdiffs = vec_sums((vector signed int) dev, (vector signed int) ZEROVEC);	/* copy vector sum into unaligned result */	sumdiffs = vec_splat(sumdiffs, 3);	vec_ste(sumdiffs, 0, (int *) &result);	return (result);}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -