⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 sad.c

📁 用MPEG-4对YUV视频文件编码压缩成divx视频文件
💻 C
字号:
/**************************************************************************
 *
 *	XVID MPEG-4 VIDEO CODEC
 *	sum of absolute difference
 *
 *	This program is an implementation of a part of one or more MPEG-4
 *	Video tools as specified in ISO/IEC 14496-2 standard.  Those intending
 *	to use this software module in hardware or software products are
 *	advised that its use may infringe existing patents or copyrights, and
 *	any such use would be at such party's own risk.  The original
 *	developer of this software module and his/her company, and subsequent
 *	editors and their companies, will have no liability for use of this
 *	software or modifications or derivatives thereof.
 *
 *	This program is free software; you can redistribute it and/or modify
 *	it under the terms of the GNU General Public License as published by
 *	the Free Software Foundation; either version 2 of the License, or
 *	(at your option) any later version.
 *
 *	This program is distributed in the hope that it will be useful,
 *	but WITHOUT ANY WARRANTY; without even the implied warranty of
 *	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *	GNU General Public License for more details.
 *
 *	You should have received a copy of the GNU General Public License
 *	along with this program; if not, write to the Free Software
 *	Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *
 *************************************************************************/

/**************************************************************************
 *
 *	History:
 *
 *	14.02.2002	added sad16bi_c()
 *	10.11.2001	initial version; (c)2001 peter ross <pross@cs.rmit.edu.au>
 *
 *************************************************************************/


#include "../portab.h"
#include "sad.h"

sad16FuncPtr sad16;
sad8FuncPtr sad8;
dev16FuncPtr dev16;

#ifndef _TRIMEDIA 
/*
   the sum of absolute difference between two 16x16 block 
*/
uint32_t
sad16_c(const uint8_t * const cur,
		const uint8_t * const ref,
		const uint32_t stride,
		const uint32_t best_sad)
{

	uint32_t sad = 0;
	uint32_t i, j;
	uint8_t const *ptr_cur = cur;
	uint8_t const *ptr_ref = ref;

	for (j = 0; j < 16; j++) {

		for (i = 0; i < 16; i++) {

			sad += abs(*(ptr_cur + i) - *(ptr_ref + i));
			/*if (sad >= best_sad) { 
				return sad;
			}*/
		}
		if (sad >= best_sad) {
				return sad;
		}

		ptr_cur += stride;
		ptr_ref += stride;

	}

	return sad;

}

#else
/* 
use trimedia instruction optimization version
*/
uint32_t
sad16_c(const uint8_t * const cur,
		const uint8_t * const ref,
		const uint32_t stride,
		const uint32_t best_sad)
{

	uint32_t sad = 0;
	uint32_t sad0,sad1,sad2,sad3;
	uint32_t j;
	uint32_t src0,src1,src2,src3,src4,dest0,dest1,dest2,dest3;
/*
	uint32_t const *restrict ptr_cur = (uint32_t const *)cur;
	uint32_t const *restrict ptr_ref = (uint32_t const *)ref;
*/
	uint32_t const * ptr_cur = (uint32_t const *)cur;
	uint32_t const * ptr_ref = (uint32_t const *)ref;

	uint32_t temp;
	
	uint32_t stride_int=(stride>>2)-4;
	
	temp = ((uint32_t)ref)&3;
	
	if(!temp)    /*word align*/
	{
		for (j=0; j < 16; j++) {
			sad0 = UME8UU(*ptr_cur++,*ptr_ref++);
			sad1 = UME8UU(*ptr_cur++,*ptr_ref++);
			sad2 = UME8UU(*ptr_cur++,*ptr_ref++);
			sad3 = UME8UU(*ptr_cur++,*ptr_ref++);
						
			sad0 += sad1;
			sad2 += sad3;
			sad += sad0 + sad2;
                        
			ptr_cur += stride_int;
			ptr_ref += stride_int;

		}
	}
	else if(temp==1)
	{
		for(j =0; j < 16; j++)
		{
		
			/*get five word*/
			src0 = *ptr_ref++ ;
			src1 = *ptr_ref++;
			src2 = *ptr_ref++;
			src3 = *ptr_ref++;
			src4 = *ptr_ref;
			
			/*make up four word*/
			dest0 = FUNSHIFT3(src1,src0);
			dest1 = FUNSHIFT3(src2,src1);
			dest2 = FUNSHIFT3(src3,src2);
			dest3 = FUNSHIFT3(src4,src3);

			/*get sum of diff abs */
			sad0 = ume8uu(*ptr_cur++,dest0);
			sad1 = ume8uu(*ptr_cur++,dest1);
			sad2 = ume8uu(*ptr_cur++,dest2);
			sad3 = ume8uu(*ptr_cur++,dest3);
			
			sad0 += sad1;
			sad2 += sad3;
			sad += sad0 + sad2;

			/*increase stride*/
			ptr_cur += stride_int;
			ptr_ref += stride_int;
		}
	}
	else if(temp==2)
	{
		for(j =0; j < 16; j++)
		{
		
			/*get five word*/
			src0 = *ptr_ref++ ;
			src1 = *ptr_ref++;
			src2 = *ptr_ref++;
			src3 = *ptr_ref++;
			src4 = *ptr_ref;
			
			/*make up four word*/
			dest0 = FUNSHIFT2(src1,src0);
			dest1 = FUNSHIFT2(src2,src1);
			dest2 = FUNSHIFT2(src3,src2);
			dest3 = FUNSHIFT2(src4,src3);

			/*get sum of diff abs */
			sad0 = ume8uu(*ptr_cur++,dest0);
			sad1 = ume8uu(*ptr_cur++,dest1);
			sad2 = ume8uu(*ptr_cur++,dest2);
			sad3 = ume8uu(*ptr_cur++,dest3);
			
			sad0 += sad1;
			sad2 += sad3;
			sad += sad0 + sad2;

			/*increase stride*/
			ptr_cur += stride_int;
			ptr_ref += stride_int;
		}
	}
	else if(temp==3)
	{
		for(j =0; j < 16; j++)
		{
		
			/*get five word*/
			src0 = *ptr_ref++ ;
			src1 = *ptr_ref++;
			src2 = *ptr_ref++;
			src3 = *ptr_ref++;
			src4 = *ptr_ref;
			
			/*make up four word*/
			dest0 = FUNSHIFT1(src1,src0);
			dest1 = FUNSHIFT1(src2,src1);
			dest2 = FUNSHIFT1(src3,src2);
			dest3 = FUNSHIFT1(src4,src3);

			/*get sum of diff abs */
			sad0 = ume8uu(*ptr_cur++,dest0);
			sad1 = ume8uu(*ptr_cur++,dest1);
			sad2 = ume8uu(*ptr_cur++,dest2);
			sad3 = ume8uu(*ptr_cur++,dest3);
			
			sad0 += sad1;
			sad2 += sad3;
			sad += sad0 + sad2;

			/*increase stride*/
			ptr_cur += stride_int;
			ptr_ref += stride_int;
		}
	}
  
    return sad; 
}
#endif

/*
   the sum of absolute difference between two 8x8 block 
*/
#ifndef _TRIMEDIA 
uint32_t
sad8_c(const uint8_t * const cur,
	   const uint8_t * const ref,
	   const uint32_t stride)
{
	uint32_t sad = 0;
	uint32_t i, j;
	uint8_t const *ptr_cur = cur;
	uint8_t const *ptr_ref = ref;

	for (j = 0; j < 8; j++) {

		for (i = 0; i < 8; i++) {
			sad += abs(*(ptr_cur + i) - *(ptr_ref + i));
		}

		ptr_cur += stride;
		ptr_ref += stride;

	}

	return sad;
}
#else
/* 
use trimedia instruction optimization version
*/
uint32_t
sad8_c(const uint8_t * const cur,
	   const uint8_t * const ref,
	   const uint32_t stride)
{

	uint32_t sad = 0;
	uint32_t sad0,sad1;
	uint32_t j;
	uint32_t src0,src1,src2,dest0,dest1;
	/*
	uint32_t const *restrict ptr_cur = (uint32_t const *)cur;
	uint32_t const *restrict ptr_ref = (uint32_t const *)ref;
	*/
	uint32_t const * ptr_cur = (uint32_t const *)cur;
	uint32_t const * ptr_ref = (uint32_t const *)ref;

	uint32_t temp;
	
	uint32_t stride_int=(stride>>2)-2;
	
	temp = ((uint32_t)ref)&3;
	
	if(!temp)    /*word align*/
	{
		for (j=0; j < 8; j++) {
			sad0 = UME8UU(*ptr_cur++,*ptr_ref++);
			sad1 = UME8UU(*ptr_cur++,*ptr_ref++);
						
			sad0 += sad1;
			sad  += sad0;
                        
			ptr_cur += stride_int;
			ptr_ref += stride_int;

		}
	}
	else if(temp==1)
	{
		for(j =0; j < 8; j++)
		{
		
			/*get three word*/
			src0 = *ptr_ref++ ;
			src1 = *ptr_ref++;
			src2 = *ptr_ref;
			
			/*make up two word*/
			dest0 = FUNSHIFT3(src1,src0);
			dest1 = FUNSHIFT3(src2,src1);

			/*get sum of diff abs */
			sad0 = ume8uu(*ptr_cur++,dest0);
			sad1 = ume8uu(*ptr_cur++,dest1);
			
			sad0 += sad1;
			sad  += sad0;

			/*increase stride*/
			ptr_cur += stride_int;
			ptr_ref += stride_int;
		}
	}
	else if(temp==2)
	{
		for(j =0; j < 8; j++)
		{
		
			/*get three word*/
			src0 = *ptr_ref++ ;
			src1 = *ptr_ref++;
			src2 = *ptr_ref;
			
			/*make up two word*/
			dest0 = FUNSHIFT2(src1,src0);
			dest1 = FUNSHIFT2(src2,src1);

			/*get sum of diff abs */
			sad0 = ume8uu(*ptr_cur++,dest0);
			sad1 = ume8uu(*ptr_cur++,dest1);
			
			sad0 += sad1;
			sad  += sad0;

			/*increase stride*/
			ptr_cur += stride_int;
			ptr_ref += stride_int;
		}
	}
	else if(temp==3)
	{
		for(j =0; j < 8; j++)
		{
		
			/*get three word*/
			src0 = *ptr_ref++ ;
			src1 = *ptr_ref++;
			src2 = *ptr_ref;
			
			/*make up two word*/
			dest0 = FUNSHIFT1(src1,src0);
			dest1 = FUNSHIFT1(src2,src1);

			/*get sum of diff abs */
			sad0 = ume8uu(*ptr_cur++,dest0);
			sad1 = ume8uu(*ptr_cur++,dest1);
			
			sad0 += sad1;
			sad  += sad0;

			/*increase stride*/
			ptr_cur += stride_int;
			ptr_ref += stride_int;
		}
	}
  
    return sad; 
}
#endif



/* average deviation from mean */
#ifndef _TRIMEDIA 
uint32_t
dev16_c(const uint8_t * const cur,
		const uint32_t stride)
{

	uint32_t mean = 0;
	uint32_t dev = 0;
	uint32_t i, j;
	uint8_t const *ptr_cur = cur;

	for (j = 0; j < 16; j++) {

		for (i = 0; i < 16; i++)
			mean += *(ptr_cur + i);

		ptr_cur += stride;

	}

	mean /= (16 * 16);
	ptr_cur = cur;

	for (j = 0; j < 16; j++) {

		for (i = 0; i < 16; i++)
			dev += abs(*(ptr_cur + i) - (int32_t) mean);

		ptr_cur += stride;

	}

	return dev;
}
#else
/* 
use trimedia instruction optimization version
*/
uint32_t
dev16_c(const uint8_t * const cur,
		const uint32_t stride)
{
	uint32_t mean = 0;
	uint32_t dev = 0;
	uint32_t j;
	uint32_t stride1;
	uint32_t const *ptr_cur = (uint32_t const *)cur;
	uint32_t src0,src1,src2,src3;
	
	stride1 = (stride>>2)-4;
	
	#pragma TCS_unroll=4
	for (j = 0; j < 16; j++) {
		
           /*get one word and complete four byte add */
		    src0   = UFIR8UU(*ptr_cur++,0x01010101);
		    src1   = UFIR8UU(*ptr_cur++,0x01010101);
		    src2   = UFIR8UU(*ptr_cur++,0x01010101);
		    src3   = UFIR8UU(*ptr_cur++,0x01010101);
		    
			/* sum */
			src0  += src1 ;
		    src2  += src3 ;
            mean  += src0+src2;
		    
			/* modify pointer by add stride */
			ptr_cur += stride1;
                    
	}           
                    
	/*mean /= (16 * 16);*/
	mean>>=8; 

	/* 
	  constuct word: mean+mean+mean+mean 
	  mean value is limit by byte
	*/
	src0 = FUNSHIFT1(mean,0);
	src0 += mean;
	src1 = FUNSHIFT2(src0,0);
	mean = src1 + src0; 
	
	ptr_cur = (uint32_t const *)cur;

	#pragma TCS_unroll=4                   
	for (j = 0; j < 16; j++) {

        /*get one word and complete four byte sad */
		src0 = UME8UU(*ptr_cur++,mean);
		src1 = UME8UU(*ptr_cur++,mean);
		src2 = UME8UU(*ptr_cur++,mean);
		src3 = UME8UU(*ptr_cur++,mean); 

		/* sum */
		src0  += src1 ;
        src2  += src3 ;
        dev  += src0+src2;

		/* modify pointer by add stride */
		ptr_cur += stride1;
	}

	return dev;
}
#endif

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -