📄 sad.c
字号:
/**************************************************************************
*
* XVID MPEG-4 VIDEO CODEC
* sum of absolute difference
*
* This program is an implementation of a part of one or more MPEG-4
* Video tools as specified in ISO/IEC 14496-2 standard. Those intending
* to use this software module in hardware or software products are
* advised that its use may infringe existing patents or copyrights, and
* any such use would be at such party's own risk. The original
* developer of this software module and his/her company, and subsequent
* editors and their companies, will have no liability for use of this
* software or modifications or derivatives thereof.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
*************************************************************************/
/**************************************************************************
*
* History:
*
* 14.02.2002 added sad16bi_c()
* 10.11.2001 initial version; (c)2001 peter ross <pross@cs.rmit.edu.au>
*
*************************************************************************/
#include "../portab.h"
#include "sad.h"
sad16FuncPtr sad16;
sad8FuncPtr sad8;
dev16FuncPtr dev16;
#ifndef _TRIMEDIA
/*
the sum of absolute difference between two 16x16 block
*/
uint32_t
sad16_c(const uint8_t * const cur,
const uint8_t * const ref,
const uint32_t stride,
const uint32_t best_sad)
{
uint32_t sad = 0;
uint32_t i, j;
uint8_t const *ptr_cur = cur;
uint8_t const *ptr_ref = ref;
for (j = 0; j < 16; j++) {
for (i = 0; i < 16; i++) {
sad += abs(*(ptr_cur + i) - *(ptr_ref + i));
/*if (sad >= best_sad) {
return sad;
}*/
}
if (sad >= best_sad) {
return sad;
}
ptr_cur += stride;
ptr_ref += stride;
}
return sad;
}
#else
/*
use trimedia instruction optimization version
*/
uint32_t
sad16_c(const uint8_t * const cur,
const uint8_t * const ref,
const uint32_t stride,
const uint32_t best_sad)
{
uint32_t sad = 0;
uint32_t sad0,sad1,sad2,sad3;
uint32_t j;
uint32_t src0,src1,src2,src3,src4,dest0,dest1,dest2,dest3;
/*
uint32_t const *restrict ptr_cur = (uint32_t const *)cur;
uint32_t const *restrict ptr_ref = (uint32_t const *)ref;
*/
uint32_t const * ptr_cur = (uint32_t const *)cur;
uint32_t const * ptr_ref = (uint32_t const *)ref;
uint32_t temp;
uint32_t stride_int=(stride>>2)-4;
temp = ((uint32_t)ref)&3;
if(!temp) /*word align*/
{
for (j=0; j < 16; j++) {
sad0 = UME8UU(*ptr_cur++,*ptr_ref++);
sad1 = UME8UU(*ptr_cur++,*ptr_ref++);
sad2 = UME8UU(*ptr_cur++,*ptr_ref++);
sad3 = UME8UU(*ptr_cur++,*ptr_ref++);
sad0 += sad1;
sad2 += sad3;
sad += sad0 + sad2;
ptr_cur += stride_int;
ptr_ref += stride_int;
}
}
else if(temp==1)
{
for(j =0; j < 16; j++)
{
/*get five word*/
src0 = *ptr_ref++ ;
src1 = *ptr_ref++;
src2 = *ptr_ref++;
src3 = *ptr_ref++;
src4 = *ptr_ref;
/*make up four word*/
dest0 = FUNSHIFT3(src1,src0);
dest1 = FUNSHIFT3(src2,src1);
dest2 = FUNSHIFT3(src3,src2);
dest3 = FUNSHIFT3(src4,src3);
/*get sum of diff abs */
sad0 = ume8uu(*ptr_cur++,dest0);
sad1 = ume8uu(*ptr_cur++,dest1);
sad2 = ume8uu(*ptr_cur++,dest2);
sad3 = ume8uu(*ptr_cur++,dest3);
sad0 += sad1;
sad2 += sad3;
sad += sad0 + sad2;
/*increase stride*/
ptr_cur += stride_int;
ptr_ref += stride_int;
}
}
else if(temp==2)
{
for(j =0; j < 16; j++)
{
/*get five word*/
src0 = *ptr_ref++ ;
src1 = *ptr_ref++;
src2 = *ptr_ref++;
src3 = *ptr_ref++;
src4 = *ptr_ref;
/*make up four word*/
dest0 = FUNSHIFT2(src1,src0);
dest1 = FUNSHIFT2(src2,src1);
dest2 = FUNSHIFT2(src3,src2);
dest3 = FUNSHIFT2(src4,src3);
/*get sum of diff abs */
sad0 = ume8uu(*ptr_cur++,dest0);
sad1 = ume8uu(*ptr_cur++,dest1);
sad2 = ume8uu(*ptr_cur++,dest2);
sad3 = ume8uu(*ptr_cur++,dest3);
sad0 += sad1;
sad2 += sad3;
sad += sad0 + sad2;
/*increase stride*/
ptr_cur += stride_int;
ptr_ref += stride_int;
}
}
else if(temp==3)
{
for(j =0; j < 16; j++)
{
/*get five word*/
src0 = *ptr_ref++ ;
src1 = *ptr_ref++;
src2 = *ptr_ref++;
src3 = *ptr_ref++;
src4 = *ptr_ref;
/*make up four word*/
dest0 = FUNSHIFT1(src1,src0);
dest1 = FUNSHIFT1(src2,src1);
dest2 = FUNSHIFT1(src3,src2);
dest3 = FUNSHIFT1(src4,src3);
/*get sum of diff abs */
sad0 = ume8uu(*ptr_cur++,dest0);
sad1 = ume8uu(*ptr_cur++,dest1);
sad2 = ume8uu(*ptr_cur++,dest2);
sad3 = ume8uu(*ptr_cur++,dest3);
sad0 += sad1;
sad2 += sad3;
sad += sad0 + sad2;
/*increase stride*/
ptr_cur += stride_int;
ptr_ref += stride_int;
}
}
return sad;
}
#endif
/*
the sum of absolute difference between two 8x8 block
*/
#ifndef _TRIMEDIA
uint32_t
sad8_c(const uint8_t * const cur,
const uint8_t * const ref,
const uint32_t stride)
{
uint32_t sad = 0;
uint32_t i, j;
uint8_t const *ptr_cur = cur;
uint8_t const *ptr_ref = ref;
for (j = 0; j < 8; j++) {
for (i = 0; i < 8; i++) {
sad += abs(*(ptr_cur + i) - *(ptr_ref + i));
}
ptr_cur += stride;
ptr_ref += stride;
}
return sad;
}
#else
/*
use trimedia instruction optimization version
*/
uint32_t
sad8_c(const uint8_t * const cur,
const uint8_t * const ref,
const uint32_t stride)
{
uint32_t sad = 0;
uint32_t sad0,sad1;
uint32_t j;
uint32_t src0,src1,src2,dest0,dest1;
/*
uint32_t const *restrict ptr_cur = (uint32_t const *)cur;
uint32_t const *restrict ptr_ref = (uint32_t const *)ref;
*/
uint32_t const * ptr_cur = (uint32_t const *)cur;
uint32_t const * ptr_ref = (uint32_t const *)ref;
uint32_t temp;
uint32_t stride_int=(stride>>2)-2;
temp = ((uint32_t)ref)&3;
if(!temp) /*word align*/
{
for (j=0; j < 8; j++) {
sad0 = UME8UU(*ptr_cur++,*ptr_ref++);
sad1 = UME8UU(*ptr_cur++,*ptr_ref++);
sad0 += sad1;
sad += sad0;
ptr_cur += stride_int;
ptr_ref += stride_int;
}
}
else if(temp==1)
{
for(j =0; j < 8; j++)
{
/*get three word*/
src0 = *ptr_ref++ ;
src1 = *ptr_ref++;
src2 = *ptr_ref;
/*make up two word*/
dest0 = FUNSHIFT3(src1,src0);
dest1 = FUNSHIFT3(src2,src1);
/*get sum of diff abs */
sad0 = ume8uu(*ptr_cur++,dest0);
sad1 = ume8uu(*ptr_cur++,dest1);
sad0 += sad1;
sad += sad0;
/*increase stride*/
ptr_cur += stride_int;
ptr_ref += stride_int;
}
}
else if(temp==2)
{
for(j =0; j < 8; j++)
{
/*get three word*/
src0 = *ptr_ref++ ;
src1 = *ptr_ref++;
src2 = *ptr_ref;
/*make up two word*/
dest0 = FUNSHIFT2(src1,src0);
dest1 = FUNSHIFT2(src2,src1);
/*get sum of diff abs */
sad0 = ume8uu(*ptr_cur++,dest0);
sad1 = ume8uu(*ptr_cur++,dest1);
sad0 += sad1;
sad += sad0;
/*increase stride*/
ptr_cur += stride_int;
ptr_ref += stride_int;
}
}
else if(temp==3)
{
for(j =0; j < 8; j++)
{
/*get three word*/
src0 = *ptr_ref++ ;
src1 = *ptr_ref++;
src2 = *ptr_ref;
/*make up two word*/
dest0 = FUNSHIFT1(src1,src0);
dest1 = FUNSHIFT1(src2,src1);
/*get sum of diff abs */
sad0 = ume8uu(*ptr_cur++,dest0);
sad1 = ume8uu(*ptr_cur++,dest1);
sad0 += sad1;
sad += sad0;
/*increase stride*/
ptr_cur += stride_int;
ptr_ref += stride_int;
}
}
return sad;
}
#endif
/* average deviation from mean */
#ifndef _TRIMEDIA
uint32_t
dev16_c(const uint8_t * const cur,
const uint32_t stride)
{
uint32_t mean = 0;
uint32_t dev = 0;
uint32_t i, j;
uint8_t const *ptr_cur = cur;
for (j = 0; j < 16; j++) {
for (i = 0; i < 16; i++)
mean += *(ptr_cur + i);
ptr_cur += stride;
}
mean /= (16 * 16);
ptr_cur = cur;
for (j = 0; j < 16; j++) {
for (i = 0; i < 16; i++)
dev += abs(*(ptr_cur + i) - (int32_t) mean);
ptr_cur += stride;
}
return dev;
}
#else
/*
use trimedia instruction optimization version
*/
uint32_t
dev16_c(const uint8_t * const cur,
const uint32_t stride)
{
uint32_t mean = 0;
uint32_t dev = 0;
uint32_t j;
uint32_t stride1;
uint32_t const *ptr_cur = (uint32_t const *)cur;
uint32_t src0,src1,src2,src3;
stride1 = (stride>>2)-4;
#pragma TCS_unroll=4
for (j = 0; j < 16; j++) {
/*get one word and complete four byte add */
src0 = UFIR8UU(*ptr_cur++,0x01010101);
src1 = UFIR8UU(*ptr_cur++,0x01010101);
src2 = UFIR8UU(*ptr_cur++,0x01010101);
src3 = UFIR8UU(*ptr_cur++,0x01010101);
/* sum */
src0 += src1 ;
src2 += src3 ;
mean += src0+src2;
/* modify pointer by add stride */
ptr_cur += stride1;
}
/*mean /= (16 * 16);*/
mean>>=8;
/*
constuct word: mean+mean+mean+mean
mean value is limit by byte
*/
src0 = FUNSHIFT1(mean,0);
src0 += mean;
src1 = FUNSHIFT2(src0,0);
mean = src1 + src0;
ptr_cur = (uint32_t const *)cur;
#pragma TCS_unroll=4
for (j = 0; j < 16; j++) {
/*get one word and complete four byte sad */
src0 = UME8UU(*ptr_cur++,mean);
src1 = UME8UU(*ptr_cur++,mean);
src2 = UME8UU(*ptr_cur++,mean);
src3 = UME8UU(*ptr_cur++,mean);
/* sum */
src0 += src1 ;
src2 += src3 ;
dev += src0+src2;
/* modify pointer by add stride */
ptr_cur += stride1;
}
return dev;
}
#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -