📄 interpolate8x8.c
字号:
/**************************************************************************
*
* XVID MPEG-4 VIDEO CODEC
* 8x8 block-based halfpel interpolation
*
* This program is an implementation of a part of one or more MPEG-4
* Video tools as specified in ISO/IEC 14496-2 standard. Those intending
* to use this software module in hardware or software products are
* advised that its use may infringe existing patents or copyrights, and
* any such use would be at such party's own risk. The original
* developer of this software module and his/her company, and subsequent
* editors and their companies, will have no liability for use of this
* software or modifications or derivatives thereof.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
*************************************************************************/
/**************************************************************************
*
* History:
*
* 02.05.2002 add "interpolate8x8_c" for B-frame; MinChen <chenm002@163.com>
* 27.12.2001 modified "compensate_halfpel"
* 05.11.2001 initial version; (c)2001 peter ross <pross@cs.rmit.edu.au>
*
*************************************************************************/
#include "../portab.h"
#include "interpolate8x8.h"
/* function pointers */
INTERPOLATE8X8_PTR interpolate8x8_halfpel_h;
INTERPOLATE8X8_PTR interpolate8x8_halfpel_v;
INTERPOLATE8X8_PTR interpolate8x8_halfpel_hv;
/*
使用下面基于QUADAVG指令的优化rounding必须为零
quadavg rscr1 rsrc2 -> rdest
rsrc1 = [abcd],
rsrc2 = [wxyz],
rdest = [pqrs]
a, b, c, d, w, x, y, z, p, q, r, and s are all unsigned eight-bit values.
p = (a + w + 1) >> 1
q = (b + x + 1) >> 1
r = (c + y + 1) >> 1
s = (d + z + 1) >> 1
*/
/*
horizontal half pixel interpolate of 8x8 block
Add by fyh 2002.12.26
*/
#ifndef _TRIMEDIA
void
interpolate8x8_halfpel_h_c(uint8_t * const dst,/* horizontal half pixel interpolate image */
const uint8_t * const src,/* integer pixel image */
const uint32_t stride,/* image stride */
const uint32_t rounding)/* rounding bit(0 ro 1) */
{
uint32_t i, j;
for (j = 0; j < 8; j++) {
for (i = 0; i < 8; i++) {
int16_t tot =
(int32_t) src[j * stride + i] + (int32_t) src[j * stride + i +
1];
tot = (int32_t) ((tot + 1 - rounding) >> 1);
dst[j * stride + i] = (uint8_t) tot;
}
}
}
#else
void
interpolate8x8_halfpel_h_c(uint8_t * const dst,/* horizontal half pixel interpolate image */
const uint8_t * const src,/* integer pixel image */
const uint32_t stride,/* image stride */
const uint32_t rounding)/* rounding bit(0 ro 1) */
{
uint32_t j;
/* 使用整型指针 */
const uint32_t *ptr_src=(uint32_t *)src;
uint32_t *ptr_dst=(uint32_t *)dst;
const uint32_t stride4=stride>>2;
uint32_t align;
uint32_t src1,src2,src3,src4,src5,src6,src7;
/* 判断源块首地址的字节对齐 */
align= ((uint32_t)ptr_src&3);
if(!align) /* word align 00b */
{
for(j = 0; j < 8; j++) {
/*
获得一行的连续3个字
3 2 1 0 , 7 6 5 4, 11 10 9 8
*/
src1= *ptr_src;
src2= *(ptr_src+1);
src3= *(ptr_src+2);
/*
通过移位获得2个字
4 3 2 1, 8 7 6 5
*/
src4= FUNSHIFT3(src2,src1);
src5= FUNSHIFT3(src3,src2);
/*
完成均值法插值p = (a + w + 1) >> 1
*/
*ptr_dst=QUADAVG(src1,src4);
*(ptr_dst+1)=QUADAVG(src2,src5);
/*
修改指针
*/
ptr_src+=stride4;
ptr_dst+=stride4;
}
}
else if(align==1) /* 01b */
{
for(j = 0; j < 8; j++) {
/*
获得一行的连续3个字
3 2 1 0 , 7 6 5 4, 11 10 9 8
*/
src1= *ptr_src;
src2= *(ptr_src+1);
src3= *(ptr_src+2);
/*
通过移位获得4个字
4 3 2 1,5 4 3 2, 8 7 6 5, 9 8 7 6
*/
src4= FUNSHIFT3(src2,src1);
src5= FUNSHIFT2(src2,src1);
src6= FUNSHIFT3(src3,src2);
src7= FUNSHIFT2(src3,src2);
/*
完成均值法插值p = (a + w + 1) >> 1
*/
*ptr_dst=QUADAVG(src4,src5);
*(ptr_dst+1)=QUADAVG(src6,src7);
/*
修改指针
*/
ptr_src+=stride4;
ptr_dst+=stride4;
}
}
else if(align==2)/* 10b */
{
for(j = 0; j < 8; j++) {
/*
获得一行的连续3个字
3 2 1 0 , 7 6 5 4, 11 10 9 8
*/
src1= *ptr_src;
src2= *(ptr_src+1);
src3= *(ptr_src+2);
/*
通过移位获得4个字
5 4 3 2,6 5 4 3, 9 8 7 6, 10 9 8 7
*/
src4= FUNSHIFT2(src2,src1);
src5= FUNSHIFT1(src2,src1);
src6= FUNSHIFT2(src3,src2);
src7= FUNSHIFT1(src3,src2);
/*
完成均值法插值p = (a + w + 1) >> 1
*/
*ptr_dst=QUADAVG(src4,src5);
*(ptr_dst+1)=QUADAVG(src6,src7);
/*
修改指针
*/
ptr_src+=stride4;
ptr_dst+=stride4;
}
}
else if(align==3)/* 11b */
{
for(j = 0; j < 8; j++) {
/*
获得一行的连续3个字
3 2 1 0 , 7 6 5 4, 11 10 9 8
*/
src1= *ptr_src;
src2= *(ptr_src+1);
src3= *(ptr_src+2);
/*
通过移位获得2个字
6 5 4 3, 10 9 8 7
*/
src4= FUNSHIFT1(src2,src1);
src5= FUNSHIFT1(src3,src2);
/*
完成均值法插值p = (a + w + 1) >> 1
*/
*ptr_dst=QUADAVG(src2,src4);
*(ptr_dst+1)=QUADAVG(src3,src5);
/*
修改指针
*/
ptr_src+=stride4;
ptr_dst+=stride4;
}
}
}
#endif
/*
vertical half pixel interpolate of 8x8 block
Add by fyh 2002.12.26
*/
#ifndef _TRIMEDIA
void
interpolate8x8_halfpel_v_c(uint8_t * const dst,/* vertical half pixel interpolate image */
const uint8_t * const src,/* integer pixel image */
const uint32_t stride,/* image stride */
const uint32_t rounding)/* rounding bit(0 ro 1) */
{
uint32_t i, j;
for (j = 0; j < 8; j++) {
for (i = 0; i < 8; i++) {
int16_t tot = src[j * stride + i] + src[j * stride + i + stride];
tot = ((tot + 1 - rounding) >> 1);
dst[j * stride + i] = (uint8_t) tot;
}
}
}
#else
#ifdef _opt1
void
interpolate8x8_halfpel_v_c(uint8_t * const dst,/* vertical half pixel interpolate image */
const uint8_t * const src,/* integer pixel image */
const uint32_t stride,/* image stride */
const uint32_t rounding)/* rounding bit(0 ro 1) */
{
uint32_t j;
uint32_t src0,src1,src2,src3,src4,src5,dest0,dest1,dest2,dest3;
uint32_t * ptr_dst = (uint32_t *)dst;
const uint32_t * ptr_src = (uint32_t *)src;
uint32_t stride1=stride>>2;
uint32_t temp;
temp = ((uint32_t)src)&3;
if(!temp)
{
for (j = 0; j < 8; j++) {
src0 = *(ptr_src);
src1 = *(ptr_src+1);
src2 = *(ptr_src+stride1);
src3 = *(ptr_src+stride1+1);
*(ptr_dst) = QUADAVG(src0,src2);
*(ptr_dst+1)= QUADAVG(src1,src3);
ptr_src += stride1;
ptr_dst += stride1;
}
}
else if(temp==1)
{
for (j = 0; j < 8; j++) {
src0 = *(ptr_src);
src1 = *(ptr_src+1);
src2 = *(ptr_src+2);
src3 = *(ptr_src+stride1);
src4 = *(ptr_src+stride1+1);
src5 = *(ptr_src+stride1+2);
dest0 = FUNSHIFT3(src1,src0);
dest1 = FUNSHIFT3(src2,src1);
dest2 = FUNSHIFT3(src4,src3);
dest3 = FUNSHIFT3(src5,src4);
*(ptr_dst) = QUADAVG(dest0,dest2);
*(ptr_dst+1)= QUADAVG(dest1,dest3);
ptr_src += stride1;
ptr_dst += stride1;
}
}
else if(temp==2)
{
for (j = 0; j < 8; j++) {
src0 = *(ptr_src);
src1 = *(ptr_src+1);
src2 = *(ptr_src+2);
src3 = *(ptr_src+stride1);
src4 = *(ptr_src+stride1+1);
src5 = *(ptr_src+stride1+2);
dest0 = FUNSHIFT2(src1,src0);
dest1 = FUNSHIFT2(src2,src1);
dest2 = FUNSHIFT2(src4,src3);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -