📄 motion_comp.c

📁 用MPEG-4对YUV视频文件编码压缩成divx视频文件
💻 C
字号:
/* 01.05.2002   updated MBMotionCompensationBVOP */
/* 14.04.2002   bframe compensation */

#include "../encoder.h"
#include "../utils/mbfunctions.h"
#include "../image/interpolate8x8.h"
#include "../utils/timer.h"
#include "motion.h"

static __inline void
interpolate8x8_switch(uint8_t * const cur,
					  const uint8_t * const refn,
					  const uint32_t x,
					  const uint32_t y,
					  const int32_t dx,
					  const int dy,
					  const uint32_t stride,
					  const uint32_t rounding)
{
	int32_t ddx, ddy;

	switch (((dx & 1) << 1) + (dy & 1))	
	{
	case 0:
		/*ddx = dx / 2;
		ddy = dy / 2;*/
		ddx = DIV(dx,1);
        ddy = DIV(dy,1);

		transfer8x8_copy(cur + y * stride + x,
						 refn + (int)((y + ddy) * stride + x + ddx), stride);
		break;
 
	case 1:
		/*ddx = dx / 2;
		ddy = (dy - 1) / 2;*/
		ddx = DIV(dx,1);
        ddy = DIV(dy-1,1);

		interpolate8x8_halfpel_v(cur + y * stride + x,
								 refn + (int)((y + ddy) * stride + x + ddx), stride,
								 rounding);
		break;

	case 2:
		/*ddx = (dx - 1) / 2;
		ddy = dy / 2;*/
		ddx = DIV(dx-1,1);
        ddy = DIV(dy,1);

		interpolate8x8_halfpel_h(cur + y * stride + x,
								 refn + (int)((y + ddy) * stride + x + ddx), stride,
								 rounding);
		break;

	default:
		/*ddx = (dx - 1) / 2;
		ddy = (dy - 1) / 2;*/
		ddx = DIV(dx-1,1);
        ddy = DIV(dy-1,1);

		interpolate8x8_halfpel_hv(cur + y * stride + x,
								 refn + (int)((y + ddy) * stride + x + ddx), stride,
								  rounding);
		break;
	}
}

static __inline void
compensate8x8_halfpel(int16_t * const dct_codes,
					  uint8_t * const cur,
					  const uint8_t * const ref,
					  const uint8_t * const refh,
					  const uint8_t * const refv,
					  const uint8_t * const refhv,
					  const uint32_t x,
					  const uint32_t y,
					  const int32_t dx,
					  const int dy,
					  const uint32_t stride)
{
	int32_t ddx, ddy;

	switch (((dx & 1) << 1) + (dy & 1))	/*((dx%2)?2:0)+((dy%2)?1:0)   */
	{
	case 0:
		/*ddx = dx / 2;
		ddy = dy / 2;*/
		ddx = DIV(dx,1);
        ddy = DIV(dy,1);

		transfer_8to16sub(dct_codes, cur + y * stride + x,
						  ref + (int) ((y + ddy) * stride + x + ddx), stride);
		break;

	case 1:
		/*ddx = dx / 2;
		ddy = (dy - 1) / 2;*/
		ddx = DIV(dx,1);
        ddy = DIV(dy-1,1);

		transfer_8to16sub(dct_codes, cur + y * stride + x,
						  refv + (int) ((y + ddy) * stride + x + ddx), stride);
		break;

	case 2:
		/*ddx = (dx - 1) / 2;
		ddy = dy / 2;*/
		ddx = DIV(dx-1,1);
        ddy = DIV(dy,1);

		transfer_8to16sub(dct_codes, cur + y * stride + x,
						  refh + (int) ((y + ddy) * stride + x + ddx), stride);
		break;

	default:					/* case 3:  */
		/*ddx = (dx - 1) / 2;
		ddy = (dy - 1) / 2;*/
		ddx = DIV(dx-1,1);
        ddy = DIV(dy-1,1);

		transfer_8to16sub(dct_codes, cur + y * stride + x,
						  refhv + (int) ((y + ddy) * stride + x + ddx), stride);
		break;
	}
}
/*
extern uint32_t g_begin;
extern uint32_t g_interval;
*/
/*
2003.1.4 test
  no use _opt 22.10 ms/frame function cycles= 36520166
  use _opt    24.96 ms/frame function cycles= 44243169
*/

/*#define _opt*/

#ifndef _opt
void
MBMotionCompensation(MACROBLOCK * const mb,
					 const uint32_t i,
					 const uint32_t j,
					 const IMAGE * const ref,
					 const IMAGE * const refh,
					 const IMAGE * const refv,
					 const IMAGE * const refhv,
					 IMAGE * const cur,
					 int16_t * dct_codes,
					 const uint32_t width,
					 const uint32_t height,
					 const uint32_t edged_width,
					 const uint32_t rounding)
{
	static const uint32_t roundtab[16] =
		{ 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2 };

     /*g_begin= CYCLES();*/

	if (mb->mode == MODE_INTER || mb->mode == MODE_INTER_Q) {
		int32_t dx = mb->mvs[0].x;
		int32_t dy = mb->mvs[0].y;

		compensate8x8_halfpel(&dct_codes[0 * 64], cur->y, ref->y, refh->y,
							  refv->y, refhv->y, 16 * i, 16 * j, dx, dy,
							  edged_width);
		compensate8x8_halfpel(&dct_codes[1 * 64], cur->y, ref->y, refh->y,
							  refv->y, refhv->y, 16 * i + 8, 16 * j, dx, dy,
							  edged_width);
		compensate8x8_halfpel(&dct_codes[2 * 64], cur->y, ref->y, refh->y,
							  refv->y, refhv->y, 16 * i, 16 * j + 8, dx, dy,
							  edged_width);
		compensate8x8_halfpel(&dct_codes[3 * 64], cur->y, ref->y, refh->y,
							  refv->y, refhv->y, 16 * i + 8, 16 * j + 8, dx,
							  dy, edged_width);

		dx = (dx & 3) ? (dx >> 1) | 1 : dx / 2;
		dy = (dy & 3) ? (dy >> 1) | 1 : dy / 2;

		/* uv-block-based compensation */
		interpolate8x8_switch(refv->u, ref->u, 8 * i, 8 * j, dx, dy,
							  edged_width / 2, rounding);
		transfer_8to16sub(&dct_codes[4 * 64],
						  cur->u + 8 * j * edged_width / 2 + 8 * i,
						  refv->u + 8 * j * edged_width / 2 + 8 * i,
						  edged_width / 2);

		interpolate8x8_switch(refv->v, ref->v, 8 * i, 8 * j, dx, dy,
							  edged_width / 2, rounding);
		transfer_8to16sub(&dct_codes[5 * 64],
						  cur->v + 8 * j * edged_width / 2 + 8 * i,
						  refv->v + 8 * j * edged_width / 2 + 8 * i,
						  edged_width / 2);

	} 
	else						/* mode == MODE_INTER4V  */
	{
		int32_t sum, dx, dy;

		compensate8x8_halfpel(&dct_codes[0 * 64], cur->y, ref->y, refh->y,
							  refv->y, refhv->y, 16 * i, 16 * j, mb->mvs[0].x,
							  mb->mvs[0].y, edged_width);
		compensate8x8_halfpel(&dct_codes[1 * 64], cur->y, ref->y, refh->y,
							  refv->y, refhv->y, 16 * i + 8, 16 * j,
							  mb->mvs[1].x, mb->mvs[1].y, edged_width);
		compensate8x8_halfpel(&dct_codes[2 * 64], cur->y, ref->y, refh->y,
							  refv->y, refhv->y, 16 * i, 16 * j + 8,
							  mb->mvs[2].x, mb->mvs[2].y, edged_width);
		compensate8x8_halfpel(&dct_codes[3 * 64], cur->y, ref->y, refh->y,
							  refv->y, refhv->y, 16 * i + 8, 16 * j + 8,
							  mb->mvs[3].x, mb->mvs[3].y, edged_width);

		sum = mb->mvs[0].x + mb->mvs[1].x + mb->mvs[2].x + mb->mvs[3].x;
		dx = (sum ? SIGN(sum) *
			  (roundtab[abs(sum) % 16] + (abs(sum) / 16) * 2) : 0);

		sum = mb->mvs[0].y + mb->mvs[1].y + mb->mvs[2].y + mb->mvs[3].y;
		dy = (sum ? SIGN(sum) *
			  (roundtab[abs(sum) % 16] + (abs(sum) / 16) * 2) : 0);

		/* uv-block-based compensation */
		interpolate8x8_switch(refv->u, ref->u, 8 * i, 8 * j, dx, dy,
							  edged_width / 2, rounding);
		transfer_8to16sub(&dct_codes[4 * 64],
						  cur->u + 8 * j * edged_width / 2 + 8 * i,
						  refv->u + 8 * j * edged_width / 2 + 8 * i,
						  edged_width / 2);

		interpolate8x8_switch(refv->v, ref->v, 8 * i, 8 * j, dx, dy,
							  edged_width / 2, rounding);
		transfer_8to16sub(&dct_codes[5 * 64],
						  cur->v + 8 * j * edged_width / 2 + 8 * i,
						  refv->v + 8 * j * edged_width / 2 + 8 * i,
						  edged_width / 2);

	}

    /*g_interval+= CYCLES()-g_begin;*/
}

#else

static const uint32_t roundtab[16] =
		{ 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2 };

void
MBMotionCompensation(MACROBLOCK * const mb,
					 const uint32_t i,
					 const uint32_t j,
					 const IMAGE * const ref,
					 const IMAGE * const refh,
					 const IMAGE * const refv,
					 const IMAGE * const refhv,
					 IMAGE * const cur,
					 int16_t * dct_codes,
					 const uint32_t width,
					 const uint32_t height,
					 const uint32_t edged_width,
					 const uint32_t rounding)
{
/*	
static const uint32_t roundtab[16] =
		{ 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2 };
*/
	const uint32_t edged_width2=edged_width>>1;
	const uint32_t offset_uv = (j<<3)*edged_width2 +(i<<3);
	uint32_t ii0,jj0,ii1,jj1;

    /*g_begin= CYCLES();*/

	if (mb->mode == MODE_INTER || mb->mode == MODE_INTER_Q) {
		int32_t dx = mb->mvs[0].x;
		int32_t dy = mb->mvs[0].y;
/*
		compensate8x8_halfpel(&dct_codes[0 * 64], cur->y, ref->y, refh->y,
							  refv->y, refhv->y, 16 * i, 16 * j, dx, dy,
							  edged_width);
		compensate8x8_halfpel(&dct_codes[1 * 64], cur->y, ref->y, refh->y,
							  refv->y, refhv->y, 16 * i + 8, 16 * j, dx, dy,
							  edged_width);
		compensate8x8_halfpel(&dct_codes[2 * 64], cur->y, ref->y, refh->y,
							  refv->y, refhv->y, 16 * i, 16 * j + 8, dx, dy,
							  edged_width);
		compensate8x8_halfpel(&dct_codes[3 * 64], cur->y, ref->y, refh->y,
							  refv->y, refhv->y, 16 * i + 8, 16 * j + 8, dx,
							  dy, edged_width);
*/
		ii0=i<<4;
		jj0=j<<4;
		ii1=ii0+8;
		jj1=jj0+8;

		compensate8x8_halfpel(&dct_codes[0 * 64], cur->y, ref->y, refh->y,
							  refv->y, refhv->y, ii0, jj0, dx, dy,
							  edged_width);
		compensate8x8_halfpel(&dct_codes[1 * 64], cur->y, ref->y, refh->y,
							  refv->y, refhv->y, ii1, jj0, dx, dy,
							  edged_width);
		compensate8x8_halfpel(&dct_codes[2 * 64], cur->y, ref->y, refh->y,
							  refv->y, refhv->y,  ii0, jj1, dx, dy,
							  edged_width);
		compensate8x8_halfpel(&dct_codes[3 * 64], cur->y, ref->y, refh->y,
							  refv->y, refhv->y,  ii1, jj1, dx,
							  dy, edged_width);
/*
		dx = (dx & 3) ? (dx >> 1) | 1 : dx / 2;
		dy = (dy & 3) ? (dy >> 1) | 1 : dy / 2;
*/
/*
		dx = (dx & 3) ? (dx >> 1) | 1 : (dx >>1);
		dy = (dy & 3) ? (dy >> 1) | 1 : (dy >>1);
*/
		dx =  (dx>>1)|((dx&3)>0);
		dy =  (dy>>1)|((dy&3)>0);

		/* uv-block-based compensation */
/*
		interpolate8x8_switch(refv->u, ref->u, 8 * i, 8 * j, dx, dy,
							  edged_width / 2, rounding);
		transfer_8to16sub(&dct_codes[4 * 64],
						  cur->u + 8 * j * edged_width / 2 + 8 * i,
						  refv->u + 8 * j * edged_width / 2 + 8 * i,
						  edged_width / 2);

		interpolate8x8_switch(refv->v, ref->v, 8 * i, 8 * j, dx, dy,
							  edged_width / 2, rounding);
		transfer_8to16sub(&dct_codes[5 * 64],
						  cur->v + 8 * j * edged_width / 2 + 8 * i,
						  refv->v + 8 * j * edged_width / 2 + 8 * i,
						  edged_width / 2);
*/
		ii0=i<<3;
		jj0=j<<3;
		interpolate8x8_switch(refv->u, ref->u, ii0, jj0, dx, dy,
							  edged_width2, rounding);
		transfer_8to16sub(&dct_codes[4 * 64],
						  cur->u  + offset_uv,
						  refv->u + offset_uv,
						  edged_width2);

		interpolate8x8_switch(refv->v, ref->v,ii0, jj0, dx, dy,
							  edged_width2, rounding);
		transfer_8to16sub(&dct_codes[5 * 64],
						  cur->v  +  offset_uv,
						  refv->v +  offset_uv,
						  edged_width2);
	} 
	else						/* mode == MODE_INTER4V  */
	{
		int32_t sum, dx, dy;
		int32_t sum_abs,sum_div16,sum_mod16;
/*
		compensate8x8_halfpel(&dct_codes[0 * 64], cur->y, ref->y, refh->y,
							  refv->y, refhv->y, 16 * i, 16 * j, mb->mvs[0].x,
							  mb->mvs[0].y, edged_width);
		compensate8x8_halfpel(&dct_codes[1 * 64], cur->y, ref->y, refh->y,
							  refv->y, refhv->y, 16 * i + 8, 16 * j,
							  mb->mvs[1].x, mb->mvs[1].y, edged_width);
		compensate8x8_halfpel(&dct_codes[2 * 64], cur->y, ref->y, refh->y,
							  refv->y, refhv->y, 16 * i, 16 * j + 8,
							  mb->mvs[2].x, mb->mvs[2].y, edged_width);
		compensate8x8_halfpel(&dct_codes[3 * 64], cur->y, ref->y, refh->y,
							  refv->y, refhv->y, 16 * i + 8, 16 * j + 8,
							  mb->mvs[3].x, mb->mvs[3].y, edged_width);
*/
		ii0 = i<<4;
		jj0 = j<<4;
		ii1=ii0+8;
		jj1=jj0+8;

		compensate8x8_halfpel(&dct_codes[0 * 64], cur->y, ref->y, refh->y,
							  refv->y, refhv->y, ii0, jj0, mb->mvs[0].x,
							  mb->mvs[0].y, edged_width);
		compensate8x8_halfpel(&dct_codes[1 * 64], cur->y, ref->y, refh->y,
							  refv->y, refhv->y, ii1, jj0,
							  mb->mvs[1].x, mb->mvs[1].y, edged_width);
		compensate8x8_halfpel(&dct_codes[2 * 64], cur->y, ref->y, refh->y,
							  refv->y, refhv->y, ii0, jj1,
							  mb->mvs[2].x, mb->mvs[2].y, edged_width);
		compensate8x8_halfpel(&dct_codes[3 * 64], cur->y, ref->y, refh->y,
							  refv->y, refhv->y, ii1, jj1,
							  mb->mvs[3].x, mb->mvs[3].y, edged_width);
/*
		sum = mb->mvs[0].x + mb->mvs[1].x + mb->mvs[2].x + mb->mvs[3].x;
		dx = (sum ? SIGN(sum) *
			  (roundtab[abs(sum) % 16] + (abs(sum) / 16) * 2) : 0);
*/
		sum = mb->mvs[0].x + mb->mvs[1].x + mb->mvs[2].x + mb->mvs[3].x;
		dx = sum;
        if(sum)
		{
			sum_abs  = abs(sum); 
			sum_div16  = sum_abs>>4;
            sum_mod16 = sum_abs - (sum_div16<<4);
            dx = SIGN(sum) * (roundtab[sum_mod16] + (sum_div16<<1));  
		}
/*
		sum = mb->mvs[0].y + mb->mvs[1].y + mb->mvs[2].y + mb->mvs[3].y;
		dy = (sum ? SIGN(sum) *
			  (roundtab[abs(sum) % 16] + (abs(sum) / 16) * 2) : 0);
*/
		sum = mb->mvs[0].y + mb->mvs[1].y + mb->mvs[2].y + mb->mvs[3].y;
		dy = sum;
        if(sum)
		{
			sum_abs  = abs(sum); 
			sum_div16  = sum_abs>>4;
            sum_mod16 = sum_abs - (sum_div16<<4);
            dy = SIGN(sum) * (roundtab[sum_mod16] + (sum_div16<<1));  
		}

		/* uv-block-based compensation */
/*
		interpolate8x8_switch(refv->u, ref->u, 8 * i, 8 * j, dx, dy,
							  edged_width / 2, rounding);
		transfer_8to16sub(&dct_codes[4 * 64],
						  cur->u + 8 * j * edged_width / 2 + 8 * i,
						  refv->u + 8 * j * edged_width / 2 + 8 * i,
						  edged_width / 2);

		interpolate8x8_switch(refv->v, ref->v, 8 * i, 8 * j, dx, dy,
							  edged_width / 2, rounding);
		transfer_8to16sub(&dct_codes[5 * 64],
						  cur->v + 8 * j * edged_width / 2 + 8 * i,
						  refv->v + 8 * j * edged_width / 2 + 8 * i,
						  edged_width / 2);
*/
		ii0=i<<3;
		jj0=j<<3;

		interpolate8x8_switch(refv->u, ref->u, ii0, jj0, dx, dy,
							  edged_width2, rounding);
		transfer_8to16sub(&dct_codes[4 * 64],
						  cur->u  + offset_uv,
						  refv->u + offset_uv,
						  edged_width2);

		interpolate8x8_switch(refv->v, ref->v, ii0, jj0, dx, dy,
							  edged_width2, rounding);
		transfer_8to16sub(&dct_codes[5 * 64],
						  cur->v  + offset_uv,
						  refv->v + offset_uv,
						  edged_width2);
	}

    /*g_interval+= CYCLES()-g_begin;*/
}
#endif
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -