📄 mvmc.c

📁 优化过的xvid1.1.2源代码
💻 C
📖 第 1 页 / 共 5 页
字号:
		    CLIP_STORE(11,C);
		    p = l +BpS;
		    C = 16-RND - (Src[q]+Src[p]) + 3*(Src[s]+Src[l])-6*(Src[t]+Src[n]) + 20*(Src[w]+Src[m]);
		    CLIP_STORE(12,C);
		    C = 16-RND - Src[s] +3*Src[t] -6*(Src[w]+Src[l]) + 20*(Src[m]+Src[n]) +2*Src[p];
		    CLIP_STORE(13,C);
		    C = 16-RND - Src[t] +3*(Src[w]-Src[p]) -6*Src[m] + 20*Src[n] + 19*Src[l];
		    CLIP_STORE(14,C);
		    C = 16-RND - Src[w] +3*Src[m] -7*Src[n] + 23*Src[l] + 14*Src[p];
		    CLIP_STORE(15,C);
		    Src += 1;
		    Dst += 1;
    }
}

void interpolate16x16_quarterpel(uint8_t * const cur,
								uint8_t * const refn,
								uint8_t * const refh,
								uint8_t * const refv,
								uint8_t * const refhv,
								const uint32_t x, const uint32_t y,
								const int32_t dx,  const int dy,
								const uint32_t stride,
								const uint32_t rounding)
{
		const uint8_t *src;
		uint8_t *dst;
		uint8_t *tmp;
		int32_t quads;
			
		int32_t x_int, y_int;
	
		const int32_t xRef = (int)x*4 + dx;
		const int32_t yRef = (int)y*4 + dy;
	
		quads = (dx&3) | ((dy&3)<<2);
	
		x_int = xRef >> 2;
		y_int = yRef >> 2;
	
		dst = cur + y * stride + x;
		src = refn + y_int * (int)stride + x_int;
	
		tmp = refh; /* we need at least a 16 x stride scratch block */
	
		switch(quads) 
		{
				case 0:
					transfer8x8_copy(dst, src, stride);
					transfer8x8_copy(dst+8, src+8, stride);
					transfer8x8_copy(dst+8*stride, src+8*stride, stride);
					transfer8x8_copy(dst+8*stride+8, src+8*stride+8, stride);
					break;
				case 1:
					interpolate16x16_quarterpel_ha(dst, src, 16, stride, rounding);
					break;
				case 2:
					interpolate16x16_quarterpel_h(dst, src, 16, stride, rounding);
					break;
				case 3:
					interpolate16x16_quarterpel_ha_up(dst, src, 16, stride, rounding);
					break;
				case 4:
					interpolate16x16_quarterpel_va(dst, src, 16, stride, rounding);
					break;
				case 5:
					interpolate16x16_quarterpel_ha(tmp, src, 17, stride, rounding);
					interpolate16x16_quarterpel_va(dst, tmp, 16, stride, rounding);
					break;
				case 6:
					interpolate16x16_quarterpel_h(tmp, src,	  17, stride, rounding);
					interpolate16x16_quarterpel_va(dst, tmp, 16, stride, rounding);
					break;
				case 7:
					interpolate16x16_quarterpel_ha_up(tmp, src, 17, stride, rounding);
					interpolate16x16_quarterpel_va(dst, tmp, 16, stride, rounding);
					break;
				case 8:
					interpolate16x16_quarterpel_v(dst, src, 16, stride, rounding);
					break;
				case 9:
					interpolate16x16_quarterpel_ha(tmp, src, 17, stride, rounding);
					interpolate16x16_quarterpel_v(dst, tmp, 16, stride, rounding);
					break;
				case 10:
					interpolate16x16_quarterpel_h(tmp, src, 17, stride, rounding);
					interpolate16x16_quarterpel_v(dst, tmp, 16, stride, rounding);
					break;
				case 11:
					interpolate16x16_quarterpel_ha_up(tmp, src, 17, stride, rounding);
					interpolate16x16_quarterpel_v(dst, tmp, 16, stride, rounding);
					break;
				case 12:
					interpolate16x16_quarterpel_va_up(dst, src, 16, stride, rounding);
					break;
				case 13:
					interpolate16x16_quarterpel_ha(tmp, src, 17, stride, rounding);
					interpolate16x16_quarterpel_va_up(dst, tmp, 16, stride, rounding);
					break;
				case 14:
					interpolate16x16_quarterpel_h(tmp, src, 17, stride, rounding);
					interpolate16x16_quarterpel_va_up( dst, tmp, 16, stride, rounding);
					break;
				case 15:
					interpolate16x16_quarterpel_ha_up(tmp, src, 17, stride, rounding);
					interpolate16x16_quarterpel_va_up(dst, tmp, 16, stride, rounding);
					break;
				}
}

#undef CLIP_STORE

#define CLIP_STORE(D,C) \
  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  STORE(D, C)

void interpolate8x8_quarterpel_h(uint8_t *Dst, const uint8_t *Src, int32_t H, int32_t BpS, int32_t RND)
{
	while(H-->0)
	 {
			  int C;		    C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] -   Src[4];		    CLIP_STORE(Dst[0],C);		    C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5];		    CLIP_STORE(Dst[1],C);		    C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6];		    CLIP_STORE(Dst[2],C);		    C = 16-RND - (Src[0]+Src[7]) + 3*(Src[1]+Src[6])-6*(Src[2]+Src[5]) + 20*(Src[3]+Src[4]);		    CLIP_STORE(Dst[3],C);		    C = 16-RND - (Src[1]+Src[8]) + 3*(Src[2]+Src[7])-6*(Src[3]+Src[6]) + 20*(Src[4]+Src[5]);		    CLIP_STORE(Dst[4],C);		    C = 16-RND - Src[2] +3*Src[3] -6*(Src[4]+Src[7]) + 20*(Src[5]+Src[6]) +2*Src[8];		    CLIP_STORE(Dst[5],C);		    C = 16-RND - Src[3] +3*(Src[4]-Src[8]) -6*Src[5] + 20*Src[6] + 19*Src[7];		    CLIP_STORE(Dst[6],C);		    C = 16-RND - Src[4] +3*Src[5] -7*Src[6] + 23*Src[7] + 14*Src[8];		    CLIP_STORE(Dst[7],C);		    Src += BpS;		    Dst += BpS;  }
}

#undef CLIP_STORE

#define CLIP_STORE(i,C) \
  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  C = (C+Src[i]+1-RND) >> 1;  \
  STORE(Dst[i], C)

void interpolate8x8_quarterpel_ha(uint8_t *Dst, const uint8_t *Src, int32_t H, int32_t BpS, int32_t RND)
{
	while(H-->0) 
	{    int C;    C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] -   Src[4];    CLIP_STORE(0,C);    C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5];    CLIP_STORE(1,C);    C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6];    CLIP_STORE(2,C);    C = 16-RND - (Src[0]+Src[7]) + 3*(Src[1]+Src[6])-6*(Src[2]+Src[5]) + 20*(Src[3]+Src[4]);    CLIP_STORE(3,C);    C = 16-RND - (Src[1]+Src[8]) + 3*(Src[2]+Src[7])-6*(Src[3]+Src[6]) + 20*(Src[4]+Src[5]);    CLIP_STORE(4,C);    C = 16-RND - Src[2] +3*Src[3] -6*(Src[4]+Src[7]) + 20*(Src[5]+Src[6]) +2*Src[8];    CLIP_STORE(5,C);    C = 16-RND - Src[3] +3*(Src[4]-Src[8]) -6*Src[5] + 20*Src[6] + 19*Src[7];    CLIP_STORE(6,C);    C = 16-RND - Src[4] +3*Src[5] -7*Src[6] + 23*Src[7] + 14*Src[8];    CLIP_STORE(7,C);  
    Src += BpS;    Dst += BpS;  }
}

#undef CLIP_STORE

#define CLIP_STORE(i,C) \
  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  C = (C+Src[i+1]+1-RND) >> 1;  \
  STORE(Dst[i], C)

void interpolate8x8_quarterpel_ha_up(uint8_t *Dst, const uint8_t *Src, int32_t H, int32_t BpS, int32_t RND)
{
	 while(H-->0) 
	 {	    int C;	    C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] -   Src[4];	    CLIP_STORE(0,C);	    C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5];	    CLIP_STORE(1,C);	    C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6];	    CLIP_STORE(2,C);	    C = 16-RND - (Src[0]+Src[7]) + 3*(Src[1]+Src[6])-6*(Src[2]+Src[5]) + 20*(Src[3]+Src[4]);	    CLIP_STORE(3,C);	    C = 16-RND - (Src[1]+Src[8]) + 3*(Src[2]+Src[7])-6*(Src[3]+Src[6]) + 20*(Src[4]+Src[5]);	    CLIP_STORE(4,C);	    C = 16-RND - Src[2] +3*Src[3] -6*(Src[4]+Src[7]) + 20*(Src[5]+Src[6]) +2*Src[8];	    CLIP_STORE(5,C);	    C = 16-RND - Src[3] +3*(Src[4]-Src[8]) -6*Src[5] + 20*Src[6] + 19*Src[7];	    CLIP_STORE(6,C);	    C = 16-RND - Src[4] +3*Src[5] -7*Src[6] + 23*Src[7] + 14*Src[8];	    CLIP_STORE(7,C);	    Src += BpS;	    Dst += BpS;  }
}

#undef CLIP_STORE
#define CLIP_STORE(D,C) \
  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  STORE(D, C)

void interpolate8x8_quarterpel_v(uint8_t *Dst, const uint8_t *Src, int32_t H, int32_t BpS, int32_t RND)
{
	int C, a, b, c, d, e, f, g, i, j;
	
    a = 0; b = a + BpS; c = b + BpS; d = c + BpS; e = d + BpS;
    f = e + BpS; g = f + BpS; i = g + BpS; j = i + BpS;
  
	while(H-->0) 
	{	    C = 16-RND +14*Src[a] +23*Src[b] - 7*Src[c] + 3*Src[d] -   Src[e];	    CLIP_STORE(Dst[a],C);	    C = 16-RND - 3*(Src[a]-Src[e]) +19*Src[b] +20*Src[c] - 6*Src[d] - Src[f];	    CLIP_STORE(Dst[b],C);	    C = 16-RND + 2*Src[a] - 6*(Src[b]+Src[e]) +20*(Src[c]+Src[d]) + 3*Src[f] - Src[g];	    CLIP_STORE(Dst[c],C);	    C = 16-RND - (Src[a]+Src[i]) + 3*(Src[b]+Src[g])-6*(Src[c]+Src[f]) + 20*(Src[d]+Src[e]);	    CLIP_STORE(Dst[d],C);	    C = 16-RND - (Src[b]+Src[j]) + 3*(Src[c]+Src[i])-6*(Src[d]+Src[g]) + 20*(Src[e]+Src[f]);	    CLIP_STORE(Dst[e],C);	    C = 16-RND - Src[c] +3*Src[d] -6*(Src[e]+Src[i]) + 20*(Src[f]+Src[g]) +2*Src[j];	    CLIP_STORE(Dst[f],C);	    C = 16-RND - Src[d] +3*(Src[e]-Src[j]) -6*Src[f] + 20*Src[g] + 19*Src[i];	    CLIP_STORE(Dst[g],C);	    C = 16-RND - Src[e] +3*Src[f] -7*Src[g] + 23*Src[i] + 14*Src[j];	    CLIP_STORE(Dst[i],C);	    Src += 1;	    Dst += 1;  }
}

#undef CLIP_STORE
#define CLIP_STORE(i,C) \
  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  STORE(Dst[BpS*i], C)

void interpolate8x8_quarterpel_va(uint8_t *Dst, const uint8_t *Src, int32_t H, int32_t BpS, int32_t RND)
{
	 int C, a, b, c, d, e, f, g, i, j;
	
  a = 0; b = a + BpS; c = b + BpS; d = c + BpS; e = d + BpS;
  f = e + BpS; g = f + BpS; i = g + BpS; j = i + BpS;
  
	while(H-->0) 
	{	    C = 16-RND +14*Src[a] +23*Src[b] - 7*Src[c] + 3*Src[d] -   Src[e];	    CLIP_STORE(0,C);	    C = 16-RND - 3*(Src[a]-Src[e]) +19*Src[b] +20*Src[c] - 6*Src[d] - Src[f];	    CLIP_STORE(1,C);	    C = 16-RND + 2*Src[a] - 6*(Src[b]+Src[e]) +20*(Src[c]+Src[d]) + 3*Src[f] - Src[g];	    CLIP_STORE(2,C);	    C = 16-RND - (Src[a]+Src[i]) + 3*(Src[b]+Src[g])-6*(Src[c]+Src[f]) + 20*(Src[d]+Src[e]);	    CLIP_STORE(3,C);	    C = 16-RND - (Src[b]+Src[j]) + 3*(Src[c]+Src[i])-6*(Src[d]+Src[g]) + 20*(Src[e]+Src[f]);	    CLIP_STORE(4,C);	    C = 16-RND - Src[c] +3*Src[d] -6*(Src[e]+Src[i]) + 20*(Src[f]+Src[g]) +2*Src[j];	    CLIP_STORE(5,C);	    C = 16-RND - Src[d] +3*(Src[e]-Src[j]) -6*Src[f] + 20*Src[g] + 19*Src[i];	    CLIP_STORE(6,C);	    C = 16-RND - Src[e] +3*Src[f] -7*Src[g] + 23*Src[i] + 14*Src[j];	    CLIP_STORE(7,C);	    Src += 1;	    Dst += 1;  }
}

#undef CLIP_STORE
#define CLIP_STORE(i,C) \
  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  STORE(Dst[BpS*i], C)

void interpolate8x8_quarterpel_va_up(uint8_t *Dst, const uint8_t *Src, int32_t H, int32_t BpS, int32_t RND)
{
	int C, a, b, c, d, e, f, g, i, j;
	
  a = 0; b = a + BpS; c = b + BpS; d = c + BpS; e = d + BpS;
  f = e + BpS; g = f + BpS; i = g + BpS; j = i + BpS;
  
	while(H-->0) 
	{	    C = 16-RND +14*Src[a] +23*Src[b] - 7*Src[c] + 3*Src[d] -   Src[e];	    CLIP_STORE(0,C);	    C = 16-RND - 3*(Src[a]-Src[e]) +19*Src[b] +20*Src[c] - 6*Src[d] - Src[f];	    CLIP_STORE(1,C);	    C = 16-RND + 2*Src[a] - 6*(Src[b]+Src[e]) +20*(Src[c]+Src[d]) + 3*Src[f] - Src[g];	    CLIP_STORE(2,C);	    C = 16-RND - (Src[a]+Src[i]) + 3*(Src[b]+Src[g])-6*(Src[c]+Src[f]) + 20*(Src[d]+Src[e]);	    CLIP_STORE(3,C);	    C = 16-RND - (Src[b]+Src[j]) + 3*(Src[c]+Src[i])-6*(Src[d]+Src[g]) + 20*(Src[e]+Src[f]);	    CLIP_STORE(4,C);	    C = 16-RND - Src[c] +3*Src[d] -6*(Src[e]+Src[i]) + 20*(Src[f]+Src[g]) +2*Src[j];	    CLIP_STORE(5,C);	    C = 16-RND - Src[d] +3*(Src[e]-Src[j]) -6*Src[f] + 20*Src[g] + 19*Src[i];	    CLIP_STORE(6,C);	    C = 16-RND - Src[e] +3*Src[f] -7*Src[g] + 23*Src[i] + 14*Src[j];	    CLIP_STORE(7,C);	    Src += 1;	    Dst += 1;
	 }
}

void interpolate8x8_quarterpel(uint8_t * const cur,
							  uint8_t * const refn,
							  uint8_t * const refh,
							  uint8_t * const refv,
							  uint8_t * const refhv,
							  const uint32_t x, const uint32_t y,
							  const int32_t dx,  const int dy,
							  const uint32_t stride,
							  const uint32_t rounding)
{
				const uint8_t *src;
				uint8_t *dst;
				uint8_t *tmp;
				int32_t quads;
							
				int32_t x_int, y_int;
			
				const int32_t xRef = (int)x*4 + dx;
				const int32_t yRef = (int)y*4 + dy;
			
				quads = (dx&3) | ((dy&3)<<2);
			
				x_int = xRef >> 2;
				y_int = yRef >> 2;
			
				dst = cur + y * stride + x;
				src = refn + y_int * (int)stride + x_int;
			
				tmp = refh; /* we need at least a 16 x stride scratch block */
			
				switch(quads) 
				{
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -