📄 mvmc.c

📁 优化过的xvid1.1.2源代码
💻 C
📖 第 1 页 / 共 5 页
字号:
上一页 1 2 3 45
		    C = 16-RND - (Src[6]+Src[13]) + 3*(Src[ 7]+Src[12])-6*(Src[ 8]+Src[11]) + 20*(Src[ 9]+Src[10]);
		    CLIP_STORE( 9,C);
		    C = 16-RND - (Src[7]+Src[14]) + 3*(Src[ 8]+Src[13])-6*(Src[ 9]+Src[12]) + 20*(Src[10]+Src[11]);
		    CLIP_STORE(10,C);
		    C = 16-RND - (Src[8]+Src[15]) + 3*(Src[ 9]+Src[14])-6*(Src[10]+Src[13]) + 20*(Src[11]+Src[12]);
		    CLIP_STORE(11,C);
		    C = 16-RND - (Src[9]+Src[16]) + 3*(Src[10]+Src[15])-6*(Src[11]+Src[14]) + 20*(Src[12]+Src[13]);
		    CLIP_STORE(12,C);
		    C = 16-RND - Src[10] +3*Src[11] -6*(Src[12]+Src[15]) + 20*(Src[13]+Src[14]) +2*Src[16];
		    CLIP_STORE(13,C);
		    C = 16-RND - Src[11] +3*(Src[12]-Src[16]) -6*Src[13] + 20*Src[14] + 19*Src[15];
		    CLIP_STORE(14,C);
		    C = 16-RND - Src[12] +3*Src[13] -7*Src[14] + 23*Src[15] + 14*Src[16];
		    CLIP_STORE(15,C);
		    Src += BpS;
		    Dst += BpS;
  }  
}

#undef CLIP_STORE

#define CLIP_STORE(i,C) \
  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  C = (C+Src[i+1]+1-RND) >> 1;  \
  STORE(Dst[i], C)

void interpolate16x16_quarterpel_ha_up_add(uint8_t *Dst, const uint8_t *Src, int32_t H, int32_t BpS, int32_t RND)
{
   while(H-->0)
   {
		    int C;
		    C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] -   Src[4];
		    CLIP_STORE(0,C);
		    C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5];
		    CLIP_STORE( 1,C);
		    C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6];
		    CLIP_STORE( 2,C);
		    C = 16-RND - (Src[0]+Src[7 ]) + 3*(Src[ 1]+Src[ 6])-6*(Src[ 2]+Src[ 5]) + 20*(Src[ 3]+Src[ 4]);
		    CLIP_STORE( 3,C);
		    C = 16-RND - (Src[1]+Src[8 ]) + 3*(Src[ 2]+Src[ 7])-6*(Src[ 3]+Src[ 6]) + 20*(Src[ 4]+Src[ 5]);
		    CLIP_STORE( 4,C);
		    C = 16-RND - (Src[2]+Src[9 ]) + 3*(Src[ 3]+Src[ 8])-6*(Src[ 4]+Src[ 7]) + 20*(Src[ 5]+Src[ 6]);
		    CLIP_STORE( 5,C);
		    C = 16-RND - (Src[3]+Src[10]) + 3*(Src[ 4]+Src[ 9])-6*(Src[ 5]+Src[ 8]) + 20*(Src[ 6]+Src[ 7]);
		    CLIP_STORE( 6,C);
		    C = 16-RND - (Src[4]+Src[11]) + 3*(Src[ 5]+Src[10])-6*(Src[ 6]+Src[ 9]) + 20*(Src[ 7]+Src[ 8]);
		    CLIP_STORE( 7,C);
		    C = 16-RND - (Src[5]+Src[12]) + 3*(Src[ 6]+Src[11])-6*(Src[ 7]+Src[10]) + 20*(Src[ 8]+Src[ 9]);
		    CLIP_STORE( 8,C);
		    C = 16-RND - (Src[6]+Src[13]) + 3*(Src[ 7]+Src[12])-6*(Src[ 8]+Src[11]) + 20*(Src[ 9]+Src[10]);
		    CLIP_STORE( 9,C);
		    C = 16-RND - (Src[7]+Src[14]) + 3*(Src[ 8]+Src[13])-6*(Src[ 9]+Src[12]) + 20*(Src[10]+Src[11]);
		    CLIP_STORE(10,C);
		    C = 16-RND - (Src[8]+Src[15]) + 3*(Src[ 9]+Src[14])-6*(Src[10]+Src[13]) + 20*(Src[11]+Src[12]);
		    CLIP_STORE(11,C);
		    C = 16-RND - (Src[9]+Src[16]) + 3*(Src[10]+Src[15])-6*(Src[11]+Src[14]) + 20*(Src[12]+Src[13]);
		    CLIP_STORE(12,C);
		    C = 16-RND - Src[10] +3*Src[11] -6*(Src[12]+Src[15]) + 20*(Src[13]+Src[14]) +2*Src[16];
		    CLIP_STORE(13,C);
		    C = 16-RND - Src[11] +3*(Src[12]-Src[16]) -6*Src[13] + 20*Src[14] + 19*Src[15];
		    CLIP_STORE(14,C);
		    C = 16-RND - Src[12] +3*Src[13] -7*Src[14] + 23*Src[15] + 14*Src[16];
		    CLIP_STORE(15,C);
		    Src += BpS;
		    Dst += BpS;
		}
}

#undef CLIP_STORE
#define CLIP_STORE(D,C) \
  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  STORE(D, C)
void interpolate16x16_quarterpel_v_add(uint8_t *Dst, const uint8_t *Src, int32_t H, int32_t BpS, int32_t RND)
{
    int C, p, q, s, t, w, m, n, l;
    while(H-->0)
    {
		    p = 0; q = p + BpS; s = q + BpS; t = s + BpS; w = t + BpS;
		    
		    C = 16-RND +14*Src[p] +23*Src[q] - 7*Src[s] + 3*Src[t] -   Src[w];
		    CLIP_STORE(Dst[p],C);
		    m = w + BpS;
		    C = 16-RND - 3*(Src[p]-Src[w]) +19*Src[q] +20*Src[s] - 6*Src[t] - Src[m];
		    CLIP_STORE(Dst[q],C);
		    n = m + BpS;
		    C = 16-RND + 2*Src[p] - 6*(Src[q]+Src[w]) +20*(Src[s]+Src[t]) + 3*Src[m] - Src[n];
		    CLIP_STORE(Dst[s],C);
		    l = n + BpS;
		    C = 16-RND - (Src[p]+Src[l]) + 3*(Src[q]+Src[n])-6*(Src[s]+Src[m]) + 20*(Src[t]+Src[w]);
		    CLIP_STORE(Dst[t],C);
		    p = l + BpS;
		    C = 16-RND - (Src[q]+Src[p ]) + 3*(Src[s]+Src[l])-6*(Src[t]+Src[n]) + 20*(Src[w]+Src[m]);
		    CLIP_STORE(Dst[w],C);
		    q = p + BpS;
		    C = 16-RND - (Src[s]+Src[q ]) + 3*(Src[t]+Src[p])-6*(Src[w]+Src[l]) + 20*(Src[m]+Src[n]);
		    CLIP_STORE(Dst[m],C);
		    s = q + BpS;
		    C = 16-RND - (Src[t]+Src[s]) + 3*(Src[w]+Src[q])-6*(Src[m]+Src[p]) + 20*(Src[n]+Src[l]);
		    CLIP_STORE(Dst[n],C);
		    t = s + BpS;
		    C = 16-RND - (Src[w]+Src[t]) + 3*(Src[m]+Src[s])-6*(Src[n]+Src[q]) + 20*(Src[l]+Src[p]);
		    CLIP_STORE(Dst[l],C);
		    w = t + BpS;
		    C = 16-RND - (Src[m]+Src[w]) + 3*(Src[n]+Src[t])-6*(Src[l]+Src[s]) + 20*(Src[p]+Src[q]);
		    CLIP_STORE(Dst[p],C);
		    m = w + BpS;
		    C = 16-RND - (Src[n]+Src[m]) + 3*(Src[l]+Src[w])-6*(Src[p]+Src[t]) + 20*(Src[q]+Src[s]);
		    CLIP_STORE(Dst[q],C);
		    n = m + BpS;
		    C = 16-RND - (Src[l]+Src[n]) + 3*(Src[p]+Src[m])-6*(Src[q]+Src[w]) + 20*(Src[s]+Src[t]);
		    CLIP_STORE(Dst[s],C);
		    l = n + BpS;
		    C = 16-RND - (Src[p]+Src[l]) + 3*(Src[q]+Src[n])-6*(Src[s]+Src[m]) + 20*(Src[t]+Src[w]);
		    CLIP_STORE(Dst[t],C);
		    p = l +BpS;
		    C = 16-RND - (Src[q]+Src[p]) + 3*(Src[s]+Src[l])-6*(Src[t]+Src[n]) + 20*(Src[w]+Src[m]);
		    CLIP_STORE(Dst[w],C);
		    C = 16-RND - Src[s] +3*Src[t] -6*(Src[w]+Src[l]) + 20*(Src[m]+Src[n]) +2*Src[p];
		    CLIP_STORE(Dst[m],C);
		    C = 16-RND - Src[t] +3*(Src[w]-Src[p]) -6*Src[m] + 20*Src[n] + 19*Src[l];
		    CLIP_STORE(Dst[n],C);
		    C = 16-RND - Src[w] +3*Src[m] -7*Src[n] + 23*Src[l] + 14*Src[p];
		    CLIP_STORE(Dst[l],C);
		    Src += 1;
		    Dst += 1;
    }
}

#undef CLIP_STORE

#define CLIP_STORE(i,C) \
  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  STORE(Dst[BpS*i], C)
void interpolate16x16_quarterpel_va_add(uint8_t *Dst, const uint8_t *Src, int32_t H, int32_t BpS, int32_t RND)
{
   int C, p, q, s, t, w, m, n, l;
    
    while(H-->0)
    {
		    p = 0; q = p + BpS; s = q + BpS; t = s + BpS; w = t + BpS;
		    
		    C = 16-RND +14*Src[p] +23*Src[q] - 7*Src[s] + 3*Src[t] -   Src[w];
		    CLIP_STORE(0,C);
		    m = w + BpS;
		    C = 16-RND - 3*(Src[p]-Src[w]) +19*Src[q] +20*Src[s] - 6*Src[t] - Src[m];
		    CLIP_STORE(1,C);
		    n = m + BpS;
		    C = 16-RND + 2*Src[p] - 6*(Src[q]+Src[w]) +20*(Src[s]+Src[t]) + 3*Src[m] - Src[n];
		    CLIP_STORE(2,C);
		    l = n + BpS;
		    C = 16-RND - (Src[p]+Src[l]) + 3*(Src[q]+Src[n])-6*(Src[s]+Src[m]) + 20*(Src[t]+Src[w]);
		    CLIP_STORE(3,C);
		    p = l + BpS;
		    C = 16-RND - (Src[q]+Src[p ]) + 3*(Src[s]+Src[l])-6*(Src[t]+Src[n]) + 20*(Src[w]+Src[m]);
		    CLIP_STORE(4,C);
		    q = p + BpS;
		    C = 16-RND - (Src[s]+Src[q ]) + 3*(Src[t]+Src[p])-6*(Src[w]+Src[l]) + 20*(Src[m]+Src[n]);
		    CLIP_STORE(5,C);
		    s = q + BpS;
		    C = 16-RND - (Src[t]+Src[s]) + 3*(Src[w]+Src[q])-6*(Src[m]+Src[p]) + 20*(Src[n]+Src[l]);
		    CLIP_STORE(6,C);
		    t = s + BpS;
		    C = 16-RND - (Src[w]+Src[t]) + 3*(Src[m]+Src[s])-6*(Src[n]+Src[q]) + 20*(Src[l]+Src[p]);
		    CLIP_STORE(7,C);
		    w = t + BpS;
		    C = 16-RND - (Src[m]+Src[w]) + 3*(Src[n]+Src[t])-6*(Src[l]+Src[s]) + 20*(Src[p]+Src[q]);
		    CLIP_STORE(8,C);
		    m = w + BpS;
		    C = 16-RND - (Src[n]+Src[m]) + 3*(Src[l]+Src[w])-6*(Src[p]+Src[t]) + 20*(Src[q]+Src[s]);
		    CLIP_STORE(9,C);
		    n = m + BpS;
		    C = 16-RND - (Src[l]+Src[n]) + 3*(Src[p]+Src[m])-6*(Src[q]+Src[w]) + 20*(Src[s]+Src[t]);
		    CLIP_STORE(10,C);
		    l = n + BpS;
		    C = 16-RND - (Src[p]+Src[l]) + 3*(Src[q]+Src[n])-6*(Src[s]+Src[m]) + 20*(Src[t]+Src[w]);
		    CLIP_STORE(11,C);
		    p = l +BpS;
		    C = 16-RND - (Src[q]+Src[p]) + 3*(Src[s]+Src[l])-6*(Src[t]+Src[n]) + 20*(Src[w]+Src[m]);
		    CLIP_STORE(12,C);
		    C = 16-RND - Src[s] +3*Src[t] -6*(Src[w]+Src[l]) + 20*(Src[m]+Src[n]) +2*Src[p];
		    CLIP_STORE(13,C);
		    C = 16-RND - Src[t] +3*(Src[w]-Src[p]) -6*Src[m] + 20*Src[n] + 19*Src[l];
		    CLIP_STORE(14,C);
		    C = 16-RND - Src[w] +3*Src[m] -7*Src[n] + 23*Src[l] + 14*Src[p];
		    CLIP_STORE(15,C);
		    Src += 1;
		    Dst += 1;
    }
}

#undef CLIP_STORE
#define CLIP_STORE(i,C) \
  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  STORE(Dst[BpS*i], C)
void interpolate16x16_quarterpel_va_up_add(uint8_t *Dst, const uint8_t *Src, int32_t H, int32_t BpS, int32_t RND)
{
  int C, p, q, s, t, w, m, n, l;
    
    while(H-->0)
    {
		    p = 0; q = p + BpS; s = q + BpS; t = s + BpS; w = t + BpS;
		    
		    C = 16-RND +14*Src[p] +23*Src[q] - 7*Src[s] + 3*Src[t] -   Src[w];
		    CLIP_STORE(0,C);
		    m = w + BpS;
		    C = 16-RND - 3*(Src[p]-Src[w]) +19*Src[q] +20*Src[s] - 6*Src[t] - Src[m];
		    CLIP_STORE(1,C);
		    n = m + BpS;
		    C = 16-RND + 2*Src[p] - 6*(Src[q]+Src[w]) +20*(Src[s]+Src[t]) + 3*Src[m] - Src[n];
		    CLIP_STORE(2,C);
		    l = n + BpS;
		    C = 16-RND - (Src[p]+Src[l]) + 3*(Src[q]+Src[n])-6*(Src[s]+Src[m]) + 20*(Src[t]+Src[w]);
		    CLIP_STORE(3,C);
		    p = l + BpS;
		    C = 16-RND - (Src[q]+Src[p ]) + 3*(Src[s]+Src[l])-6*(Src[t]+Src[n]) + 20*(Src[w]+Src[m]);
		    CLIP_STORE(4,C);
		    q = p + BpS;
		    C = 16-RND - (Src[s]+Src[q ]) + 3*(Src[t]+Src[p])-6*(Src[w]+Src[l]) + 20*(Src[m]+Src[n]);
		    CLIP_STORE(5,C);
		    s = q + BpS;
		    C = 16-RND - (Src[t]+Src[s]) + 3*(Src[w]+Src[q])-6*(Src[m]+Src[p]) + 20*(Src[n]+Src[l]);
		    CLIP_STORE(6,C);
		    t = s + BpS;
		    C = 16-RND - (Src[w]+Src[t]) + 3*(Src[m]+Src[s])-6*(Src[n]+Src[q]) + 20*(Src[l]+Src[p]);
		    CLIP_STORE(7,C);
		    w = t + BpS;
		    C = 16-RND - (Src[m]+Src[w]) + 3*(Src[n]+Src[t])-6*(Src[l]+Src[s]) + 20*(Src[p]+Src[q]);
		    CLIP_STORE(8,C);
		    m = w + BpS;
		    C = 16-RND - (Src[n]+Src[m]) + 3*(Src[l]+Src[w])-6*(Src[p]+Src[t]) + 20*(Src[q]+Src[s]);
		    CLIP_STORE(9,C);
		    n = m + BpS;
		    C = 16-RND - (Src[l]+Src[n]) + 3*(Src[p]+Src[m])-6*(Src[q]+Src[w]) + 20*(Src[s]+Src[t]);
		    CLIP_STORE(10,C);
		    l = n + BpS;
		    C = 16-RND - (Src[p]+Src[l]) + 3*(Src[q]+Src[n])-6*(Src[s]+Src[m]) + 20*(Src[t]+Src[w]);
		    CLIP_STORE(11,C);
		    p = l +BpS;
		    C = 16-RND - (Src[q]+Src[p]) + 3*(Src[s]+Src[l])-6*(Src[t]+Src[n]) + 20*(Src[w]+Src[m]);
		    CLIP_STORE(12,C);
		    C = 16-RND - Src[s] +3*Src[t] -6*(Src[w]+Src[l]) + 20*(Src[m]+Src[n]) +2*Src[p];
		    CLIP_STORE(13,C);
		    C = 16-RND - Src[t] +3*(Src[w]-Src[p]) -6*Src[m] + 20*Src[n] + 19*Src[l];
		    CLIP_STORE(14,C);
		    C = 16-RND - Src[w] +3*Src[m] -7*Src[n] + 23*Src[l] + 14*Src[p];
		    CLIP_STORE(15,C);
		    Src += 1;
		    Dst += 1;
    }
}

#undef CLIP_STORE

void interpolate16x16_quarterpel_add(uint8_t * const cur,
									uint8_t * const refn,
									uint8_t * const refh,
									uint8_t * const refv,
									uint8_t * const refhv,
									const uint32_t x, const uint32_t y,
									const int32_t dx,  const int dy,
									const uint32_t stride,
									const uint32_t rounding)
{
			const uint8_t *src;
			uint8_t *dst;
			uint8_t *tmp;
			int32_t quads;
			
			int32_t x_int, y_int;
		
			const int32_t xRef = (int)x*4 + dx;
			const int32_t yRef = (int)y*4 + dy;
		
			quads = (dx&3) | ((dy&3)<<2);
		
			x_int = xRef >> 2;
			y_int = yRef >> 2;
		
			dst = cur + y * stride + x;
			src = refn + y_int * (int)stride + x_int;
		
			tmp = refh; /* we need at least a 16 x stride scratch block */
		
			switch(quads) 
			{
					case 0:
						interpolate8x8_halfpel_add(dst, dst,  src, stride, 0, 8);
						interpolate8x8_halfpel_add(dst+8, dst+8, src+8, stride, 0, 8);
						interpolate8x8_halfpel_add(dst+8*stride, dst+8*stride, src+8*stride, stride, 0, 8);
						interpolate8x8_halfpel_add(dst+8*stride+8, dst+8*stride+8, src+8*stride+8, stride, 0, 8);
						break;
					case 1:
					interpolate16x16_quarterpel_ha_add(dst, src, 16, stride, rounding);
					break;
				case 2:
					interpolate16x16_quarterpel_h_add(dst, src, 16, stride, rounding);
					break;
				case 3:
					interpolate16x16_quarterpel_ha_up_add(dst, src, 16, stride, rounding);
					break;
				case 4:
					interpolate16x16_quarterpel_va_add(dst, src, 16, stride, rounding);
					break;
				case 5:
					interpolate16x16_quarterpel_ha(tmp, src, 17, stride, rounding);
					interpolate16x16_quarterpel_va_add(dst, tmp, 16, stride, rounding);
					break;
				case 6:
					interpolate16x16_quarterpel_h(tmp, src,	  17, stride, rounding);
					interpolate16x16_quarterpel_va_add(dst, tmp, 16, stride, rounding);
					break;
				case 7:
					interpolate16x16_quarterpel_ha_up(tmp, src, 17, stride, rounding);
					interpolate16x16_quarterpel_va_add(dst, tmp, 16, stride, rounding);
					break;
				case 8:
					interpolate16x16_quarterpel_v_add(dst, src, 16, stride, rounding);
					break;
				case 9:
					interpolate16x16_quarterpel_ha(tmp, src, 17, stride, rounding);
					interpolate16x16_quarterpel_v_add(dst, tmp, 16, stride, rounding);
					break;
				case 10:
					interpolate16x16_quarterpel_h(tmp, src, 17, stride, rounding);
					interpolate16x16_quarterpel_v_add(dst, tmp, 16, stride, rounding);
					break;
				case 11:
					interpolate16x16_quarterpel_ha_up(tmp, src, 17, stride, rounding);
					interpolate16x16_quarterpel_v_add(dst, tmp, 16, stride, rounding);
					break;
				case 12:
					interpolate16x16_quarterpel_va_up_add(dst, src, 16, stride, rounding);
					break;
				case 13:
					interpolate16x16_quarterpel_ha(tmp, src, 17, stride, rounding);
					interpolate16x16_quarterpel_va_up_add(dst, tmp, 16, stride, rounding);
					break;
				case 14:
					interpolate16x16_quarterpel_h(tmp, src, 17, stride, rounding);
					interpolate16x16_quarterpel_va_up_add( dst, tmp, 16, stride, rounding);
					break;
				case 15:
					interpolat
上一页 1 2 3 45
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -