📄 mvmc.c
字号:
CLIP_STORE(11,C);
p = l +BpS;
C = 16-RND - (Src[q]+Src[p]) + 3*(Src[s]+Src[l])-6*(Src[t]+Src[n]) + 20*(Src[w]+Src[m]);
CLIP_STORE(12,C);
C = 16-RND - Src[s] +3*Src[t] -6*(Src[w]+Src[l]) + 20*(Src[m]+Src[n]) +2*Src[p];
CLIP_STORE(13,C);
C = 16-RND - Src[t] +3*(Src[w]-Src[p]) -6*Src[m] + 20*Src[n] + 19*Src[l];
CLIP_STORE(14,C);
C = 16-RND - Src[w] +3*Src[m] -7*Src[n] + 23*Src[l] + 14*Src[p];
CLIP_STORE(15,C);
Src += 1;
Dst += 1;
}
}
void interpolate16x16_quarterpel(uint8_t * const cur,
uint8_t * const refn,
uint8_t * const refh,
uint8_t * const refv,
uint8_t * const refhv,
const uint32_t x, const uint32_t y,
const int32_t dx, const int dy,
const uint32_t stride,
const uint32_t rounding)
{
const uint8_t *src;
uint8_t *dst;
uint8_t *tmp;
int32_t quads;
int32_t x_int, y_int;
const int32_t xRef = (int)x*4 + dx;
const int32_t yRef = (int)y*4 + dy;
quads = (dx&3) | ((dy&3)<<2);
x_int = xRef >> 2;
y_int = yRef >> 2;
dst = cur + y * stride + x;
src = refn + y_int * (int)stride + x_int;
tmp = refh; /* we need at least a 16 x stride scratch block */
switch(quads)
{
case 0:
transfer8x8_copy(dst, src, stride);
transfer8x8_copy(dst+8, src+8, stride);
transfer8x8_copy(dst+8*stride, src+8*stride, stride);
transfer8x8_copy(dst+8*stride+8, src+8*stride+8, stride);
break;
case 1:
interpolate16x16_quarterpel_ha(dst, src, 16, stride, rounding);
break;
case 2:
interpolate16x16_quarterpel_h(dst, src, 16, stride, rounding);
break;
case 3:
interpolate16x16_quarterpel_ha_up(dst, src, 16, stride, rounding);
break;
case 4:
interpolate16x16_quarterpel_va(dst, src, 16, stride, rounding);
break;
case 5:
interpolate16x16_quarterpel_ha(tmp, src, 17, stride, rounding);
interpolate16x16_quarterpel_va(dst, tmp, 16, stride, rounding);
break;
case 6:
interpolate16x16_quarterpel_h(tmp, src, 17, stride, rounding);
interpolate16x16_quarterpel_va(dst, tmp, 16, stride, rounding);
break;
case 7:
interpolate16x16_quarterpel_ha_up(tmp, src, 17, stride, rounding);
interpolate16x16_quarterpel_va(dst, tmp, 16, stride, rounding);
break;
case 8:
interpolate16x16_quarterpel_v(dst, src, 16, stride, rounding);
break;
case 9:
interpolate16x16_quarterpel_ha(tmp, src, 17, stride, rounding);
interpolate16x16_quarterpel_v(dst, tmp, 16, stride, rounding);
break;
case 10:
interpolate16x16_quarterpel_h(tmp, src, 17, stride, rounding);
interpolate16x16_quarterpel_v(dst, tmp, 16, stride, rounding);
break;
case 11:
interpolate16x16_quarterpel_ha_up(tmp, src, 17, stride, rounding);
interpolate16x16_quarterpel_v(dst, tmp, 16, stride, rounding);
break;
case 12:
interpolate16x16_quarterpel_va_up(dst, src, 16, stride, rounding);
break;
case 13:
interpolate16x16_quarterpel_ha(tmp, src, 17, stride, rounding);
interpolate16x16_quarterpel_va_up(dst, tmp, 16, stride, rounding);
break;
case 14:
interpolate16x16_quarterpel_h(tmp, src, 17, stride, rounding);
interpolate16x16_quarterpel_va_up( dst, tmp, 16, stride, rounding);
break;
case 15:
interpolate16x16_quarterpel_ha_up(tmp, src, 17, stride, rounding);
interpolate16x16_quarterpel_va_up(dst, tmp, 16, stride, rounding);
break;
}
}
#undef CLIP_STORE
#define CLIP_STORE(D,C) \
if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5; \
STORE(D, C)
void interpolate8x8_quarterpel_h(uint8_t *Dst, const uint8_t *Src, int32_t H, int32_t BpS, int32_t RND)
{
while(H-->0)
{
int C; C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] - Src[4]; CLIP_STORE(Dst[0],C); C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5]; CLIP_STORE(Dst[1],C); C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6]; CLIP_STORE(Dst[2],C); C = 16-RND - (Src[0]+Src[7]) + 3*(Src[1]+Src[6])-6*(Src[2]+Src[5]) + 20*(Src[3]+Src[4]); CLIP_STORE(Dst[3],C); C = 16-RND - (Src[1]+Src[8]) + 3*(Src[2]+Src[7])-6*(Src[3]+Src[6]) + 20*(Src[4]+Src[5]); CLIP_STORE(Dst[4],C); C = 16-RND - Src[2] +3*Src[3] -6*(Src[4]+Src[7]) + 20*(Src[5]+Src[6]) +2*Src[8]; CLIP_STORE(Dst[5],C); C = 16-RND - Src[3] +3*(Src[4]-Src[8]) -6*Src[5] + 20*Src[6] + 19*Src[7]; CLIP_STORE(Dst[6],C); C = 16-RND - Src[4] +3*Src[5] -7*Src[6] + 23*Src[7] + 14*Src[8]; CLIP_STORE(Dst[7],C); Src += BpS; Dst += BpS; }
}
#undef CLIP_STORE
#define CLIP_STORE(i,C) \
if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5; \
C = (C+Src[i]+1-RND) >> 1; \
STORE(Dst[i], C)
void interpolate8x8_quarterpel_ha(uint8_t *Dst, const uint8_t *Src, int32_t H, int32_t BpS, int32_t RND)
{
while(H-->0)
{ int C; C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] - Src[4]; CLIP_STORE(0,C); C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5]; CLIP_STORE(1,C); C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6]; CLIP_STORE(2,C); C = 16-RND - (Src[0]+Src[7]) + 3*(Src[1]+Src[6])-6*(Src[2]+Src[5]) + 20*(Src[3]+Src[4]); CLIP_STORE(3,C); C = 16-RND - (Src[1]+Src[8]) + 3*(Src[2]+Src[7])-6*(Src[3]+Src[6]) + 20*(Src[4]+Src[5]); CLIP_STORE(4,C); C = 16-RND - Src[2] +3*Src[3] -6*(Src[4]+Src[7]) + 20*(Src[5]+Src[6]) +2*Src[8]; CLIP_STORE(5,C); C = 16-RND - Src[3] +3*(Src[4]-Src[8]) -6*Src[5] + 20*Src[6] + 19*Src[7]; CLIP_STORE(6,C); C = 16-RND - Src[4] +3*Src[5] -7*Src[6] + 23*Src[7] + 14*Src[8]; CLIP_STORE(7,C);
Src += BpS; Dst += BpS; }
}
#undef CLIP_STORE
#define CLIP_STORE(i,C) \
if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5; \
C = (C+Src[i+1]+1-RND) >> 1; \
STORE(Dst[i], C)
void interpolate8x8_quarterpel_ha_up(uint8_t *Dst, const uint8_t *Src, int32_t H, int32_t BpS, int32_t RND)
{
while(H-->0)
{ int C; C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] - Src[4]; CLIP_STORE(0,C); C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5]; CLIP_STORE(1,C); C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6]; CLIP_STORE(2,C); C = 16-RND - (Src[0]+Src[7]) + 3*(Src[1]+Src[6])-6*(Src[2]+Src[5]) + 20*(Src[3]+Src[4]); CLIP_STORE(3,C); C = 16-RND - (Src[1]+Src[8]) + 3*(Src[2]+Src[7])-6*(Src[3]+Src[6]) + 20*(Src[4]+Src[5]); CLIP_STORE(4,C); C = 16-RND - Src[2] +3*Src[3] -6*(Src[4]+Src[7]) + 20*(Src[5]+Src[6]) +2*Src[8]; CLIP_STORE(5,C); C = 16-RND - Src[3] +3*(Src[4]-Src[8]) -6*Src[5] + 20*Src[6] + 19*Src[7]; CLIP_STORE(6,C); C = 16-RND - Src[4] +3*Src[5] -7*Src[6] + 23*Src[7] + 14*Src[8]; CLIP_STORE(7,C); Src += BpS; Dst += BpS; }
}
#undef CLIP_STORE
#define CLIP_STORE(D,C) \
if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5; \
STORE(D, C)
void interpolate8x8_quarterpel_v(uint8_t *Dst, const uint8_t *Src, int32_t H, int32_t BpS, int32_t RND)
{
int C, a, b, c, d, e, f, g, i, j;
a = 0; b = a + BpS; c = b + BpS; d = c + BpS; e = d + BpS;
f = e + BpS; g = f + BpS; i = g + BpS; j = i + BpS;
while(H-->0)
{ C = 16-RND +14*Src[a] +23*Src[b] - 7*Src[c] + 3*Src[d] - Src[e]; CLIP_STORE(Dst[a],C); C = 16-RND - 3*(Src[a]-Src[e]) +19*Src[b] +20*Src[c] - 6*Src[d] - Src[f]; CLIP_STORE(Dst[b],C); C = 16-RND + 2*Src[a] - 6*(Src[b]+Src[e]) +20*(Src[c]+Src[d]) + 3*Src[f] - Src[g]; CLIP_STORE(Dst[c],C); C = 16-RND - (Src[a]+Src[i]) + 3*(Src[b]+Src[g])-6*(Src[c]+Src[f]) + 20*(Src[d]+Src[e]); CLIP_STORE(Dst[d],C); C = 16-RND - (Src[b]+Src[j]) + 3*(Src[c]+Src[i])-6*(Src[d]+Src[g]) + 20*(Src[e]+Src[f]); CLIP_STORE(Dst[e],C); C = 16-RND - Src[c] +3*Src[d] -6*(Src[e]+Src[i]) + 20*(Src[f]+Src[g]) +2*Src[j]; CLIP_STORE(Dst[f],C); C = 16-RND - Src[d] +3*(Src[e]-Src[j]) -6*Src[f] + 20*Src[g] + 19*Src[i]; CLIP_STORE(Dst[g],C); C = 16-RND - Src[e] +3*Src[f] -7*Src[g] + 23*Src[i] + 14*Src[j]; CLIP_STORE(Dst[i],C); Src += 1; Dst += 1; }
}
#undef CLIP_STORE
#define CLIP_STORE(i,C) \
if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5; \
C = (C+Src[BpS*i]+1-RND) >> 1; \
STORE(Dst[BpS*i], C)
void interpolate8x8_quarterpel_va(uint8_t *Dst, const uint8_t *Src, int32_t H, int32_t BpS, int32_t RND)
{
int C, a, b, c, d, e, f, g, i, j;
a = 0; b = a + BpS; c = b + BpS; d = c + BpS; e = d + BpS;
f = e + BpS; g = f + BpS; i = g + BpS; j = i + BpS;
while(H-->0)
{ C = 16-RND +14*Src[a] +23*Src[b] - 7*Src[c] + 3*Src[d] - Src[e]; CLIP_STORE(0,C); C = 16-RND - 3*(Src[a]-Src[e]) +19*Src[b] +20*Src[c] - 6*Src[d] - Src[f]; CLIP_STORE(1,C); C = 16-RND + 2*Src[a] - 6*(Src[b]+Src[e]) +20*(Src[c]+Src[d]) + 3*Src[f] - Src[g]; CLIP_STORE(2,C); C = 16-RND - (Src[a]+Src[i]) + 3*(Src[b]+Src[g])-6*(Src[c]+Src[f]) + 20*(Src[d]+Src[e]); CLIP_STORE(3,C); C = 16-RND - (Src[b]+Src[j]) + 3*(Src[c]+Src[i])-6*(Src[d]+Src[g]) + 20*(Src[e]+Src[f]); CLIP_STORE(4,C); C = 16-RND - Src[c] +3*Src[d] -6*(Src[e]+Src[i]) + 20*(Src[f]+Src[g]) +2*Src[j]; CLIP_STORE(5,C); C = 16-RND - Src[d] +3*(Src[e]-Src[j]) -6*Src[f] + 20*Src[g] + 19*Src[i]; CLIP_STORE(6,C); C = 16-RND - Src[e] +3*Src[f] -7*Src[g] + 23*Src[i] + 14*Src[j]; CLIP_STORE(7,C); Src += 1; Dst += 1; }
}
#undef CLIP_STORE
#define CLIP_STORE(i,C) \
if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5; \
C = (C+Src[BpS*i+BpS]+1-RND) >> 1; \
STORE(Dst[BpS*i], C)
void interpolate8x8_quarterpel_va_up(uint8_t *Dst, const uint8_t *Src, int32_t H, int32_t BpS, int32_t RND)
{
int C, a, b, c, d, e, f, g, i, j;
a = 0; b = a + BpS; c = b + BpS; d = c + BpS; e = d + BpS;
f = e + BpS; g = f + BpS; i = g + BpS; j = i + BpS;
while(H-->0)
{ C = 16-RND +14*Src[a] +23*Src[b] - 7*Src[c] + 3*Src[d] - Src[e]; CLIP_STORE(0,C); C = 16-RND - 3*(Src[a]-Src[e]) +19*Src[b] +20*Src[c] - 6*Src[d] - Src[f]; CLIP_STORE(1,C); C = 16-RND + 2*Src[a] - 6*(Src[b]+Src[e]) +20*(Src[c]+Src[d]) + 3*Src[f] - Src[g]; CLIP_STORE(2,C); C = 16-RND - (Src[a]+Src[i]) + 3*(Src[b]+Src[g])-6*(Src[c]+Src[f]) + 20*(Src[d]+Src[e]); CLIP_STORE(3,C); C = 16-RND - (Src[b]+Src[j]) + 3*(Src[c]+Src[i])-6*(Src[d]+Src[g]) + 20*(Src[e]+Src[f]); CLIP_STORE(4,C); C = 16-RND - Src[c] +3*Src[d] -6*(Src[e]+Src[i]) + 20*(Src[f]+Src[g]) +2*Src[j]; CLIP_STORE(5,C); C = 16-RND - Src[d] +3*(Src[e]-Src[j]) -6*Src[f] + 20*Src[g] + 19*Src[i]; CLIP_STORE(6,C); C = 16-RND - Src[e] +3*Src[f] -7*Src[g] + 23*Src[i] + 14*Src[j]; CLIP_STORE(7,C); Src += 1; Dst += 1;
}
}
void interpolate8x8_quarterpel(uint8_t * const cur,
uint8_t * const refn,
uint8_t * const refh,
uint8_t * const refv,
uint8_t * const refhv,
const uint32_t x, const uint32_t y,
const int32_t dx, const int dy,
const uint32_t stride,
const uint32_t rounding)
{
const uint8_t *src;
uint8_t *dst;
uint8_t *tmp;
int32_t quads;
int32_t x_int, y_int;
const int32_t xRef = (int)x*4 + dx;
const int32_t yRef = (int)y*4 + dy;
quads = (dx&3) | ((dy&3)<<2);
x_int = xRef >> 2;
y_int = yRef >> 2;
dst = cur + y * stride + x;
src = refn + y_int * (int)stride + x_int;
tmp = refh; /* we need at least a 16 x stride scratch block */
switch(quads)
{
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -