📄 gmc.c
字号:
vDst[i] = (uint8_t)f0;
}
uDst += dststride;
vDst += dststride;
}
}
static
void get_average_mv_1pt_C(const NEW_GMC_DATA * const Dsp, VECTOR * const mv,
int x, int y, int qpel)
{
mv->x = RSHIFT(Dsp->Uo<<qpel, 3);
mv->y = RSHIFT(Dsp->Vo<<qpel, 3);
}
#if defined(ARCH_IS_IA32)
/* *************************************************************
* MMX core function
*/
static
void (*GMC_Core_Lin_8)(uint8_t *Dst, const uint16_t * Offsets,
const uint8_t * const Src0, const int BpS, const int Rounder) = 0;
extern void xvid_GMC_Core_Lin_8_mmx(uint8_t *Dst, const uint16_t * Offsets,
const uint8_t * const Src0, const int BpS, const int Rounder);
extern void xvid_GMC_Core_Lin_8_sse2(uint8_t *Dst, const uint16_t * Offsets,
const uint8_t * const Src0, const int BpS, const int Rounder);
/* *************************************************************/
static void GMC_Core_Non_Lin_8(uint8_t *Dst,
const uint16_t * Offsets,
const uint8_t * const Src0, const int srcstride,
const int Rounder)
{
int i;
for(i=0; i<8; ++i)
{
uint32_t u = Offsets[i ];
uint32_t v = Offsets[i+16];
const uint32_t ri = MTab[u&0x0f];
const uint32_t rj = MTab[v&0x0f];
uint32_t f0, f1;
const uint8_t * const Src = Src0 + (u>>4) + (v>>4)*srcstride;
f0 = Src[0];
f0 |= Src[1] << 16;
f1 = Src[srcstride +0];
f1 |= Src[srcstride +1] << 16;
f0 = (ri*f0)>>16;
f1 = (ri*f1) & 0x0fff0000;
f0 |= f1;
f0 = ( rj*f0 + Rounder ) >> 24;
Dst[i] = (uint8_t)f0;
}
}
//////////////////////////////////////////////////////////
static
void Predict_16x16_mmx(const NEW_GMC_DATA * const This,
uint8_t *dst, const uint8_t *src,
int dststride, int srcstride, int x, int y, int rounding)
{
const int W = This->sW;
const int H = This->sH;
const int rho = 3 - This->accuracy;
const int Rounder = ( 128 - (rounding<<(2*rho)) ) << 16;
const uint32_t W2 = W<<(16-rho);
const uint32_t H2 = H<<(16-rho);
const int dUx = This->dU[0];
const int dVx = This->dV[0];
const int dUy = This->dU[1];
const int dVy = This->dV[1];
int Uo = This->Uo + 16*(dUy*y + dUx*x);
int Vo = This->Vo + 16*(dVy*y + dVx*x);
int i, j;
DECLARE_ALIGNED_MATRIX(Offsets, 2,16, uint16_t, CACHE_LINE);
for(j=16; j>0; --j)
{
int32_t U = Uo, V = Vo;
Uo += dUy; Vo += dVy;
if ( W2>(uint32_t)U && W2>(uint32_t)(U+15*dUx) &&
H2>(uint32_t)V && H2>(uint32_t)(V+15*dVx) )
{
uint32_t UV1, UV2;
for(i=0; i<16; ++i)
{
uint32_t u = ( U >> 16 ) << rho;
uint32_t v = ( V >> 16 ) << rho;
U += dUx; V += dVx;
Offsets[ i] = u;
Offsets[16+i] = v;
}
// batch 8 input pixels when linearity says it's ok
UV1 = (Offsets[0] | (Offsets[16]<<16)) & 0xfff0fff0U;
UV2 = (Offsets[7] | (Offsets[23]<<16)) & 0xfff0fff0U;
if (UV1+7*16==UV2)
GMC_Core_Lin_8(dst, Offsets, src + (Offsets[0]>>4) + (Offsets[16]>>4)*srcstride, srcstride, Rounder);
else
GMC_Core_Non_Lin_8(dst, Offsets, src, srcstride, Rounder);
UV1 = (Offsets[ 8] | (Offsets[24]<<16)) & 0xfff0fff0U;
UV2 = (Offsets[15] | (Offsets[31]<<16)) & 0xfff0fff0U;
if (UV1+7*16==UV2)
GMC_Core_Lin_8(dst+8, Offsets+8, src + (Offsets[8]>>4) + (Offsets[24]>>4)*srcstride, srcstride, Rounder);
else
GMC_Core_Non_Lin_8(dst+8, Offsets+8, src, srcstride, Rounder);
}
else
{
for(i=0; i<16; ++i)
{
int u = ( U >> 16 ) << rho;
int v = ( V >> 16 ) << rho;
U += dUx; V += dVx;
Offsets[ i] = (u<0) ? 0 : (u>=W) ? W : u;
Offsets[16+i] = (v<0) ? 0 : (v>=H) ? H : v;
}
// due to boundary clipping, we cannot infer the 8-pixels batchability
// simply by using the linearity. Oh well, not a big deal...
GMC_Core_Non_Lin_8(dst, Offsets, src, srcstride, Rounder);
GMC_Core_Non_Lin_8(dst+8, Offsets+8, src, srcstride, Rounder);
}
dst += dststride;
}
}
static
void Predict_8x8_mmx(const NEW_GMC_DATA * const This,
uint8_t *uDst, const uint8_t *uSrc,
uint8_t *vDst, const uint8_t *vSrc,
int dststride, int srcstride, int x, int y, int rounding)
{
const int W = This->sW >> 1;
const int H = This->sH >> 1;
const int rho = 3-This->accuracy;
const int32_t Rounder = ( 128 - (rounding<<(2*rho)) ) << 16;
const uint32_t W2 = W<<(16-rho);
const uint32_t H2 = H<<(16-rho);
const int dUx = This->dU[0];
const int dVx = This->dV[0];
const int dUy = This->dU[1];
const int dVy = This->dV[1];
int Uo = This->Uco + 8*(dUy*y + dUx*x);
int Vo = This->Vco + 8*(dVy*y + dVx*x);
DECLARE_ALIGNED_MATRIX(Offsets, 2,16, uint16_t, CACHE_LINE);
int i, j;
for(j=8; j>0; --j)
{
int32_t U = Uo, V = Vo;
Uo += dUy; Vo += dVy;
if ( W2>(uint32_t)U && W2>(uint32_t)(U+15*dUx) &&
H2>(uint32_t)V && H2>(uint32_t)(V+15*dVx) )
{
uint32_t UV1, UV2;
for(i=0; i<8; ++i)
{
int32_t u = ( U >> 16 ) << rho;
int32_t v = ( V >> 16 ) << rho;
U += dUx; V += dVx;
Offsets[ i] = u;
Offsets[16+i] = v;
}
// batch 8 input pixels when linearity says it's ok
UV1 = (Offsets[ 0] | (Offsets[16]<<16)) & 0xfff0fff0U;
UV2 = (Offsets[ 7] | (Offsets[23]<<16)) & 0xfff0fff0U;
if (UV1+7*16==UV2)
{
const uint32_t Off = (Offsets[0]>>4) + (Offsets[16]>>4)*srcstride;
GMC_Core_Lin_8(uDst, Offsets, uSrc+Off, srcstride, Rounder);
GMC_Core_Lin_8(vDst, Offsets, vSrc+Off, srcstride, Rounder);
}
else {
GMC_Core_Non_Lin_8(uDst, Offsets, uSrc, srcstride, Rounder);
GMC_Core_Non_Lin_8(vDst, Offsets, vSrc, srcstride, Rounder);
}
}
else
{
for(i=0; i<8; ++i)
{
int u = ( U >> 16 ) << rho;
int v = ( V >> 16 ) << rho;
U += dUx; V += dVx;
Offsets[ i] = (u<0) ? 0 : (u>=W) ? W : u;
Offsets[16+i] = (v<0) ? 0 : (v>=H) ? H : v;
}
GMC_Core_Non_Lin_8(uDst, Offsets, uSrc, srcstride, Rounder);
GMC_Core_Non_Lin_8(vDst, Offsets, vSrc, srcstride, Rounder);
}
uDst += dststride;
vDst += dststride;
}
}
#endif /* ARCH_IS_IA32 */
/* *************************************************************
* will initialize internal pointers
*/
void init_GMC(const unsigned int cpu_flags)
{
Predict_16x16_func = Predict_16x16_C;
Predict_8x8_func = Predict_8x8_C;
#if defined(ARCH_IS_IA32)
if ((cpu_flags & XVID_CPU_MMX) || (cpu_flags & XVID_CPU_MMXEXT) ||
(cpu_flags & XVID_CPU_3DNOW) || (cpu_flags & XVID_CPU_3DNOWEXT) ||
(cpu_flags & XVID_CPU_SSE) || (cpu_flags & XVID_CPU_SSE2))
{
Predict_16x16_func = Predict_16x16_mmx;
Predict_8x8_func = Predict_8x8_mmx;
if (cpu_flags & XVID_CPU_SSE2)
GMC_Core_Lin_8 = xvid_GMC_Core_Lin_8_sse2;
else
GMC_Core_Lin_8 = xvid_GMC_Core_Lin_8_mmx;
}
#endif
}
/* *************************************************************
* Warning! It's Accuracy being passed, not 'resolution'!
*/
void generate_GMCparameters( int nb_pts, const int accuracy,
const WARPPOINTS *const pts,
const int width, const int height,
NEW_GMC_DATA *const gmc)
{
gmc->sW = width << 4;
gmc->sH = height << 4;
gmc->accuracy = accuracy;
gmc->num_wp = nb_pts;
/* reduce the number of points, if possible */
if (nb_pts<2 || (pts->duv[2].x==0 && pts->duv[2].y==0 && pts->duv[1].x==0 && pts->duv[1].y==0 )) {
if (nb_pts<2 || (pts->duv[1].x==0 && pts->duv[1].y==0)) {
if (nb_pts<1 || (pts->duv[0].x==0 && pts->duv[0].y==0)) {
nb_pts = 0;
}
else nb_pts = 1;
}
else nb_pts = 2;
}
/* now, nb_pts stores the actual number of points required for interpolation */
if (nb_pts<=1)
{
if (nb_pts==1) {
/* store as 4b fixed point */
gmc->Uo = pts->duv[0].x << accuracy;
gmc->Vo = pts->duv[0].y << accuracy;
gmc->Uco = ((pts->duv[0].x>>1) | (pts->duv[0].x&1)) << accuracy; /* DIV2RND() */
gmc->Vco = ((pts->duv[0].y>>1) | (pts->duv[0].y&1)) << accuracy; /* DIV2RND() */
}
else { /* zero points?! */
gmc->Uo = gmc->Vo = 0;
gmc->Uco = gmc->Vco = 0;
}
gmc->predict_16x16 = Predict_1pt_16x16_C;
gmc->predict_8x8 = Predict_1pt_8x8_C;
gmc->get_average_mv = get_average_mv_1pt_C;
}
else { /* 2 or 3 points */
const int rho = 3 - accuracy; /* = {3,2,1,0} for Acc={0,1,2,3} */
int Alpha = log2bin(width-1);
int Ws = 1 << Alpha;
gmc->dU[0] = 16*Ws + RDIV( 8*Ws*pts->duv[1].x, width ); /* dU/dx */
gmc->dV[0] = RDIV( 8*Ws*pts->duv[1].y, width ); /* dV/dx */
if (nb_pts==2) {
gmc->dU[1] = -gmc->dV[0]; /* -Sin */
gmc->dV[1] = gmc->dU[0] ; /* Cos */
}
else
{
const int Beta = log2bin(height-1);
const int Hs = 1<<Beta;
gmc->dU[1] = RDIV( 8*Hs*pts->duv[2].x, height ); /* dU/dy */
gmc->dV[1] = 16*Hs + RDIV( 8*Hs*pts->duv[2].y, height ); /* dV/dy */
if (Beta>Alpha) {
gmc->dU[0] <<= (Beta-Alpha);
gmc->dV[0] <<= (Beta-Alpha);
Alpha = Beta;
Ws = Hs;
}
else {
gmc->dU[1] <<= Alpha - Beta;
gmc->dV[1] <<= Alpha - Beta;
}
}
/* upscale to 16b fixed-point */
gmc->dU[0] <<= (16-Alpha - rho);
gmc->dU[1] <<= (16-Alpha - rho);
gmc->dV[0] <<= (16-Alpha - rho);
gmc->dV[1] <<= (16-Alpha - rho);
gmc->Uo = ( pts->duv[0].x <<(16+ accuracy)) + (1<<15);
gmc->Vo = ( pts->duv[0].y <<(16+ accuracy)) + (1<<15);
gmc->Uco = ((pts->duv[0].x-1)<<(17+ accuracy)) + (1<<17);
gmc->Vco = ((pts->duv[0].y-1)<<(17+ accuracy)) + (1<<17);
gmc->Uco = (gmc->Uco + gmc->dU[0] + gmc->dU[1])>>2;
gmc->Vco = (gmc->Vco + gmc->dV[0] + gmc->dV[1])>>2;
gmc->predict_16x16 = Predict_16x16_func;
gmc->predict_8x8 = Predict_8x8_func;
gmc->get_average_mv = get_average_mv_C;
}
}
/* *******************************************************************
* quick and dirty routine to generate the full warped image
* (pGMC != NULL) or just all average Motion Vectors (pGMC == NULL) */
void
generate_GMCimage( const NEW_GMC_DATA *const gmc_data, /* [input] precalculated data */
const IMAGE *const pRef, /* [input] */
const int mb_width,
const int mb_height,
const int stride,
const int stride2,
const int fcode, /* [input] some parameters... */
const int32_t quarterpel, /* [input] for rounding avgMV */
const int reduced_resolution, /* [input] ignored */
const int32_t rounding, /* [input] for rounding image data */
MACROBLOCK *const pMBs, /* [output] average motion vectors */
IMAGE *const pGMC) /* [output] full warped image */
{
unsigned int mj,mi;
VECTOR avgMV;
for (mj = 0; mj < (unsigned int)mb_height; mj++)
for (mi = 0; mi < (unsigned int)mb_width; mi++) {
const int mbnum = mj*mb_width+mi;
if (pGMC)
{
gmc_data->predict_16x16(gmc_data,
pGMC->y + mj*16*stride + mi*16, pRef->y,
stride, stride, mi, mj, rounding);
gmc_data->predict_8x8(gmc_data,
pGMC->u + mj*8*stride2 + mi*8, pRef->u,
pGMC->v + mj*8*stride2 + mi*8, pRef->v,
stride2, stride2, mi, mj, rounding);
}
gmc_data->get_average_mv(gmc_data, &avgMV, mi, mj, quarterpel);
pMBs[mbnum].amv.x = gmc_sanitize(avgMV.x, quarterpel, fcode);
pMBs[mbnum].amv.y = gmc_sanitize(avgMV.y, quarterpel, fcode);
pMBs[mbnum].mcsel = 0; /* until mode decision */
}
emms();
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -