📄 block.c
字号:
paddd mm2,mm3
paddd mm2,mm5
psrad mm2,6
PACKSSDW mm0,mm2
PACKUSWB mm0,mm7
PUNPCKLbw mm0,mm7
movq [ecx],mm0
add eax,32
add ebx,8
add ecx,8
sub dx,1
jne loop1_2
//转秩
mov ebx,pt2
mov edi,pt3
movq mm0,[ebx]
movq mm1,[ebx+8]
movq mm2,[ebx+16]
movq mm3,[ebx+24]
movq mm4,mm0
movq mm5,mm1
movq mm6,mm2
movq mm7,mm3
PUNPCKLWD mm4,mm5
PUNPCKLWD mm6,mm7
movq mm5,mm4
PUNPCKLDQ mm4,mm6
PUNPCKHDQ mm5,mm6
PUNPCKHWD mm0,mm1
PUNPCKHWD mm2,mm7
movq mm7,mm0
PUNPCKLDQ mm0,mm2
PUNPCKHDQ mm7,mm2
movq mm6,mm0
pxor mm0,mm0
PACKUSWB mm4,mm0
PACKUSWB mm5,mm0
PACKUSWB mm6,mm0
PACKUSWB mm7,mm0
movd [edi],mm4
add edi,imgcw
movd [edi],mm5
add edi,imgcw
movd [edi],mm6
add edi,imgcw
movd [edi],mm7
emms
}
}
}
// Horizontal.
/* for (n2=0; n2 <= BLOCK_SIZE; n2 += BLOCK_SIZE)
{
for (n1=0; n1 <= BLOCK_SIZE; n1 += BLOCK_SIZE)
{
for (j=0; j < BLOCK_SIZE; j++)
{
for (i=0; i < BLOCK_SIZE; i++)
{
m5[i]=img->m7[n1+i][n2+j];
}
m6[0]=(m5[0]+m5[2]);
m6[1]=(m5[0]-m5[2]);
m6[2]=(m5[1]>>1)-m5[3];
m6[3]=m5[1]+(m5[3]>>1);
for (i=0; i < 2; i++)
{
i1=3-i;
img->m7[n1+i][n2+j]=m6[i]+m6[i1];
img->m7[n1+i1][n2+j]=m6[i]-m6[i1];
}
}
// Vertical.
for (i=0; i < BLOCK_SIZE; i++)
{
for (j=0; j < BLOCK_SIZE; j++)
{
m5[j]=img->m7[n1+i][n2+j];
}
m6[0]=(m5[0]+m5[2]);
m6[1]=(m5[0]-m5[2]);
m6[2]=(m5[1]>>1)-m5[3];
m6[3]=m5[1]+(m5[3]>>1);
for (j=0; j < 2; j++)
{
j2=3-j;
img->m7[n1+i][n2+j] =min(255,max(0,(m6[j]+m6[j2]+(img->mpr[n1+i][n2+j] <<DQ_BITS)+DQ_ROUND)>>DQ_BITS));
img->m7[n1+i][n2+j2]=min(255,max(0,(m6[j]-m6[j2]+(img->mpr[n1+i][n2+j2]<<DQ_BITS)+DQ_ROUND)>>DQ_BITS));
}
}
}
}
// Decoded block moved to memory
for (j=0; j < BLOCK_SIZE*2; j++)
for (i=0; i < BLOCK_SIZE*2; i++)
imgUV[uv][img->pix_c_y+j][img->pix_c_x+i]= img->m7[i][j];*/
return cr_cbp;
}
/*!
************************************************************************
* \brief
* The routine performs transform,quantization,inverse transform, adds the diff.
* to the prediction and writes the result to the decoded luma frame. Includes the
* RD constrained quantization also.
*
* \para Input:
* block_x,block_y: Block position inside a macro block (0,4,8,12).
*
* \para Output:
* nonzero: 0 if no levels are nonzero. 1 if there are nonzero levels. \n
* coeff_cost: Counter for nonzero coefficients, used to discard expencive levels.
*
*
************************************************************************
*/
int dct_luma_sp(int block_x,int block_y,int *coeff_cost)
{
int sign(int a,int b);
int i,j,i1,j1,ilev,m5[4],m6[4],coeff_ctr;
int qp_const,level,scan_pos,run;
int nonzero;
int predicted_block[BLOCK_SIZE][BLOCK_SIZE],c_err,qp_const2;
int qp_per,qp_rem,q_bits;
int qp_per_sp,qp_rem_sp,q_bits_sp;
int pos_x = block_x/BLOCK_SIZE;
int pos_y = block_y/BLOCK_SIZE;
int b8 = 2*(pos_y/2) + (pos_x/2);
int b4 = 2*(pos_y%2) + (pos_x%2);
int* ACLevel = img->cofAC[b8][b4][0];
int* ACRun = img->cofAC[b8][b4][1];
// For encoding optimization
int c_err1, c_err2, level1, level2;
double D_dis1, D_dis2;
int len, info;
double lambda_mode = 0.85 * pow (2, img->qp/3.0) * 4;
qp_per = (img->qp-MIN_QP)/6;
qp_rem = (img->qp-MIN_QP)%6;
q_bits = Q_BITS+qp_per;
qp_per_sp = (img->qpsp-MIN_QP)/6;
qp_rem_sp = (img->qpsp-MIN_QP)%6;
q_bits_sp = Q_BITS+qp_per_sp;
qp_const=(1<<q_bits)/6; // inter
qp_const2=(1<<q_bits_sp)/2; //sp_pred
// Horizontal transform
for (j=0; j< BLOCK_SIZE; j++)
for (i=0; i< BLOCK_SIZE; i++)
{
img->m7[i][j]+=img->mpr[i+block_x][j+block_y];
predicted_block[i][j]=img->mpr[i+block_x][j+block_y];
}
for (j=0; j < BLOCK_SIZE; j++)
{
for (i=0; i < 2; i++)
{
i1=3-i;
m5[i]=img->m7[i][j]+img->m7[i1][j];
m5[i1]=img->m7[i][j]-img->m7[i1][j];
}
img->m7[0][j]=(m5[0]+m5[1]);
img->m7[2][j]=(m5[0]-m5[1]);
img->m7[1][j]=m5[3]*2+m5[2];
img->m7[3][j]=m5[3]-m5[2]*2;
}
// Vertival transform
for (i=0; i < BLOCK_SIZE; i++)
{
for (j=0; j < 2; j++)
{
j1=3-j;
m5[j]=img->m7[i][j]+img->m7[i][j1];
m5[j1]=img->m7[i][j]-img->m7[i][j1];
}
img->m7[i][0]=(m5[0]+m5[1]);
img->m7[i][2]=(m5[0]-m5[1]);
img->m7[i][1]=m5[3]*2+m5[2];
img->m7[i][3]=m5[3]-m5[2]*2;
}
for (j=0; j < BLOCK_SIZE; j++)
{
for (i=0; i < 2; i++)
{
i1=3-i;
m5[i]=predicted_block[i][j]+predicted_block[i1][j];
m5[i1]=predicted_block[i][j]-predicted_block[i1][j];
}
predicted_block[0][j]=(m5[0]+m5[1]);
predicted_block[2][j]=(m5[0]-m5[1]);
predicted_block[1][j]=m5[3]*2+m5[2];
predicted_block[3][j]=m5[3]-m5[2]*2;
}
// Vertival transform
for (i=0; i < BLOCK_SIZE; i++)
{
for (j=0; j < 2; j++)
{
j1=3-j;
m5[j]=predicted_block[i][j]+predicted_block[i][j1];
m5[j1]=predicted_block[i][j]-predicted_block[i][j1];
}
predicted_block[i][0]=(m5[0]+m5[1]);
predicted_block[i][2]=(m5[0]-m5[1]);
predicted_block[i][1]=m5[3]*2+m5[2];
predicted_block[i][3]=m5[3]-m5[2]*2;
}
// Quant
nonzero=FALSE;
run=-1;
scan_pos=0;
for (coeff_ctr=0;coeff_ctr < 16;coeff_ctr++) // 8 times if double scan, 16 normal scan
{
if (img->field_picture || ( mb_adaptive && img->field_mode ))
{ // Alternate scan for field coding
i=FIELD_SCAN[coeff_ctr][0];
j=FIELD_SCAN[coeff_ctr][1];
}
else
{
i=SNGL_SCAN[coeff_ctr][0];
j=SNGL_SCAN[coeff_ctr][1];
}
run++;
ilev=0;
// decide prediction
// case 1
level1 = (abs (predicted_block[i][j]) * quant_coef[qp_rem_sp][i][j] + qp_const2) >> q_bits_sp;
level1 = (level1 << q_bits_sp) / quant_coef[qp_rem_sp][i][j];
c_err1 = img->m7[i][j]-sign(level1, predicted_block[i][j]);
level1 = (abs (c_err1) * quant_coef[qp_rem][i][j] + qp_const) >> q_bits;
// case 2
c_err2=img->m7[i][j]-predicted_block[i][j];
level2 = (abs (c_err2) * quant_coef[qp_rem][i][j] + qp_const) >> q_bits;
// select prediction
if ((level1 != level2) && (level1 != 0) && (level2 != 0))
{
D_dis1 = img->m7[i][j] - ((sign(level1,c_err1)*dequant_coef[qp_rem][i][j]*A[i][j]<< qp_per) >>6) - predicted_block[i][j];
levrun_linfo_inter(level1, run, &len, &info);
D_dis1 = D_dis1*D_dis1 + lambda_mode * len;
D_dis2 = img->m7[i][j] - ((sign(level2,c_err2)*dequant_coef[qp_rem][i][j]*A[i][j]<< qp_per) >>6) - predicted_block[i][j];
levrun_linfo_inter(level2, run, &len, &info);
D_dis2 = D_dis2 * D_dis2 + lambda_mode * len;
if (D_dis1 == D_dis2)
level = (abs(level1) < abs(level2)) ? level1 : level2;
else
{
if (D_dis1 < D_dis2)
level = level1;
else
level = level2;
}
c_err = (level == level1) ? c_err1 : c_err2;
}
else if (level1 == level2)
{
level = level1;
c_err = c_err1;
}
else
{
level = (level1 == 0) ? level1 : level2;
c_err = (level1 == 0) ? c_err1 : c_err2;
}
if (level != 0)
{
nonzero=TRUE;
if (level > 1)
*coeff_cost += MAX_VALUE; // set high cost, shall not be discarded
else
*coeff_cost += COEFF_COST[run];
ACLevel[scan_pos] = sign(level,c_err);
ACRun [scan_pos] = run;
++scan_pos;
run=-1; // reset zero level counter
ilev=((sign(level,c_err)*dequant_coef[qp_rem][i][j]*A[i][j]<< qp_per) >>6);
}
ilev+=predicted_block[i][j] ;
img->m7[i][j] = sign((abs(ilev) * quant_coef[qp_rem_sp][i][j] + qp_const2)>> q_bits_sp, ilev) * dequant_coef[qp_rem_sp][i][j] << qp_per_sp;
}
ACLevel[scan_pos] = 0;
// IDCT.
// horizontal
for (j=0; j < BLOCK_SIZE; j++)
{
for (i=0; i < BLOCK_SIZE; i++)
{
m5[i]=img->m7[i][j];
}
m6[0]=(m5[0]+m5[2]);
m6[1]=(m5[0]-m5[2]);
m6[2]=(m5[1]>>1)-m5[3];
m6[3]=m5[1]+(m5[3]>>1);
for (i=0; i < 2; i++)
{
i1=3-i;
img->m7[i][j]=m6[i]+m6[i1];
img->m7[i1][j]=m6[i]-m6[i1];
}
}
// vertical
for (i=0; i < BLOCK_SIZE; i++)
{
for (j=0; j < BLOCK_SIZE; j++)
{
m5[j]=img->m7[i][j];
}
m6[0]=(m5[0]+m5[2]);
m6[1]=(m5[0]-m5[2]);
m6[2]=(m5[1]>>1)-m5[3];
m6[3]=m5[1]+(m5[3]>>1);
for (j=0; j < 2; j++)
{
j1=3-j;
img->m7[i][j] =min(255,max(0,(m6[j]+m6[j1]+DQ_ROUND)>>DQ_BITS));
img->m7[i][j1]=min(255,max(0,(m6[j]-m6[j1]+DQ_ROUND)>>DQ_BITS));
}
}
// Decoded block moved to frame memory
for (j=0; j < BLOCK_SIZE; j++)
for (i=0; i < BLOCK_SIZE; i++)
imgY[img->pix_y+block_y+j][img->pix_x+block_x+i]=img->m7[i][j];
return nonzero;
}
/*!
************************************************************************
* \brief
* Transform,quantization,inverse transform for chroma.
* The main reason why this is done in a separate routine is the
* additional 2x2 transform of DC-coeffs. This routine is called
* ones for each of the chroma components.
*
* \para Input:
* uv : Make difference between the U and V chroma component \n
* cr_cbp: chroma coded block pattern
*
* \para Output:
* cr_cbp: Updated chroma coded block pattern.
************************************************************************
*/
int dct_chroma_sp(int uv,int cr_cbp)
{
int i,j,i1,j2,ilev,n2,n1,j1,mb_y,coeff_ctr,qp_const,c_err,level ,scan_pos,run;
int m1[BLOCK_SIZE],m5[BLOCK_SIZE],m6[BLOCK_SIZE];
int coeff_cost;
int cr_cbp_tmp;
int predicted_chroma_block[MB_BLOCK_SIZE/2][MB_BLOCK_SIZE/2],qp_const2,mp1[BLOCK_SIZE];
Macroblock *currMB = &img->mb_data[img->current_mb_nr];
int qp_per,qp_rem,q_bits;
int qp_per_sp,qp_rem_sp,q_bits_sp;
int b4;
int* DCLevel = img->cofDC[uv+1][0];
int* DCRun = img->cofDC[uv+1][1];
int* ACLevel;
int* ACRun;
int c_err1, c_err2, level1, level2;
int len, info;
double D_dis1, D_dis2;
double lambda_mode = 0.85 * pow (2, img->qp/3.0) * 4;
qp_per = ((img->qp<0?img->qp:QP_SCALE_CR[img->qp])-MIN_QP)/6;
qp_rem = ((img->qp<0?img->qp:QP_SCALE_CR[img->qp])-MIN_QP)%6;
q_bits = Q_BITS+qp_per;
qp_const=(1<<q_bits)/6; // inter
qp_per_sp = ((img->qpsp<0?img->qpsp:QP_SCALE_CR[img->qpsp])-MIN_QP)/6;
qp_rem_sp = ((img->qpsp<0?img->qpsp:QP_SCALE_CR[img->qpsp])-MIN_QP)%6;
q_bits_sp = Q_BITS+qp_per_sp;
qp_const2=(1<<q_bits_sp)/2; //sp_pred
for (j=0; j < MB_BLOCK_SIZE/2; j++)
for (i=0; i < MB_BLOCK_SIZE/2; i++)
{
img->m7[i][j]+=img->mpr[i][j];
predicted_chroma_block[i][j]=img->mpr[i][j];
}
for (n2=0; n2 <= BLOCK_SIZE; n2 += BLOCK_SIZE)
{
for (n1=0; n1 <= BLOCK_SIZE; n1 += BLOCK_SIZE)
{
// Horizontal transform.
for (j=0; j < BLOCK_SIZE; j++)
{
mb_y=n2+j;
for (i=0; i < 2; i++)
{
i1=3-i;
m5[i]=img->m7[i+n1][mb_y]+img->m7[i1+n1][mb_y];
m5[i1]=img->m7[i+n1][mb_y]-img->m7[i1+n1][mb_y];
}
img->m7[n1][mb_y] =(m5[0]+m5[1]);
img->m7[n1+2][mb_y]=(m5[0]-m5[1]);
img->m7[n1+1][mb_y]=m5[3]*2+m5[2];
img->m7[n1+3][mb_y]=m5[3]-m5[2]*2;
}
// Vertical transform.
for (i=0; i < BLOCK_SIZE; i++)
{
j1=n1+i;
for (j=0; j < 2; j++)
{
j2=3-j;
m5[j]=img->m7[j1][n2+j]+img->m7[j1][n2+j2];
m5[j2]=img->m7[j1][n2+j]-img->m7[j1][n2+j2];
}
img->m7[j1][n2+0]=(m5[0]+m5[1]);
img->m7[j1][n2+2]=(m5[0]-m5[1]);
img->m7[j1][n2+1]=m5[3]*2+m5[2];
img->m7[j1][n2+3]=m5[3]-m5[2]*2;
}
}
}
for (n2=0; n2 <= BLOCK_SIZE; n2 += BLOCK_SIZE)
{
for (n1=0; n1 <= BLOCK_SIZE; n1 += BLOCK_SIZE)
{
// Horizontal transform.
for (j=0; j < BLOCK_SIZE; j++)
{
mb_y=n2+j;
for (i=0; i < 2; i++)
{
i1=3-i;
m5[i]=predicted_chroma_block[i+n1][mb_y]+predicted_chroma_block[i1+n1][mb_y];
m5[i1]=predicted_chroma_block[i+n1][mb_y]-predicted_chroma_block[i1+n1][mb_y];
}
predicted_chroma_block[n1][mb_y] =(m5[0]+m5[1]);
predicted_chroma_block[n1+2][mb_y]=(m5[0]-m5[1]);
predicted_chroma_block[n1+1][mb_y]=m5[3]*2+m5[2];
predicted_chroma_block[n1+3][mb_y]=m5[3]-m5[2]*2;
}
// Vertical transform.
for (i=0; i < BLOCK_SIZE; i++)
{
j1=n1+i;
for (j=0; j < 2; j++)
{
j2=3-j;
m5[j]=predicted_chroma_block[j1][n2+j]+predicted_chroma_block[j1][n2+j2];
m5[j2]=predicted_chroma_block[j1][n2+j]-predicted_chroma_block[j1][n2+j2];
}
predicted_chroma_block[j1][n2+0]=(m5[0]+m5[1]);
predicted_chroma_block[j1][n2+2]=(m5[0]-m5[1]);
predicted_chroma_block[j1][n2+1]=m5[3]*2+m5[2];
predicted_chroma_block[j1][n2+3]=m5[3]-m5[2]*2;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -