📄 block.cpp
字号:
M3[2]=M0[i][mm][1][jj]-M0[i][mm][2][jj];
M3[3]=M0[i][mm][0][jj]-M0[i][mm][3][jj];
M0[i][mm][0][jj]=M3[0]+M3[1];
M0[i][mm][2][jj]=M3[0]-M3[1];
M0[i][mm][1][jj]=M3[2]+M3[3];
M0[i][mm][3][jj]=M3[3]-M3[2];
}
current_intra_sad_2 += abs(M0[1][mm][0][jj])
+abs(M0[2][mm][0][jj])+abs(M0[3][mm][0][jj])
+abs(M0[0][mm][1][jj])+abs(M0[1][mm][1][jj])
+abs(M0[2][mm][1][jj])+abs(M0[3][mm][1][jj])
+abs(M0[0][mm][2][jj])+abs(M0[1][mm][2][jj])
+abs(M0[2][mm][2][jj])+abs(M0[3][mm][2][jj])
+abs(M0[0][mm][3][jj])+abs(M0[1][mm][3][jj])
+abs(M0[2][mm][3][jj])+abs(M0[3][mm][3][jj]);
}
}
M4[0][0]=M0[0][0][0][0]/4; M4[1][0]=M0[0][1][0][0]/4;
M4[2][0]=M0[0][2][0][0]/4; M4[3][0]=M0[0][3][0][0]/4;
M4[0][1]=M0[0][0][0][1]/4; M4[1][1]=M0[0][1][0][1]/4;
M4[2][1]=M0[0][2][0][1]/4; M4[3][1]=M0[0][3][0][1]/4;
M4[0][2]=M0[0][0][0][2]/4; M4[1][2]=M0[0][1][0][2]/4;
M4[2][2]=M0[0][2][0][2]/4; M4[3][2]=M0[0][3][0][2]/4;
M4[0][3]=M0[0][0][0][3]/4; M4[1][3]=M0[0][1][0][3]/4;
M4[2][3]=M0[0][2][0][3]/4; M4[3][3]=M0[0][3][0][3]/4;
// Hadamard of DC koeff
for (j=0;j<4;j++)
{
M3[0]=M4[0][j]+M4[3][j];
M3[1]=M4[1][j]+M4[2][j];
M3[2]=M4[1][j]-M4[2][j];
M3[3]=M4[0][j]-M4[3][j];
M4[0][j]=M3[0]+M3[1];
M4[2][j]=M3[0]-M3[1];
M4[1][j]=M3[2]+M3[3];
M4[3][j]=M3[3]-M3[2];
}
for (i=0;i<4;i++)
{
M3[0]=M4[i][0]+M4[i][3];
M3[1]=M4[i][1]+M4[i][2];
M3[2]=M4[i][1]-M4[i][2];
M3[3]=M4[i][0]-M4[i][3];
M4[i][0]=M3[0]+M3[1];
M4[i][2]=M3[0]-M3[1];
M4[i][1]=M3[2]+M3[3];
M4[i][3]=M3[3]-M3[2];
}
current_intra_sad_2 += abs(M4[0][0])+abs(M4[1][0])+abs(M4[2][0])+abs(M4[3][0])
+abs(M4[0][1])+abs(M4[1][1])+abs(M4[2][1])+abs(M4[3][1])
+abs(M4[0][2])+abs(M4[1][2])+abs(M4[2][2])+abs(M4[3][2])
+abs(M4[0][3])+abs(M4[1][3])+abs(M4[2][3])+abs(M4[3][3]);
return current_intra_sad_2/2;
}
/************************************************************************
*
* Routine: dct_luma
*
* Description: The routine performs transform,quantization,inverse transform, adds the diff.
* to the prediction and writes the result to the decoded luma frame. Includes the
* RD constrained quantization also.
*
*
* Input: block_x,block_y: Block position inside a macro block (0,4,8,12).
*
* Output: nonzero: 0 if no levels are nonzero. 1 if there are nonzero levels.
* coeff_cost: Counter for nonzero coefficients, used to discard expencive levels.
*
*
************************************************************************/
#define HOR_DCT(x0,x1,x2,x3)\
D0=x0+x3;\
D3=x0-x3;\
D1=x1+x2;\
D2=x1-x2;\
x0=D0+D1;\
x2=D0-D1;\
x1=D2+(D3<<1);\
x3=D3-(D2<<1);
#define VER_DCT(x0,x1,x2,x3)\
q0=x0+x3;\
q3=x0-x3;\
q1=x1+x2;\
q2=x1-x2;\
p0=q0+q1;\
p2=q0-q1;\
p1=q2+(q3<<1);\
p3=q3-(q2<<1);
#define HOR_IDCT(x0,x1,x2,x3)\
q0=x0+x2;\
q1=x0-x2;\
q2=(x1>>1)-x3;\
q3=x1+(x3>>1);\
p0=q0+q3;\
p3=q0-q3;\
p1=q1+q2;\
p2=q1-q2;
#define VER_IDCT(x0,x1,x2,x3)\
q0=x0+x2;\
q1=x0-x2;\
q2=(x1>>1)-x3;\
q3=x1+(x3>>1);\
x0=q0+q3;\
x3=q0-q3;\
x1=q1+q2;\
x2=q1-q2;
/*yummy*/
#define VER_CVT(x0,x1,x2,x3)\
q0=x0+x3;\
q3=x0-x3;\
q1=x1+x2;\
q2=x1-x2;\
p0=q0+q1;\
p2=q0-q1;\
p1=q2+q3;\
p3=q3-q2;
#define HOR_CVT(x0,x1,x2,x3)\
D0=x0+x3;\
D3=x0-x3;\
D1=x1+x2;\
D2=x1-x2;\
x0=D0+D1;\
x2=D0-D1;\
x1=D2+D3;\
x3=D3-D2;
#define HOR_ICVT(x0,x1,x2,x3)\
q0=x0+x2;\
q1=x0-x2;\
q2=x1-x3;\
q3=x1+x3;\
p0=q0+q3;\
p3=q0-q3;\
p1=q1+q2;\
p2=q1-q2;
#define VER_ICVT(x0,x1,x2,x3)\
q0=x0+x2;\
q1=x0-x2;\
q2=x1-x3;\
q3=x1+x3;\
x0=q0+q3;\
x3=q0-q3;\
x1=q1+q2;\
x2=q1-q2;
#define IDCT_SHIFT(x,y) min(255,max(0,((x)+((y) <<DQ_BITS)+DQ_ROUND)>>DQ_BITS))
int dct_luma(byte *ii, int *pp, int block_x,int block_y,struct img_par *img)
{
int sign(int a,int b);
int j,ilev,coeff_ctr;
int qp_const,level,scan_pos,run;
int nonzero;
int qp_per,qp_rem,q_bits;
int pos_x = block_x/BLOCK_SIZE;
int pos_y = block_y/BLOCK_SIZE;
int p0,p1,p2,p3;
int q0,q1,q2,q3;
int A0,A1,A2,A3;
int B0,B1,B2,B3;
int C0,C1,C2,C3;
int D0,D1,D2,D3;
int width=img->width+IMG_PAD_SIZE;
int outD[16];
qp_per = (img->qp-MIN_QP)/6;
qp_rem = (img->qp-MIN_QP)%6;
q_bits = Q_BITS+qp_per;
if (img->type == INTRA_IMG)
qp_const=(1<<q_bits)/3; // intra
else
qp_const=(1<<q_bits)/6; // inter
p0=ii[0]-pp[0];
p1=ii[1]-pp[1];
p2=ii[2]-pp[2];
p3=ii[3]-pp[3];
HOR_DCT(p0,p1,p2,p3);
A0=p0;A1=p1;A2=p2;A3=p3;
ii+=width;pp+=4;
p0=ii[0]-pp[0];
p1=ii[1]-pp[1];
p2=ii[2]-pp[2];
p3=ii[3]-pp[3];
HOR_DCT(p0,p1,p2,p3);
B0=p0;B1=p1;B2=p2;B3=p3;
ii+=width;pp+=4;
p0=ii[0]-pp[0];
p1=ii[1]-pp[1];
p2=ii[2]-pp[2];
p3=ii[3]-pp[3];
HOR_DCT(p0,p1,p2,p3);
C0=p0;C1=p1;C2=p2;C3=p3;
ii+=width;pp+=4;
p0=ii[0]-pp[0];
p1=ii[1]-pp[1];
p2=ii[2]-pp[2];
p3=ii[3]-pp[3];
HOR_DCT(p0,p1,p2,p3);
D0=p0;D1=p1;D2=p2;D3=p3;
pp-=12;
VER_DCT(A0,B0,C0,D0);
outD[0]=p0;outD[2]=p1;outD[3]=p2;outD[9]=p3;
VER_DCT(A1,B1,C1,D1);
outD[1]=p0;outD[4]=p1;outD[8]=p2;outD[10]=p3;
VER_DCT(A2,B2,C2,D2);
outD[5]=p0;outD[7]=p1;outD[11]=p2;outD[14]=p3;
VER_DCT(A3,B3,C3,D3);
outD[6]=p0;outD[12]=p1;outD[13]=p2;outD[15]=p3;
// Quant
nonzero=FALSE;
run=-1;
scan_pos=0;
for (coeff_ctr=0;coeff_ctr < 16;coeff_ctr++)
{
p0=outD[coeff_ctr];
run++;
ilev=0;
level = (abs (p0) * quant_coef1[qp_rem][coeff_ctr] + qp_const) >> q_bits;
if (level != 0)
{
nonzero=TRUE;
img->cof[pos_x][pos_y][scan_pos][0]=sign(level,p0);
img->cof[pos_x][pos_y][scan_pos][1]=run;
++scan_pos;
run=-1; // reset zero level counter
ilev=level*dequant_coef1[qp_rem][coeff_ctr]<<qp_per;
}
outD[coeff_ctr]=sign(ilev,p0);
}
img->cof[pos_x][pos_y][scan_pos][0]=0;
p0=outD[0];p1=outD[2];p2=outD[3];p3=outD[9];
VER_IDCT(p0,p1,p2,p3);
A0=p0;A1=p1;A2=p2;A3=p3;
p0=outD[1];p1=outD[4];p2=outD[8];p3=outD[10];
VER_IDCT(p0,p1,p2,p3);
B0=p0;B1=p1;B2=p2;B3=p3;
p0=outD[5];p1=outD[7];p2=outD[11];p3=outD[14];
VER_IDCT(p0,p1,p2,p3);
C0=p0;C1=p1;C2=p2;C3=p3;
p0=outD[6];p1=outD[12];p2=outD[13];p3=outD[15];
VER_IDCT(p0,p1,p2,p3);
D0=p0;D1=p1;D2=p2;D3=p3;
HOR_IDCT(A0,B0,C0,D0);
outD[0]=IDCT_SHIFT(p0,pp[0]);
outD[1]=IDCT_SHIFT(p1,pp[1]);
outD[2]=IDCT_SHIFT(p2,pp[2]);
outD[3]=IDCT_SHIFT(p3,pp[3]);
HOR_IDCT(A1,B1,C1,D1);
outD[4]=IDCT_SHIFT(p0,pp[4]);
outD[5]=IDCT_SHIFT(p1,pp[5]);
outD[6]=IDCT_SHIFT(p2,pp[6]);
outD[7]=IDCT_SHIFT(p3,pp[7]);
HOR_IDCT(A2,B2,C2,D2);
outD[8]=IDCT_SHIFT(p0,pp[8]);
outD[9]=IDCT_SHIFT(p1,pp[9]);
outD[10]=IDCT_SHIFT(p2,pp[10]);
outD[11]=IDCT_SHIFT(p3,pp[11]);
HOR_IDCT(A3,B3,C3,D3);
outD[12]=IDCT_SHIFT(p0,pp[12]);
outD[13]=IDCT_SHIFT(p1,pp[13]);
outD[14]=IDCT_SHIFT(p2,pp[14]);
outD[15]=IDCT_SHIFT(p3,pp[15]);
for (j=0; j < BLOCK_SIZE; j++)
{
p0=(img->pix_y+block_y+j)*width+img->pix_x+block_x;
p1=j*4;
imgY[p0 ]=outD[p1 ];
imgY[p0+1]=outD[p1+1];
imgY[p0+2]=outD[p1+2];
imgY[p0+3]=outD[p1+3];
}
return nonzero;
}
/*yummy*/
int dct_luma_16x16(byte *mm,int new_intra_mode,struct img_par *img)
{
int lx=img->width+IMG_PAD_SIZE;
int qp_const;
int i,j;
int ii,jj;
int M1[16][16];
int M4[4][4];
int M0[4][4][4][4];
int run,scan_pos,coeff_ctr,level;
int qp_per,qp_rem,q_bits;
int ac_coef = 0;
int incr=1;
int offset=0;
int ilev,pp;
int p0,p1,p2,p3;
int q0,q1,q2,q3;
int A0,A1,A2,A3;
int B0,B1,B2,B3;
int C0,C1,C2,C3;
int D0,D1,D2,D3;
int outD[16];
qp_per = (img->qp-MIN_QP)/6;
qp_rem = (img->qp-MIN_QP)%6;
q_bits = Q_BITS+qp_per;
qp_const = (1<<q_bits)/3;
for (j=0;j<16;j++)
{
jj=j>>2;
for (i=0;i<16;i+=4)
{
ii=i>>2;
M1[i][j] =mm[i] -img->mprr_2[new_intra_mode][i +j*16];
M1[i+1][j]=mm[i+1]-img->mprr_2[new_intra_mode][i+1+j*16];
M1[i+2][j]=mm[i+2]-img->mprr_2[new_intra_mode][i+2+j*16];
M1[i+3][j]=mm[i+3]-img->mprr_2[new_intra_mode][i+3+j*16];
M0[0][ii][j&0x03][jj]=M1[i][j];
M0[1][ii][j&0x03][jj]=M1[i+1][j];
M0[2][ii][j&0x03][jj]=M1[i+2][j];
M0[3][ii][j&0x03][jj]=M1[i+3][j];
}
mm +=lx;
}
for (jj=0;jj<4;jj++)
{
for (ii=0;ii<4;ii++)
{
p0=M0[0][ii][0][jj];
p1=M0[1][ii][0][jj];
p2=M0[2][ii][0][jj];
p3=M0[3][ii][0][jj];
HOR_DCT(p0,p1,p2,p3);
A0=p0;A1=p1;A2=p2;A3=p3;
p0=M0[0][ii][1][jj];
p1=M0[1][ii][1][jj];
p2=M0[2][ii][1][jj];
p3=M0[3][ii][1][jj];
HOR_DCT(p0,p1,p2,p3);
B0=p0;B1=p1;B2=p2;B3=p3;
p0=M0[0][ii][2][jj];
p1=M0[1][ii][2][jj];
p2=M0[2][ii][2][jj];
p3=M0[3][ii][2][jj];
HOR_DCT(p0,p1,p2,p3);
C0=p0;C1=p1;C2=p2;C3=p3;
p0=M0[0][ii][3][jj];
p1=M0[1][ii][3][jj];
p2=M0[2][ii][3][jj];
p3=M0[3][ii][3][jj];
HOR_DCT(p0,p1,p2,p3);
D0=p0;D1=p1;D2=p2;D3=p3;
// vertical
VER_DCT(A0,B0,C0,D0);
M0[0][ii][0][jj]=p0; M0[0][ii][1][jj]=p1;
M0[0][ii][2][jj]=p2; M0[0][ii][3][jj]=p3;
VER_DCT(A1,B1,C1,D1);
M0[1][ii][0][jj]=p0; M0[1][ii][1][jj]=p1;
M0[1][ii][2][jj]=p2; M0[1][ii][3][jj]=p3;
VER_DCT(A2,B2,C2,D2);
M0[2][ii][0][jj]=p0; M0[2][ii][1][jj]=p1;
M0[2][ii][2][jj]=p2; M0[2][ii][3][jj]=p3;
VER_DCT(A3,B3,C3,D3);
M0[3][ii][0][jj]=p0; M0[3][ii][1][jj]=p1;
M0[3][ii][2][jj]=p2; M0[3][ii][3][jj]=p3;
}
}
// pick out DC coeff
//horizontal
p0=M0[0][0][0][0]; p1=M0[0][1][0][0];
p2=M0[0][2][0][0]; p3=M0[0][3][0][0];
HOR_CVT(p0,p1,p2,p3);
A0=p0;A1=p1;A2=p2;A3=p3;
p0=M0[0][0][0][1]; p1=M0[0][1][0][1];
p2=M0[0][2][0][1]; p3=M0[0][3][0][1];
HOR_CVT(p0,p1,p2,p3);
B0=p0;B1=p1;B2=p2;B3=p3;
p0=M0[0][0][0][2]; p1=M0[0][1][0][2];
p2=M0[0][2][0][2]; p3=M0[0][3][0][2];
HOR_CVT(p0,p1,p2,p3);
C0=p0;C1=p1;C2=p2;C3=p3;
p0=M0[0][0][0][3]; p1=M0[0][1][0][3];
p2=M0[0][2][0][3]; p3=M0[0][3][0][3];
HOR_CVT(p0,p1,p2,p3);
D0=p0;D1=p1;D2=p2;D3=p3;
// vertical
VER_CVT(A0,B0,C0,D0);
outD[0]=p0>>1;outD[2]=p1>>1;outD[3]=p2>>1;outD[9]=p3>>1;
VER_CVT(A1,B1,C1,D1);
outD[1]=p0>>1;outD[4]=p1>>1;outD[8]=p2>>1;outD[10]=p3>>1;
VER_CVT(A2,B2,C2,D2);
outD[5]=p0>>1;outD[7]=p1>>1;outD[11]=p2>>1;outD[14]=p3>>1;
VER_CVT(A3,B3,C3,D3);
outD[6]=p0>>1;outD[12]=p1>>1;outD[13]=p2>>1;outD[15]=p3>>1;
// quant
run=-1;
scan_pos=0;
for (coeff_ctr=0; coeff_ctr < 16; coeff_ctr++)
{
pp = outD[coeff_ctr];
run++;
ilev=0;
level =(abs(pp) * quant_coef[qp_rem][0][0] + 2*qp_const) >> (q_bits+1);
if (level != 0)
{
img->cofy[scan_pos][0]=sign(level ,pp);
img->cofy[scan_pos][1]=run;
scan_pos++;
run=-1;
}
outD[coeff_ctr]=sign(level,pp);/*level,not ilev*/
}
img->cofy[scan_pos][0]=0;
// invers DC transform
p0=outD[0];p1=outD[2];p2=outD[3];p3=outD[9];
VER_ICVT(p0,p1,p2,p3);
A0=p0;A1=p1;A2=p2;A3=p3;
p0=outD[1];p1=outD[4];p2=outD[8];p3=outD[10];
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -