📄 block.c
字号:
for (j=0; j < BLOCK_SIZE; j++)
for (i=0; i < BLOCK_SIZE; i++)
imgY[img->pix_y+block_y+j][img->pix_x+block_x+i]=img->m7[i][j];
return nonzero;
}
*/
//zdd
int dct_luma(int block_x,int block_y,int *coeff_cost, int old_intra_mode)
{
int sign(int a,int b);
int i,j,i1,j1,ilev,m5[4],m6[4],coeff_ctr;
int qp_const,level,scan_pos,run;
int nonzero;
int qp_per,qp_rem,q_bits;
int pos_x = block_x/BLOCK_SIZE;
int pos_y = block_y/BLOCK_SIZE;
int b8 = 2*(pos_y/2) + (pos_x/2);
int b4 = 2*(pos_y%2) + (pos_x%2);
int* ACLevel = img->cofAC[b8][b4][0];
int* ACRun = img->cofAC[b8][b4][1];
//zdd
_int16 buff[4][4];
_int16 *pointer=&(img->m7[0][0]);
_int16 *pt1=&(img->mpr[block_x][block_y]);
_int16 *pt2=&(buff[0][0]);
byte *pt3=&(imgY[img->pix_y+block_y][img->pix_x+block_x]);
static _int64 dq=0x0000002000000020;
static _int64 f0f=0x00ff00ff00ff00ff;
static _int64 ff=0xffffffffffff0000;
int imgw=img->width;
_int32 mulres[4][4],*pt4=mulres[0];
_int16*pt5;
//end
qp_per = (img->qp-MIN_QP)/6;
qp_rem = (img->qp-MIN_QP)%6;
q_bits = Q_BITS+qp_per;
if (img->type == INTRA_IMG)
qp_const=(1<<q_bits)/3; // intra
else
qp_const=(1<<q_bits)/6; // inter
pt5=quant_coef[qp_rem][0];
_asm
{
mov eax,pointer
mov ebx,eax
mov edi,pt4
mov esi,pt5
//读入矩阵
movq mm4,[eax]
movq mm5,[eax+32]
movq mm6,[eax+64]
movq mm7,[eax+96]
//转秩
movq mm0,mm4
movq mm1,mm5
movq mm2,mm6
PUNPCKLWD mm4,mm5
PUNPCKLWD mm6,mm7
movq mm5,mm4
PUNPCKLDQ mm4,mm6
PUNPCKHDQ mm5,mm6
PUNPCKHWD mm0,mm1
PUNPCKHWD mm2,mm7
movq mm7,mm0
PUNPCKLDQ mm0,mm2
PUNPCKHDQ mm7,mm2
movq mm6,mm0
//计算第一遍
movq mm0,mm4
PADDW mm0,mm5
PADDW mm0,mm6
PADDW mm0,mm7
movq mm1,mm4
PSLLW mm1,1
PADDW mm1,mm5
PSUBW mm1,mm6
movq mm2,mm7
PSLLW mm2,1
PSUBW mm1,mm2
movq mm2,mm4
PADDW mm2,mm7
PSUBW mm2,mm5
PSUBW mm2,mm6
movq mm3,mm4
PSUBW mm3,mm7
PSLLW mm5,1
PSLLW mm6,1
PSUBW mm3,mm5
PADDW mm3,mm6
//转秩
movq mm4,mm0
movq mm5,mm1
movq mm6,mm2
movq mm7,mm3
PUNPCKLWD mm4,mm5
PUNPCKLWD mm6,mm7
movq mm5,mm4
PUNPCKLDQ mm4,mm6
PUNPCKHDQ mm5,mm6
PUNPCKHWD mm0,mm1
PUNPCKHWD mm2,mm7
movq mm7,mm0
PUNPCKLDQ mm0,mm2
PUNPCKHDQ mm7,mm2
movq mm6,mm0
//计算第二遍
movq mm0,mm4
PADDW mm0,mm5
PADDW mm0,mm6
PADDW mm0,mm7
movq mm1,mm4
PSLLW mm1,1
PADDW mm1,mm5
PSUBW mm1,mm6
movq mm2,mm7
PSLLW mm2,1
PSUBW mm1,mm2
movq mm2,mm4
PADDW mm2,mm7
PSUBW mm2,mm5
PSUBW mm2,mm6
movq mm3,mm4
PSUBW mm3,mm7
PSLLW mm5,1
PSLLW mm6,1
PSUBW mm3,mm5
PADDW mm3,mm6
movq [ebx],mm0
movq [ebx+32],mm1
movq [ebx+64],mm2
movq [ebx+96],mm3
pxor xmm7,xmm7
//求绝对值
MOVQ MM4, MM0
PSRAW MM4, 15
PXOR MM0, MM4
PSUBW MM0, MM4
MOVQ MM5, MM1
PSRAW MM5, 15
PXOR MM1, MM5
PSUBW MM1, MM5
MOVQ MM6, MM2
PSRAW MM6, 15
PXOR MM2, MM6
PSUBW MM2, MM6
MOVQ MM7, MM3
PSRAW MM7, 15
PXOR MM3, MM7
PSUBW MM3, MM7
movq2dq xmm0,mm0
movq2dq xmm1,mm1
movq2dq xmm2,mm2
movq2dq xmm3,mm3
movq mm4,[esi]
movq2dq xmm4,mm4
MOVDQU xmm5,xmm0
PMULLW xmm0,xmm4
PMULHW xmm5,xmm4
PUNPCKLWD xmm0,xmm5
movq mm4,[esi+8]
movq2dq xmm4,mm4
MOVDQU xmm5,xmm1
PMULLW xmm1,xmm4
PMULHW xmm5,xmm4
PUNPCKLWD xmm1,xmm5
movq mm4,[esi+16]
movq2dq xmm4,mm4
MOVDQU xmm5,xmm2
PMULLW xmm2,xmm4
PMULHW xmm5,xmm4
PUNPCKLWD xmm2,xmm5
movq mm4,[esi+24]
movq2dq xmm4,mm4
MOVDQU xmm5,xmm3
PMULLW xmm3,xmm4
PMULHW xmm5,xmm4
PUNPCKLWD xmm3,xmm5
MOVDQU [edi],xmm0
MOVDQU [edi+16],xmm1
MOVDQU [edi+32],xmm2
MOVDQU [edi+48],xmm3
emms
}
// Quant
nonzero=FALSE;
run=-1;
scan_pos=0;
for (coeff_ctr=0;coeff_ctr < 16;coeff_ctr++)
{
i=SNGL_SCAN[coeff_ctr][0];
j=SNGL_SCAN[coeff_ctr][1];
run++;
ilev=0;
level = (mulres[i][j] + qp_const) >> q_bits;
if (level != 0)
{
nonzero=TRUE;
if (level > 1)
*coeff_cost += MAX_VALUE; // set high cost, shall not be discarded
else
*coeff_cost += COEFF_COST[run];
ACLevel[scan_pos] = sign(level,img->m7[i][j]);
ACRun [scan_pos] = run;
++scan_pos;
run=-1; // reset zero level counter
ilev=level*dequant_coef[qp_rem][i][j]<<qp_per;
img->m7[i][j]=sign(ilev,img->m7[i][j]);
}
else
img->m7[i][j]=0;
}
ACLevel[scan_pos] = 0;
// IDCT.
_asm
{
mov eax,pointer
mov ebx,pt2
//读入矩阵
movq mm4,[eax]
movq mm5,[eax+32]
movq mm6,[eax+64]
movq mm7,[eax+96]
//转秩
movq mm0,mm4
movq mm1,mm5
movq mm2,mm6
PUNPCKLWD mm4,mm5
PUNPCKLWD mm6,mm7
movq mm5,mm4
PUNPCKLDQ mm4,mm6
PUNPCKHDQ mm5,mm6
PUNPCKHWD mm0,mm1
PUNPCKHWD mm2,mm7
movq mm7,mm0
PUNPCKLDQ mm0,mm2
PUNPCKHDQ mm7,mm2
movq mm6,mm0
//计算第一遍
movq mm0,mm4
PADDW mm0,mm5
PADDW mm0,mm6
movq mm3,mm7
PSRAW mm3,1
PADDW mm0,mm3
movq mm1,mm4
movq mm3,mm5
PSRAW mm3,1
PADDW mm1,mm3
PSUBW mm1,mm6
PSUBW mm1,mm7
movq mm2,mm4
PADDW mm2,mm7
movq mm3,mm5
PSRAW mm3,1
PSUBW mm2,mm3
PSUBW mm2,mm6
movq mm3,mm4
PADDW mm3,mm6
PSUBW mm3,mm5
PSRAW mm7,1
PSUBW mm3,mm7
//转秩
movq mm4,mm0
movq mm5,mm1
movq mm6,mm2
movq mm7,mm3
PUNPCKLWD mm4,mm5
PUNPCKLWD mm6,mm7
movq mm5,mm4
PUNPCKLDQ mm4,mm6
PUNPCKHDQ mm5,mm6
PUNPCKHWD mm0,mm1
PUNPCKHWD mm2,mm7
movq mm7,mm0
PUNPCKLDQ mm0,mm2
PUNPCKHDQ mm7,mm2
movq mm6,mm0
//计算第二遍
movq mm0,mm4
PADDW mm0,mm5
PADDW mm0,mm6
movq mm3,mm7
PSRAW mm3,1
PADDW mm0,mm3
movq mm1,mm4
movq mm3,mm5
PSRAW mm3,1
PADDW mm1,mm3
PSUBW mm1,mm6
PSUBW mm1,mm7
movq mm2,mm4
PADDW mm2,mm7
movq mm3,mm5
PSRAW mm3,1
PSUBW mm2,mm3
PSUBW mm2,mm6
movq mm3,mm4
PADDW mm3,mm6
PSUBW mm3,mm5
PSRAW mm7,1
PSUBW mm3,mm7
movq [ebx],mm0
movq [ebx+8],mm1
movq [ebx+16],mm2
movq [ebx+24],mm3
mov eax,pt1
mov ebx,pt2
mov ecx,ebx
mov dx,4
movq mm5,dq
loop1_2:
pxor mm4,mm4
movq mm6,[eax]
movq mm7,[ebx]
movq mm0,mm6
PUNPCKLWD mm0,mm4
pslld mm0,16
psrad mm0,16
movq mm1,mm7
PUNPCKLWD mm1,mm4
pslld mm1,16
psrad mm1,16
movq mm2,mm6
PUNPCKHWD mm2,mm4
pslld mm2,16
psrad mm2,16
movq mm3,mm7
PUNPCKHWD mm3,mm4
pslld mm3,16
psrad mm3,16
pxor mm7,mm7
PSLLD mm0,6
paddd mm0,mm1
paddd mm0,mm5
psrad mm0,6
PSLLD mm2,6
paddd mm2,mm3
paddd mm2,mm5
psrad mm2,6
PACKSSDW mm0,mm2
PACKUSWB mm0,mm7
PUNPCKLbw mm0,mm7
movq [ecx],mm0
add eax,32
add ebx,8
add ecx,8
sub dx,1
jne loop1_2
//转秩
mov ebx,pt2
mov edi,pt3
movq mm0,[ebx]
movq mm1,[ebx+8]
movq mm2,[ebx+16]
movq mm3,[ebx+24]
movq mm4,mm0
movq mm5,mm1
movq mm6,mm2
movq mm7,mm3
PUNPCKLWD mm4,mm5
PUNPCKLWD mm6,mm7
movq mm5,mm4
PUNPCKLDQ mm4,mm6
PUNPCKHDQ mm5,mm6
PUNPCKHWD mm0,mm1
PUNPCKHWD mm2,mm7
movq mm7,mm0
PUNPCKLDQ mm0,mm2
PUNPCKHDQ mm7,mm2
movq mm6,mm0
pxor mm0,mm0
PACKUSWB mm4,mm0
PACKUSWB mm5,mm0
PACKUSWB mm6,mm0
PACKUSWB mm7,mm0
movd [edi],mm4
add edi,imgw
movd [edi],mm5
add edi,imgw
movd [edi],mm6
add edi,imgw
movd [edi],mm7
emms
}
return nonzero;
}
/*!
************************************************************************
* \brief
* Transform,quantization,inverse transform for chroma.
* The main reason why this is done in a separate routine is the
* additional 2x2 transform of DC-coeffs. This routine is called
* ones for each of the chroma components.
*
* \para Input:
* uv : Make difference between the U and V chroma component \n
* cr_cbp: chroma coded block pattern
*
* \para Output:
* cr_cbp: Updated chroma coded block pattern.
************************************************************************
*/
int dct_chroma(int uv,int cr_cbp)
{
int i,j,i1,j2,ilev,n2,n1,j1,mb_y,coeff_ctr,qp_const,level ,scan_pos,run;
int m1[BLOCK_SIZE],m5[BLOCK_SIZE],m6[BLOCK_SIZE];
int coeff_cost;
int cr_cbp_tmp;
int nn0,nn1;
int DCcoded=0 ;
Macroblock *currMB = &img->mb_data[img->current_mb_nr];
int qp_per,qp_rem,q_bits;
int b4;
int* DCLevel = img->cofDC[uv+1][0];
int* DCRun = img->cofDC[uv+1][1];
int* ACLevel;
int* ACRun;
//zdd
_int16 *pointer;
_int16 buff[4][4];
_int16 *pt1;
_int16 *pt2=&(buff[0][0]);
byte *pt3;
static _int64 dq=0x0000002000000020;
static _int64 f0f=0x00ff00ff00ff00ff;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -