⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 block.c

📁 包含了从MPEG4的视频解码到H.264的视频编码部分的源代码
💻 C
📖 第 1 页 / 共 5 页
字号:
  static _int64 ff=0xffffffffffff0000;
  int imgcw=img->width_cr;  

  _int32 mulres[8][8],*pt4;
  _int16*pt5;  
  
  //end

  qp_per    = QP_SCALE_CR[img->qp-MIN_QP]/6;
  qp_rem    = QP_SCALE_CR[img->qp-MIN_QP]%6;
  q_bits    = Q_BITS+qp_per;

  if (img->type == INTRA_IMG)
    qp_const=(1<<q_bits)/3;    // intra
  else
    qp_const=(1<<q_bits)/6;    // inter
  
  pt5=quant_coef[qp_rem][0];
  for (n2=0; n2 <= BLOCK_SIZE; n2 += BLOCK_SIZE)
  {
    for (n1=0; n1 <= BLOCK_SIZE; n1 += BLOCK_SIZE)
    {
		pointer=&(img->m7[n1][n2]);
		pt4=&(mulres[n1][n2]);		
     _asm
	 {		
		
			mov eax,pointer
			mov ebx,eax	
			mov edi,pt4
			mov esi,pt5
			
			//读入矩阵
			movq mm4,[eax]					
			movq mm5,[eax+32]							
			movq mm6,[eax+64]			
			movq mm7,[eax+96] 		
			
			//转秩	
			movq mm0,mm4
			movq mm1,mm5
			movq mm2,mm6
			PUNPCKLWD mm4,mm5
			PUNPCKLWD mm6,mm7
			movq mm5,mm4
			PUNPCKLDQ mm4,mm6
			PUNPCKHDQ mm5,mm6

			PUNPCKHWD mm0,mm1
			PUNPCKHWD mm2,mm7
			movq mm7,mm0
			PUNPCKLDQ mm0,mm2
			PUNPCKHDQ mm7,mm2
			movq mm6,mm0
			
			//计算第一遍
			movq mm0,mm4
			PADDW mm0,mm5
			PADDW mm0,mm6						
			PADDW mm0,mm7				
			
			movq mm1,mm4
			PSLLW mm1,1			
			PADDW mm1,mm5
			PSUBW mm1,mm6
			movq mm2,mm7
			PSLLW mm2,1
			PSUBW mm1,mm2				
			
			movq mm2,mm4			
			PADDW mm2,mm7			
			PSUBW mm2,mm5			
			PSUBW mm2,mm6			
			
			movq mm3,mm4
			PSUBW mm3,mm7						
			PSLLW mm5,1
			PSLLW mm6,1
			PSUBW mm3,mm5
			PADDW mm3,mm6
			

			//转秩	
			movq mm4,mm0
			movq mm5,mm1
			movq mm6,mm2
			movq mm7,mm3

			PUNPCKLWD mm4,mm5
			PUNPCKLWD mm6,mm7
			movq mm5,mm4
			PUNPCKLDQ mm4,mm6
			PUNPCKHDQ mm5,mm6

			PUNPCKHWD mm0,mm1
			PUNPCKHWD mm2,mm7
			movq mm7,mm0
			PUNPCKLDQ mm0,mm2
			PUNPCKHDQ mm7,mm2
			movq mm6,mm0

			//计算第二遍
			movq mm0,mm4
			PADDW mm0,mm5
			PADDW mm0,mm6						
			PADDW mm0,mm7				
			
			movq mm1,mm4
			PSLLW mm1,1			
			PADDW mm1,mm5
			PSUBW mm1,mm6
			movq mm2,mm7
			PSLLW mm2,1
			PSUBW mm1,mm2				
			
			movq mm2,mm4			
			PADDW mm2,mm7			
			PSUBW mm2,mm5			
			PSUBW mm2,mm6			
			
			movq mm3,mm4
			PSUBW mm3,mm7						
			PSLLW mm5,1
			PSLLW mm6,1
			PSUBW mm3,mm5
			PADDW mm3,mm6
		

			movq [ebx],mm0
			movq [ebx+32],mm1
			movq [ebx+64],mm2
			movq [ebx+96],mm3	
			
			//求绝对值
			MOVQ MM4, MM0 
			PSRAW MM4, 15 
			PXOR MM0, MM4 
			PSUBW MM0, MM4 
			MOVQ MM5, MM1 
			PSRAW MM5, 15 
			PXOR MM1, MM5 
			PSUBW MM1, MM5
			MOVQ MM6, MM2 
			PSRAW MM6, 15 
			PXOR MM2, MM6 
			PSUBW MM2, MM6 
			MOVQ MM7, MM3 
			PSRAW MM7, 15 
			PXOR MM3, MM7 
			PSUBW MM3, MM7 

			movq2dq xmm0,mm0
			movq2dq xmm1,mm1
			movq2dq xmm2,mm2
			movq2dq xmm3,mm3
			
			
			movq mm4,[esi]
			movq2dq xmm4,mm4
			MOVDQU xmm5,xmm0
			PMULLW xmm0,xmm4
			PMULHW xmm5,xmm4
			PUNPCKLWD xmm0,xmm5
			movq mm4,[esi+8]
			movq2dq xmm4,mm4
			MOVDQU xmm5,xmm1
			PMULLW xmm1,xmm4
			PMULHW xmm5,xmm4
			PUNPCKLWD xmm1,xmm5
			movq mm4,[esi+16]
			movq2dq xmm4,mm4
			MOVDQU xmm5,xmm2
			PMULLW xmm2,xmm4
			PMULHW xmm5,xmm4
			PUNPCKLWD xmm2,xmm5
			movq mm4,[esi+24]
			movq2dq xmm4,mm4
			MOVDQU xmm5,xmm3
			PMULLW xmm3,xmm4
			PMULHW xmm5,xmm4
			PUNPCKLWD xmm3,xmm5
			
			MOVDQU [edi],xmm0
			MOVDQU [edi+32],xmm1
			MOVDQU [edi+64],xmm2
			MOVDQU [edi+96],xmm3
			
			emms
		}
	}
  }
  /*for (n2=0; n2 <= BLOCK_SIZE; n2 += BLOCK_SIZE)
  {
    for (n1=0; n1 <= BLOCK_SIZE; n1 += BLOCK_SIZE)
    {

      //  Horizontal transform.
      for (j=0; j < BLOCK_SIZE; j++)
      {
        mb_y=n2+j;
        for (i=0; i < 2; i++)
        {
          i1=3-i;
          m5[i]=img->m7[i+n1][mb_y]+img->m7[i1+n1][mb_y];
          m5[i1]=img->m7[i+n1][mb_y]-img->m7[i1+n1][mb_y];
        }
        img->m7[n1][mb_y]  =(m5[0]+m5[1]);
        img->m7[n1+2][mb_y]=(m5[0]-m5[1]);
        img->m7[n1+1][mb_y]=m5[3]*2+m5[2];
        img->m7[n1+3][mb_y]=m5[3]-m5[2]*2;
      }

      //  Vertical transform.

      for (i=0; i < BLOCK_SIZE; i++)
      {
        j1=n1+i;
        for (j=0; j < 2; j++)
        {
          j2=3-j;
          m5[j]=img->m7[j1][n2+j]+img->m7[j1][n2+j2];
          m5[j2]=img->m7[j1][n2+j]-img->m7[j1][n2+j2];
        }
        img->m7[j1][n2+0]=(m5[0]+m5[1]);
        img->m7[j1][n2+2]=(m5[0]-m5[1]);
        img->m7[j1][n2+1]=m5[3]*2+m5[2];
        img->m7[j1][n2+3]=m5[3]-m5[2]*2;
      }
    }
  }*/

  //     2X2 transform of DC coeffs.
  m1[0]=(img->m7[0][0]+img->m7[4][0]+img->m7[0][4]+img->m7[4][4]);
  m1[1]=(img->m7[0][0]-img->m7[4][0]+img->m7[0][4]-img->m7[4][4]);
  m1[2]=(img->m7[0][0]+img->m7[4][0]-img->m7[0][4]-img->m7[4][4]);
  m1[3]=(img->m7[0][0]-img->m7[4][0]-img->m7[0][4]+img->m7[4][4]);

  //     Quant of chroma 2X2 coeffs.
  run=-1;
  scan_pos=0;

  for (coeff_ctr=0; coeff_ctr < 4; coeff_ctr++)
  {
    run++;
    ilev=0;

    level =(abs(m1[coeff_ctr]) * quant_coef[qp_rem][0][0] + 2*qp_const) >> (q_bits+1);

    if (level  != 0)
    {
      currMB->cbp_blk |= 0xf0000 << (uv << 2) ;    // if one of the 2x2-DC levels is != 0 set the
      cr_cbp=max(1,cr_cbp);                     // coded-bit all 4 4x4 blocks (bit 16-19 or 20-23)
      DCcoded = 1 ;
      DCLevel[scan_pos] = sign(level ,m1[coeff_ctr]);
      DCRun  [scan_pos] = run;
      scan_pos++;
      run=-1;
      ilev=level*dequant_coef[qp_rem][0][0]<<qp_per;
	  m1[coeff_ctr]=sign(ilev,m1[coeff_ctr]);
    }
	else
		m1[coeff_ctr]=0;
    
  }
  DCLevel[scan_pos] = 0;

  //  Invers transform of 2x2 DC levels

  img->m7[0][0]=(m1[0]+m1[1]+m1[2]+m1[3])>>1;
  img->m7[4][0]=(m1[0]-m1[1]+m1[2]-m1[3])>>1;
  img->m7[0][4]=(m1[0]+m1[1]-m1[2]-m1[3])>>1;
  img->m7[4][4]=(m1[0]-m1[1]-m1[2]+m1[3])>>1;

  //     Quant of chroma AC-coeffs.
  coeff_cost=0;
  cr_cbp_tmp=0;

  for (n2=0; n2 <= BLOCK_SIZE; n2 += BLOCK_SIZE)
  {
    for (n1=0; n1 <= BLOCK_SIZE; n1 += BLOCK_SIZE)
    {
      b4      = 2*(n2/4) + (n1/4);
      ACLevel = img->cofAC[uv+4][b4][0];
      ACRun   = img->cofAC[uv+4][b4][1];
      run=-1;
      scan_pos=0;

      for (coeff_ctr=1; coeff_ctr < 16; coeff_ctr++)// start change rd_quant
      {

        
        i=SNGL_SCAN[coeff_ctr][0];
        j=SNGL_SCAN[coeff_ctr][1];
        
        ++run;
        ilev=0;

        //level=(abs(img->m7[n1+i][n2+j])*quant_coef[qp_rem][i][j]+qp_const)>>q_bits;
		level = (mulres[n1+i][n2+j]  + qp_const) >> q_bits;
        if (level  != 0)
        {
          currMB->cbp_blk |= 1 << (16 + (uv << 2) + ((n2 >> 1) + (n1 >> 2))) ;
          if (level > 1)
            coeff_cost += MAX_VALUE;                // set high cost, shall not be discarded
          else
            coeff_cost += COEFF_COST[run];

          cr_cbp_tmp=2;
          ACLevel[scan_pos] = sign(level,img->m7[n1+i][n2+j]);
          ACRun  [scan_pos] = run;
          ++scan_pos;
          run=-1;
          ilev=level*dequant_coef[qp_rem][i][j]<<qp_per;
		  img->m7[n1+i][n2+j]=sign(ilev,img->m7[n1+i][n2+j]); // for use in IDCT
        }
		else
			img->m7[n1+i][n2+j]=0; // for use in IDCT
        
      }
      ACLevel[scan_pos] = 0;
    }
  }

  // * reset chroma coeffs
  if(coeff_cost < _CHROMA_COEFF_COST_)
  {
    cr_cbp_tmp = 0 ;
    for (n2=0; n2 <= BLOCK_SIZE; n2 += BLOCK_SIZE)
    {
      for (n1=0; n1 <= BLOCK_SIZE; n1 += BLOCK_SIZE)
      {
        b4      = 2*(n2/4) + (n1/4);
        ACLevel = img->cofAC[uv+4][b4][0];
        ACRun   = img->cofAC[uv+4][b4][1];
        if( DCcoded == 0) currMB->cbp_blk &= ~(0xf0000 << (uv << 2));  // if no chroma DC's: then reset coded-bits of this chroma subblock
        nn0 = (n1>>2) + (uv<<1);
        nn1 = 4 + (n2>>2) ;
        ACLevel[0] = 0;
        for (coeff_ctr=1; coeff_ctr < 16; coeff_ctr++)// ac coeff
        {
          
            i=SNGL_SCAN[coeff_ctr][0];
            j=SNGL_SCAN[coeff_ctr][1];
          
          img->m7[n1+i][n2+j]=0;
          ACLevel[coeff_ctr] = 0;
        }
      }
    }
  }
  if(cr_cbp_tmp==2)
    cr_cbp = 2;
  //     IDCT.
    for (n2=0; n2 <= BLOCK_SIZE; n2 += BLOCK_SIZE)
  {
    for (n1=0; n1 <= BLOCK_SIZE; n1 += BLOCK_SIZE)
    {
		pointer=&(img->m7[n1][n2]);
		pt1=&(img->mpr[n1][n2]);   
		pt3=&(imgUV[uv][img->pix_c_y+n2][img->pix_c_x+n1]);
   _asm
	{		
		
			mov eax,pointer
			mov ebx,pt2		
			
			//读入矩阵
			movq mm4,[eax]					
			movq mm5,[eax+32]							
			movq mm6,[eax+64]			
			movq mm7,[eax+96] 		
			
			//转秩	
			movq mm0,mm4
			movq mm1,mm5
			movq mm2,mm6
			PUNPCKLWD mm4,mm5
			PUNPCKLWD mm6,mm7
			movq mm5,mm4
			PUNPCKLDQ mm4,mm6
			PUNPCKHDQ mm5,mm6

			PUNPCKHWD mm0,mm1
			PUNPCKHWD mm2,mm7
			movq mm7,mm0
			PUNPCKLDQ mm0,mm2
			PUNPCKHDQ mm7,mm2
			movq mm6,mm0
			
			//计算第一遍
			movq mm0,mm4
			PADDW mm0,mm5
			PADDW mm0,mm6			
			movq mm3,mm7
			PSRAW mm3,1
			PADDW mm0,mm3				
			
			movq mm1,mm4			
			movq mm3,mm5
			PSRAW mm3,1
			PADDW mm1,mm3
			PSUBW mm1,mm6						
			PSUBW mm1,mm7				
			
			movq mm2,mm4			
			PADDW mm2,mm7			
			movq mm3,mm5
			PSRAW mm3,1
			PSUBW mm2,mm3			
			PSUBW mm2,mm6			
			
			movq mm3,mm4			
			PADDW mm3,mm6			
			PSUBW mm3,mm5			
			PSRAW mm7,1
			PSUBW mm3,mm7

			//转秩	
			movq mm4,mm0
			movq mm5,mm1
			movq mm6,mm2
			movq mm7,mm3

			PUNPCKLWD mm4,mm5
			PUNPCKLWD mm6,mm7
			movq mm5,mm4
			PUNPCKLDQ mm4,mm6
			PUNPCKHDQ mm5,mm6

			PUNPCKHWD mm0,mm1
			PUNPCKHWD mm2,mm7
			movq mm7,mm0
			PUNPCKLDQ mm0,mm2
			PUNPCKHDQ mm7,mm2
			movq mm6,mm0

			//计算第二遍
			movq mm0,mm4
			PADDW mm0,mm5
			PADDW mm0,mm6			
			movq mm3,mm7
			PSRAW mm3,1
			PADDW mm0,mm3				
			
			movq mm1,mm4			
			movq mm3,mm5
			PSRAW mm3,1
			PADDW mm1,mm3
			PSUBW mm1,mm6						
			PSUBW mm1,mm7				
			
			movq mm2,mm4			
			PADDW mm2,mm7			
			movq mm3,mm5
			PSRAW mm3,1
			PSUBW mm2,mm3			
			PSUBW mm2,mm6			
			
			movq mm3,mm4			
			PADDW mm3,mm6			
			PSUBW mm3,mm5			
			PSRAW mm7,1
			PSUBW mm3,mm7
		

			movq [ebx],mm0
			movq [ebx+8],mm1
			movq [ebx+16],mm2
			movq [ebx+24],mm3

			mov eax,pt1
			mov ebx,pt2
			mov ecx,ebx			
			mov dx,4
			movq mm5,dq		

loop1_2:
		
			pxor mm4,mm4
			movq mm6,[eax]
			movq mm7,[ebx]
			
			movq mm0,mm6
			PUNPCKLWD mm0,mm4
			pslld mm0,16
			psrad mm0,16
			movq mm1,mm7
			PUNPCKLWD mm1,mm4
			pslld mm1,16
			psrad mm1,16
			movq mm2,mm6
			PUNPCKHWD mm2,mm4
			pslld mm2,16
			psrad mm2,16
			movq mm3,mm7
			PUNPCKHWD mm3,mm4
			pslld mm3,16
			psrad mm3,16
			pxor mm7,mm7
			PSLLD mm0,6
			paddd mm0,mm1
			paddd mm0,mm5
			psrad mm0,6
			PSLLD mm2,6

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -