⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 dsp_mmx.cpp

📁 从FFMPEG转换而来的H264解码程序,VC下编译..
💻 CPP
📖 第 1 页 / 共 2 页
字号:
static ogg_uint32_t sad8x8_thres__mmx (unsigned char *ptr1, ogg_uint32_t stride1,
		       		  unsigned char *ptr2, ogg_uint32_t stride2,
			   	  ogg_uint32_t thres)
{
  return sad8x8__mmx (ptr1, stride1, ptr2, stride2);
}

static ogg_uint32_t sad8x8_xy2_thres__mmx (unsigned char *SrcData, ogg_uint32_t SrcStride,
		                      unsigned char *RefDataPtr1,
			              unsigned char *RefDataPtr2, ogg_uint32_t RefStride,
			              ogg_uint32_t thres)
{
  ogg_uint32_t  DiffVal;
  __m64 mm5;
  pcmpeqd     (mm5, mm5);     	/* fefefefefefefefe in mm5 */
  paddb       (mm5, mm5);
  __m64 mm6=_mm_setzero_si64(),mm7=mm6,mm0,mm1,mm2,mm3;
   for (int i=0;i<8;i++,SrcData+=SrcStride,RefDataPtr1+=RefStride,RefDataPtr2+=RefStride)
    {
      movq        (SrcData, mm0);      	/* take 8 bytes */

      movq        (RefDataPtr1, mm2);
      movq        (RefDataPtr2, mm3);      	/* take average of mm2 and mm3 */
      movq        (mm2, mm1);
      pand        (mm3, mm1);
      pxor        (mm2, mm3);
      pand        (mm5, mm3);
      psrlq       (1, mm3 );
      paddb       (mm3, mm1);

      movq        (mm0, mm2);

      psubusb     (mm1, mm0);      	/* A - B */
      psubusb     (mm2, mm1);     	/* B - A */
      por         (mm1, mm0);         	/* and or gives abs difference */
      movq        (mm0, mm1);

      punpcklbw   (mm6, mm0);     	/* unpack to higher precision for accumulation */
      paddw       (mm0, mm7);     	/* accumulate difference... */
      punpckhbw   (mm6, mm1);     	/* unpack high four bytes to higher precision */
      paddw       (mm1, mm7);     	/* accumulate difference... */
     }

      movq        (mm7, mm0);
      psrlq       (32, mm7 );
      paddw       (mm0, mm7);
      movq        (mm7, mm0);
      psrlq       (16, mm7 );
      paddw       (mm0, mm7);
      movd        (mm7, (int&)DiffVal);

  return DiffVal&0xffff;
}

static ogg_uint32_t intra8x8_err__mmx (unsigned char *DataPtr, ogg_uint32_t Stride)
{
  ogg_int16_t  XSum;
  ogg_uint32_t  XXSum;

  __m64 mm5=_mm_setzero_si64(),mm6=mm5,mm7=mm5,mm0,mm2;
  for (int i=0;i<8;i++,DataPtr+=Stride)
   {
      movq        (DataPtr, mm0);      	/* take 8 bytes */
      movq        (mm0, mm2);

      punpcklbw   (mm6, mm0);
      punpckhbw   (mm6, mm2);

      paddw       (mm0, mm5);
      paddw       (mm2, mm5);

      pmaddwd     (mm0, mm0);
      pmaddwd     (mm2, mm2);

      paddd       (mm0, mm7);
      paddd       (mm2, mm7);

    }

      movq        (mm5, mm0);
      psrlq       (32, mm5);
      paddw       (mm0, mm5);
      movq        (mm5, mm0);
      psrlq       (16, mm5);
      paddw       (mm0, mm5);
      int edi;
      movd        (mm5, edi);
      //movsx       di, edi
      //movl        edi, XSum
      XSum=edi;

      movq        (mm7, mm0);
      psrlq       (32, mm7 );
      paddd       (mm0, mm7);
      movd        (mm7, (int&)XXSum);

  /* Compute population variance as mis-match metric. */
  return (( (XXSum<<6) - XSum*XSum ) );
}

static ogg_uint32_t inter8x8_err__mmx (unsigned char *SrcData, ogg_uint32_t SrcStride,
		                 unsigned char *RefDataPtr, ogg_uint32_t RefStride)
{
  ogg_int16_t  XSum;
  ogg_uint32_t  XXSum;

  __m64 mm5=_mm_setzero_si64(),mm6=mm5,mm7=mm5,mm0,mm1,mm2,mm3;

  for (int i=0;i<8;i++,SrcData+=SrcStride,RefDataPtr+=RefStride)
   {
      movq        (SrcData, mm0);      	/* take 8 bytes */
      movq        (RefDataPtr, mm1);
      movq        (mm0, mm2);
      movq        (mm1, mm3);

      punpcklbw   (mm6, mm0);
      punpcklbw   (mm6, mm1);
      punpckhbw   (mm6, mm2);
      punpckhbw   (mm6, mm3);

      psubsw      (mm1, mm0);
      psubsw      (mm3, mm2);

      paddw       (mm0, mm5);
      paddw       (mm2, mm5);

      pmaddwd     (mm0, mm0);
      pmaddwd     (mm2, mm2);

      paddd       (mm0, mm7);
      paddd       (mm2, mm7);
    }

      movq        (mm5, mm0);
      psrlq       (32, mm5 );
      paddw       (mm0, mm5);
      movq        (mm5, mm0);
      psrlq       (16, mm5 );
      paddw       (mm0, mm5);
      int edi;
      movd        (mm5, edi);
      //movsx       di, edi
      //movl        edi, XSum
      XSum=edi;

      movq        (mm7, mm0);
      psrlq       (32, mm7);
      paddd       (mm0, mm7);
      movd        (mm7, (int&)XXSum);

  /* Compute and return population variance as mis-match metric. */
  return (( (XXSum<<6) - XSum*XSum ));
}

static ogg_uint32_t inter8x8_err_xy2__mmx (unsigned char *SrcData, ogg_uint32_t SrcStride,
		                     unsigned char *RefDataPtr1,
				     unsigned char *RefDataPtr2, ogg_uint32_t RefStride)
{
  ogg_int16_t XSum;
  ogg_uint32_t XXSum;

  __m64 mm4;
  pcmpeqd     (mm4, mm4);     	/* fefefefefefefefe in mm4 */
  paddb       (mm4, mm4);
  __m64 mm5=_mm_setzero_si64(),mm6=mm5,mm7=mm5,mm0,mm1,mm2,mm3;
  for (int i=0;i<8;i++,SrcData+=SrcStride,RefDataPtr1+=RefStride,RefDataPtr2+=RefStride)
   {
      movq        (SrcData, mm0);      	/* take 8 bytes */

      movq        (RefDataPtr1, mm2);
      movq        (RefDataPtr2, mm3);      	/* take average of mm2 and mm3 */
      movq        (mm2, mm1);
      pand        (mm3, mm1);
      pxor        (mm2, mm3);
      pand        (mm4, mm3);
      psrlq       (1, mm3 );
      paddb       (mm3, mm1);

      movq        (mm0, mm2);
      movq        (mm1, mm3);

      punpcklbw   (mm6, mm0);
      punpcklbw   (mm6, mm1);
      punpckhbw   (mm6, mm2);
      punpckhbw   (mm6, mm3);

      psubsw      (mm1, mm0);
      psubsw      (mm3, mm2);

      paddw       (mm0, mm5);
      paddw       (mm2, mm5);

      pmaddwd     (mm0, mm0);
      pmaddwd     (mm2, mm2);

      paddd       (mm0, mm7);
      paddd       (mm2, mm7);
   }

      movq        (mm5, mm0);
      psrlq       (32, mm5 );
      paddw       (mm0, mm5);
      movq        (mm5, mm0);
      psrlq       (16, mm5 );
      paddw       (mm0, mm5);
      int edi;
      movd        (mm5, edi);
      //movsx       di, edi
      //movl        edi, XSum
      XSum=edi;

      movq        (mm7, mm0);
      psrlq       (32, mm7 );
      paddd       (mm0, mm7);
      movd        (mm7, (int&)XXSum);

  /* Compute and return population variance as mis-match metric. */
  return (( (XXSum<<6) - XSum*XSum ));
}

static void restore_fpu (void)
{
 _mm_empty();
}

extern "C"
void dsp_i386_mmx_init(DspFunctions *funcs)
{
  funcs->restore_fpu = restore_fpu;
  funcs->sub8x8 = sub8x8__mmx;
  funcs->sub8x8_128 = sub8x8_128__mmx;
  funcs->sub8x8avg2 = sub8x8avg2__mmx;
  funcs->row_sad8 = row_sad8__mmx;
  funcs->col_sad8x8 = col_sad8x8__mmx;
  funcs->sad8x8 = sad8x8__mmx;
  funcs->sad8x8_thres = sad8x8_thres__mmx;
  funcs->sad8x8_xy2_thres = sad8x8_xy2_thres__mmx;
  funcs->intra8x8_err = intra8x8_err__mmx;
  funcs->inter8x8_err = inter8x8_err__mmx;
  funcs->inter8x8_err_xy2 = inter8x8_err_xy2__mmx;
}





⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -