⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 dsp_mmx.c

📁 mediastreamer2是开源的网络传输媒体流的库
💻 C
📖 第 1 页 / 共 2 页
字号:
     : "r" (stride1),       "r" (stride2)     : "memory"  );  return DiffVal;}static ogg_uint32_t sad8x8_thres__mmx (unsigned char *ptr1, ogg_uint32_t stride1,		       		  unsigned char *ptr2, ogg_uint32_t stride2, 			   	  ogg_uint32_t thres){  return sad8x8__mmx (ptr1, stride1, ptr2, stride2);}static ogg_uint32_t sad8x8_xy2_thres__mmx (unsigned char *SrcData, ogg_uint32_t SrcStride,		                      unsigned char *RefDataPtr1,			              unsigned char *RefDataPtr2, ogg_uint32_t RefStride,			              ogg_uint32_t thres){  ogg_uint32_t  DiffVal;  __asm__ __volatile__ (    "  .balign 16                   \n\t"    "  pcmpeqd     %%mm5, %%mm5     \n\t"	/* fefefefefefefefe in mm5 */    "  paddb       %%mm5, %%mm5     \n\t"       "  pxor        %%mm6, %%mm6     \n\t"	/* zero out mm6 for unpack */    "  pxor        %%mm7, %%mm7     \n\t" 	/* mm7 contains the result */    "  mov         $8, %%edi        \n\t"	/* 8 rows */    "1:                             \n\t"    "  movq        (%1), %%mm0      \n\t"	/* take 8 bytes */    "  movq        (%2), %%mm2      \n\t"    "  movq        (%3), %%mm3      \n\t"	/* take average of mm2 and mm3 */    "  movq        %%mm2, %%mm1     \n\t"    "  pand        %%mm3, %%mm1     \n\t"    "  pxor        %%mm2, %%mm3     \n\t"    "  pand        %%mm5, %%mm3     \n\t"    "  psrlq       $1, %%mm3        \n\t"    "  paddb       %%mm3, %%mm1     \n\t"    "  movq        %%mm0, %%mm2     \n\t"    "  psubusb     %%mm1, %%mm0     \n\t" 	/* A - B */    "  psubusb     %%mm2, %%mm1     \n\t"	/* B - A */    "  por         %%mm1, %%mm0     \n\t"    	/* and or gives abs difference */    "  movq        %%mm0, %%mm1     \n\t"    "  punpcklbw   %%mm6, %%mm0     \n\t"	/* unpack to higher precision for accumulation */    "  paddw       %%mm0, %%mm7     \n\t"	/* accumulate difference... */    "  punpckhbw   %%mm6, %%mm1     \n\t"	/* unpack high four bytes to higher precision */    "  add         %4, %1           \n\t"	/* Inc pointer into the new data */    "  paddw       %%mm1, %%mm7     \n\t"	/* accumulate difference... */    "  add         %5, %2           \n\t"	/* Inc pointer into ref data */    "  add         %5, %3           \n\t"	/* Inc pointer into ref data */    "  dec         %%edi            \n\t"    "  jnz 1b                       \n\t"    "  movq        %%mm7, %%mm0     \n\t"    "  psrlq       $32, %%mm7       \n\t"    "  paddw       %%mm0, %%mm7     \n\t"    "  movq        %%mm7, %%mm0     \n\t"    "  psrlq       $16, %%mm7       \n\t"    "  paddw       %%mm0, %%mm7     \n\t"    "  movd        %%mm7, %0        \n\t"    "  andl        $0xffff, %0      \n\t"     : "=m" (DiffVal),       "+r" (SrcData),        "+r" (RefDataPtr1),        "+r" (RefDataPtr2)      : "m" (SrcStride),       "m" (RefStride)     : "edi", "memory"  );  return DiffVal;}static ogg_uint32_t intra8x8_err__mmx (unsigned char *DataPtr, ogg_uint32_t Stride){  ogg_uint32_t  XSum;  ogg_uint32_t  XXSum;  __asm__ __volatile__ (    "  .balign 16                   \n\t"    "  pxor        %%mm5, %%mm5     \n\t"    "  pxor        %%mm6, %%mm6     \n\t"    "  pxor        %%mm7, %%mm7     \n\t"    "  mov         $8, %%edi        \n\t"    "1:                             \n\t"    "  movq        (%2), %%mm0      \n\t"	/* take 8 bytes */    "  movq        %%mm0, %%mm2     \n\t"    "  punpcklbw   %%mm6, %%mm0     \n\t"    "  punpckhbw   %%mm6, %%mm2     \n\t"    "  paddw       %%mm0, %%mm5     \n\t"    "  paddw       %%mm2, %%mm5     \n\t"    "  pmaddwd     %%mm0, %%mm0     \n\t"    "  pmaddwd     %%mm2, %%mm2     \n\t"        "  paddd       %%mm0, %%mm7     \n\t"    "  paddd       %%mm2, %%mm7     \n\t"    "  add         %3, %2           \n\t"	/* Inc pointer into src data */    "  dec         %%edi            \n\t"    "  jnz 1b                       \n\t"    "  movq        %%mm5, %%mm0     \n\t"    "  psrlq       $32, %%mm5       \n\t"    "  paddw       %%mm0, %%mm5     \n\t"    "  movq        %%mm5, %%mm0     \n\t"    "  psrlq       $16, %%mm5       \n\t"    "  paddw       %%mm0, %%mm5     \n\t"    "  movd        %%mm5, %%edi     \n\t"    "  movsx       %%di, %%edi      \n\t"    "  movl        %%edi, %0        \n\t"    "  movq        %%mm7, %%mm0     \n\t"    "  psrlq       $32, %%mm7       \n\t"    "  paddd       %%mm0, %%mm7     \n\t"    "  movd        %%mm7, %1        \n\t"     : "=r" (XSum),       "=r" (XXSum),       "+r" (DataPtr)      : "r" (Stride)     : "edi", "memory"  );  /* Compute population variance as mis-match metric. */  return (( (XXSum<<6) - XSum*XSum ) );}static ogg_uint32_t inter8x8_err__mmx (unsigned char *SrcData, ogg_uint32_t SrcStride,		                 unsigned char *RefDataPtr, ogg_uint32_t RefStride){  ogg_uint32_t  XSum;  ogg_uint32_t  XXSum;  __asm__ __volatile__ (    "  .balign 16                   \n\t"    "  pxor        %%mm5, %%mm5     \n\t"    "  pxor        %%mm6, %%mm6     \n\t"    "  pxor        %%mm7, %%mm7     \n\t"    "  mov         $8, %%edi        \n\t"    "1:                             \n\t"    "  movq        (%2), %%mm0      \n\t"	/* take 8 bytes */    "  movq        (%3), %%mm1      \n\t"    "  movq        %%mm0, %%mm2     \n\t"    "  movq        %%mm1, %%mm3     \n\t"    "  punpcklbw   %%mm6, %%mm0     \n\t"    "  punpcklbw   %%mm6, %%mm1     \n\t"    "  punpckhbw   %%mm6, %%mm2     \n\t"    "  punpckhbw   %%mm6, %%mm3     \n\t"    "  psubsw      %%mm1, %%mm0     \n\t"    "  psubsw      %%mm3, %%mm2     \n\t"    "  paddw       %%mm0, %%mm5     \n\t"    "  paddw       %%mm2, %%mm5     \n\t"    "  pmaddwd     %%mm0, %%mm0     \n\t"    "  pmaddwd     %%mm2, %%mm2     \n\t"        "  paddd       %%mm0, %%mm7     \n\t"    "  paddd       %%mm2, %%mm7     \n\t"    "  add         %4, %2           \n\t"	/* Inc pointer into src data */    "  add         %5, %3           \n\t"	/* Inc pointer into ref data */    "  dec         %%edi            \n\t"    "  jnz 1b                       \n\t"    "  movq        %%mm5, %%mm0     \n\t"    "  psrlq       $32, %%mm5       \n\t"    "  paddw       %%mm0, %%mm5     \n\t"    "  movq        %%mm5, %%mm0     \n\t"    "  psrlq       $16, %%mm5       \n\t"    "  paddw       %%mm0, %%mm5     \n\t"    "  movd        %%mm5, %%edi     \n\t"    "  movsx       %%di, %%edi      \n\t"    "  movl        %%edi, %0        \n\t"    "  movq        %%mm7, %%mm0     \n\t"    "  psrlq       $32, %%mm7       \n\t"    "  paddd       %%mm0, %%mm7     \n\t"    "  movd        %%mm7, %1        \n\t"     : "=m" (XSum),       "=m" (XXSum),       "+r" (SrcData),        "+r" (RefDataPtr)      : "m" (SrcStride),       "m" (RefStride)     : "edi", "memory"  );  /* Compute and return population variance as mis-match metric. */  return (( (XXSum<<6) - XSum*XSum ));}static ogg_uint32_t inter8x8_err_xy2__mmx (unsigned char *SrcData, ogg_uint32_t SrcStride,		                     unsigned char *RefDataPtr1,				     unsigned char *RefDataPtr2, ogg_uint32_t RefStride){  ogg_uint32_t XSum;  ogg_uint32_t XXSum;  __asm__ __volatile__ (    "  .balign 16                   \n\t"    "  pcmpeqd     %%mm4, %%mm4     \n\t"	/* fefefefefefefefe in mm4 */    "  paddb       %%mm4, %%mm4     \n\t"    "  pxor        %%mm5, %%mm5     \n\t"    "  pxor        %%mm6, %%mm6     \n\t"    "  pxor        %%mm7, %%mm7     \n\t"    "  mov         $8, %%edi        \n\t"    "1:                             \n\t"    "  movq        (%2), %%mm0      \n\t"	/* take 8 bytes */    "  movq        (%3), %%mm2      \n\t"    "  movq        (%4), %%mm3      \n\t"	/* take average of mm2 and mm3 */    "  movq        %%mm2, %%mm1     \n\t"    "  pand        %%mm3, %%mm1     \n\t"    "  pxor        %%mm2, %%mm3     \n\t"    "  pand        %%mm4, %%mm3     \n\t"    "  psrlq       $1, %%mm3        \n\t"    "  paddb       %%mm3, %%mm1     \n\t"    "  movq        %%mm0, %%mm2     \n\t"    "  movq        %%mm1, %%mm3     \n\t"    "  punpcklbw   %%mm6, %%mm0     \n\t"    "  punpcklbw   %%mm6, %%mm1     \n\t"    "  punpckhbw   %%mm6, %%mm2     \n\t"    "  punpckhbw   %%mm6, %%mm3     \n\t"    "  psubsw      %%mm1, %%mm0     \n\t"    "  psubsw      %%mm3, %%mm2     \n\t"    "  paddw       %%mm0, %%mm5     \n\t"    "  paddw       %%mm2, %%mm5     \n\t"    "  pmaddwd     %%mm0, %%mm0     \n\t"    "  pmaddwd     %%mm2, %%mm2     \n\t"        "  paddd       %%mm0, %%mm7     \n\t"    "  paddd       %%mm2, %%mm7     \n\t"    "  add         %5, %2           \n\t"	/* Inc pointer into src data */    "  add         %6, %3           \n\t"	/* Inc pointer into ref data */    "  add         %6, %4           \n\t"	/* Inc pointer into ref data */    "  dec         %%edi            \n\t"    "  jnz 1b                       \n\t"    "  movq        %%mm5, %%mm0     \n\t"    "  psrlq       $32, %%mm5       \n\t"    "  paddw       %%mm0, %%mm5     \n\t"    "  movq        %%mm5, %%mm0     \n\t"    "  psrlq       $16, %%mm5       \n\t"    "  paddw       %%mm0, %%mm5     \n\t"    "  movd        %%mm5, %%edi     \n\t"    "  movsx       %%di, %%edi      \n\t"    "  movl        %%edi, %0        \n\t"    "  movq        %%mm7, %%mm0     \n\t"    "  psrlq       $32, %%mm7       \n\t"    "  paddd       %%mm0, %%mm7     \n\t"    "  movd        %%mm7, %1        \n\t"     : "=m" (XSum),       "=m" (XXSum),       "+r" (SrcData),        "+r" (RefDataPtr1),       "+r" (RefDataPtr2)      : "m" (SrcStride),       "m" (RefStride)     : "edi", "memory"  );  /* Compute and return population variance as mis-match metric. */  return (( (XXSum<<6) - XSum*XSum ));}static void restore_fpu (void){  __asm__ __volatile__ (    "  emms                         \n\t"  );}void dsp_mmx_init(DspFunctions *funcs){  TH_DEBUG("enabling accelerated x86_32 mmx dsp functions.\n");  funcs->restore_fpu = restore_fpu;  funcs->sub8x8 = sub8x8__mmx;  funcs->sub8x8_128 = sub8x8_128__mmx;  funcs->sub8x8avg2 = sub8x8avg2__mmx;  funcs->row_sad8 = row_sad8__mmx;  funcs->col_sad8x8 = col_sad8x8__mmx;  funcs->sad8x8 = sad8x8__mmx;  funcs->sad8x8_thres = sad8x8_thres__mmx;  funcs->sad8x8_xy2_thres = sad8x8_xy2_thres__mmx;  funcs->intra8x8_err = intra8x8_err__mmx;  funcs->inter8x8_err = inter8x8_err__mmx;  funcs->inter8x8_err_xy2 = inter8x8_err_xy2__mmx;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -