⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 dsp_mmx.c

📁 mediastreamer2是开源的网络传输媒体流的库
💻 C
📖 第 1 页 / 共 4 页
字号:
    movq		mm1, mm0		;	

    punpcklbw		mm0, mm6		;	/* unpack to higher precision for accumulation */
    paddw		mm7, mm0		;	/* accumulate difference... */
    punpckhbw		mm1, mm6		;	/* unpack high four bytes to higher precision */
    add		ebx, SrcStride		;	/* Inc pointer into the new data */
    paddw		mm7, mm1		;	/* accumulate difference... */
    add		ecx, RefStride		;	/* Inc pointer into ref data */
    add		edx, RefStride		;	/* Inc pointer into ref data */

    dec		edi		;	
    jnz		loop_start		;	

    movq		mm0, mm7		;	
    psrlq		mm7, 32		;	
    paddw		mm7, mm0		;	
    movq		mm0, mm7		;	
    psrlq		mm7, 16		;	
    paddw		mm7, mm0		;	
    movd		eax, mm7		;	
    and		eax, 0xffff		;	

    mov DiffVal, eax
  };

  return DiffVal;

 

#endif
}

static ogg_uint32_t intra8x8_err__mmx (unsigned char *DataPtr, ogg_uint32_t Stride)
{
#if 0
  ogg_uint32_t  i;  ogg_uint32_t  XSum=0;  ogg_uint32_t  XXSum=0;  for (i=8; i; i--) {     /* Examine alternate pixel locations. */     XSum += DataPtr[0];     XXSum += DataPtr[0]*DataPtr[0];     XSum += DataPtr[1];     XXSum += DataPtr[1]*DataPtr[1];     XSum += DataPtr[2];     XXSum += DataPtr[2]*DataPtr[2];     XSum += DataPtr[3];     XXSum += DataPtr[3]*DataPtr[3];     XSum += DataPtr[4];     XXSum += DataPtr[4]*DataPtr[4];     XSum += DataPtr[5];     XXSum += DataPtr[5]*DataPtr[5];     XSum += DataPtr[6];     XXSum += DataPtr[6]*DataPtr[6];     XSum += DataPtr[7];     XXSum += DataPtr[7]*DataPtr[7];     /* Step to next row of block. */     DataPtr += Stride;   }   /* Compute population variance as mis-match metric. */   return (( (XXSum<<6) - XSum*XSum ) );
#else
  ogg_uint32_t  XSum;
  ogg_uint32_t  XXSum;

  __asm {
    align 16

        mov     ecx, DataPtr

    pxor		mm5, mm5		;	
    pxor		mm6, mm6		;	
    pxor		mm7, mm7		;	
    mov		edi, 8		;	
    loop_start:		
    movq		mm0, [ecx]		;	/* take 8 bytes */
    movq		mm2, mm0		;	

    punpcklbw		mm0, mm6		;	
    punpckhbw		mm2, mm6		;	

    paddw		mm5, mm0		;	
    paddw		mm5, mm2		;	

    pmaddwd		mm0, mm0		;	
    pmaddwd		mm2, mm2		;	
				    ;	
    paddd		mm7, mm0		;	
    paddd		mm7, mm2		;	

    add		ecx, Stride		;	/* Inc pointer into src data */

    dec		edi		;	
    jnz		loop_start		;	

    movq		mm0, mm5		;	
    psrlq		mm5, 32		;	
    paddw		mm5, mm0		;	
    movq		mm0, mm5		;	
    psrlq		mm5, 16		;	
    paddw		mm5, mm0		;	
    movd		edi, mm5		;	
    movsx		edi, di		;	
    mov		eax, edi		;	

    movq		mm0, mm7		;	
    psrlq		mm7, 32		;	
    paddd		mm7, mm0		;	
    movd		ebx, mm7		;	

        mov         XSum, eax
        mov         XXSum, ebx;

  };
    /* Compute population variance as mis-match metric. */
    return (( (XXSum<<6) - XSum*XSum ) );

 

#endif
}

static ogg_uint32_t inter8x8_err__mmx (unsigned char *SrcData, ogg_uint32_t SrcStride,
		                 unsigned char *RefDataPtr, ogg_uint32_t RefStride)
{

#if 0
  ogg_uint32_t  i;  ogg_uint32_t  XSum=0;  ogg_uint32_t  XXSum=0;  ogg_int32_t   DiffVal;  for (i=8; i; i--) {    DiffVal = DSP_OP_DIFF (SrcData[0], RefDataPtr[0]);    XSum += DiffVal;    XXSum += DiffVal*DiffVal;    DiffVal = DSP_OP_DIFF (SrcData[1], RefDataPtr[1]);    XSum += DiffVal;    XXSum += DiffVal*DiffVal;    DiffVal = DSP_OP_DIFF (SrcData[2], RefDataPtr[2]);    XSum += DiffVal;    XXSum += DiffVal*DiffVal;    DiffVal = DSP_OP_DIFF (SrcData[3], RefDataPtr[3]);    XSum += DiffVal;    XXSum += DiffVal*DiffVal;            DiffVal = DSP_OP_DIFF (SrcData[4], RefDataPtr[4]);    XSum += DiffVal;    XXSum += DiffVal*DiffVal;            DiffVal = DSP_OP_DIFF (SrcData[5], RefDataPtr[5]);    XSum += DiffVal;    XXSum += DiffVal*DiffVal;            DiffVal = DSP_OP_DIFF (SrcData[6], RefDataPtr[6]);    XSum += DiffVal;    XXSum += DiffVal*DiffVal;            DiffVal = DSP_OP_DIFF (SrcData[7], RefDataPtr[7]);    XSum += DiffVal;    XXSum += DiffVal*DiffVal;            /* Step to next row of block. */    SrcData += SrcStride;    RefDataPtr += RefStride;  }  /* Compute and return population variance as mis-match metric. */  return (( (XXSum<<6) - XSum*XSum ));
#else
  ogg_uint32_t  XSum;
  ogg_uint32_t  XXSum;


  __asm {
    align 16

        mov     ecx, SrcData
        mov     edx, RefDataPtr

    pxor		mm5, mm5		;	
    pxor		mm6, mm6		;	
    pxor		mm7, mm7		;	
    mov		edi, 8		;	
    loop_start:				;	
    movq		mm0, [ecx]		;	/* take 8 bytes */
    movq		mm1, [edx]		;	
    movq		mm2, mm0		;	
    movq		mm3, mm1		;	

    punpcklbw		mm0, mm6		;	
    punpcklbw		mm1, mm6		;	
    punpckhbw		mm2, mm6		;	
    punpckhbw		mm3, mm6		;	

    psubsw		mm0, mm1		;	
    psubsw		mm2, mm3		;	

    paddw		mm5, mm0		;	
    paddw		mm5, mm2		;	

    pmaddwd		mm0, mm0		;	
    pmaddwd		mm2, mm2		;	
				    ;	
    paddd		mm7, mm0		;	
    paddd		mm7, mm2		;	

    add		ecx, SrcStride		;	/* Inc pointer into src data */
    add		edx, RefStride		;	/* Inc pointer into ref data */

    dec		edi		;	
    jnz		loop_start		;	

    movq		mm0, mm5		;	
    psrlq		mm5, 32		;	
    paddw		mm5, mm0		;	
    movq		mm0, mm5		;	
    psrlq		mm5, 16		;	
    paddw		mm5, mm0		;	
    movd		edi, mm5		;	
    movsx		edi, di		;	
    mov		eax, edi		;	

    movq		mm0, mm7		;	
    psrlq		mm7, 32		;	
    paddd		mm7, mm0		;	
    movd		ebx, mm7		;	

        mov     XSum, eax
        mov     XXSum, ebx

  };

  /* Compute and return population variance as mis-match metric. */
  return (( (XXSum<<6) - XSum*XSum ));

 
#endif
}

static ogg_uint32_t inter8x8_err_xy2__mmx (unsigned char *SrcData, ogg_uint32_t SrcStride,
		                     unsigned char *RefDataPtr1,
				     unsigned char *RefDataPtr2, ogg_uint32_t RefStride)
{
#if 0
  ogg_uint32_t  i;  ogg_uint32_t  XSum=0;  ogg_uint32_t  XXSum=0;  ogg_int32_t   DiffVal;  for (i=8; i; i--) {    DiffVal = DSP_OP_DIFF(SrcData[0], DSP_OP_AVG (RefDataPtr1[0], RefDataPtr2[0]));    XSum += DiffVal;    XXSum += DiffVal*DiffVal;    DiffVal = DSP_OP_DIFF(SrcData[1], DSP_OP_AVG (RefDataPtr1[1], RefDataPtr2[1]));    XSum += DiffVal;    XXSum += DiffVal*DiffVal;    DiffVal = DSP_OP_DIFF(SrcData[2], DSP_OP_AVG (RefDataPtr1[2], RefDataPtr2[2]));    XSum += DiffVal;    XXSum += DiffVal*DiffVal;    DiffVal = DSP_OP_DIFF(SrcData[3], DSP_OP_AVG (RefDataPtr1[3], RefDataPtr2[3]));    XSum += DiffVal;    XXSum += DiffVal*DiffVal;    DiffVal = DSP_OP_DIFF(SrcData[4], DSP_OP_AVG (RefDataPtr1[4], RefDataPtr2[4]));    XSum += DiffVal;    XXSum += DiffVal*DiffVal;    DiffVal = DSP_OP_DIFF(SrcData[5], DSP_OP_AVG (RefDataPtr1[5], RefDataPtr2[5]));    XSum += DiffVal;    XXSum += DiffVal*DiffVal;    DiffVal = DSP_OP_DIFF(SrcData[6], DSP_OP_AVG (RefDataPtr1[6], RefDataPtr2[6]));    XSum += DiffVal;    XXSum += DiffVal*DiffVal;    DiffVal = DSP_OP_DIFF(SrcData[7], DSP_OP_AVG (RefDataPtr1[7], RefDataPtr2[7]));    XSum += DiffVal;    XXSum += DiffVal*DiffVal;    /* Step to next row of block. */    SrcData += SrcStride;    RefDataPtr1 += RefStride;    RefDataPtr2 += RefStride;  }  /* Compute and return population variance as mis-match metric. */  return (( (XXSum<<6) - XSum*XSum ));
#else
  ogg_uint32_t XSum;
  ogg_uint32_t XXSum;

  __asm {
    align 16

        mov ebx, SrcData
        mov ecx, RefDataPtr1
        mov edx, RefDataPtr2

    pcmpeqd		mm4, mm4		;	/* fefefefefefefefe in mm4 */
    paddb		mm4, mm4		;	
    pxor		mm5, mm5		;	
    pxor		mm6, mm6		;	
    pxor		mm7, mm7		;	
    mov		edi, 8		;	
    loop_start:				;	
    movq		mm0, [ebx]		;	/* take 8 bytes */

    movq		mm2, [ecx]		;	
    movq		mm3, [edx]		;	/* take average of mm2 and mm3 */
    movq		mm1, mm2		;	
    pand		mm1, mm3		;	
    pxor		mm3, mm2		;	
    pand		mm3, mm4		;	
    psrlq		mm3, 1		;	
    paddb		mm1, mm3		;	

    movq		mm2, mm0		;	
    movq		mm3, mm1		;	

    punpcklbw		mm0, mm6		;	
    punpcklbw		mm1, mm6		;	
    punpckhbw		mm2, mm6		;	
    punpckhbw		mm3, mm6		;	

    psubsw		mm0, mm1		;	
    psubsw		mm2, mm3		;	

    paddw		mm5, mm0		;	
    paddw		mm5, mm2		;	

    pmaddwd		mm0, mm0		;	
    pmaddwd		mm2, mm2		;	
				    ;	
    paddd		mm7, mm0		;	
    paddd		mm7, mm2		;	

    add		ebx, SrcStride		;	/* Inc pointer into src data */
    add		ecx, RefStride		;	/* Inc pointer into ref data */
    add		edx, RefStride		;	/* Inc pointer into ref data */

    dec		edi		;	
    jnz		loop_start		;	

    movq		mm0, mm5		;	
    psrlq		mm5, 32		;	
    paddw		mm5, mm0		;	
    movq		mm0, mm5		;	
    psrlq		mm5, 16		;	
    paddw		mm5, mm0		;	
    movd		edi, mm5		;	
    movsx		edi, di		;	
    mov         XSum, edi   ; /* movl		eax, edi		;	Modified for vc to resuse eax*/

    movq		mm0, mm7		;	
    psrlq		mm7, 32		;	
    paddd		mm7, mm0		;	
    movd        XXSum, mm7 ; /*movd		eax, mm7		; Modified for vc to reuse eax */
  };

    return (( (XXSum<<6) - XSum*XSum ));

#endif
}

static void restore_fpu (void)
{

    __asm {
        emms
    }

}

void dsp_mmx_init(DspFunctions *funcs)
{
  TH_DEBUG("enabling accelerated x86_32 mmx dsp functions.\n");
  funcs->restore_fpu = restore_fpu;
  funcs->sub8x8 = sub8x8__mmx;
  funcs->sub8x8_128 = sub8x8_128__mmx;
  funcs->sub8x8avg2 = sub8x8avg2__mmx;
  funcs->row_sad8 = row_sad8__mmx;
  funcs->col_sad8x8 = col_sad8x8__mmx;
  funcs->sad8x8 = sad8x8__mmx;
  funcs->sad8x8_thres = sad8x8_thres__mmx;
  funcs->sad8x8_xy2_thres = sad8x8_xy2_thres__mmx;
  funcs->intra8x8_err = intra8x8_err__mmx;
  funcs->inter8x8_err = inter8x8_err__mmx;
  funcs->inter8x8_err_xy2 = inter8x8_err_xy2__mmx;
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -