⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 dsp_mmx.c

📁 mediastreamer2是开源的网络传输媒体流的库
💻 C
📖 第 1 页 / 共 4 页
字号:
  for ( i = 0; i < 4; i++ ){    SadValue2[0] += abs(Src1[0] - Src2[0]);    SadValue2[1] += abs(Src1[1] - Src2[1]);    SadValue2[2] += abs(Src1[2] - Src2[2]);    SadValue2[3] += abs(Src1[3] - Src2[3]);    SadValue2[4] += abs(Src1[4] - Src2[4]);    SadValue2[5] += abs(Src1[5] - Src2[5]);    SadValue2[6] += abs(Src1[6] - Src2[6]);    SadValue2[7] += abs(Src1[7] - Src2[7]);        Src1 += stride;    Src2 += stride;  }      for ( i = 0; i < 8; i++ ){    if ( SadValue[i] > MaxSad )      MaxSad = SadValue[i];    if ( SadValue2[i] > MaxSad )      MaxSad = SadValue2[i];  }      return MaxSad;
#else
  ogg_uint32_t MaxSad;


    __asm {
        align       16
        mov         ebx, Src1
        mov         ecx, Src2

        pxor		mm3, mm3		;	/* zero out mm3 for unpack */
        pxor		mm4, mm4		;	/* mm4 low sum */
        pxor		mm5, mm5		;	/* mm5 high sum */
        pxor		mm6, mm6		;	/* mm6 low sum */
        pxor		mm7, mm7		;	/* mm7 high sum */
        mov		edi, 4		;	/* 4 rows */
        label_1:				;	
        movq		mm0, [ebx]		;	/* take 8 bytes */
        movq		mm1, [ecx]		;	/* take 8 bytes */

        movq		mm2, mm0		;	
        psubusb		mm0, mm1		;	/* A - B */
        psubusb		mm1, mm2		;	/* B - A */
        por		mm0, mm1		;	/* and or gives abs difference */
        movq		mm1, mm0		;	

        punpcklbw		mm0, mm3		;	/* unpack to higher precision for accumulation */
        paddw		mm4, mm0		;	/* accumulate difference... */
        punpckhbw		mm1, mm3		;	/* unpack high four bytes to higher precision */
        paddw		mm5, mm1		;	/* accumulate difference... */
        add		ebx, stride		;	/* Inc pointer into the new data */
        add		ecx, stride		;	/* Inc pointer into the new data */

        dec		edi		;	
        jnz		label_1		;	

        mov		edi, 4		;	/* 4 rows */
        label_2:				;	
        movq		mm0, [ebx]		;	/* take 8 bytes */
        movq		mm1, [ecx]		;	/* take 8 bytes */

        movq		mm2, mm0		;	
        psubusb		mm0, mm1		;	/* A - B */
        psubusb		mm1, mm2		;	/* B - A */
        por		mm0, mm1		;	/* and or gives abs difference */
        movq		mm1, mm0		;	

        punpcklbw		mm0, mm3		;	/* unpack to higher precision for accumulation */
        paddw		mm6, mm0		;	/* accumulate difference... */
        punpckhbw		mm1, mm3		;	/* unpack high four bytes to higher precision */
        paddw		mm7, mm1		;	/* accumulate difference... */
        add		ebx, stride		;	/* Inc pointer into the new data */
        add		ecx, stride		;	/* Inc pointer into the new data */

        dec		edi		;	
        jnz		label_2		;	

        psubusw		mm7, mm6		;	
        paddw		mm7, mm6		;	/* mm7 = max(mm7, mm6) */
        psubusw		mm5, mm4		;	
        paddw		mm5, mm4		;	/* mm5 = max(mm5, mm4) */
        psubusw		mm7, mm5		;	
        paddw		mm7, mm5		;	/* mm7 = max(mm5, mm7) */
        movq		mm6, mm7		;	
        psrlq		mm6, 32		;	
        psubusw		mm7, mm6		;	
        paddw		mm7, mm6		;	/* mm7 = max(mm5, mm7) */
        movq		mm6, mm7		;	
        psrlq		mm6, 16		;	
        psubusw		mm7, mm6		;	
        paddw		mm7, mm6		;	/* mm7 = max(mm5, mm7) */
        movd		eax, mm7		;	
        and		    eax, 0xffff		;

        mov         MaxSad, eax
    };

    return MaxSad;


#endif
}

static ogg_uint32_t sad8x8__mmx (unsigned char *ptr1, ogg_uint32_t stride1,
		       	    unsigned char *ptr2, ogg_uint32_t stride2)
{

#if 0
  ogg_uint32_t  i;  ogg_uint32_t  sad = 0;  for (i=8; i; i--) {    sad += DSP_OP_ABS_DIFF(ptr1[0], ptr2[0]);    sad += DSP_OP_ABS_DIFF(ptr1[1], ptr2[1]);    sad += DSP_OP_ABS_DIFF(ptr1[2], ptr2[2]);    sad += DSP_OP_ABS_DIFF(ptr1[3], ptr2[3]);    sad += DSP_OP_ABS_DIFF(ptr1[4], ptr2[4]);    sad += DSP_OP_ABS_DIFF(ptr1[5], ptr2[5]);    sad += DSP_OP_ABS_DIFF(ptr1[6], ptr2[6]);    sad += DSP_OP_ABS_DIFF(ptr1[7], ptr2[7]);    /* Step to next row of block. */    ptr1 += stride1;    ptr2 += stride2;  }  return sad;
#else
  ogg_uint32_t  DiffVal;

  __asm {
    align  16

    mov         ebx, ptr1
    mov         edx, ptr2

    pxor		mm6, mm6		;	/* zero out mm6 for unpack */
    pxor		mm7, mm7		;	/* mm7 contains the result */
    
    ; /* ITERATION 1 */
    movq		mm0, [ebx]		;	/* take 8 bytes */
    movq		mm1, [edx]		;	
    movq		mm2, mm0		;	

    psubusb		mm0, mm1		;	/* A - B */
    psubusb		mm1, mm2		;	/* B - A */
    por		mm0, mm1		;	/* and or gives abs difference */
    movq		mm1, mm0		;	

    punpcklbw		mm0, mm6		;	/* unpack to higher precision for accumulation */
    paddw		mm7, mm0		;	/* accumulate difference... */
    punpckhbw		mm1, mm6		;	/* unpack high four bytes to higher precision */
    add		ebx, stride1		;	/* Inc pointer into the new data */
    paddw		mm7, mm1		;	/* accumulate difference... */
    add		edx, stride2		;	/* Inc pointer into ref data */

    ; /* ITERATION 2 */
    movq		mm0, [ebx]		;	/* take 8 bytes */
    movq		mm1, [edx]		;	
    movq		mm2, mm0		;	

    psubusb		mm0, mm1		;	/* A - B */
    psubusb		mm1, mm2		;	/* B - A */
    por		mm0, mm1		;	/* and or gives abs difference */
    movq		mm1, mm0		;	

    punpcklbw		mm0, mm6		;	/* unpack to higher precision for accumulation */
    paddw		mm7, mm0		;	/* accumulate difference... */
    punpckhbw		mm1, mm6		;	/* unpack high four bytes to higher precision */
    add		ebx, stride1		;	/* Inc pointer into the new data */
    paddw		mm7, mm1		;	/* accumulate difference... */
    add		edx, stride2		;	/* Inc pointer into ref data */


    ; /* ITERATION 3 */
    movq		mm0, [ebx]		;	/* take 8 bytes */
    movq		mm1, [edx]		;	
    movq		mm2, mm0		;	

    psubusb		mm0, mm1		;	/* A - B */
    psubusb		mm1, mm2		;	/* B - A */
    por		mm0, mm1		;	/* and or gives abs difference */
    movq		mm1, mm0		;	

    punpcklbw		mm0, mm6		;	/* unpack to higher precision for accumulation */
    paddw		mm7, mm0		;	/* accumulate difference... */
    punpckhbw		mm1, mm6		;	/* unpack high four bytes to higher precision */
    add		ebx, stride1		;	/* Inc pointer into the new data */
    paddw		mm7, mm1		;	/* accumulate difference... */
    add		edx, stride2		;	/* Inc pointer into ref data */

    ; /* ITERATION 4 */
    movq		mm0, [ebx]		;	/* take 8 bytes */
    movq		mm1, [edx]		;	
    movq		mm2, mm0		;	

    psubusb		mm0, mm1		;	/* A - B */
    psubusb		mm1, mm2		;	/* B - A */
    por		mm0, mm1		;	/* and or gives abs difference */
    movq		mm1, mm0		;	

    punpcklbw		mm0, mm6		;	/* unpack to higher precision for accumulation */
    paddw		mm7, mm0		;	/* accumulate difference... */
    punpckhbw		mm1, mm6		;	/* unpack high four bytes to higher precision */
    add		ebx, stride1		;	/* Inc pointer into the new data */
    paddw		mm7, mm1		;	/* accumulate difference... */
    add		edx, stride2		;	/* Inc pointer into ref data */


    ; /* ITERATION 5 */
    movq		mm0, [ebx]		;	/* take 8 bytes */
    movq		mm1, [edx]		;	
    movq		mm2, mm0		;	

    psubusb		mm0, mm1		;	/* A - B */
    psubusb		mm1, mm2		;	/* B - A */
    por		mm0, mm1		;	/* and or gives abs difference */
    movq		mm1, mm0		;	

    punpcklbw		mm0, mm6		;	/* unpack to higher precision for accumulation */
    paddw		mm7, mm0		;	/* accumulate difference... */
    punpckhbw		mm1, mm6		;	/* unpack high four bytes to higher precision */
    add		ebx, stride1		;	/* Inc pointer into the new data */
    paddw		mm7, mm1		;	/* accumulate difference... */
    add		edx, stride2		;	/* Inc pointer into ref data */


    ; /* ITERATION 6 */
    movq		mm0, [ebx]		;	/* take 8 bytes */
    movq		mm1, [edx]		;	
    movq		mm2, mm0		;	

    psubusb		mm0, mm1		;	/* A - B */
    psubusb		mm1, mm2		;	/* B - A */
    por		mm0, mm1		;	/* and or gives abs difference */
    movq		mm1, mm0		;	

    punpcklbw		mm0, mm6		;	/* unpack to higher precision for accumulation */
    paddw		mm7, mm0		;	/* accumulate difference... */
    punpckhbw		mm1, mm6		;	/* unpack high four bytes to higher precision */
    add		ebx, stride1		;	/* Inc pointer into the new data */
    paddw		mm7, mm1		;	/* accumulate difference... */
    add		edx, stride2		;	/* Inc pointer into ref data */


    ; /* ITERATION 7 */
    movq		mm0, [ebx]		;	/* take 8 bytes */
    movq		mm1, [edx]		;	
    movq		mm2, mm0		;	

    psubusb		mm0, mm1		;	/* A - B */
    psubusb		mm1, mm2		;	/* B - A */
    por		mm0, mm1		;	/* and or gives abs difference */
    movq		mm1, mm0		;	

    punpcklbw		mm0, mm6		;	/* unpack to higher precision for accumulation */
    paddw		mm7, mm0		;	/* accumulate difference... */
    punpckhbw		mm1, mm6		;	/* unpack high four bytes to higher precision */
    add		ebx, stride1		;	/* Inc pointer into the new data */
    paddw		mm7, mm1		;	/* accumulate difference... */
    add		edx, stride2		;	/* Inc pointer into ref data */



    ; /* ITERATION 8 */
    movq		mm0, [ebx]		;	/* take 8 bytes */
    movq		mm1, [edx]		;	
    movq		mm2, mm0		;	

    psubusb		mm0, mm1		;	/* A - B */
    psubusb		mm1, mm2		;	/* B - A */
    por		mm0, mm1		;	/* and or gives abs difference */
    movq		mm1, mm0		;	

    punpcklbw		mm0, mm6		;	/* unpack to higher precision for accumulation */
    paddw		mm7, mm0		;	/* accumulate difference... */
    punpckhbw		mm1, mm6		;	/* unpack high four bytes to higher precision */
    add		ebx, stride1		;	/* Inc pointer into the new data */
    paddw		mm7, mm1		;	/* accumulate difference... */
    add		edx, stride2		;	/* Inc pointer into ref data */



    ; /* ------ */

    movq		mm0, mm7		;	
    psrlq		mm7, 32		;	
    paddw		mm7, mm0		;	
    movq		mm0, mm7		;	
    psrlq		mm7, 16		;	
    paddw		mm7, mm0		;	
    movd		eax, mm7		;	
    and		    eax, 0xffff		;	

    mov         DiffVal, eax
  };

  return DiffVal;

 

#endif
}

static ogg_uint32_t sad8x8_thres__mmx (unsigned char *ptr1, ogg_uint32_t stride1,
		       		  unsigned char *ptr2, ogg_uint32_t stride2, 
			   	  ogg_uint32_t thres)
{
#if 0
  ogg_uint32_t  i;  ogg_uint32_t  sad = 0;  for (i=8; i; i--) {    sad += DSP_OP_ABS_DIFF(ptr1[0], ptr2[0]);    sad += DSP_OP_ABS_DIFF(ptr1[1], ptr2[1]);    sad += DSP_OP_ABS_DIFF(ptr1[2], ptr2[2]);    sad += DSP_OP_ABS_DIFF(ptr1[3], ptr2[3]);    sad += DSP_OP_ABS_DIFF(ptr1[4], ptr2[4]);    sad += DSP_OP_ABS_DIFF(ptr1[5], ptr2[5]);    sad += DSP_OP_ABS_DIFF(ptr1[6], ptr2[6]);    sad += DSP_OP_ABS_DIFF(ptr1[7], ptr2[7]);    if (sad > thres )      break;    /* Step to next row of block. */    ptr1 += stride1;    ptr2 += stride2;  }  return sad;
#else
  return sad8x8__mmx (ptr1, stride1, ptr2, stride2);
#endif
}


static ogg_uint32_t sad8x8_xy2_thres__mmx (unsigned char *SrcData, ogg_uint32_t SrcStride,
		                      unsigned char *RefDataPtr1,
			              unsigned char *RefDataPtr2, ogg_uint32_t RefStride,
			              ogg_uint32_t thres)
{
#if 0
  ogg_uint32_t  i;  ogg_uint32_t  sad = 0;  for (i=8; i; i--) {    sad += DSP_OP_ABS_DIFF(SrcData[0], DSP_OP_AVG (RefDataPtr1[0], RefDataPtr2[0]));    sad += DSP_OP_ABS_DIFF(SrcData[1], DSP_OP_AVG (RefDataPtr1[1], RefDataPtr2[1]));    sad += DSP_OP_ABS_DIFF(SrcData[2], DSP_OP_AVG (RefDataPtr1[2], RefDataPtr2[2]));    sad += DSP_OP_ABS_DIFF(SrcData[3], DSP_OP_AVG (RefDataPtr1[3], RefDataPtr2[3]));    sad += DSP_OP_ABS_DIFF(SrcData[4], DSP_OP_AVG (RefDataPtr1[4], RefDataPtr2[4]));    sad += DSP_OP_ABS_DIFF(SrcData[5], DSP_OP_AVG (RefDataPtr1[5], RefDataPtr2[5]));    sad += DSP_OP_ABS_DIFF(SrcData[6], DSP_OP_AVG (RefDataPtr1[6], RefDataPtr2[6]));    sad += DSP_OP_ABS_DIFF(SrcData[7], DSP_OP_AVG (RefDataPtr1[7], RefDataPtr2[7]));    if ( sad > thres )      break;    /* Step to next row of block. */    SrcData += SrcStride;    RefDataPtr1 += RefStride;    RefDataPtr2 += RefStride;  }  return sad;
#else
  ogg_uint32_t  DiffVal;

  __asm {
    align 16

        mov     ebx, SrcData
        mov     ecx, RefDataPtr1
        mov     edx, RefDataPtr2


    pcmpeqd		mm5, mm5		;	/* fefefefefefefefe in mm5 */
    paddb		mm5, mm5		;	
				    ;	
    pxor		mm6, mm6		;	/* zero out mm6 for unpack */
    pxor		mm7, mm7		;	/* mm7 contains the result */
    mov		edi, 8		;	/* 8 rows */
    loop_start:				;	
    movq		mm0, [ebx]		;	/* take 8 bytes */

    movq		mm2, [ecx]		;	
    movq		mm3, [edx]		;	/* take average of mm2 and mm3 */
    movq		mm1, mm2		;	
    pand		mm1, mm3		;	
    pxor		mm3, mm2		;	
    pand		mm3, mm5		;	
    psrlq		mm3, 1		;	
    paddb		mm1, mm3		;	

    movq		mm2, mm0		;	

    psubusb		mm0, mm1		;	/* A - B */
    psubusb		mm1, mm2		;	/* B - A */
    por		mm0, mm1		;	/* and or gives abs difference */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -