⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 dsp_mmx.c

📁 mediastreamer2是开源的网络传输媒体流的库
💻 C
📖 第 1 页 / 共 4 页
字号:
        psubw		mm2, mm1		/* mm2 = FiltPtr - 128 */
        movq		[ebx], mm0		/* write answer out */
        movq		[8 + ebx], mm2		/* write answer out */
        /* Increment pointers */
        add		ebx, 16		
        add		eax, PixelsPerLine	

    };
 
#endif
}




static void sub8x8avg2__mmx (unsigned char *FiltPtr, unsigned char *ReconPtr1,
                     unsigned char *ReconPtr2, ogg_int16_t *DctInputPtr,
                     ogg_uint32_t PixelsPerLine,
                     ogg_uint32_t ReconPixelsPerLine) 
{

#if 0
  int i;  /* For each block row */  for (i=8; i; i--) {    DctInputPtr[0] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[0], DSP_OP_AVG (ReconPtr1[0], ReconPtr2[0]));    DctInputPtr[1] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[1], DSP_OP_AVG (ReconPtr1[1], ReconPtr2[1]));    DctInputPtr[2] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[2], DSP_OP_AVG (ReconPtr1[2], ReconPtr2[2]));    DctInputPtr[3] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[3], DSP_OP_AVG (ReconPtr1[3], ReconPtr2[3]));    DctInputPtr[4] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[4], DSP_OP_AVG (ReconPtr1[4], ReconPtr2[4]));    DctInputPtr[5] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[5], DSP_OP_AVG (ReconPtr1[5], ReconPtr2[5]));    DctInputPtr[6] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[6], DSP_OP_AVG (ReconPtr1[6], ReconPtr2[6]));    DctInputPtr[7] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[7], DSP_OP_AVG (ReconPtr1[7], ReconPtr2[7]));    /* Start next row */    FiltPtr += PixelsPerLine;    ReconPtr1 += ReconPixelsPerLine;    ReconPtr2 += ReconPixelsPerLine;    DctInputPtr += 8;  }
#else

    __asm {
        align 16

            pxor        mm7, mm7

        mov         eax, FiltPtr
        mov         ebx, ReconPtr1
        mov         ecx, ReconPtr2
        mov         edx, DctInputPtr

        /*  ITERATION 1 */	
        movq		mm0, [eax]		;	/* mm0 = FiltPtr */
        movq		mm1, [ebx]		;	/* mm1 = ReconPtr1 */
        movq		mm4, [ecx]		;	/* mm1 = ReconPtr2 */
        movq		mm2, mm0		;	/* dup to prepare for up conversion */
        movq		mm3, mm1		;	/* dup to prepare for up conversion */
        movq		mm5, mm4		;	/* dup to prepare for up conversion */
	        ;	/* convert from UINT8 to INT16 */
        punpcklbw		mm0, mm7		;	/* mm0 = INT16(FiltPtr) */
        punpcklbw		mm1, mm7		;	/* mm1 = INT16(ReconPtr1) */
        punpcklbw		mm4, mm7		;	/* mm1 = INT16(ReconPtr2) */
        punpckhbw		mm2, mm7		;	/* mm2 = INT16(FiltPtr) */
        punpckhbw		mm3, mm7		;	/* mm3 = INT16(ReconPtr1) */
        punpckhbw		mm5, mm7		;	/* mm3 = INT16(ReconPtr2) */
	        ;	/* average ReconPtr1 and ReconPtr2 */
        paddw		mm1, mm4		;	/* mm1 = ReconPtr1 + ReconPtr2 */
        paddw		mm3, mm5		;	/* mm3 = ReconPtr1 + ReconPtr2 */
        psrlw		mm1, 1		;	/* mm1 = (ReconPtr1 + ReconPtr2) / 2 */
        psrlw		mm3, 1		;	/* mm3 = (ReconPtr1 + ReconPtr2) / 2 */
        psubw		mm0, mm1		;	/* mm0 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
        psubw		mm2, mm3		;	/* mm2 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
        movq		[edx], mm0		;	/* write answer out */
        movq		[8 + edx], mm2		;	/* write answer out */
	        ;	/* Increment pointers */
        add		edx, 16		;	
        add		eax, PixelsPerLine		;	
        add		ebx, ReconPixelsPerLine		;	
        add		ecx, ReconPixelsPerLine		;	
	

        /*  ITERATION 2 */	
        movq		mm0, [eax]		;	/* mm0 = FiltPtr */
        movq		mm1, [ebx]		;	/* mm1 = ReconPtr1 */
        movq		mm4, [ecx]		;	/* mm1 = ReconPtr2 */
        movq		mm2, mm0		;	/* dup to prepare for up conversion */
        movq		mm3, mm1		;	/* dup to prepare for up conversion */
        movq		mm5, mm4		;	/* dup to prepare for up conversion */
	        ;	/* convert from UINT8 to INT16 */
        punpcklbw		mm0, mm7		;	/* mm0 = INT16(FiltPtr) */
        punpcklbw		mm1, mm7		;	/* mm1 = INT16(ReconPtr1) */
        punpcklbw		mm4, mm7		;	/* mm1 = INT16(ReconPtr2) */
        punpckhbw		mm2, mm7		;	/* mm2 = INT16(FiltPtr) */
        punpckhbw		mm3, mm7		;	/* mm3 = INT16(ReconPtr1) */
        punpckhbw		mm5, mm7		;	/* mm3 = INT16(ReconPtr2) */
	        ;	/* average ReconPtr1 and ReconPtr2 */
        paddw		mm1, mm4		;	/* mm1 = ReconPtr1 + ReconPtr2 */
        paddw		mm3, mm5		;	/* mm3 = ReconPtr1 + ReconPtr2 */
        psrlw		mm1, 1		;	/* mm1 = (ReconPtr1 + ReconPtr2) / 2 */
        psrlw		mm3, 1		;	/* mm3 = (ReconPtr1 + ReconPtr2) / 2 */
        psubw		mm0, mm1		;	/* mm0 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
        psubw		mm2, mm3		;	/* mm2 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
        movq		[edx], mm0		;	/* write answer out */
        movq		[8 + edx], mm2		;	/* write answer out */
	        ;	/* Increment pointers */
        add		edx, 16		;	
        add		eax, PixelsPerLine		;	
        add		ebx, ReconPixelsPerLine		;	
        add		ecx, ReconPixelsPerLine		;	


        /*  ITERATION 3 */	
        movq		mm0, [eax]		;	/* mm0 = FiltPtr */
        movq		mm1, [ebx]		;	/* mm1 = ReconPtr1 */
        movq		mm4, [ecx]		;	/* mm1 = ReconPtr2 */
        movq		mm2, mm0		;	/* dup to prepare for up conversion */
        movq		mm3, mm1		;	/* dup to prepare for up conversion */
        movq		mm5, mm4		;	/* dup to prepare for up conversion */
	        ;	/* convert from UINT8 to INT16 */
        punpcklbw		mm0, mm7		;	/* mm0 = INT16(FiltPtr) */
        punpcklbw		mm1, mm7		;	/* mm1 = INT16(ReconPtr1) */
        punpcklbw		mm4, mm7		;	/* mm1 = INT16(ReconPtr2) */
        punpckhbw		mm2, mm7		;	/* mm2 = INT16(FiltPtr) */
        punpckhbw		mm3, mm7		;	/* mm3 = INT16(ReconPtr1) */
        punpckhbw		mm5, mm7		;	/* mm3 = INT16(ReconPtr2) */
	        ;	/* average ReconPtr1 and ReconPtr2 */
        paddw		mm1, mm4		;	/* mm1 = ReconPtr1 + ReconPtr2 */
        paddw		mm3, mm5		;	/* mm3 = ReconPtr1 + ReconPtr2 */
        psrlw		mm1, 1		;	/* mm1 = (ReconPtr1 + ReconPtr2) / 2 */
        psrlw		mm3, 1		;	/* mm3 = (ReconPtr1 + ReconPtr2) / 2 */
        psubw		mm0, mm1		;	/* mm0 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
        psubw		mm2, mm3		;	/* mm2 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
        movq		[edx], mm0		;	/* write answer out */
        movq		[8 + edx], mm2		;	/* write answer out */
	        ;	/* Increment pointers */
        add		edx, 16		;	
        add		eax, PixelsPerLine		;	
        add		ebx, ReconPixelsPerLine		;	
        add		ecx, ReconPixelsPerLine		;	


        /*  ITERATION 4 */	
        movq		mm0, [eax]		;	/* mm0 = FiltPtr */
        movq		mm1, [ebx]		;	/* mm1 = ReconPtr1 */
        movq		mm4, [ecx]		;	/* mm1 = ReconPtr2 */
        movq		mm2, mm0		;	/* dup to prepare for up conversion */
        movq		mm3, mm1		;	/* dup to prepare for up conversion */
        movq		mm5, mm4		;	/* dup to prepare for up conversion */
	        ;	/* convert from UINT8 to INT16 */
        punpcklbw		mm0, mm7		;	/* mm0 = INT16(FiltPtr) */
        punpcklbw		mm1, mm7		;	/* mm1 = INT16(ReconPtr1) */
        punpcklbw		mm4, mm7		;	/* mm1 = INT16(ReconPtr2) */
        punpckhbw		mm2, mm7		;	/* mm2 = INT16(FiltPtr) */
        punpckhbw		mm3, mm7		;	/* mm3 = INT16(ReconPtr1) */
        punpckhbw		mm5, mm7		;	/* mm3 = INT16(ReconPtr2) */
	        ;	/* average ReconPtr1 and ReconPtr2 */
        paddw		mm1, mm4		;	/* mm1 = ReconPtr1 + ReconPtr2 */
        paddw		mm3, mm5		;	/* mm3 = ReconPtr1 + ReconPtr2 */
        psrlw		mm1, 1		;	/* mm1 = (ReconPtr1 + ReconPtr2) / 2 */
        psrlw		mm3, 1		;	/* mm3 = (ReconPtr1 + ReconPtr2) / 2 */
        psubw		mm0, mm1		;	/* mm0 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
        psubw		mm2, mm3		;	/* mm2 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
        movq		[edx], mm0		;	/* write answer out */
        movq		[8 + edx], mm2		;	/* write answer out */
	        ;	/* Increment pointers */
        add		edx, 16		;	
        add		eax, PixelsPerLine		;	
        add		ebx, ReconPixelsPerLine		;	
        add		ecx, ReconPixelsPerLine		;	


        /*  ITERATION 5 */	
        movq		mm0, [eax]		;	/* mm0 = FiltPtr */
        movq		mm1, [ebx]		;	/* mm1 = ReconPtr1 */
        movq		mm4, [ecx]		;	/* mm1 = ReconPtr2 */
        movq		mm2, mm0		;	/* dup to prepare for up conversion */
        movq		mm3, mm1		;	/* dup to prepare for up conversion */
        movq		mm5, mm4		;	/* dup to prepare for up conversion */
	        ;	/* convert from UINT8 to INT16 */
        punpcklbw		mm0, mm7		;	/* mm0 = INT16(FiltPtr) */
        punpcklbw		mm1, mm7		;	/* mm1 = INT16(ReconPtr1) */
        punpcklbw		mm4, mm7		;	/* mm1 = INT16(ReconPtr2) */
        punpckhbw		mm2, mm7		;	/* mm2 = INT16(FiltPtr) */
        punpckhbw		mm3, mm7		;	/* mm3 = INT16(ReconPtr1) */
        punpckhbw		mm5, mm7		;	/* mm3 = INT16(ReconPtr2) */
	        ;	/* average ReconPtr1 and ReconPtr2 */
        paddw		mm1, mm4		;	/* mm1 = ReconPtr1 + ReconPtr2 */
        paddw		mm3, mm5		;	/* mm3 = ReconPtr1 + ReconPtr2 */
        psrlw		mm1, 1		;	/* mm1 = (ReconPtr1 + ReconPtr2) / 2 */
        psrlw		mm3, 1		;	/* mm3 = (ReconPtr1 + ReconPtr2) / 2 */
        psubw		mm0, mm1		;	/* mm0 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
        psubw		mm2, mm3		;	/* mm2 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
        movq		[edx], mm0		;	/* write answer out */
        movq		[8 + edx], mm2		;	/* write answer out */
	        ;	/* Increment pointers */
        add		edx, 16		;	
        add		eax, PixelsPerLine		;	
        add		ebx, ReconPixelsPerLine		;	
        add		ecx, ReconPixelsPerLine		;	


        /*  ITERATION 6 */	
        movq		mm0, [eax]		;	/* mm0 = FiltPtr */
        movq		mm1, [ebx]		;	/* mm1 = ReconPtr1 */
        movq		mm4, [ecx]		;	/* mm1 = ReconPtr2 */
        movq		mm2, mm0		;	/* dup to prepare for up conversion */
        movq		mm3, mm1		;	/* dup to prepare for up conversion */
        movq		mm5, mm4		;	/* dup to prepare for up conversion */
	        ;	/* convert from UINT8 to INT16 */
        punpcklbw		mm0, mm7		;	/* mm0 = INT16(FiltPtr) */
        punpcklbw		mm1, mm7		;	/* mm1 = INT16(ReconPtr1) */
        punpcklbw		mm4, mm7		;	/* mm1 = INT16(ReconPtr2) */
        punpckhbw		mm2, mm7		;	/* mm2 = INT16(FiltPtr) */
        punpckhbw		mm3, mm7		;	/* mm3 = INT16(ReconPtr1) */
        punpckhbw		mm5, mm7		;	/* mm3 = INT16(ReconPtr2) */
	        ;	/* average ReconPtr1 and ReconPtr2 */
        paddw		mm1, mm4		;	/* mm1 = ReconPtr1 + ReconPtr2 */
        paddw		mm3, mm5		;	/* mm3 = ReconPtr1 + ReconPtr2 */
        psrlw		mm1, 1		;	/* mm1 = (ReconPtr1 + ReconPtr2) / 2 */
        psrlw		mm3, 1		;	/* mm3 = (ReconPtr1 + ReconPtr2) / 2 */
        psubw		mm0, mm1		;	/* mm0 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
        psubw		mm2, mm3		;	/* mm2 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
        movq		[edx], mm0		;	/* write answer out */
        movq		[8 + edx], mm2		;	/* write answer out */
	        ;	/* Increment pointers */
        add		edx, 16		;	
        add		eax, PixelsPerLine		;	
        add		ebx, ReconPixelsPerLine		;	
        add		ecx, ReconPixelsPerLine		;	


        /*  ITERATION 7 */	
        movq		mm0, [eax]		;	/* mm0 = FiltPtr */
        movq		mm1, [ebx]		;	/* mm1 = ReconPtr1 */
        movq		mm4, [ecx]		;	/* mm1 = ReconPtr2 */
        movq		mm2, mm0		;	/* dup to prepare for up conversion */
        movq		mm3, mm1		;	/* dup to prepare for up conversion */
        movq		mm5, mm4		;	/* dup to prepare for up conversion */
	        ;	/* convert from UINT8 to INT16 */
        punpcklbw		mm0, mm7		;	/* mm0 = INT16(FiltPtr) */
        punpcklbw		mm1, mm7		;	/* mm1 = INT16(ReconPtr1) */
        punpcklbw		mm4, mm7		;	/* mm1 = INT16(ReconPtr2) */
        punpckhbw		mm2, mm7		;	/* mm2 = INT16(FiltPtr) */
        punpckhbw		mm3, mm7		;	/* mm3 = INT16(ReconPtr1) */
        punpckhbw		mm5, mm7		;	/* mm3 = INT16(ReconPtr2) */
	        ;	/* average ReconPtr1 and ReconPtr2 */
        paddw		mm1, mm4		;	/* mm1 = ReconPtr1 + ReconPtr2 */
        paddw		mm3, mm5		;	/* mm3 = ReconPtr1 + ReconPtr2 */
        psrlw		mm1, 1		;	/* mm1 = (ReconPtr1 + ReconPtr2) / 2 */
        psrlw		mm3, 1		;	/* mm3 = (ReconPtr1 + ReconPtr2) / 2 */
        psubw		mm0, mm1		;	/* mm0 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
        psubw		mm2, mm3		;	/* mm2 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
        movq		[edx], mm0		;	/* write answer out */
        movq		[8 + edx], mm2		;	/* write answer out */
	        ;	/* Increment pointers */
        add		edx, 16		;	
        add		eax, PixelsPerLine		;	
        add		ebx, ReconPixelsPerLine		;	
        add		ecx, ReconPixelsPerLine		;	


        /*  ITERATION 8 */	
        movq		mm0, [eax]		;	/* mm0 = FiltPtr */
        movq		mm1, [ebx]		;	/* mm1 = ReconPtr1 */
        movq		mm4, [ecx]		;	/* mm1 = ReconPtr2 */
        movq		mm2, mm0		;	/* dup to prepare for up conversion */
        movq		mm3, mm1		;	/* dup to prepare for up conversion */
        movq		mm5, mm4		;	/* dup to prepare for up conversion */
	        ;	/* convert from UINT8 to INT16 */
        punpcklbw		mm0, mm7		;	/* mm0 = INT16(FiltPtr) */
        punpcklbw		mm1, mm7		;	/* mm1 = INT16(ReconPtr1) */
        punpcklbw		mm4, mm7		;	/* mm1 = INT16(ReconPtr2) */
        punpckhbw		mm2, mm7		;	/* mm2 = INT16(FiltPtr) */
        punpckhbw		mm3, mm7		;	/* mm3 = INT16(ReconPtr1) */
        punpckhbw		mm5, mm7		;	/* mm3 = INT16(ReconPtr2) */
	        ;	/* average ReconPtr1 and ReconPtr2 */
        paddw		mm1, mm4		;	/* mm1 = ReconPtr1 + ReconPtr2 */
        paddw		mm3, mm5		;	/* mm3 = ReconPtr1 + ReconPtr2 */
        psrlw		mm1, 1		;	/* mm1 = (ReconPtr1 + ReconPtr2) / 2 */
        psrlw		mm3, 1		;	/* mm3 = (ReconPtr1 + ReconPtr2) / 2 */
        psubw		mm0, mm1		;	/* mm0 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
        psubw		mm2, mm3		;	/* mm2 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
        movq		[edx], mm0		;	/* write answer out */
        movq		[8 + edx], mm2		;	/* write answer out */
	        ;	/* Increment pointers */
        add		edx, 16		;	
        add		eax, PixelsPerLine		;	
        add		ebx, ReconPixelsPerLine		;	
        add		ecx, ReconPixelsPerLine		;	

    };




 
#endif
}

static ogg_uint32_t row_sad8__mmx (unsigned char *Src1, unsigned char *Src2)
{

#if 0
  ogg_uint32_t SadValue;  ogg_uint32_t SadValue1;  SadValue    = DSP_OP_ABS_DIFF (Src1[0], Src2[0]) + 	        DSP_OP_ABS_DIFF (Src1[1], Src2[1]) +	        DSP_OP_ABS_DIFF (Src1[2], Src2[2]) +	        DSP_OP_ABS_DIFF (Src1[3], Src2[3]);  SadValue1   = DSP_OP_ABS_DIFF (Src1[4], Src2[4]) + 	        DSP_OP_ABS_DIFF (Src1[5], Src2[5]) +	        DSP_OP_ABS_DIFF (Src1[6], Src2[6]) +	        DSP_OP_ABS_DIFF (Src1[7], Src2[7]);  SadValue = ( SadValue > SadValue1 ) ? SadValue : SadValue1;  return SadValue;

#else
  ogg_uint32_t MaxSad;

  
  __asm {
    align       16
    mov         ebx, Src1
    mov         ecx, Src2


    pxor		mm6, mm6		;	/* zero out mm6 for unpack */
    pxor		mm7, mm7		;	/* zero out mm7 for unpack */
    movq		mm0, [ebx]		;	/* take 8 bytes */
    movq		mm1, [ecx]		;	

    movq		mm2, mm0		;	
    psubusb		mm0, mm1		;	/* A - B */
    psubusb		mm1, mm2		;	/* B - A */
    por		mm0, mm1		;	/* and or gives abs difference */

    movq		mm1, mm0		;	

    punpcklbw		mm0, mm6		;	/* ; unpack low four bytes to higher precision */
    punpckhbw		mm1, mm7		;	/* ; unpack high four bytes to higher precision */

    movq		mm2, mm0		;	
    movq		mm3, mm1		;	
    psrlq		mm2, 32		;	/* fold and add */
    psrlq		mm3, 32		;	
    paddw		mm0, mm2		;	
    paddw		mm1, mm3		;	
    movq		mm2, mm0		;	
    movq		mm3, mm1		;	
    psrlq		mm2, 16		;	
    psrlq		mm3, 16		;	
    paddw		mm0, mm2		;	
    paddw		mm1, mm3		;	

    psubusw		mm1, mm0		;	
    paddw		mm1, mm0		;	/* mm1 = max(mm1, mm0) */
    movd		eax, mm1		;

    and         eax, 0xffff
    mov         MaxSad, eax
  };
   return MaxSad;
  
  
  
 

#endif
}




static ogg_uint32_t col_sad8x8__mmx (unsigned char *Src1, unsigned char *Src2,
		                    ogg_uint32_t stride)
{

#if 0
  ogg_uint32_t SadValue[8] = {0,0,0,0,0,0,0,0};  ogg_uint32_t SadValue2[8] = {0,0,0,0,0,0,0,0};  ogg_uint32_t MaxSad = 0;  ogg_uint32_t i;  for ( i = 0; i < 4; i++ ){    SadValue[0] += abs(Src1[0] - Src2[0]);    SadValue[1] += abs(Src1[1] - Src2[1]);    SadValue[2] += abs(Src1[2] - Src2[2]);    SadValue[3] += abs(Src1[3] - Src2[3]);    SadValue[4] += abs(Src1[4] - Src2[4]);    SadValue[5] += abs(Src1[5] - Src2[5]);    SadValue[6] += abs(Src1[6] - Src2[6]);    SadValue[7] += abs(Src1[7] - Src2[7]);        Src1 += stride;    Src2 += stride;  }

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -