⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 dsp_mmx.c

📁 mediastreamer2是开源的网络传输媒体流的库
💻 C
📖 第 1 页 / 共 4 页
字号:
/********************************************************************
 *                                                                  *
 * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
 *                                                                  *
 * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2003                *
 * by the Xiph.Org Foundation http://www.xiph.org/                  *
 *                                                                  *
 ********************************************************************

  function:
  last mod: $Id: mcomp.c,v 1.8 2003/12/03 08:59:41 arc Exp $

 ********************************************************************/

#include <stdlib.h>

#include "codec_internal.h"
#include "dsp.h"

#if 0
//These are to let me selectively enable the C versions, these are needed
#define DSP_OP_AVG(a,b) ((((int)(a)) + ((int)(b)))/2)
#define DSP_OP_DIFF(a,b) (((int)(a)) - ((int)(b)))
#define DSP_OP_ABS_DIFF(a,b) abs((((int)(a)) - ((int)(b))))
#endif


static const ogg_int64_t V128 = 0x0080008000800080LL;

static void sub8x8__mmx (unsigned char *FiltPtr, unsigned char *ReconPtr,
                  ogg_int16_t *DctInputPtr, ogg_uint32_t PixelsPerLine,
                  ogg_uint32_t ReconPixelsPerLine) 
{

    //Make non-zero to use the C-version
#if 0
  int i;  /* For each block row */  for (i=8; i; i--) {    DctInputPtr[0] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[0], ReconPtr[0]);    DctInputPtr[1] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[1], ReconPtr[1]);    DctInputPtr[2] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[2], ReconPtr[2]);    DctInputPtr[3] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[3], ReconPtr[3]);    DctInputPtr[4] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[4], ReconPtr[4]);    DctInputPtr[5] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[5], ReconPtr[5]);    DctInputPtr[6] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[6], ReconPtr[6]);    DctInputPtr[7] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[7], ReconPtr[7]);    /* Start next row */    FiltPtr += PixelsPerLine;    ReconPtr += ReconPixelsPerLine;    DctInputPtr += 8;  }
#else
    __asm {
        align 16

        pxor		mm7, mm7	

        mov     eax, FiltPtr
        mov     ebx, ReconPtr
        mov     edx, DctInputPtr

     /* You can't use rept in inline masm and macro parsing seems screwed with inline asm*/		
     
     /* ITERATION 1 */
        movq		mm0, [eax]		/* mm0 = FiltPtr */
        movq		mm1, [ebx]		/* mm1 = ReconPtr */
        movq		mm2, mm0		/* dup to prepare for up conversion */
        movq		mm3, mm1		/* dup to prepare for up conversion */
        /* convert from UINT8 to INT16 */
        punpcklbw		mm0, mm7		/* mm0 = INT16(FiltPtr) */
        punpcklbw		mm1, mm7		/* mm1 = INT16(ReconPtr) */
        punpckhbw		mm2, mm7		/* mm2 = INT16(FiltPtr) */
        punpckhbw		mm3, mm7		/* mm3 = INT16(ReconPtr) */
        /* start calculation */
        psubw		mm0, mm1		/* mm0 = FiltPtr - ReconPtr */
        psubw		mm2, mm3		/* mm2 = FiltPtr - ReconPtr */
        movq		[edx], mm0		/* write answer out */
        movq		[8 + edx], mm2		/* write answer out */
        /* Increment pointers */
        add		edx, 16		
        add		eax, PixelsPerLine		
        add		ebx, ReconPixelsPerLine


     /* ITERATION 2 */
        movq		mm0, [eax]		/* mm0 = FiltPtr */
        movq		mm1, [ebx]		/* mm1 = ReconPtr */
        movq		mm2, mm0		/* dup to prepare for up conversion */
        movq		mm3, mm1		/* dup to prepare for up conversion */
        /* convert from UINT8 to INT16 */
        punpcklbw		mm0, mm7		/* mm0 = INT16(FiltPtr) */
        punpcklbw		mm1, mm7		/* mm1 = INT16(ReconPtr) */
        punpckhbw		mm2, mm7		/* mm2 = INT16(FiltPtr) */
        punpckhbw		mm3, mm7		/* mm3 = INT16(ReconPtr) */
        /* start calculation */
        psubw		mm0, mm1		/* mm0 = FiltPtr - ReconPtr */
        psubw		mm2, mm3		/* mm2 = FiltPtr - ReconPtr */
        movq		[edx], mm0		/* write answer out */
        movq		[8 + edx], mm2		/* write answer out */
        /* Increment pointers */
        add		edx, 16		
        add		eax, PixelsPerLine		
        add		ebx, ReconPixelsPerLine


     /* ITERATION 3 */
        movq		mm0, [eax]		/* mm0 = FiltPtr */
        movq		mm1, [ebx]		/* mm1 = ReconPtr */
        movq		mm2, mm0		/* dup to prepare for up conversion */
        movq		mm3, mm1		/* dup to prepare for up conversion */
        /* convert from UINT8 to INT16 */
        punpcklbw		mm0, mm7		/* mm0 = INT16(FiltPtr) */
        punpcklbw		mm1, mm7		/* mm1 = INT16(ReconPtr) */
        punpckhbw		mm2, mm7		/* mm2 = INT16(FiltPtr) */
        punpckhbw		mm3, mm7		/* mm3 = INT16(ReconPtr) */
        /* start calculation */
        psubw		mm0, mm1		/* mm0 = FiltPtr - ReconPtr */
        psubw		mm2, mm3		/* mm2 = FiltPtr - ReconPtr */
        movq		[edx], mm0		/* write answer out */
        movq		[8 + edx], mm2		/* write answer out */
        /* Increment pointers */
        add		edx, 16		
        add		eax, PixelsPerLine		
        add		ebx, ReconPixelsPerLine


     /* ITERATION 4 */
        movq		mm0, [eax]		/* mm0 = FiltPtr */
        movq		mm1, [ebx]		/* mm1 = ReconPtr */
        movq		mm2, mm0		/* dup to prepare for up conversion */
        movq		mm3, mm1		/* dup to prepare for up conversion */
        /* convert from UINT8 to INT16 */
        punpcklbw		mm0, mm7		/* mm0 = INT16(FiltPtr) */
        punpcklbw		mm1, mm7		/* mm1 = INT16(ReconPtr) */
        punpckhbw		mm2, mm7		/* mm2 = INT16(FiltPtr) */
        punpckhbw		mm3, mm7		/* mm3 = INT16(ReconPtr) */
        /* start calculation */
        psubw		mm0, mm1		/* mm0 = FiltPtr - ReconPtr */
        psubw		mm2, mm3		/* mm2 = FiltPtr - ReconPtr */
        movq		[edx], mm0		/* write answer out */
        movq		[8 + edx], mm2		/* write answer out */
        /* Increment pointers */
        add		edx, 16		
        add		eax, PixelsPerLine		
        add		ebx, ReconPixelsPerLine


     /* ITERATION 5 */
        movq		mm0, [eax]		/* mm0 = FiltPtr */
        movq		mm1, [ebx]		/* mm1 = ReconPtr */
        movq		mm2, mm0		/* dup to prepare for up conversion */
        movq		mm3, mm1		/* dup to prepare for up conversion */
        /* convert from UINT8 to INT16 */
        punpcklbw		mm0, mm7		/* mm0 = INT16(FiltPtr) */
        punpcklbw		mm1, mm7		/* mm1 = INT16(ReconPtr) */
        punpckhbw		mm2, mm7		/* mm2 = INT16(FiltPtr) */
        punpckhbw		mm3, mm7		/* mm3 = INT16(ReconPtr) */
        /* start calculation */
        psubw		mm0, mm1		/* mm0 = FiltPtr - ReconPtr */
        psubw		mm2, mm3		/* mm2 = FiltPtr - ReconPtr */
        movq		[edx], mm0		/* write answer out */
        movq		[8 + edx], mm2		/* write answer out */
        /* Increment pointers */
        add		edx, 16		
        add		eax, PixelsPerLine		
        add		ebx, ReconPixelsPerLine


     /* ITERATION 6 */
        movq		mm0, [eax]		/* mm0 = FiltPtr */
        movq		mm1, [ebx]		/* mm1 = ReconPtr */
        movq		mm2, mm0		/* dup to prepare for up conversion */
        movq		mm3, mm1		/* dup to prepare for up conversion */
        /* convert from UINT8 to INT16 */
        punpcklbw		mm0, mm7		/* mm0 = INT16(FiltPtr) */
        punpcklbw		mm1, mm7		/* mm1 = INT16(ReconPtr) */
        punpckhbw		mm2, mm7		/* mm2 = INT16(FiltPtr) */
        punpckhbw		mm3, mm7		/* mm3 = INT16(ReconPtr) */
        /* start calculation */
        psubw		mm0, mm1		/* mm0 = FiltPtr - ReconPtr */
        psubw		mm2, mm3		/* mm2 = FiltPtr - ReconPtr */
        movq		[edx], mm0		/* write answer out */
        movq		[8 + edx], mm2		/* write answer out */
        /* Increment pointers */
        add		edx, 16		
        add		eax, PixelsPerLine		
        add		ebx, ReconPixelsPerLine


     /* ITERATION 7 */
        movq		mm0, [eax]		/* mm0 = FiltPtr */
        movq		mm1, [ebx]		/* mm1 = ReconPtr */
        movq		mm2, mm0		/* dup to prepare for up conversion */
        movq		mm3, mm1		/* dup to prepare for up conversion */
        /* convert from UINT8 to INT16 */
        punpcklbw		mm0, mm7		/* mm0 = INT16(FiltPtr) */
        punpcklbw		mm1, mm7		/* mm1 = INT16(ReconPtr) */
        punpckhbw		mm2, mm7		/* mm2 = INT16(FiltPtr) */
        punpckhbw		mm3, mm7		/* mm3 = INT16(ReconPtr) */
        /* start calculation */
        psubw		mm0, mm1		/* mm0 = FiltPtr - ReconPtr */
        psubw		mm2, mm3		/* mm2 = FiltPtr - ReconPtr */
        movq		[edx], mm0		/* write answer out */
        movq		[8 + edx], mm2		/* write answer out */
        /* Increment pointers */
        add		edx, 16		
        add		eax, PixelsPerLine		
        add		ebx, ReconPixelsPerLine


     /* ITERATION 8 */
        movq		mm0, [eax]		/* mm0 = FiltPtr */
        movq		mm1, [ebx]		/* mm1 = ReconPtr */
        movq		mm2, mm0		/* dup to prepare for up conversion */
        movq		mm3, mm1		/* dup to prepare for up conversion */
        /* convert from UINT8 to INT16 */
        punpcklbw		mm0, mm7		/* mm0 = INT16(FiltPtr) */
        punpcklbw		mm1, mm7		/* mm1 = INT16(ReconPtr) */
        punpckhbw		mm2, mm7		/* mm2 = INT16(FiltPtr) */
        punpckhbw		mm3, mm7		/* mm3 = INT16(ReconPtr) */
        /* start calculation */
        psubw		mm0, mm1		/* mm0 = FiltPtr - ReconPtr */
        psubw		mm2, mm3		/* mm2 = FiltPtr - ReconPtr */
        movq		[edx], mm0		/* write answer out */
        movq		[8 + edx], mm2		/* write answer out */
        /* Increment pointers */
        add		edx, 16		
        add		eax, PixelsPerLine		
        add		ebx, ReconPixelsPerLine



     

    };
 
#endif
}

static void sub8x8_128__mmx (unsigned char *FiltPtr, ogg_int16_t *DctInputPtr,
                      ogg_uint32_t PixelsPerLine) 
{

#if 0
  int i;  /* For each block row */  for (i=8; i; i--) {    /* INTRA mode so code raw image data */    /* We convert the data to 8 bit signed (by subtracting 128) as       this reduces the internal precision requirments in the DCT       transform. */    DctInputPtr[0] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[0], 128);    DctInputPtr[1] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[1], 128);    DctInputPtr[2] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[2], 128);    DctInputPtr[3] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[3], 128);    DctInputPtr[4] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[4], 128);    DctInputPtr[5] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[5], 128);    DctInputPtr[6] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[6], 128);    DctInputPtr[7] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[7], 128);    /* Start next row */    FiltPtr += PixelsPerLine;    DctInputPtr += 8;  }

#else
    __asm {
        align 16

        pxor		mm7, mm7		

        mov         eax, FiltPtr
        mov         ebx, DctInputPtr

        movq		mm1, V128

        /*  ITERATION 1 */		
        movq		mm0, [eax]		/* mm0 = FiltPtr */
        movq		mm2, mm0		/* dup to prepare for up conversion */
        /* convert from UINT8 to INT16 */
        punpcklbw		mm0, mm7		/* mm0 = INT16(FiltPtr) */
        punpckhbw		mm2, mm7		/* mm2 = INT16(FiltPtr) */
        /* start calculation */
        psubw		mm0, mm1		/* mm0 = FiltPtr - 128 */
        psubw		mm2, mm1		/* mm2 = FiltPtr - 128 */
        movq		[ebx], mm0		/* write answer out */
        movq		[8 + ebx], mm2		/* write answer out */
        /* Increment pointers */
        add		ebx, 16		
        add		eax, PixelsPerLine	


        /*  ITERATION 2 */		
        movq		mm0, [eax]		/* mm0 = FiltPtr */
        movq		mm2, mm0		/* dup to prepare for up conversion */
        /* convert from UINT8 to INT16 */
        punpcklbw		mm0, mm7		/* mm0 = INT16(FiltPtr) */
        punpckhbw		mm2, mm7		/* mm2 = INT16(FiltPtr) */
        /* start calculation */
        psubw		mm0, mm1		/* mm0 = FiltPtr - 128 */
        psubw		mm2, mm1		/* mm2 = FiltPtr - 128 */
        movq		[ebx], mm0		/* write answer out */
        movq		[8 + ebx], mm2		/* write answer out */
        /* Increment pointers */
        add		ebx, 16		
        add		eax, PixelsPerLine	


        /*  ITERATION 3 */		
        movq		mm0, [eax]		/* mm0 = FiltPtr */
        movq		mm2, mm0		/* dup to prepare for up conversion */
        /* convert from UINT8 to INT16 */
        punpcklbw		mm0, mm7		/* mm0 = INT16(FiltPtr) */
        punpckhbw		mm2, mm7		/* mm2 = INT16(FiltPtr) */
        /* start calculation */
        psubw		mm0, mm1		/* mm0 = FiltPtr - 128 */
        psubw		mm2, mm1		/* mm2 = FiltPtr - 128 */
        movq		[ebx], mm0		/* write answer out */
        movq		[8 + ebx], mm2		/* write answer out */
        /* Increment pointers */
        add		ebx, 16		
        add		eax, PixelsPerLine	


        /*  ITERATION 4 */		
        movq		mm0, [eax]		/* mm0 = FiltPtr */
        movq		mm2, mm0		/* dup to prepare for up conversion */
        /* convert from UINT8 to INT16 */
        punpcklbw		mm0, mm7		/* mm0 = INT16(FiltPtr) */
        punpckhbw		mm2, mm7		/* mm2 = INT16(FiltPtr) */
        /* start calculation */
        psubw		mm0, mm1		/* mm0 = FiltPtr - 128 */
        psubw		mm2, mm1		/* mm2 = FiltPtr - 128 */
        movq		[ebx], mm0		/* write answer out */
        movq		[8 + ebx], mm2		/* write answer out */
        /* Increment pointers */
        add		ebx, 16		
        add		eax, PixelsPerLine	


        /*  ITERATION 5 */		
        movq		mm0, [eax]		/* mm0 = FiltPtr */
        movq		mm2, mm0		/* dup to prepare for up conversion */
        /* convert from UINT8 to INT16 */
        punpcklbw		mm0, mm7		/* mm0 = INT16(FiltPtr) */
        punpckhbw		mm2, mm7		/* mm2 = INT16(FiltPtr) */
        /* start calculation */
        psubw		mm0, mm1		/* mm0 = FiltPtr - 128 */
        psubw		mm2, mm1		/* mm2 = FiltPtr - 128 */
        movq		[ebx], mm0		/* write answer out */
        movq		[8 + ebx], mm2		/* write answer out */
        /* Increment pointers */
        add		ebx, 16		
        add		eax, PixelsPerLine	


        /*  ITERATION 6 */		
        movq		mm0, [eax]		/* mm0 = FiltPtr */
        movq		mm2, mm0		/* dup to prepare for up conversion */
        /* convert from UINT8 to INT16 */
        punpcklbw		mm0, mm7		/* mm0 = INT16(FiltPtr) */
        punpckhbw		mm2, mm7		/* mm2 = INT16(FiltPtr) */
        /* start calculation */
        psubw		mm0, mm1		/* mm0 = FiltPtr - 128 */
        psubw		mm2, mm1		/* mm2 = FiltPtr - 128 */
        movq		[ebx], mm0		/* write answer out */
        movq		[8 + ebx], mm2		/* write answer out */
        /* Increment pointers */
        add		ebx, 16		
        add		eax, PixelsPerLine	


        /*  ITERATION 7 */		
        movq		mm0, [eax]		/* mm0 = FiltPtr */
        movq		mm2, mm0		/* dup to prepare for up conversion */
        /* convert from UINT8 to INT16 */
        punpcklbw		mm0, mm7		/* mm0 = INT16(FiltPtr) */
        punpckhbw		mm2, mm7		/* mm2 = INT16(FiltPtr) */
        /* start calculation */
        psubw		mm0, mm1		/* mm0 = FiltPtr - 128 */
        psubw		mm2, mm1		/* mm2 = FiltPtr - 128 */
        movq		[ebx], mm0		/* write answer out */
        movq		[8 + ebx], mm2		/* write answer out */
        /* Increment pointers */
        add		ebx, 16		
        add		eax, PixelsPerLine	


        /*  ITERATION 8 */		
        movq		mm0, [eax]		/* mm0 = FiltPtr */
        movq		mm2, mm0		/* dup to prepare for up conversion */
        /* convert from UINT8 to INT16 */
        punpcklbw		mm0, mm7		/* mm0 = INT16(FiltPtr) */
        punpckhbw		mm2, mm7		/* mm2 = INT16(FiltPtr) */
        /* start calculation */
        psubw		mm0, mm1		/* mm0 = FiltPtr - 128 */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -