mmxmotio.c

来自「symbian 下的helix player源代码」· C语言 代码 · 共 1,081 行 · 第 1/2 页

C
1,081
字号
/* ***** BEGIN LICENSE BLOCK *****
 * Source last modified: $Id: mmxmotio.c,v 1.1.1.1.42.1 2004/07/09 01:56:22 hubbe Exp $
 * 
 * Portions Copyright (c) 1995-2004 RealNetworks, Inc. All Rights Reserved.
 * 
 * The contents of this file, and the files included with this file,
 * are subject to the current version of the RealNetworks Public
 * Source License (the "RPSL") available at
 * http://www.helixcommunity.org/content/rpsl unless you have licensed
 * the file under the current version of the RealNetworks Community
 * Source License (the "RCSL") available at
 * http://www.helixcommunity.org/content/rcsl, in which case the RCSL
 * will apply. You may also obtain the license terms directly from
 * RealNetworks.  You may not use this file except in compliance with
 * the RPSL or, if you have a valid RCSL with RealNetworks applicable
 * to this file, the RCSL.  Please see the applicable RPSL or RCSL for
 * the rights, obligations and limitations governing use of the
 * contents of the file.
 * 
 * Alternatively, the contents of this file may be used under the
 * terms of the GNU General Public License Version 2 or later (the
 * "GPL") in which case the provisions of the GPL are applicable
 * instead of those above. If you wish to allow use of your version of
 * this file only under the terms of the GPL, and not to allow others
 * to use your version of this file under the terms of either the RPSL
 * or RCSL, indicate your decision by deleting the provisions above
 * and replace them with the notice and other provisions required by
 * the GPL. If you do not delete the provisions above, a recipient may
 * use your version of this file under the terms of any one of the
 * RPSL, the RCSL or the GPL.
 * 
 * This file is part of the Helix DNA Technology. RealNetworks is the
 * developer of the Original Code and owns the copyrights in the
 * portions it created.
 * 
 * This file, and the files included with this file, is distributed
 * and made available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY
 * KIND, EITHER EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS
 * ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES
 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET
 * ENJOYMENT OR NON-INFRINGEMENT.
 * 
 * Technology Compatibility Kit Test Suite(s) Location:
 *    http://www.helixcommunity.org/content/tck
 * 
 * Contributor(s):
 * 
 * ***** END LICENSE BLOCK ***** */

#include "dllindex.h"
#include "h261defs.h"
#include "h261func.h"

extern int  g_saveMbHor;      // Hold on to size of Mb array (used by distUmv and
extern int  g_saveMbVert;     //  TryInter4V and its subroutines)
extern int  g_saveUmvMode;    // Hold on to unrestrictedMv (used by RefineMotionVector)
extern int  g_savePointOutside;   // Hold on to pointOutside (used by rmvDist)
extern int  g_distPixels;     // Number of pixels used in squared error computations





static const __int64 const_0x00ff00ff00ff00ff = 0x00ff00ff00ff00ff;		//0	2 4	6
static const __int64 const_0x000000ff000000ff = 0x000000ff000000ff;//0	4
static const __int64 const_0x00ff000000ff0000 = 0x00ff000000ff0000;//2	6
static const __int64 const_0x00ff00ff000000ff = 0x00ff00ff000000ff;	//0	 4	6 == shift 4 8 10

static const __int64 const_0x0001000100010001 = 0x0001000100010001;
static const __int64 const_0x0002000200020002 = 0x0002000200020002;


/*-- mcomp.c -----------------------------------------------------------------------------------*/

static void mc16pelsNoInterpolMMX( PIXEL *inpix, PIXEL *outpix, int hdim, int vSize )
{
__asm {
	mov		ecx, vSize
	mov	ebx, hdim
	mov		esi, inpix
	neg		ebx
	mov		edi, outpix
mc16pelsNoInterpolMMX_loop:
		add		ebx, hdim
		dec		ecx
		movq	mm0, [esi + ebx]
		movq	mm1, [esi + ebx + 8]
		movq	[edi + ebx], mm0
		movq	[edi + ebx + 8], mm1
	jg	mc16pelsNoInterpolMMX_loop
	emms		
    }
	/*
    union { // Copy words to speed up routine
        PIXEL   *pix;
        U32     *word;
    } pIn, pOut;

    pIn.pix = inpix;
    pOut.pix = outpix;
    while (vSize > 0) {
        *(pOut.word + 0) = *(pIn.word + 0);
        *(pOut.word + 1) = *(pIn.word + 1);
        *(pOut.word + 2) = *(pIn.word + 2);
        *(pOut.word + 3) = *(pIn.word + 3);
        pIn.pix += hdim;
        pOut.pix += hdim;
        --vSize;
    }
	*/
}


static void mc8pelsNoInterpolMMX( PIXEL *inpix, PIXEL *outpix, int hdim, int vSize )
{
__asm {
	mov		ecx, vSize
	mov	ebx, hdim
	mov		esi, inpix
	neg		ebx
	mov		edi, outpix
mc8pelsNoInterpolMMX_loop:
		add		ebx, hdim
		dec		ecx
		movq	mm0, [esi + ebx]
		movq	[edi + ebx], mm0
	jg	mc8pelsNoInterpolMMX_loop
	emms		
    }
/*
	union { // Copy words to speed up routine
        PIXEL   *pix;
        U32     *word;
    } pIn, pOut;
	
	pIn.pix = inpix;
    pOut.pix = outpix;
    while (vSize > 0) {
        *(pOut.word + 0) = *(pIn.word + 0);
        *(pOut.word + 1) = *(pIn.word + 1);
        pIn.pix += hdim;
        pOut.pix += hdim;
        --vSize;
    }
*/
}


static void mc4pelsNoInterpolMMX( PIXEL *inpix, PIXEL *outpix, int hdim, int vSize )
{
	int rowcount = 0;
    while (vSize > 0) {
         *((U32 *)(outpix + rowcount)) = *((U32 *)(inpix + rowcount));
		rowcount +=  hdim;
		vSize--;
    }
}


static void mc16pelsHorInterpolMMX( PIXEL const *inpix, PIXEL *outpix, int hdim, int vSize )
{

__asm {
	xor		ebx, ebx
	mov		ecx, vSize
	mov		esi, inpix
	mov		edi, outpix
	pxor	mm0, mm0	;// mm0 = 0
	movq		mm7, const_0x0001000100010001
mc16pelsHorInterpolMMX_loop:
		movq			mm1, [esi + ebx]
		movq			mm4, [esi + ebx +8]
		movq			mm3, mm1	;//save
		movq			mm5, mm4	;//save
		psrlq			mm3, 8			;//shift one byte
		movq			mm2, mm1
		psllq			mm4, 56			;//shift 7 bytes, LSB is in top pos of mm4
		punpcklbw	mm1, mm0	;//expand lower 4 pix	pos: + 0
		pxor			mm3, mm4	;//equivalent to read from +1
		punpckhbw	mm2, mm0	;//expand higher 4 pix pos: + 4

		movq			mm4, mm3
		punpcklbw	mm3, mm0	;//expand lower 4 pix pos: + 1
		paddw		mm1, mm7		;//add 1
		punpckhbw	mm4, mm0	;//expand higher 4 pix pos: + 5

		paddw		mm2, mm7		;//add 1

		paddw		mm1, mm3		;//pos: +0/+1	add
		paddw		mm2, mm4		;//pos: +4/+5	add


		psrlw			mm1, 1			;//pos: +0/+1	/2
		movq			mm6, mm5
		psrlw			mm2, 1			;//pos: +4/+5	/2
		packuswb	mm1, mm2
		movq			mm3, mm5	;//save

		punpcklbw	mm5, mm0	;//expand lower 4 pix pos: + 8
		movq	[edi + ebx], mm1
		punpckhbw	mm6, mm0	;//expand higher 4 pix pos: + 12

		movq			mm4, [esi + ebx + 16]
		psrlq			mm3, 8			;//shift one byte
		paddw		mm5, mm7		;//add 1
		psllq			mm4, 56			;//shift 7 bytes, LSB is in top pos of mm4
		paddw		mm6, mm7		;//add 1
		pxor			mm3, mm4	;//equivalent to read from +9
		movq			mm4, mm3
		punpcklbw	mm3, mm0	;//expand lower 4 pix pos: + 9
		punpckhbw	mm4, mm0	;//expand higher 4 pix pos: + 13
		

		paddw		mm5, mm3		;//pos: +8/+9	add
		paddw		mm6, mm4		;//pos: +12/+13	add

		psrlw			mm5, 1			;//pos: +8/+9	/2
		psrlw			mm6, 1			;//pos: +12/+13	/2
		packuswb	mm5, mm6

		movq	[edi + ebx + 8], mm5

		add		ebx, hdim
		dec		ecx
	jg	mc16pelsHorInterpolMMX_loop
	emms		
}

/*
    while (vSize > 0) {
        out[0] = (in[0] + in[1] + 1) >> 1;
        out[1] = (in[1] + in[2] + 1) >> 1;
        out[2] = (in[2] + in[3] + 1) >> 1;
        out[3] = (in[3] + in[4] + 1) >> 1;
        out[4] = (in[4] + in[5] + 1) >> 1;
        out[5] = (in[5] + in[6] + 1) >> 1;
        out[6] = (in[6] + in[7] + 1) >> 1;
        out[7] = (in[7] + in[8] + 1) >> 1;
        out[8] = (in[8] + in[9] + 1) >> 1;
        out[9] = (in[9] + in[10] + 1) >> 1;
        out[10] = (in[10] + in[11] + 1) >> 1;
        out[11] = (in[11] + in[12] + 1) >> 1;
        out[12] = (in[12] + in[13] + 1) >> 1;
        out[13] = (in[13] + in[14] + 1) >> 1;
        out[14] = (in[14] + in[15] + 1) >> 1;
        out[15] = (in[15] + in[16] + 1) >> 1;
        in += hdim;
        out += hdim;
        --vSize;
    }
    return;
*/
}

static void mc8pelsHorInterpolMMX( PIXEL const *inpix, PIXEL *outpix, int hdim, int vSize )
{
__asm {
	xor		ebx, ebx
	mov		ecx, vSize
	mov		esi, inpix
	mov		edi, outpix
	pxor	mm0, mm0	;// mm0 = 0
	movq		mm7, const_0x0001000100010001
mc8pelsHorInterpolMMX_loop:
		movq			mm1, [esi + ebx]
		movq			mm4, [esi + ebx +8]
		movq			mm3, mm1
		psllq			mm4, 56			;//shift 7 bytes, LSB is in top pos of mm4
		movq			mm2, mm1
		psrlq			mm3, 8			;//shift one byte
		punpcklbw	mm1, mm0	;//expand lower 4 pix
		pxor			mm3, mm4	;
		punpckhbw	mm2, mm0	;//expand higher 4 pix

		movq			mm4, mm3
		punpcklbw	mm3, mm0	;//expand lower 4 pix
		paddw		mm1, mm7
		punpckhbw	mm4, mm0	;//expand higher 4 pix

		paddw		mm2, mm7

		paddw		mm1, mm3
		paddw		mm2, mm4

		psrlw			mm1, 1			
		psrlw			mm2, 1			
		packuswb	mm1, mm2

		movq	[edi + ebx], mm1
		add		ebx, hdim
		dec		ecx
	jg	mc8pelsHorInterpolMMX_loop
	emms		
	}

/*
    while (vSize > 0) {
        out[0] = (in[0] + in[1] + 1) >> 1;
        out[1] = (in[1] + in[2] + 1) >> 1;
        out[2] = (in[2] + in[3] + 1) >> 1;
        out[3] = (in[3] + in[4] + 1) >> 1;
        out[4] = (in[4] + in[5] + 1) >> 1;
        out[5] = (in[5] + in[6] + 1) >> 1;
        out[6] = (in[6] + in[7] + 1) >> 1;
        out[7] = (in[7] + in[8] + 1) >> 1;
        in += hdim;
        out += hdim;
        --vSize;
    }
    return;
*/
}


static void mc4pelsHorInterpolMMX( PIXEL const *inpix, PIXEL *outpix, int hdim, int vSize )
{
__asm {
	xor		ebx, ebx
	mov		ecx, vSize
	mov		esi, inpix
	mov		edi, outpix
	pxor	mm0, mm0	;// mm0 = 0
	movq		mm7, const_0x0001000100010001
mc4pelsHorInterpolMMX_loop:
		movq			mm1, [esi + ebx]
		movq			mm3, mm1
		punpcklbw	mm1, mm0	;//expand lower 4 pix

		psrlq			mm3, 8			;//shift one byte
		punpcklbw	mm3, mm0	;//expand lower 4 pix

		paddw		mm1, mm7

		paddw		mm1, mm3

		psrlw			mm1, 1			
		packuswb	mm1, mm0

		movd	[edi + ebx], mm1
		add		ebx, hdim
		dec		ecx
	jg	mc4pelsHorInterpolMMX_loop
	emms		
	}

/*
    while (vSize > 0) {
        out[0] = (in[0] + in[1] + 1) >> 1;
        out[1] = (in[1] + in[2] + 1) >> 1;
        out[2] = (in[2] + in[3] + 1) >> 1;
        out[3] = (in[3] + in[4] + 1) >> 1;
        in += hdim;
        out += hdim;
        --vSize;
    }
    return;
*/
}


static void mc16pelsVertInterpolMMX( PIXEL const *inpix, PIXEL *outpix, int hdim, int vSize )
{
__asm {
	xor		ebx, ebx
	mov		ecx, vSize
	mov		esi, inpix

	mov		edi, outpix
	sub		edi, hdim		;//edi = output - hdim

	pxor	mm0, mm0	;// mm0 = 0
	movq		mm7, const_0x0001000100010001
mc16pelsVertInterpolMMX_loop:
		movq			mm1, [esi + ebx]
		movq			mm2, mm1
		movq			mm5, [esi + ebx + 8]
		punpcklbw	mm1, mm0	;//expand lower 4 pix
		movq			mm6, mm5
		punpckhbw	mm2, mm0	;//expand higher 4 pix

		add		ebx, hdim
		punpcklbw	mm5, mm0	;//expand lower 4 pix
		paddw		mm1, mm7
		punpckhbw	mm6, mm0	;//expand higher 4 pix


		movq			mm3, [esi + ebx]
		paddw		mm2, mm7
		movq			mm4, mm3
		punpcklbw	mm3, mm0	;//expand lower 4 pix
		punpckhbw	mm4, mm0	;//expand higher 4 pix

		paddw		mm1, mm3
		paddw		mm2, mm4

		psrlw			mm1, 1			
		psrlw			mm2, 1			
		movq			mm3, [esi + ebx + 8]
		packuswb	mm1, mm2

		movq			mm4, mm3
		movq	[edi + ebx], mm1

		punpcklbw	mm3, mm0	;//expand lower 4 pix
		paddw		mm5, mm7
		punpckhbw	mm4, mm0	;//expand higher 4 pix

		paddw		mm6, mm7

		paddw		mm5, mm3
		paddw		mm6, mm4

		psrlw			mm5, 1			
		psrlw			mm6, 1			
		packuswb	mm5, mm6

		dec		ecx
		movq	[edi + ebx + 8], mm5

	jg	mc16pelsVertInterpolMMX_loop
	emms		
	}

/*
    while (vSize > 0) {
        out[0] = (in[0] + in[hdim+0] + 1) >> 1;
        out[1] = (in[1] + in[hdim+1] + 1) >> 1;
        out[2] = (in[2] + in[hdim+2] + 1) >> 1;
        out[3] = (in[3] + in[hdim+3] + 1) >> 1;
        out[4] = (in[4] + in[hdim+4] + 1) >> 1;
        out[5] = (in[5] + in[hdim+5] + 1) >> 1;
        out[6] = (in[6] + in[hdim+6] + 1) >> 1;
        out[7] = (in[7] + in[hdim+7] + 1) >> 1;
        out[8] = (in[8] + in[hdim+8] + 1) >> 1;
        out[9] = (in[9] + in[hdim+9] + 1) >> 1;
        out[10] = (in[10] + in[hdim+10] + 1) >> 1;
        out[11] = (in[11] + in[hdim+11] + 1) >> 1;
        out[12] = (in[12] + in[hdim+12] + 1) >> 1;
        out[13] = (in[13] + in[hdim+13] + 1) >> 1;
        out[14] = (in[14] + in[hdim+14] + 1) >> 1;
        out[15] = (in[15] + in[hdim+15] + 1) >> 1;
        in += hdim;
        out += hdim;
        --vSize;
    }
    return;
*/
}


static void mc8pelsVertInterpolMMX( PIXEL const *inpix, PIXEL *outpix, int hdim, int vSize )
{
__asm {
	xor		ebx, ebx
	mov		ecx, vSize
	mov		esi, inpix

	mov		edi, outpix
	sub		edi, hdim		;//edi = output - hdim

	pxor	mm0, mm0	;// mm0 = 0
	movq		mm7, const_0x0001000100010001
mc8pelsVertInterpolMMX_loop:
		movq			mm1, [esi + ebx]
		add		ebx, hdim
		movq			mm2, mm1
		punpcklbw	mm1, mm0	;//expand lower 4 pix
		movq			mm3, [esi + ebx]
		punpckhbw	mm2, mm0	;//expand higher 4 pix

		movq			mm4, mm3
		punpcklbw	mm3, mm0	;//expand lower 4 pix
		punpckhbw	mm4, mm0	;//expand higher 4 pix

		paddw		mm1, mm7
		paddw		mm2, mm7

		paddw		mm1, mm3
		paddw		mm2, mm4

		psrlw			mm1, 1			
		psrlw			mm2, 1			
		dec		ecx
		packuswb	mm1, mm2

		movq	[edi + ebx], mm1
	jg	mc8pelsVertInterpolMMX_loop
	emms		
	}

/*
    while (vSize > 0) {
        out[0] = (in[0] + in[hdim+0] + 1) >> 1;
        out[1] = (in[1] + in[hdim+1] + 1) >> 1;
        out[2] = (in[2] + in[hdim+2] + 1) >> 1;
        out[3] = (in[3] + in[hdim+3] + 1) >> 1;
        out[4] = (in[4] + in[hdim+4] + 1) >> 1;
        out[5] = (in[5] + in[hdim+5] + 1) >> 1;
        out[6] = (in[6] + in[hdim+6] + 1) >> 1;
        out[7] = (in[7] + in[hdim+7] + 1) >> 1;
        in += hdim;
        out += hdim;
        --vSize;
    }
    return;
*/
}


static void mc4pelsVertInterpolMMX( PIXEL const *inpix, PIXEL *outpix, int hdim, int vSize )
{
__asm {
	xor		ebx, ebx
	mov		ecx, vSize
	mov		esi, inpix

	mov		edi, outpix
	sub		edi, hdim		;//edi = output - hdim

	pxor	mm0, mm0	;// mm0 = 0
	movq		mm7, const_0x0001000100010001
mc4pelsVertInterpolMMX_loop:
		movq			mm1, [esi + ebx]
		punpcklbw	mm1, mm0	;//expand lower 4 pix
		add		ebx, hdim

		movq			mm3, [esi + ebx]
		punpcklbw	mm3, mm0	;//expand lower 4 pix

		paddw		mm1, mm7

		paddw		mm1, mm3

		psrlw			mm1, 1			
		packuswb	mm1, mm0

		movd	[edi + ebx], mm1
		dec		ecx
	jg	mc4pelsVertInterpolMMX_loop
	emms		
	}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?