mmxmotio.c
来自「symbian 下的helix player源代码」· C语言 代码 · 共 1,081 行 · 第 1/2 页
C
1,081 行
/* ***** BEGIN LICENSE BLOCK *****
* Source last modified: $Id: mmxmotio.c,v 1.1.1.1.42.1 2004/07/09 01:56:22 hubbe Exp $
*
* Portions Copyright (c) 1995-2004 RealNetworks, Inc. All Rights Reserved.
*
* The contents of this file, and the files included with this file,
* are subject to the current version of the RealNetworks Public
* Source License (the "RPSL") available at
* http://www.helixcommunity.org/content/rpsl unless you have licensed
* the file under the current version of the RealNetworks Community
* Source License (the "RCSL") available at
* http://www.helixcommunity.org/content/rcsl, in which case the RCSL
* will apply. You may also obtain the license terms directly from
* RealNetworks. You may not use this file except in compliance with
* the RPSL or, if you have a valid RCSL with RealNetworks applicable
* to this file, the RCSL. Please see the applicable RPSL or RCSL for
* the rights, obligations and limitations governing use of the
* contents of the file.
*
* Alternatively, the contents of this file may be used under the
* terms of the GNU General Public License Version 2 or later (the
* "GPL") in which case the provisions of the GPL are applicable
* instead of those above. If you wish to allow use of your version of
* this file only under the terms of the GPL, and not to allow others
* to use your version of this file under the terms of either the RPSL
* or RCSL, indicate your decision by deleting the provisions above
* and replace them with the notice and other provisions required by
* the GPL. If you do not delete the provisions above, a recipient may
* use your version of this file under the terms of any one of the
* RPSL, the RCSL or the GPL.
*
* This file is part of the Helix DNA Technology. RealNetworks is the
* developer of the Original Code and owns the copyrights in the
* portions it created.
*
* This file, and the files included with this file, is distributed
* and made available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY
* KIND, EITHER EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS
* ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET
* ENJOYMENT OR NON-INFRINGEMENT.
*
* Technology Compatibility Kit Test Suite(s) Location:
* http://www.helixcommunity.org/content/tck
*
* Contributor(s):
*
* ***** END LICENSE BLOCK ***** */
#include "dllindex.h"
#include "h261defs.h"
#include "h261func.h"
extern int g_saveMbHor; // Hold on to size of Mb array (used by distUmv and
extern int g_saveMbVert; // TryInter4V and its subroutines)
extern int g_saveUmvMode; // Hold on to unrestrictedMv (used by RefineMotionVector)
extern int g_savePointOutside; // Hold on to pointOutside (used by rmvDist)
extern int g_distPixels; // Number of pixels used in squared error computations
static const __int64 const_0x00ff00ff00ff00ff = 0x00ff00ff00ff00ff; //0 2 4 6
static const __int64 const_0x000000ff000000ff = 0x000000ff000000ff;//0 4
static const __int64 const_0x00ff000000ff0000 = 0x00ff000000ff0000;//2 6
static const __int64 const_0x00ff00ff000000ff = 0x00ff00ff000000ff; //0 4 6 == shift 4 8 10
static const __int64 const_0x0001000100010001 = 0x0001000100010001;
static const __int64 const_0x0002000200020002 = 0x0002000200020002;
/*-- mcomp.c -----------------------------------------------------------------------------------*/
static void mc16pelsNoInterpolMMX( PIXEL *inpix, PIXEL *outpix, int hdim, int vSize )
{
__asm {
mov ecx, vSize
mov ebx, hdim
mov esi, inpix
neg ebx
mov edi, outpix
mc16pelsNoInterpolMMX_loop:
add ebx, hdim
dec ecx
movq mm0, [esi + ebx]
movq mm1, [esi + ebx + 8]
movq [edi + ebx], mm0
movq [edi + ebx + 8], mm1
jg mc16pelsNoInterpolMMX_loop
emms
}
/*
union { // Copy words to speed up routine
PIXEL *pix;
U32 *word;
} pIn, pOut;
pIn.pix = inpix;
pOut.pix = outpix;
while (vSize > 0) {
*(pOut.word + 0) = *(pIn.word + 0);
*(pOut.word + 1) = *(pIn.word + 1);
*(pOut.word + 2) = *(pIn.word + 2);
*(pOut.word + 3) = *(pIn.word + 3);
pIn.pix += hdim;
pOut.pix += hdim;
--vSize;
}
*/
}
static void mc8pelsNoInterpolMMX( PIXEL *inpix, PIXEL *outpix, int hdim, int vSize )
{
__asm {
mov ecx, vSize
mov ebx, hdim
mov esi, inpix
neg ebx
mov edi, outpix
mc8pelsNoInterpolMMX_loop:
add ebx, hdim
dec ecx
movq mm0, [esi + ebx]
movq [edi + ebx], mm0
jg mc8pelsNoInterpolMMX_loop
emms
}
/*
union { // Copy words to speed up routine
PIXEL *pix;
U32 *word;
} pIn, pOut;
pIn.pix = inpix;
pOut.pix = outpix;
while (vSize > 0) {
*(pOut.word + 0) = *(pIn.word + 0);
*(pOut.word + 1) = *(pIn.word + 1);
pIn.pix += hdim;
pOut.pix += hdim;
--vSize;
}
*/
}
static void mc4pelsNoInterpolMMX( PIXEL *inpix, PIXEL *outpix, int hdim, int vSize )
{
int rowcount = 0;
while (vSize > 0) {
*((U32 *)(outpix + rowcount)) = *((U32 *)(inpix + rowcount));
rowcount += hdim;
vSize--;
}
}
static void mc16pelsHorInterpolMMX( PIXEL const *inpix, PIXEL *outpix, int hdim, int vSize )
{
__asm {
xor ebx, ebx
mov ecx, vSize
mov esi, inpix
mov edi, outpix
pxor mm0, mm0 ;// mm0 = 0
movq mm7, const_0x0001000100010001
mc16pelsHorInterpolMMX_loop:
movq mm1, [esi + ebx]
movq mm4, [esi + ebx +8]
movq mm3, mm1 ;//save
movq mm5, mm4 ;//save
psrlq mm3, 8 ;//shift one byte
movq mm2, mm1
psllq mm4, 56 ;//shift 7 bytes, LSB is in top pos of mm4
punpcklbw mm1, mm0 ;//expand lower 4 pix pos: + 0
pxor mm3, mm4 ;//equivalent to read from +1
punpckhbw mm2, mm0 ;//expand higher 4 pix pos: + 4
movq mm4, mm3
punpcklbw mm3, mm0 ;//expand lower 4 pix pos: + 1
paddw mm1, mm7 ;//add 1
punpckhbw mm4, mm0 ;//expand higher 4 pix pos: + 5
paddw mm2, mm7 ;//add 1
paddw mm1, mm3 ;//pos: +0/+1 add
paddw mm2, mm4 ;//pos: +4/+5 add
psrlw mm1, 1 ;//pos: +0/+1 /2
movq mm6, mm5
psrlw mm2, 1 ;//pos: +4/+5 /2
packuswb mm1, mm2
movq mm3, mm5 ;//save
punpcklbw mm5, mm0 ;//expand lower 4 pix pos: + 8
movq [edi + ebx], mm1
punpckhbw mm6, mm0 ;//expand higher 4 pix pos: + 12
movq mm4, [esi + ebx + 16]
psrlq mm3, 8 ;//shift one byte
paddw mm5, mm7 ;//add 1
psllq mm4, 56 ;//shift 7 bytes, LSB is in top pos of mm4
paddw mm6, mm7 ;//add 1
pxor mm3, mm4 ;//equivalent to read from +9
movq mm4, mm3
punpcklbw mm3, mm0 ;//expand lower 4 pix pos: + 9
punpckhbw mm4, mm0 ;//expand higher 4 pix pos: + 13
paddw mm5, mm3 ;//pos: +8/+9 add
paddw mm6, mm4 ;//pos: +12/+13 add
psrlw mm5, 1 ;//pos: +8/+9 /2
psrlw mm6, 1 ;//pos: +12/+13 /2
packuswb mm5, mm6
movq [edi + ebx + 8], mm5
add ebx, hdim
dec ecx
jg mc16pelsHorInterpolMMX_loop
emms
}
/*
while (vSize > 0) {
out[0] = (in[0] + in[1] + 1) >> 1;
out[1] = (in[1] + in[2] + 1) >> 1;
out[2] = (in[2] + in[3] + 1) >> 1;
out[3] = (in[3] + in[4] + 1) >> 1;
out[4] = (in[4] + in[5] + 1) >> 1;
out[5] = (in[5] + in[6] + 1) >> 1;
out[6] = (in[6] + in[7] + 1) >> 1;
out[7] = (in[7] + in[8] + 1) >> 1;
out[8] = (in[8] + in[9] + 1) >> 1;
out[9] = (in[9] + in[10] + 1) >> 1;
out[10] = (in[10] + in[11] + 1) >> 1;
out[11] = (in[11] + in[12] + 1) >> 1;
out[12] = (in[12] + in[13] + 1) >> 1;
out[13] = (in[13] + in[14] + 1) >> 1;
out[14] = (in[14] + in[15] + 1) >> 1;
out[15] = (in[15] + in[16] + 1) >> 1;
in += hdim;
out += hdim;
--vSize;
}
return;
*/
}
static void mc8pelsHorInterpolMMX( PIXEL const *inpix, PIXEL *outpix, int hdim, int vSize )
{
__asm {
xor ebx, ebx
mov ecx, vSize
mov esi, inpix
mov edi, outpix
pxor mm0, mm0 ;// mm0 = 0
movq mm7, const_0x0001000100010001
mc8pelsHorInterpolMMX_loop:
movq mm1, [esi + ebx]
movq mm4, [esi + ebx +8]
movq mm3, mm1
psllq mm4, 56 ;//shift 7 bytes, LSB is in top pos of mm4
movq mm2, mm1
psrlq mm3, 8 ;//shift one byte
punpcklbw mm1, mm0 ;//expand lower 4 pix
pxor mm3, mm4 ;
punpckhbw mm2, mm0 ;//expand higher 4 pix
movq mm4, mm3
punpcklbw mm3, mm0 ;//expand lower 4 pix
paddw mm1, mm7
punpckhbw mm4, mm0 ;//expand higher 4 pix
paddw mm2, mm7
paddw mm1, mm3
paddw mm2, mm4
psrlw mm1, 1
psrlw mm2, 1
packuswb mm1, mm2
movq [edi + ebx], mm1
add ebx, hdim
dec ecx
jg mc8pelsHorInterpolMMX_loop
emms
}
/*
while (vSize > 0) {
out[0] = (in[0] + in[1] + 1) >> 1;
out[1] = (in[1] + in[2] + 1) >> 1;
out[2] = (in[2] + in[3] + 1) >> 1;
out[3] = (in[3] + in[4] + 1) >> 1;
out[4] = (in[4] + in[5] + 1) >> 1;
out[5] = (in[5] + in[6] + 1) >> 1;
out[6] = (in[6] + in[7] + 1) >> 1;
out[7] = (in[7] + in[8] + 1) >> 1;
in += hdim;
out += hdim;
--vSize;
}
return;
*/
}
static void mc4pelsHorInterpolMMX( PIXEL const *inpix, PIXEL *outpix, int hdim, int vSize )
{
__asm {
xor ebx, ebx
mov ecx, vSize
mov esi, inpix
mov edi, outpix
pxor mm0, mm0 ;// mm0 = 0
movq mm7, const_0x0001000100010001
mc4pelsHorInterpolMMX_loop:
movq mm1, [esi + ebx]
movq mm3, mm1
punpcklbw mm1, mm0 ;//expand lower 4 pix
psrlq mm3, 8 ;//shift one byte
punpcklbw mm3, mm0 ;//expand lower 4 pix
paddw mm1, mm7
paddw mm1, mm3
psrlw mm1, 1
packuswb mm1, mm0
movd [edi + ebx], mm1
add ebx, hdim
dec ecx
jg mc4pelsHorInterpolMMX_loop
emms
}
/*
while (vSize > 0) {
out[0] = (in[0] + in[1] + 1) >> 1;
out[1] = (in[1] + in[2] + 1) >> 1;
out[2] = (in[2] + in[3] + 1) >> 1;
out[3] = (in[3] + in[4] + 1) >> 1;
in += hdim;
out += hdim;
--vSize;
}
return;
*/
}
static void mc16pelsVertInterpolMMX( PIXEL const *inpix, PIXEL *outpix, int hdim, int vSize )
{
__asm {
xor ebx, ebx
mov ecx, vSize
mov esi, inpix
mov edi, outpix
sub edi, hdim ;//edi = output - hdim
pxor mm0, mm0 ;// mm0 = 0
movq mm7, const_0x0001000100010001
mc16pelsVertInterpolMMX_loop:
movq mm1, [esi + ebx]
movq mm2, mm1
movq mm5, [esi + ebx + 8]
punpcklbw mm1, mm0 ;//expand lower 4 pix
movq mm6, mm5
punpckhbw mm2, mm0 ;//expand higher 4 pix
add ebx, hdim
punpcklbw mm5, mm0 ;//expand lower 4 pix
paddw mm1, mm7
punpckhbw mm6, mm0 ;//expand higher 4 pix
movq mm3, [esi + ebx]
paddw mm2, mm7
movq mm4, mm3
punpcklbw mm3, mm0 ;//expand lower 4 pix
punpckhbw mm4, mm0 ;//expand higher 4 pix
paddw mm1, mm3
paddw mm2, mm4
psrlw mm1, 1
psrlw mm2, 1
movq mm3, [esi + ebx + 8]
packuswb mm1, mm2
movq mm4, mm3
movq [edi + ebx], mm1
punpcklbw mm3, mm0 ;//expand lower 4 pix
paddw mm5, mm7
punpckhbw mm4, mm0 ;//expand higher 4 pix
paddw mm6, mm7
paddw mm5, mm3
paddw mm6, mm4
psrlw mm5, 1
psrlw mm6, 1
packuswb mm5, mm6
dec ecx
movq [edi + ebx + 8], mm5
jg mc16pelsVertInterpolMMX_loop
emms
}
/*
while (vSize > 0) {
out[0] = (in[0] + in[hdim+0] + 1) >> 1;
out[1] = (in[1] + in[hdim+1] + 1) >> 1;
out[2] = (in[2] + in[hdim+2] + 1) >> 1;
out[3] = (in[3] + in[hdim+3] + 1) >> 1;
out[4] = (in[4] + in[hdim+4] + 1) >> 1;
out[5] = (in[5] + in[hdim+5] + 1) >> 1;
out[6] = (in[6] + in[hdim+6] + 1) >> 1;
out[7] = (in[7] + in[hdim+7] + 1) >> 1;
out[8] = (in[8] + in[hdim+8] + 1) >> 1;
out[9] = (in[9] + in[hdim+9] + 1) >> 1;
out[10] = (in[10] + in[hdim+10] + 1) >> 1;
out[11] = (in[11] + in[hdim+11] + 1) >> 1;
out[12] = (in[12] + in[hdim+12] + 1) >> 1;
out[13] = (in[13] + in[hdim+13] + 1) >> 1;
out[14] = (in[14] + in[hdim+14] + 1) >> 1;
out[15] = (in[15] + in[hdim+15] + 1) >> 1;
in += hdim;
out += hdim;
--vSize;
}
return;
*/
}
static void mc8pelsVertInterpolMMX( PIXEL const *inpix, PIXEL *outpix, int hdim, int vSize )
{
__asm {
xor ebx, ebx
mov ecx, vSize
mov esi, inpix
mov edi, outpix
sub edi, hdim ;//edi = output - hdim
pxor mm0, mm0 ;// mm0 = 0
movq mm7, const_0x0001000100010001
mc8pelsVertInterpolMMX_loop:
movq mm1, [esi + ebx]
add ebx, hdim
movq mm2, mm1
punpcklbw mm1, mm0 ;//expand lower 4 pix
movq mm3, [esi + ebx]
punpckhbw mm2, mm0 ;//expand higher 4 pix
movq mm4, mm3
punpcklbw mm3, mm0 ;//expand lower 4 pix
punpckhbw mm4, mm0 ;//expand higher 4 pix
paddw mm1, mm7
paddw mm2, mm7
paddw mm1, mm3
paddw mm2, mm4
psrlw mm1, 1
psrlw mm2, 1
dec ecx
packuswb mm1, mm2
movq [edi + ebx], mm1
jg mc8pelsVertInterpolMMX_loop
emms
}
/*
while (vSize > 0) {
out[0] = (in[0] + in[hdim+0] + 1) >> 1;
out[1] = (in[1] + in[hdim+1] + 1) >> 1;
out[2] = (in[2] + in[hdim+2] + 1) >> 1;
out[3] = (in[3] + in[hdim+3] + 1) >> 1;
out[4] = (in[4] + in[hdim+4] + 1) >> 1;
out[5] = (in[5] + in[hdim+5] + 1) >> 1;
out[6] = (in[6] + in[hdim+6] + 1) >> 1;
out[7] = (in[7] + in[hdim+7] + 1) >> 1;
in += hdim;
out += hdim;
--vSize;
}
return;
*/
}
static void mc4pelsVertInterpolMMX( PIXEL const *inpix, PIXEL *outpix, int hdim, int vSize )
{
__asm {
xor ebx, ebx
mov ecx, vSize
mov esi, inpix
mov edi, outpix
sub edi, hdim ;//edi = output - hdim
pxor mm0, mm0 ;// mm0 = 0
movq mm7, const_0x0001000100010001
mc4pelsVertInterpolMMX_loop:
movq mm1, [esi + ebx]
punpcklbw mm1, mm0 ;//expand lower 4 pix
add ebx, hdim
movq mm3, [esi + ebx]
punpcklbw mm3, mm0 ;//expand lower 4 pix
paddw mm1, mm7
paddw mm1, mm3
psrlw mm1, 1
packuswb mm1, mm0
movd [edi + ebx], mm1
dec ecx
jg mc4pelsVertInterpolMMX_loop
emms
}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?