📄 mmxmotio.c
字号:
/* ***** BEGIN LICENSE BLOCK ***** * Source last modified: $Id: mmxmotio.c,v 1.1.1.1.42.1 2004/07/09 01:56:22 hubbe Exp $ * * Portions Copyright (c) 1995-2004 RealNetworks, Inc. All Rights Reserved. * * The contents of this file, and the files included with this file, * are subject to the current version of the RealNetworks Public * Source License (the "RPSL") available at * http://www.helixcommunity.org/content/rpsl unless you have licensed * the file under the current version of the RealNetworks Community * Source License (the "RCSL") available at * http://www.helixcommunity.org/content/rcsl, in which case the RCSL * will apply. You may also obtain the license terms directly from * RealNetworks. You may not use this file except in compliance with * the RPSL or, if you have a valid RCSL with RealNetworks applicable * to this file, the RCSL. Please see the applicable RPSL or RCSL for * the rights, obligations and limitations governing use of the * contents of the file. * * Alternatively, the contents of this file may be used under the * terms of the GNU General Public License Version 2 or later (the * "GPL") in which case the provisions of the GPL are applicable * instead of those above. If you wish to allow use of your version of * this file only under the terms of the GPL, and not to allow others * to use your version of this file under the terms of either the RPSL * or RCSL, indicate your decision by deleting the provisions above * and replace them with the notice and other provisions required by * the GPL. If you do not delete the provisions above, a recipient may * use your version of this file under the terms of any one of the * RPSL, the RCSL or the GPL. * * This file is part of the Helix DNA Technology. RealNetworks is the * developer of the Original Code and owns the copyrights in the * portions it created. * * This file, and the files included with this file, is distributed * and made available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY * KIND, EITHER EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS * ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET * ENJOYMENT OR NON-INFRINGEMENT. * * Technology Compatibility Kit Test Suite(s) Location: * http://www.helixcommunity.org/content/tck * * Contributor(s): * * ***** END LICENSE BLOCK ***** */#include "dllindex.h"#include "h261defs.h"#include "h261func.h"extern int g_saveMbHor; // Hold on to size of Mb array (used by distUmv andextern int g_saveMbVert; // TryInter4V and its subroutines)extern int g_saveUmvMode; // Hold on to unrestrictedMv (used by RefineMotionVector)extern int g_savePointOutside; // Hold on to pointOutside (used by rmvDist)extern int g_distPixels; // Number of pixels used in squared error computationsstatic const __int64 const_0x00ff00ff00ff00ff = 0x00ff00ff00ff00ff; //0 2 4 6static const __int64 const_0x000000ff000000ff = 0x000000ff000000ff;//0 4static const __int64 const_0x00ff000000ff0000 = 0x00ff000000ff0000;//2 6static const __int64 const_0x00ff00ff000000ff = 0x00ff00ff000000ff; //0 4 6 == shift 4 8 10static const __int64 const_0x0001000100010001 = 0x0001000100010001;static const __int64 const_0x0002000200020002 = 0x0002000200020002;/*-- mcomp.c -----------------------------------------------------------------------------------*/static void mc16pelsNoInterpolMMX( PIXEL *inpix, PIXEL *outpix, int hdim, int vSize ){__asm { mov ecx, vSize mov ebx, hdim mov esi, inpix neg ebx mov edi, outpixmc16pelsNoInterpolMMX_loop: add ebx, hdim dec ecx movq mm0, [esi + ebx] movq mm1, [esi + ebx + 8] movq [edi + ebx], mm0 movq [edi + ebx + 8], mm1 jg mc16pelsNoInterpolMMX_loop emms } /* union { // Copy words to speed up routine PIXEL *pix; U32 *word; } pIn, pOut; pIn.pix = inpix; pOut.pix = outpix; while (vSize > 0) { *(pOut.word + 0) = *(pIn.word + 0); *(pOut.word + 1) = *(pIn.word + 1); *(pOut.word + 2) = *(pIn.word + 2); *(pOut.word + 3) = *(pIn.word + 3); pIn.pix += hdim; pOut.pix += hdim; --vSize; } */}static void mc8pelsNoInterpolMMX( PIXEL *inpix, PIXEL *outpix, int hdim, int vSize ){__asm { mov ecx, vSize mov ebx, hdim mov esi, inpix neg ebx mov edi, outpixmc8pelsNoInterpolMMX_loop: add ebx, hdim dec ecx movq mm0, [esi + ebx] movq [edi + ebx], mm0 jg mc8pelsNoInterpolMMX_loop emms }/* union { // Copy words to speed up routine PIXEL *pix; U32 *word; } pIn, pOut; pIn.pix = inpix; pOut.pix = outpix; while (vSize > 0) { *(pOut.word + 0) = *(pIn.word + 0); *(pOut.word + 1) = *(pIn.word + 1); pIn.pix += hdim; pOut.pix += hdim; --vSize; }*/}static void mc4pelsNoInterpolMMX( PIXEL *inpix, PIXEL *outpix, int hdim, int vSize ){ int rowcount = 0; while (vSize > 0) { *((U32 *)(outpix + rowcount)) = *((U32 *)(inpix + rowcount)); rowcount += hdim; vSize--; }}static void mc16pelsHorInterpolMMX( PIXEL const *inpix, PIXEL *outpix, int hdim, int vSize ){__asm { xor ebx, ebx mov ecx, vSize mov esi, inpix mov edi, outpix pxor mm0, mm0 ;// mm0 = 0 movq mm7, const_0x0001000100010001mc16pelsHorInterpolMMX_loop: movq mm1, [esi + ebx] movq mm4, [esi + ebx +8] movq mm3, mm1 ;//save movq mm5, mm4 ;//save psrlq mm3, 8 ;//shift one byte movq mm2, mm1 psllq mm4, 56 ;//shift 7 bytes, LSB is in top pos of mm4 punpcklbw mm1, mm0 ;//expand lower 4 pix pos: + 0 pxor mm3, mm4 ;//equivalent to read from +1 punpckhbw mm2, mm0 ;//expand higher 4 pix pos: + 4 movq mm4, mm3 punpcklbw mm3, mm0 ;//expand lower 4 pix pos: + 1 paddw mm1, mm7 ;//add 1 punpckhbw mm4, mm0 ;//expand higher 4 pix pos: + 5 paddw mm2, mm7 ;//add 1 paddw mm1, mm3 ;//pos: +0/+1 add paddw mm2, mm4 ;//pos: +4/+5 add psrlw mm1, 1 ;//pos: +0/+1 /2 movq mm6, mm5 psrlw mm2, 1 ;//pos: +4/+5 /2 packuswb mm1, mm2 movq mm3, mm5 ;//save punpcklbw mm5, mm0 ;//expand lower 4 pix pos: + 8 movq [edi + ebx], mm1 punpckhbw mm6, mm0 ;//expand higher 4 pix pos: + 12 movq mm4, [esi + ebx + 16] psrlq mm3, 8 ;//shift one byte paddw mm5, mm7 ;//add 1 psllq mm4, 56 ;//shift 7 bytes, LSB is in top pos of mm4 paddw mm6, mm7 ;//add 1 pxor mm3, mm4 ;//equivalent to read from +9 movq mm4, mm3 punpcklbw mm3, mm0 ;//expand lower 4 pix pos: + 9 punpckhbw mm4, mm0 ;//expand higher 4 pix pos: + 13 paddw mm5, mm3 ;//pos: +8/+9 add paddw mm6, mm4 ;//pos: +12/+13 add psrlw mm5, 1 ;//pos: +8/+9 /2 psrlw mm6, 1 ;//pos: +12/+13 /2 packuswb mm5, mm6 movq [edi + ebx + 8], mm5 add ebx, hdim dec ecx jg mc16pelsHorInterpolMMX_loop emms }/* while (vSize > 0) { out[0] = (in[0] + in[1] + 1) >> 1; out[1] = (in[1] + in[2] + 1) >> 1; out[2] = (in[2] + in[3] + 1) >> 1; out[3] = (in[3] + in[4] + 1) >> 1; out[4] = (in[4] + in[5] + 1) >> 1; out[5] = (in[5] + in[6] + 1) >> 1; out[6] = (in[6] + in[7] + 1) >> 1; out[7] = (in[7] + in[8] + 1) >> 1; out[8] = (in[8] + in[9] + 1) >> 1; out[9] = (in[9] + in[10] + 1) >> 1; out[10] = (in[10] + in[11] + 1) >> 1; out[11] = (in[11] + in[12] + 1) >> 1; out[12] = (in[12] + in[13] + 1) >> 1; out[13] = (in[13] + in[14] + 1) >> 1; out[14] = (in[14] + in[15] + 1) >> 1; out[15] = (in[15] + in[16] + 1) >> 1; in += hdim; out += hdim; --vSize; } return;*/}static void mc8pelsHorInterpolMMX( PIXEL const *inpix, PIXEL *outpix, int hdim, int vSize ){__asm { xor ebx, ebx mov ecx, vSize mov esi, inpix mov edi, outpix pxor mm0, mm0 ;// mm0 = 0 movq mm7, const_0x0001000100010001mc8pelsHorInterpolMMX_loop: movq mm1, [esi + ebx] movq mm4, [esi + ebx +8] movq mm3, mm1 psllq mm4, 56 ;//shift 7 bytes, LSB is in top pos of mm4 movq mm2, mm1 psrlq mm3, 8 ;//shift one byte punpcklbw mm1, mm0 ;//expand lower 4 pix pxor mm3, mm4 ; punpckhbw mm2, mm0 ;//expand higher 4 pix movq mm4, mm3 punpcklbw mm3, mm0 ;//expand lower 4 pix paddw mm1, mm7 punpckhbw mm4, mm0 ;//expand higher 4 pix paddw mm2, mm7 paddw mm1, mm3 paddw mm2, mm4 psrlw mm1, 1 psrlw mm2, 1 packuswb mm1, mm2 movq [edi + ebx], mm1 add ebx, hdim dec ecx jg mc8pelsHorInterpolMMX_loop emms }/* while (vSize > 0) { out[0] = (in[0] + in[1] + 1) >> 1; out[1] = (in[1] + in[2] + 1) >> 1; out[2] = (in[2] + in[3] + 1) >> 1; out[3] = (in[3] + in[4] + 1) >> 1; out[4] = (in[4] + in[5] + 1) >> 1; out[5] = (in[5] + in[6] + 1) >> 1; out[6] = (in[6] + in[7] + 1) >> 1; out[7] = (in[7] + in[8] + 1) >> 1; in += hdim; out += hdim; --vSize; } return;*/}static void mc4pelsHorInterpolMMX( PIXEL const *inpix, PIXEL *outpix, int hdim, int vSize ){__asm { xor ebx, ebx mov ecx, vSize mov esi, inpix mov edi, outpix pxor mm0, mm0 ;// mm0 = 0 movq mm7, const_0x0001000100010001mc4pelsHorInterpolMMX_loop: movq mm1, [esi + ebx] movq mm3, mm1 punpcklbw mm1, mm0 ;//expand lower 4 pix psrlq mm3, 8 ;//shift one byte punpcklbw mm3, mm0 ;//expand lower 4 pix paddw mm1, mm7 paddw mm1, mm3 psrlw mm1, 1 packuswb mm1, mm0 movd [edi + ebx], mm1 add ebx, hdim dec ecx jg mc4pelsHorInterpolMMX_loop emms }/* while (vSize > 0) { out[0] = (in[0] + in[1] + 1) >> 1; out[1] = (in[1] + in[2] + 1) >> 1; out[2] = (in[2] + in[3] + 1) >> 1; out[3] = (in[3] + in[4] + 1) >> 1; in += hdim; out += hdim; --vSize; } return;*/}static void mc16pelsVertInterpolMMX( PIXEL const *inpix, PIXEL *outpix, int hdim, int vSize ){__asm { xor ebx, ebx mov ecx, vSize mov esi, inpix mov edi, outpix sub edi, hdim ;//edi = output - hdim pxor mm0, mm0 ;// mm0 = 0 movq mm7, const_0x0001000100010001mc16pelsVertInterpolMMX_loop: movq mm1, [esi + ebx] movq mm2, mm1 movq mm5, [esi + ebx + 8] punpcklbw mm1, mm0 ;//expand lower 4 pix movq mm6, mm5 punpckhbw mm2, mm0 ;//expand higher 4 pix add ebx, hdim punpcklbw mm5, mm0 ;//expand lower 4 pix paddw mm1, mm7 punpckhbw mm6, mm0 ;//expand higher 4 pix movq mm3, [esi + ebx] paddw mm2, mm7 movq mm4, mm3 punpcklbw mm3, mm0 ;//expand lower 4 pix punpckhbw mm4, mm0 ;//expand higher 4 pix paddw mm1, mm3 paddw mm2, mm4 psrlw mm1, 1 psrlw mm2, 1 movq mm3, [esi + ebx + 8] packuswb mm1, mm2 movq mm4, mm3 movq [edi + ebx], mm1 punpcklbw mm3, mm0 ;//expand lower 4 pix paddw mm5, mm7 punpckhbw mm4, mm0 ;//expand higher 4 pix paddw mm6, mm7 paddw mm5, mm3 paddw mm6, mm4 psrlw mm5, 1 psrlw mm6, 1 packuswb mm5, mm6 dec ecx movq [edi + ebx + 8], mm5 jg mc16pelsVertInterpolMMX_loop emms }/* while (vSize > 0) { out[0] = (in[0] + in[hdim+0] + 1) >> 1; out[1] = (in[1] + in[hdim+1] + 1) >> 1; out[2] = (in[2] + in[hdim+2] + 1) >> 1; out[3] = (in[3] + in[hdim+3] + 1) >> 1; out[4] = (in[4] + in[hdim+4] + 1) >> 1; out[5] = (in[5] + in[hdim+5] + 1) >> 1; out[6] = (in[6] + in[hdim+6] + 1) >> 1; out[7] = (in[7] + in[hdim+7] + 1) >> 1; out[8] = (in[8] + in[hdim+8] + 1) >> 1; out[9] = (in[9] + in[hdim+9] + 1) >> 1; out[10] = (in[10] + in[hdim+10] + 1) >> 1; out[11] = (in[11] + in[hdim+11] + 1) >> 1; out[12] = (in[12] + in[hdim+12] + 1) >> 1; out[13] = (in[13] + in[hdim+13] + 1) >> 1; out[14] = (in[14] + in[hdim+14] + 1) >> 1; out[15] = (in[15] + in[hdim+15] + 1) >> 1; in += hdim; out += hdim; --vSize; } return;*/}static void mc8pelsVertInterpolMMX( PIXEL const *inpix, PIXEL *outpix, int hdim, int vSize ){__asm { xor ebx, ebx mov ecx, vSize mov esi, inpix mov edi, outpix sub edi, hdim ;//edi = output - hdim pxor mm0, mm0 ;// mm0 = 0 movq mm7, const_0x0001000100010001mc8pelsVertInterpolMMX_loop: movq mm1, [esi + ebx] add ebx, hdim movq mm2, mm1 punpcklbw mm1, mm0 ;//expand lower 4 pix movq mm3, [esi + ebx] punpckhbw mm2, mm0 ;//expand higher 4 pix movq mm4, mm3 punpcklbw mm3, mm0 ;//expand lower 4 pix punpckhbw mm4, mm0 ;//expand higher 4 pix paddw mm1, mm7 paddw mm2, mm7 paddw mm1, mm3 paddw mm2, mm4 psrlw mm1, 1 psrlw mm2, 1 dec ecx packuswb mm1, mm2 movq [edi + ebx], mm1 jg mc8pelsVertInterpolMMX_loop emms }/* while (vSize > 0) { out[0] = (in[0] + in[hdim+0] + 1) >> 1; out[1] = (in[1] + in[hdim+1] + 1) >> 1; out[2] = (in[2] + in[hdim+2] + 1) >> 1; out[3] = (in[3] + in[hdim+3] + 1) >> 1; out[4] = (in[4] + in[hdim+4] + 1) >> 1; out[5] = (in[5] + in[hdim+5] + 1) >> 1; out[6] = (in[6] + in[hdim+6] + 1) >> 1; out[7] = (in[7] + in[hdim+7] + 1) >> 1; in += hdim; out += hdim; --vSize; } return;*/}static void mc4pelsVertInterpolMMX( PIXEL const *inpix, PIXEL *outpix, int hdim, int vSize ){__asm { xor ebx, ebx mov ecx, vSize mov esi, inpix mov edi, outpix sub edi, hdim ;//edi = output - hdim pxor mm0, mm0 ;// mm0 = 0 movq mm7, const_0x0001000100010001mc4pelsVertInterpolMMX_loop: movq mm1, [esi + ebx] punpcklbw mm1, mm0 ;//expand lower 4 pix add ebx, hdim movq mm3, [esi + ebx] punpcklbw mm3, mm0 ;//expand lower 4 pix paddw mm1, mm7 paddw mm1, mm3 psrlw mm1, 1 packuswb mm1, mm0 movd [edi + ebx], mm1 dec ecx jg mc4pelsVertInterpolMMX_loop emms }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -