mmxdeblo.c
来自「symbian 下的helix player源代码」· C语言 代码 · 共 542 行
C
542 行
/* ***** BEGIN LICENSE BLOCK *****
* Source last modified: $Id: Mmxdeblo.c,v 1.1.1.1.42.1 2004/07/09 01:56:22 hubbe Exp $
*
* Portions Copyright (c) 1995-2004 RealNetworks, Inc. All Rights Reserved.
*
* The contents of this file, and the files included with this file,
* are subject to the current version of the RealNetworks Public
* Source License (the "RPSL") available at
* http://www.helixcommunity.org/content/rpsl unless you have licensed
* the file under the current version of the RealNetworks Community
* Source License (the "RCSL") available at
* http://www.helixcommunity.org/content/rcsl, in which case the RCSL
* will apply. You may also obtain the license terms directly from
* RealNetworks. You may not use this file except in compliance with
* the RPSL or, if you have a valid RCSL with RealNetworks applicable
* to this file, the RCSL. Please see the applicable RPSL or RCSL for
* the rights, obligations and limitations governing use of the
* contents of the file.
*
* Alternatively, the contents of this file may be used under the
* terms of the GNU General Public License Version 2 or later (the
* "GPL") in which case the provisions of the GPL are applicable
* instead of those above. If you wish to allow use of your version of
* this file only under the terms of the GPL, and not to allow others
* to use your version of this file under the terms of either the RPSL
* or RCSL, indicate your decision by deleting the provisions above
* and replace them with the notice and other provisions required by
* the GPL. If you do not delete the provisions above, a recipient may
* use your version of this file under the terms of any one of the
* RPSL, the RCSL or the GPL.
*
* This file is part of the Helix DNA Technology. RealNetworks is the
* developer of the Original Code and owns the copyrights in the
* portions it created.
*
* This file, and the files included with this file, is distributed
* and made available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY
* KIND, EITHER EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS
* ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET
* ENJOYMENT OR NON-INFRINGEMENT.
*
* Technology Compatibility Kit Test Suite(s) Location:
* http://www.helixcommunity.org/content/tck
*
* Contributor(s):
*
* ***** END LICENSE BLOCK ***** */
/*
#ifdef __ICL
#pragma message ("Attention: Intel Compiler")
#else
#pragma message ("Attention: Non Intel Compiler")
#endif
*/
//disable no emms warning
#pragma warning(disable:4799)
//#include <string.h>
#include "dllindex.h"
#include "h261defs.h"
#include "h261func.h"
#include "h263plus.h"
#if defined(COMPILE_MMX)
#if (_MSC_VER>=1100)
// 4 * short
extern __int64 g_qp;
extern __int64 g_max_qp; // max - pq
extern __int64 g_max_2qp; // max - 2 * pq
void ApplyHorizontalDeblockingFilterMMX( PIXEL * top, PIXEL * bottom, int offset)
{
//__m64 mm0, mm1, mm2, mm3, mm4, mm5, mm6;
PIXEL *next_to_top;
PIXEL *next_after_bottom;
top += 7*offset;
next_to_top = top - offset;
next_after_bottom = bottom + offset;
//_m_empty();
__asm {
//unpack next_to_top -> short unsigned
mov eax, next_to_top
pxor mm7, mm7; // mm7 == 0
movq mm0, [eax]
mov eax, next_after_bottom;//--
movq mm1, mm0
punpcklbw mm0, mm7
movq mm4, [eax];//--
punpckhbw mm1, mm7
movq mm5, mm4
punpcklbw mm4, mm7
mov ebx, bottom;//--
//unpack next_after_bottom -> short unsigned
punpckhbw mm5, mm7
//
//next_after_bottom - next_after_bottom
psubw mm0, mm4
psubw mm1, mm5
//multiply mm0,mm1 by 3
movq mm2, mm0
movq mm3, mm1
psllw mm2, 1
psllw mm3, 1
paddw mm0, mm2
paddw mm1, mm3
/* values * 8 */
//unpack bottom -> short unsigned
movq mm4, [ebx]
mov ecx, top;//--
movq mm5, mm4
punpcklbw mm4, mm7
movq mm2, [ecx];//--
//
punpckhbw mm5, mm7
//
//unpack top -> short unsigned
movq mm3, mm2
punpcklbw mm2, mm7
punpckhbw mm3, mm7
//
//save top
movq [ebx], mm2
movq [ecx], mm3
//bottom stays in mm4,mm5
//top - bottom
psubw mm2, mm4
psubw mm3, mm5
//multiply by 8
psllw mm2, 3
psllw mm3, 3
//accumulate *3 - *8
psubw mm0, mm2
psubw mm1, mm3
//divide by 16
psraw mm0, 4
psraw mm1, 4
/** DiffCutoff **/
//cmp g_bBigDiffCutoffTable
/** d = limit(-qp, 2*d, qp) - limit(-qp, d, qp) **/
//d -> 2*d
//single argument: d
movq mm2, mm0
movq mm3, mm1
psllw mm0, 1;//--
movq mm6, g_max_qp
//
//double argument: 2*d
psllw mm1, 1
/* limit to [-qp, +qp] */
//add (max-qp) and saturate signed for upper limit
paddsw mm0, mm6
paddsw mm2, mm6
movq mm7, g_max_2qp
//
paddsw mm1, mm6
paddsw mm3, mm6
//subtract (max-2qp) and saturate unsigned for lower limit
psubusw mm0, mm7
psubusw mm2, mm7
movq mm6, g_qp
//
psubusw mm1, mm7
psubusw mm3, mm7
//correct: subtract (qp)
psubw mm0, mm6
psubw mm2, mm6
psubw mm1, mm6
psubw mm3, mm6
//d = ... - ...
psubw mm0, mm2
psubw mm1, mm3
//subtract from bottom
//still in mm4,mm5
psubw mm4, mm0
psubw mm5, mm1
paddw mm0, [ebx];//--
//clip bottom
packuswb mm4, mm5;//--
//add to top
paddw mm1, [ecx]
movq [ebx], mm4;//--
//clip top
//convert word -> unsigned byte with saturation [0; 255]
packuswb mm0, mm1
//write back
movq [ecx], mm0
}
}
////////////////////////////////////////////////////////////////////////////////
void ApplyVerticalDeblockingFilterMMX( PIXEL * left, PIXEL * right, int offset)
{
//left[-1, 0, -1+offset, 0+offset, ..., -1+7*offset, 0+8*offset]
//right[0, +1, 0+offset, +1+offset, ..., 0+7*offset, +1+8*offset]
//get left[-1, -1+offset, -1+2*offset, -1+3*offset]
//PIXEL *next_to_top, *top, *bottom, *next_after_bottom;
__int64 qw0, qw1;
__int64 qw2, qw3;
PIXEL *tleft_1, *t2left, *tright, *t2right;
S32 ii;
left += 7;
tleft_1 = left - 1;
t2left = left;
tright = right;
t2right = right;
//rearranging data into vectors of 8 bytes
//arrange qw1 == h1h0 d1d0 f1f0 b1b0 and qw0 == g1g0 c1c0 e1e0 a1a0
*((U32 *)&(((U16 *)&qw0)[0])) = *((U32 *)(tleft_1));//a
tleft_1 += offset;
*((U32 *)&(((U16 *)&qw1)[0])) = *((U32 *)(tleft_1));//b
tleft_1 += offset;
*((U32 *)&(((U16 *)&qw0)[2])) = *((U32 *)(tleft_1));//c
tleft_1 += offset;
*((U16 *)&(((U16 *)&qw1)[2])) = *((U16 *)(tleft_1));//d
tleft_1 += offset;
*((U16 *)&(((U16 *)&qw0)[1])) = *((U16 *)(tleft_1));//e
tleft_1 += offset;
*((U16 *)&(((U16 *)&qw1)[1])) = *((U16 *)(tleft_1));//f
tleft_1 += offset;
*((U16 *)&(((U16 *)&qw0)[3])) = *((U16 *)(tleft_1));//g
tleft_1 += offset;
*((U16 *)&(((U16 *)&qw1)[3])) = *((U16 *)(tleft_1));//h
//arrange qw3 == h1h0 d1d0 f1f0 b1b0 and qw2 == g1g0 c1c0 e1e0 a1a0
*((U32 *)&(((U16 *)&qw2)[0])) = *((U32 *)(tright));//a
tright += offset;
*((U32 *)&(((U16 *)&qw3)[0])) = *((U32 *)(tright));//b
tright += offset;
*((U32 *)&(((U16 *)&qw2)[2])) = *((U32 *)(tright));//c
tright += offset;
*((U16 *)&(((U16 *)&qw3)[2])) = *((U16 *)(tright));//d
tright += offset;
*((U16 *)&(((U16 *)&qw2)[1])) = *((U16 *)(tright));//e
tright += offset;
*((U16 *)&(((U16 *)&qw3)[1])) = *((U16 *)(tright));//f
tright += offset;
*((U16 *)&(((U16 *)&qw2)[3])) = *((U16 *)(tright));//g
tright += offset;
*((U16 *)&(((U16 *)&qw3)[3])) = *((U16 *)(tright));//h
__asm {
/////////////////////////////////////////////////
movq mm0, [qw2]
movq mm1, [qw3]
movq mm4, mm0
//mm1==h1h0d1d0f1f0b1b0 and mm0==g1g0c1c0e1e0a1a0
// to
//mm4==h1g1h0g0d1c1d0c0 and mm0==f1e1f0e0b1a1b0a0
punpcklbw mm0, mm1
punpckhbw mm4, mm1
//mm4==h1g1h0g0d1c1d0c0 and mm0==f1e1f0e0b1a1b0a0
// to
//mm1==h1g1f1e1h0g0f0e0 and mm0==d1c1b1a1d0c0b0a0
movq mm1, mm0
punpcklwd mm0, mm4
punpckhwd mm1, mm4
//mm1==h1g1f1e1h0g0f0e0 and mm0==d1c1b1a1d0c0b0a0
// to
//mm4==h1g1f1e1d1c1b1a1==next_after_bottom and mm0==h0g0f0e0d0c0b0a0==bottom
movq mm4, mm0
punpckldq mm0, mm1
punpckhdq mm4, mm1
movq [qw2], mm0;//save bottom
/////////////////////////////////////////////////
movq mm0, [qw0]
movq mm1, [qw1]
movq mm6, mm0
//mm1==h1h0d1d0f1f0b1b0 and mm0==g1g0c1c0e1e0a1a0
// to
//mm6==h1g1h0g0d1c1d0c0 and mm0==f1e1f0e0b1a1b0a0
punpcklbw mm0, mm1
punpckhbw mm6, mm1
//mm6==h1g1h0g0d1c1d0c0 and mm0==f1e1f0e0b1a1b0a0
// to
//mm1==h1g1f1e1h0g0f0e0 and mm0==d1c1b1a1d0c0b0a0
movq mm1, mm0
punpcklwd mm0, mm6
punpckhwd mm1, mm6
//mm1==h1g1f1e1h0g0f0e0 and mm0==d1c1b1a1d0c0b0a0
// to
//mm6==h1g1f1e1d1c1b1a1==top and mm0==h0g0f0e0d0c0b0a0==next_to_top
movq mm6, mm0
punpckldq mm0, mm1
punpckhdq mm6, mm1
/////////////////////////////////////////////////
//unpack next_to_top -> short unsigned -> mm0
pxor mm7, mm7; // mm7 == 0
//mov eax, next_after_bottom;//--
movq mm1, mm0
punpcklbw mm0, mm7
//movq mm4, [eax];//--
punpckhbw mm1, mm7
movq mm5, mm4
punpcklbw mm4, mm7
//mov ebx, bottom;//--
//unpack next_after_bottom -> short unsigned
punpckhbw mm5, mm7
//
//next_after_bottom - next_after_bottom
psubw mm0, mm4
psubw mm1, mm5
//multiply mm0,mm1 by 3
movq mm2, mm0
movq mm3, mm1
psllw mm2, 1
psllw mm3, 1
paddw mm0, mm2
paddw mm1, mm3
/* values * 8 */
//unpack bottom -> short unsigned
movq mm4, [qw2]
//movq mm4, [ebx]
//mov ecx, top;//--
movq mm5, mm4
punpcklbw mm4, mm7
//movq mm2, [ecx];//--
//
punpckhbw mm5, mm7
//
//unpack top -> short unsigned
movq mm3, mm6
punpcklbw mm6, mm7
punpckhbw mm3, mm7
//
//save top
movq [qw0], mm6
movq [qw1], mm3
//bottom stays in mm4,mm5
//top - bottom
psubw mm6, mm4
psubw mm3, mm5
//multiply by 8
psllw mm6, 3
psllw mm3, 3
//accumulate *3 - *8
psubw mm0, mm6
psubw mm1, mm3
//divide by 16
psraw mm0, 4
psraw mm1, 4
/** DiffCutoff **/
//cmp g_bBigDiffCutoffTable
/** d = limit(-qp, 2*d, qp) - limit(-qp, d, qp) **/
//d -> 2*d
//single argument: d
movq mm2, mm0
movq mm3, mm1
psllw mm0, 1;//--
movq mm6, g_max_qp
//
//double argument: 2*d
psllw mm1, 1
/* limit to [-qp, +qp] */
//add (max-qp) and saturate signed for upper limit
paddsw mm0, mm6
paddsw mm2, mm6
movq mm7, g_max_2qp
//
paddsw mm1, mm6
paddsw mm3, mm6
//subtract (max-2qp) and saturate unsigned for lower limit
psubusw mm0, mm7
psubusw mm2, mm7
movq mm6, g_qp
//
psubusw mm1, mm7
psubusw mm3, mm7
//correct: subtract (qp)
psubw mm0, mm6
psubw mm2, mm6
psubw mm1, mm6
psubw mm3, mm6
//d = ... - ...
psubw mm0, mm2
psubw mm1, mm3
//subtract from bottom
//still in mm4,mm5
psubw mm4, mm0
psubw mm5, mm1
paddw mm0, [qw0];//--
//clip bottom
packuswb mm4, mm5;//--
//add to top
paddw mm1, [qw1]
//write back bottom
movq [qw0], mm4;//--
//clip top
//convert word -> unsigned byte with saturation [0; 255]
packuswb mm0, mm1
//write back top
movq [qw1], mm0
}
//rearrange data back
for(ii=0; ii<8; ii+=2, t2left+=offset, t2right+=offset) {
*t2right = ((U8 *)&qw0)[ii];
*t2left = ((U8 *)&qw1)[ii];
//unroll loop
t2left+=offset;
t2right+=offset;
*t2right = ((U8 *)&qw0)[ii+1];
*t2left = ((U8 *)&qw1)[ii+1];
}
}
#endif
#endif
//default no emms warning
#pragma warning(default:4799)
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?