📄 deblockopt.cpp
字号:
//==========================================================================
//
// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY
// KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR
// PURPOSE.
//
// Copyright (c) 1999 - 2001 On2 Technologies Inc. All Rights Reserved.
//
//--------------------------------------------------------------------------
/****************************************************************************
*
* Module Title : DeblockOpt.c
*
* Description : Optimized functions for deblocking
*
*****************************************************************************
*/
/****************************************************************************
* Header Frames
*****************************************************************************
*/
#ifdef _MSC_VER
#pragma warning(disable:4799)
#pragma warning(disable:4731)
#endif
#include <stdio.h>
#include <stdlib.h>
#include "inttypes.h"
#include "codec_internal.h"
#include "simd.h"
/****************************************************************************
* Module constants.
*****************************************************************************
*/
/****************************************************************************
* Explicit Imports
*****************************************************************************
*/
extern "C" uint32_t LoopFilterLimitValuesV2[];
extern unsigned char FragDeblockingFlag[];
/****************************************************************************
* Exported Global Variables
*****************************************************************************
*/
/****************************************************************************
* Exported Functions
*****************************************************************************
*/
/****************************************************************************
* Module Statics
*****************************************************************************
*/
/****************************************************************************
*
* ROUTINE : DeblockLoopFilteredBand_MMX
*
* INPUTS : None
*
* OUTPUTS : None
*
* RETURNS : None
*
* FUNCTION : Filter both horizontal and vertical edge in a band
*
* SPECIAL NOTES :
*
* REFERENCE :
*
* ERRORS : None.
*
****************************************************************************/
extern "C" void DeblockLoopFilteredBand_MMX(
PB_INSTANCE *pbi,
uint8_t *SrcPtr,
uint8_t *DesPtr,
uint32_t PlaneLineStep,
uint32_t FragAcross,
uint32_t StartFrag,
uint32_t *QuantScale
)
{
uint32_t j;
uint32_t CurrentFrag=StartFrag;
uint32_t QStep;
uint32_t LoopFLimit;
uint8_t *Src, *Des;
uint32_t Var1, Var2;
__m64 QStepMmx;
__m64 FLimitMmx;
__align16(short,Rows[80]);
__align16(short,NewRows[64]);
__align16(unsigned char,Variance11[8]);
__align16(unsigned char,Variance21[8]);
static __align16(const short ,FourFours[])= {4, 4, 4, 4};
static __align16(const unsigned char, Eight128c[]) = {128, 128, 128, 128,128, 128, 128, 128 };
while(CurrentFrag < StartFrag + FragAcross )
{
Src=SrcPtr+8*(CurrentFrag-StartFrag);
Des=DesPtr+8*(CurrentFrag-StartFrag);
QStep = QuantScale[ pbi->FragQIndex[CurrentFrag+FragAcross]];
/* Calculate the FLimit and store FLimit and QStep */
/* Copy the data to the intermediate buffer */
uint32_t eax1=QStep;
int edx=0;;/* clear edx */
__m64 mm5,mm6,mm7,mm0,mm1,mm4,mm2,mm3;
uint32_t ecx=PlaneLineStep ;/* ecx = Pitch */
movd (mm5, eax1);
uint8_t *eax= Src;/* eax = Src */
punpcklwd (mm5, mm5 );
uint8_t *esi= (uint8_t*)NewRows;/* esi = NewRows */
punpckldq (mm5, mm5);
edx-= ecx ;/* edx = - Pitch */
movq (mm6, mm5 );/* Q Q Q Q */
paddw (mm6, mm5 );
paddw (mm6, mm5 );/* 3Q3Q3Q3Q */
packuswb (mm5, mm5 );/* QQQQQQQQ */
movq (QStepMmx, mm5);
psraw (mm6, 2 );/* F F F F */
packuswb (mm6, mm6 );/* FFFFFFFF */
uint8_t *edi= (uint8_t*)Rows; /* edi = Rows */
pxor (mm7, mm7 );/* Clear mm7 */
psubb (mm6, Eight128c );/* Eight (F-128)s */
eax= eax + edx * 4 ;/* eax = Src - 4*Pitch */
movq (mm0, eax + edx );/* mm0 = Src-5*Pitch */
movq (mm1, mm0 );/* mm1 = mm0 */
punpcklbw (mm0, mm7 );/* Lower Four -5 */
movq (mm4, mm1 );/* mm4 = Src-5*Pitch */
movq ( FLimitMmx, mm6 );/* FFFF FFFF */
movq( mm2, eax );/* mm2 = Src-4*Pitch */
punpckhbw( mm1, mm7 );/* Higher Four -5 */
movq ( edi, mm0 );/* Write Lower Four of -5 */
movq (mm5, mm2 );/* mm5 = S_4 */
movq (mm3, mm2 );/* mm3 = S_4 */
movq ( edi+8, mm1 );/* Write Higher Four of -5 */
movq (mm0, eax + ecx );/* mm0 = Src-3*Pitch */
psubusb (mm5, mm4 );/* S_4 - S_5 */
psubusb (mm4, mm2 );/* S_5 - S_4 */
punpcklbw (mm2, mm7 );/* Lower Four -4 */
por (mm4, mm5 );/* abs(S_4-S_5) */
movq (edi+16, mm2 );/* Write Lower -4 */
movq (mm6, mm3 );/* mm6 = S_4 */
punpckhbw (mm3, mm7 );/* higher Four -4 */
movq (edi+24, mm3 );/* write hight -4 */
movq (mm1, mm0 );/* mm1 = S_3 */
punpcklbw (mm0, mm7 );/* lower four -3 */
movq (edi+32, mm0 );/* write Lower -3 */
movq (mm2, eax + ecx *2 );/* mm2 = Src-2*Pitch */
movq (mm5, mm1 );/* mm5 = S_3 */
psubusb (mm5, mm6 );/* S_3 - S_4 */
psubusb (mm6, mm1 );/* S_4 - S_3 */
por (mm5, mm6 );/* abs(S_4-S_3) */
movq (mm6, mm1 );/* mm6 = S_3 */
punpckhbw (mm1, mm7 );/* higher four -3 */
movq (mm3, mm2 );/* mm3 = S_2 */
movq (edi+40, mm1 );/* write Higher -3 */
paddusb (mm4, mm5 );/* abs(S_5-S_4)+abs(S_4-S_3) */
movq (mm5, mm2 );/* mm5 = S_2 */
psubusb (mm5, mm6 );/* S_2 - S_3 */
psubusb (mm6, mm2 );/* S_3 - S_2 */
por ( mm5, mm6 );/* abs(S_3 - S_2) */
movq ( mm6, mm2 );/* mm6 = S_2 */
punpcklbw (mm2, mm7 );/* lower four -2 */
eax= eax + ecx *4;/* eax = Src */
punpckhbw (mm3, mm7 );/* higher four -2 */
movq (mm0, eax + edx );/* mm2 = Src-Pitch */
movq (edi+48, mm2 );/* lower -2 */
paddusb ( mm4, mm5 );/* abs(S_5-S_4)+abs(S_4-S_3)+abs(S_3-S_2) */
movq ( mm5, mm0 );/* mm5 = S_1 */
movq ( edi+56, mm3 );/* higher -2 */
movq ( mm1, mm0 );/* mm1 = S_1 */
psubusb (mm5, mm6 );/* S_1 - S_2 */
psubusb (mm6, mm1 );/* S_2 - S_1 */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -