📄 deringopt.cpp
字号:
//==========================================================================
//
// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY
// KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR
// PURPOSE.
//
// Copyright (c) 1999 - 2001 On2 Technologies Inc. All Rights Reserved.
//
//--------------------------------------------------------------------------
/****************************************************************************
*
* Module Title : DeRingingOpt.c
*
* Description : Optimized functions for PostProcessor
*
*****************************************************************************
*/
#pragma warning(disable:4799)
#pragma warning(disable:4731)
/****************************************************************************
* Header Files
*****************************************************************************
*/
#include <stdio.h>
#include <stdlib.h>
#include "inttypes.h"
#include "codec_internal.h"
#include "simd.h"
/****************************************************************************
* Module constants.
*****************************************************************************
*/
#pragma warning(disable:4305)
/****************************************************************************
* Explicit Imports
*****************************************************************************
*/
extern "C" uint32_t SharpenModifier[];
/*******************************************************************************/
/****************************************************************************
*
* ROUTINE : DeRingBlockStrong_MMX()
*
* INPUTS : None
*
* OUTPUTS : None
*
* RETURNS : None
*
* FUNCTION : Filtering a block for deringing purpose
*
* SPECIAL NOTES :
*
* ERRORS : None.
*
****************************************************************************/
static __align16(const unsigned short, Four128s[]) = {128, 128, 128, 128};
static __align16(const unsigned short, Four64s[] ) = { 64, 64, 64, 64};
static __align16(const char, eight64s [] )= { 64,64,64,64,64,64,64,64};
static __align16(const char, eight32s [] )= { 32,32,32,32,32,32,32,32};
static __align16(const char, eight127s [])= { 127, 127, 127, 127, 127, 127, 127, 127};
static __align16(const char, eight128s [])= { 128, 128, 128, 128, 128, 128, 128, 128};
static __align16(const unsigned char ,eight223s[]) = { 223,223,223,223,223,223,223,223};
static __align16(const unsigned char ,eight231s[]) = { 231,231,231,231,231,231,231,231};
extern "C" void DeringBlockStrong_MMX(
const uint8_t *SrcPtr,
uint8_t *DstPtr,
const int32_t Pitch,
uint32_t FragQIndex,
const uint32_t *QuantScale)
{
__align16(short, UDMod[72]);
__align16(short, LRMod[128]);
unsigned int PlaneLineStep = Pitch;
const unsigned char * Src = SrcPtr;
unsigned char * Des = DstPtr;
short * UDPointer = UDMod;
short * LRPointer = LRMod;
uint32_t QStep = QuantScale[FragQIndex];
int32_t Sharpen = SharpenModifier[FragQIndex];
const uint8_t * esi= Src; /* Source Pointer */
uint8_t * edi= (uint8_t*)UDPointer ;/* UD modifier pointer */
unsigned int ecx= PlaneLineStep ;/* Pitch Step */
int edx=0;
int eax_= QStep; /* QValue */
int ebx_= Sharpen; /* Sharpen */
__m64 mm0,mm1,mm2,mm3,mm4,mm5,mm6,mm7;
movd ( mm0, eax_ );/* QValue */
movd ( mm2, ebx_ );/* sharpen */
punpcklbw ( mm0, mm0 );/* 00 00 00 QQ */
edx-= ecx;/* Negative Pitch */
punpcklbw ( mm2, mm2 );/* 00 00 00 SS */
pxor ( mm7, mm7 );/* clear mm7 for unpacks */
punpcklbw ( mm0, mm0 );/* 00 00 qq qq */
uint8_t *eax= (uint8_t*)LRPointer;/* Left and Right Modifier */
punpcklbw ( mm2, mm2 );/* 00 00 ss ss */
const uint8_t *ebx= esi+ecx*8;/* Source Pointer of last row */
punpcklbw ( mm0, mm0 );/* qq qq qq qq */
movq ( mm1, mm0 );/* make a copy */
punpcklbw ( mm2, mm2 );/* ss ss ss ss */
paddb ( mm1, mm0 );/* QValue * 2 */
paddb ( mm1, mm0 );/* High = 3 * Qvalue */
paddusb ( mm1, eight223s );/* clamping high to 32 */
paddb ( mm0, eight32s );/* 32+QValues */
psubusb ( mm1, eight223s );/* Get the real value back */
movq ( mm3, eight127s );/* 7f 7f 7f 7f 7f 7f 7f 7f */
pandn ( mm1, mm3 );/* ClampHigh */
/* mm0,mm1,mm2,mm7 are in use */
/* mm0---> QValue+32 */
/* mm1---> ClampHigh */
/* mm2---> Sharpen */
/* mm7---> Cleared for unpack */
FillModLoop1:
movq ( mm3, esi );/* read 8 pixels p */
movq ( mm4, esi+edx );/* Pixels on top pu */
movq ( mm5, mm3 );/* make a copy of p */
psubusb( mm3, mm4 );/* p-pu */
psubusb( mm4, mm5 );/* pu-p */
por ( mm3, mm4 );/* abs(p-pu) */
movq ( mm6, mm0 );/* 32+QValues */
movq (mm4, mm0 );/* 32+QValues */
psubusb ( mm6, mm3 );/* zero clampled TmpMod */
movq ( mm5, eight128s );/* 80 80 80 80 80 80 80 80 */
paddb ( mm4, eight64s );/* 32+QValues + 64 */
pxor ( mm4, mm5 );/* convert to a sign number */
pxor ( mm3, mm5 );/* convert to a sign number */
pcmpgtb ( mm3, mm4 );/* 32+QValue- 2*abs(p-pu) <-64 ? */
pand ( mm3, mm2 );/* use sharpen */
paddsb ( mm6, mm1 );/* clamping to high */
psubsb ( mm6, mm1 );/* offset back */
por ( mm6, mm3 );/* Mod value to be stored */
pxor ( mm5, mm5 );/* clear mm5 */
pxor ( mm4, mm4 );/* clear mm4 */
punpcklbw ( mm5, mm6 );/* 03 xx 02 xx 01 xx 00 xx */
psraw ( mm5, 8 );/* sign extended */
movq ( edi, mm5 );/* writeout UDmod, low four */
punpckhbw ( mm4, mm6);
psraw ( mm4, 8);
movq ( edi+8, mm4 );/* writeout UDmod, high four */
/* left Mod */
movq ( mm3, esi );/* read 8 pixels p */
movq ( mm4, esi-1 );/* Pixels on top pu */
movq ( mm5, mm3 );/* make a copy of p */
psubusb ( mm3, mm4 );/* p-pu */
psubusb ( mm4, mm5 );/* pu-p */
por ( mm3, mm4 );/* abs(p-pu) */
movq ( mm6, mm0 );/* 32+QValues */
movq ( mm4, mm0 );/* 32+QValues */
psubusb( mm6, mm3 );/* zero clampled TmpMod */
movq ( mm5, eight128s );/* 80 80 80 80 80 80 80 80 */
paddb ( mm4, eight64s );/* 32+QValues + 64 */
pxor ( mm4, mm5 );/* convert to a sign number */
pxor ( mm3, mm5 );/* convert to a sign number */
pcmpgtb( mm3, mm4 );/* 32+QValue- 2*abs(p-pu) <-64 ? */
pand ( mm3, mm2 );/* use sharpen */
paddsb ( mm6, mm1 );/* clamping to high */
psubsb ( mm6, mm1 );/* offset back */
por ( mm6, mm3 );/* Mod value to be stored */
pxor ( mm5, mm5 );/* clear mm5 */
pxor ( mm4, mm4 );/* clear mm4 */
punpcklbw( mm5, mm6 );/* 03 xx 02 xx 01 xx 00 xx */
psraw ( mm5, 8 );/* sign extended */
movq ( eax, mm5 );/* writeout UDmod, low four */
punpckhbw( mm4, mm6);
psraw ( mm4, 8);
movq ( eax+8, mm4 );/* writeout UDmod, high four */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -