📄 sbrhfgen.c

📁 从FFMPEG转换而来的H264解码程序,VC下编译..
💻 C
📖 第 1 页 / 共 2 页
字号:
12 下一页
/* ***** BEGIN LICENSE BLOCK *****
 *
 * Portions Copyright (c) 1995-2005 RealNetworks, Inc. All Rights Reserved.
 *
 * The contents of this file, and the files included with this file,
 * are subject to the current version of the RealNetworks Public
 * Source License (the "RPSL") available at
 * http://www.helixcommunity.org/content/rpsl unless you have licensed
 * the file under the current version of the RealNetworks Community
 * Source License (the "RCSL") available at
 * http://www.helixcommunity.org/content/rcsl, in which case the RCSL
 * will apply. You may also obtain the license terms directly from
 * RealNetworks.  You may not use this file except in compliance with
 * the RPSL or, if you have a valid RCSL with RealNetworks applicable
 * to this file, the RCSL.  Please see the applicable RPSL or RCSL for
 * the rights, obligations and limitations governing use of the
 * contents of the file.
 *
 * This file is part of the Helix DNA Technology. RealNetworks is the
 * developer of the Original Code and owns the copyrights in the
 * portions it created.
 *
 * This file, and the files included with this file, is distributed
 * and made available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY
 * KIND, EITHER EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS
 * ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES
 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET
 * ENJOYMENT OR NON-INFRINGEMENT.
 *
 * Technology Compatibility Kit Test Suite(s) Location:
 *    http://www.helixcommunity.org/content/tck
 *
 * Contributor(s):
 *
 * ***** END LICENSE BLOCK ***** */

/**************************************************************************************
 * Fixed-point HE-AAC decoder
 * Jon Recker (jrecker@real.com)
 * February 2005
 *
 * sbrhfgen.c - high frequency generation for SBR
 **************************************************************************************/

#include "sbr.h"
#include "assembly.h"

#define FBITS_LPCOEFS	29	/* Q29 for range of (-4, 4) */
#define MAG_16			(16 * (1 << (32 - (2*(32-FBITS_LPCOEFS)))))		/* i.e. 16 in Q26 format */
#define RELAX_COEF		0x7ffff79c	/* 1.0 / (1.0 + 1e-6), Q31 */

/* newBWTab[prev invfMode][curr invfMode], format = Q31 (table 4.158)
 * sample file which uses all of these: al_sbr_sr_64_2_fsaac32.aac
 */
static const int newBWTab[4][4] = {
	{0x00000000, 0x4ccccccd, 0x73333333, 0x7d70a3d7},
	{0x4ccccccd, 0x60000000, 0x73333333, 0x7d70a3d7},
	{0x00000000, 0x60000000, 0x73333333, 0x7d70a3d7},
	{0x00000000, 0x60000000, 0x73333333, 0x7d70a3d7},
};

/**************************************************************************************
 * Function:    CVKernel1
 *
 * Description: kernel of covariance matrix calculation for p01, p11, p12, p22
 *
 * Inputs:      buffer of low-freq samples, starting at time index = 0,
 *                freq index = patch subband
 *
 * Outputs:     64-bit accumulators for p01re, p01im, p12re, p12im, p11re, p22re
 *                stored in accBuf
 *
 * Return:      none
 *
 * Notes:       this is carefully written to be efficient on ARM
 *              use the assembly code version in sbrcov.s when building for ARM!
 **************************************************************************************/
#if (defined (__arm) && defined (__ARMCC_VERSION)) || (defined (_WIN32) && defined (_WIN32_WCE) && defined (ARM)) || (defined(__GNUC__) && defined(__arm__))
#ifdef __cplusplus
extern "C"
#endif
void CVKernel1(int *XBuf, int *accBuf);
#else
void CVKernel1(int *XBuf, int *accBuf)
{
	U64 p01re, p01im, p12re, p12im, p11re, p22re;
	int n, x0re, x0im, x1re, x1im;

	x0re = XBuf[0];
	x0im = XBuf[1];
	XBuf += (2*64);
	x1re = XBuf[0];
	x1im = XBuf[1];
	XBuf += (2*64);

	p01re.w64 = p01im.w64 = 0;
	p12re.w64 = p12im.w64 = 0;
	p11re.w64 = 0;
	p22re.w64 = 0;

	p12re.w64 = MADD64(p12re.w64,  x1re, x0re);
	p12re.w64 = MADD64(p12re.w64,  x1im, x0im);
	p12im.w64 = MADD64(p12im.w64,  x0re, x1im);
	p12im.w64 = MADD64(p12im.w64, -x0im, x1re);
	p22re.w64 = MADD64(p22re.w64,  x0re, x0re);
	p22re.w64 = MADD64(p22re.w64,  x0im, x0im);
	for (n = (NUM_TIME_SLOTS*SAMPLES_PER_SLOT + 6); n != 0; n--) {
		/* 4 input, 3*2 acc, 1 ptr, 1 loop counter = 12 registers (use same for x0im, -x0im) */
		x0re = x1re;
		x0im = x1im;
		x1re = XBuf[0];
		x1im = XBuf[1];

		p01re.w64 = MADD64(p01re.w64,  x1re, x0re);
		p01re.w64 = MADD64(p01re.w64,  x1im, x0im);
		p01im.w64 = MADD64(p01im.w64,  x0re, x1im);
		p01im.w64 = MADD64(p01im.w64, -x0im, x1re);
		p11re.w64 = MADD64(p11re.w64,  x0re, x0re);
		p11re.w64 = MADD64(p11re.w64,  x0im, x0im);

		XBuf += (2*64);
	}
	/* these can be derived by slight changes to account for boundary conditions */
	p12re.w64 += p01re.w64;
	p12re.w64 = MADD64(p12re.w64, x1re, -x0re);
	p12re.w64 = MADD64(p12re.w64, x1im, -x0im);
	p12im.w64 += p01im.w64;
	p12im.w64 = MADD64(p12im.w64, x0re, -x1im);
	p12im.w64 = MADD64(p12im.w64, x0im,  x1re);
	p22re.w64 += p11re.w64;
	p22re.w64 = MADD64(p22re.w64, x0re, -x0re);
	p22re.w64 = MADD64(p22re.w64, x0im, -x0im);

	accBuf[0]  = p01re.r.lo32;	accBuf[1]  = p01re.r.hi32;
	accBuf[2]  = p01im.r.lo32;	accBuf[3]  = p01im.r.hi32;
	accBuf[4]  = p11re.r.lo32;	accBuf[5]  = p11re.r.hi32;
	accBuf[6]  = p12re.r.lo32;	accBuf[7]  = p12re.r.hi32;
	accBuf[8]  = p12im.r.lo32;	accBuf[9]  = p12im.r.hi32;
	accBuf[10] = p22re.r.lo32;	accBuf[11] = p22re.r.hi32;
}
#endif

/**************************************************************************************
 * Function:    CalcCovariance1
 *
 * Description: calculate covariance matrix for p01, p12, p11, p22 (4.6.18.6.2)
 *
 * Inputs:      buffer of low-freq samples, starting at time index 0,
 *                freq index = patch subband
 *
 * Outputs:     complex covariance elements p01re, p01im, p12re, p12im, p11re, p22re
 *                (p11im = p22im = 0)
 *              format = integer (Q0) * 2^N, with scalefactor N >= 0
 *
 * Return:      scalefactor N
 *
 * Notes:       outputs are normalized to have 1 GB (sign in at least top 2 bits)
 **************************************************************************************/
static int CalcCovariance1(int *XBuf, int *p01reN, int *p01imN, int *p12reN, int *p12imN, int *p11reN, int *p22reN)
{
	int accBuf[2*6];
	int n, z, s, loShift, hiShift, gbMask;
	U64 p01re, p01im, p12re, p12im, p11re, p22re;

	CVKernel1(XBuf, accBuf);
	p01re.r.lo32 = accBuf[0];	p01re.r.hi32 = accBuf[1];
	p01im.r.lo32 = accBuf[2];	p01im.r.hi32 = accBuf[3];
	p11re.r.lo32 = accBuf[4];	p11re.r.hi32 = accBuf[5];
	p12re.r.lo32 = accBuf[6];	p12re.r.hi32 = accBuf[7];
	p12im.r.lo32 = accBuf[8];	p12im.r.hi32 = accBuf[9];
	p22re.r.lo32 = accBuf[10];	p22re.r.hi32 = accBuf[11];

	/* 64-bit accumulators now have 2*FBITS_OUT_QMFA fraction bits
	 * want to scale them down to integers (32-bit signed, Q0)
	 *   with scale factor of 2^n, n >= 0
	 * leave 2 GB's for calculating determinant, so take top 30 non-zero bits
	 */
	gbMask  = ((p01re.r.hi32) ^ (p01re.r.hi32 >> 31)) | ((p01im.r.hi32) ^ (p01im.r.hi32 >> 31));
	gbMask |= ((p12re.r.hi32) ^ (p12re.r.hi32 >> 31)) | ((p12im.r.hi32) ^ (p12im.r.hi32 >> 31));
	gbMask |= ((p11re.r.hi32) ^ (p11re.r.hi32 >> 31)) | ((p22re.r.hi32) ^ (p22re.r.hi32 >> 31));
	if (gbMask == 0) {
		s = p01re.r.hi32 >> 31; gbMask  = (p01re.r.lo32 ^ s) - s;
		s = p01im.r.hi32 >> 31; gbMask |= (p01im.r.lo32 ^ s) - s;
		s = p12re.r.hi32 >> 31; gbMask |= (p12re.r.lo32 ^ s) - s;
		s = p12im.r.hi32 >> 31; gbMask |= (p12im.r.lo32 ^ s) - s;
		s = p11re.r.hi32 >> 31; gbMask |= (p11re.r.lo32 ^ s) - s;
		s = p22re.r.hi32 >> 31; gbMask |= (p22re.r.lo32 ^ s) - s;
		z = 32 + CLZ(gbMask);
	} else {
		gbMask  = FASTABS(p01re.r.hi32) | FASTABS(p01im.r.hi32);
		gbMask |= FASTABS(p12re.r.hi32) | FASTABS(p12im.r.hi32);
		gbMask |= FASTABS(p11re.r.hi32) | FASTABS(p22re.r.hi32);
		z = CLZ(gbMask);
	}

	n = 64 - z;	/* number of non-zero bits in bottom of 64-bit word */
	if (n <= 30) {
		loShift = (30 - n);
		*p01reN = p01re.r.lo32 << loShift;	*p01imN = p01im.r.lo32 << loShift;
		*p12reN = p12re.r.lo32 << loShift;	*p12imN = p12im.r.lo32 << loShift;
		*p11reN = p11re.r.lo32 << loShift;	*p22reN = p22re.r.lo32 << loShift;
		return -(loShift + 2*FBITS_OUT_QMFA);
	} else if (n < 32 + 30) {
		loShift = (n - 30);
		hiShift = 32 - loShift;
		*p01reN = (p01re.r.hi32 << hiShift) | (p01re.r.lo32 >> loShift);
		*p01imN = (p01im.r.hi32 << hiShift) | (p01im.r.lo32 >> loShift);
		*p12reN = (p12re.r.hi32 << hiShift) | (p12re.r.lo32 >> loShift);
		*p12imN = (p12im.r.hi32 << hiShift) | (p12im.r.lo32 >> loShift);
		*p11reN = (p11re.r.hi32 << hiShift) | (p11re.r.lo32 >> loShift);
		*p22reN = (p22re.r.hi32 << hiShift) | (p22re.r.lo32 >> loShift);
		return (loShift - 2*FBITS_OUT_QMFA);
	} else {
		hiShift = n - (32 + 30);
		*p01reN = p01re.r.hi32 >> hiShift;	*p01imN = p01im.r.hi32 >> hiShift;
		*p12reN = p12re.r.hi32 >> hiShift;	*p12imN = p12im.r.hi32 >> hiShift;
		*p11reN = p11re.r.hi32 >> hiShift;	*p22reN = p22re.r.hi32 >> hiShift;
		return (32 - 2*FBITS_OUT_QMFA - hiShift);
	}

	return 0;
}

/**************************************************************************************
 * Function:    CVKernel2
 *
 * Description: kernel of covariance matrix calculation for p02
 *
 * Inputs:      buffer of low-freq samples, starting at time index = 0,
 *                freq index = patch subband
 *
 * Outputs:     64-bit accumulators for p02re, p02im stored in accBuf
 *
 * Return:      none
 *
 * Notes:       this is carefully written to be efficient on ARM
 *              use the assembly code version in sbrcov.s when building for ARM!
 **************************************************************************************/
#if (defined (__arm) && defined (__ARMCC_VERSION)) || (defined (_WIN32) && defined (_WIN32_WCE) && defined (ARM)) || (defined(__GNUC__) && defined(__arm__))
#ifdef __cplusplus
extern "C"
#endif
void CVKernel2(int *XBuf, int *accBuf);
#else
void CVKernel2(int *XBuf, int *accBuf)
{
	U64 p02re, p02im;
	int n, x0re, x0im, x1re, x1im, x2re, x2im;

	p02re.w64 = p02im.w64 = 0;

	x0re = XBuf[0];
	x0im = XBuf[1];
	XBuf += (2*64);
	x1re = XBuf[0];
	x1im = XBuf[1];
	XBuf += (2*64);

	for (n = (NUM_TIME_SLOTS*SAMPLES_PER_SLOT + 6); n != 0; n--) {
		/* 6 input, 2*2 acc, 1 ptr, 1 loop counter = 12 registers (use same for x0im, -x0im) */
		x2re = XBuf[0];
		x2im = XBuf[1];

		p02re.w64 = MADD64(p02re.w64,  x2re, x0re);
		p02re.w64 = MADD64(p02re.w64,  x2im, x0im);
		p02im.w64 = MADD64(p02im.w64,  x0re, x2im);
		p02im.w64 = MADD64(p02im.w64, -x0im, x2re);

		x0re = x1re;
		x0im = x1im;
		x1re = x2re;
		x1im = x2im;
		XBuf += (2*64);
	}

	accBuf[0] = p02re.r.lo32;
	accBuf[1] = p02re.r.hi32;
	accBuf[2] = p02im.r.lo32;
	accBuf[3] = p02im.r.hi32;
}
#endif

/**************************************************************************************
 * Function:    CalcCovariance2
 *
 * Description: calculate covariance matrix for p02 (4.6.18.6.2)
 *
 * Inputs:      buffer of low-freq samples, starting at time index = 0,
 *                freq index = patch subband
 *
 * Outputs:     complex covariance element p02re, p02im
 *              format = integer (Q0) * 2^N, with scalefactor N >= 0
 *
 * Return:      scalefactor N
 *
 * Notes:       outputs are normalized to have 1 GB (sign in at least top 2 bits)
 **************************************************************************************/
static int CalcCovariance2(int *XBuf, int *p02reN, int *p02imN)
{
	U64 p02re, p02im;
	int n, z, s, loShift, hiShift, gbMask;
	int accBuf[2*2];

	CVKernel2(XBuf, accBuf);
	p02re.r.lo32 = accBuf[0];
	p02re.r.hi32 = accBuf[1];
	p02im.r.lo32 = accBuf[2];
	p02im.r.hi32 = accBuf[3];
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -