📄 sbrqmf.c

📁 从FFMPEG转换而来的H264解码程序,VC下编译..
💻 C
📖 第 1 页 / 共 2 页
字号:
12 下一页
/* ***** BEGIN LICENSE BLOCK *****
 *
 * Portions Copyright (c) 1995-2005 RealNetworks, Inc. All Rights Reserved.
 *
 * The contents of this file, and the files included with this file,
 * are subject to the current version of the RealNetworks Public
 * Source License (the "RPSL") available at
 * http://www.helixcommunity.org/content/rpsl unless you have licensed
 * the file under the current version of the RealNetworks Community
 * Source License (the "RCSL") available at
 * http://www.helixcommunity.org/content/rcsl, in which case the RCSL
 * will apply. You may also obtain the license terms directly from
 * RealNetworks.  You may not use this file except in compliance with
 * the RPSL or, if you have a valid RCSL with RealNetworks applicable
 * to this file, the RCSL.  Please see the applicable RPSL or RCSL for
 * the rights, obligations and limitations governing use of the
 * contents of the file.
 *
 * This file is part of the Helix DNA Technology. RealNetworks is the
 * developer of the Original Code and owns the copyrights in the
 * portions it created.
 *
 * This file, and the files included with this file, is distributed
 * and made available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY
 * KIND, EITHER EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS
 * ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES
 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET
 * ENJOYMENT OR NON-INFRINGEMENT.
 *
 * Technology Compatibility Kit Test Suite(s) Location:
 *    http://www.helixcommunity.org/content/tck
 *
 * Contributor(s):
 *
 * ***** END LICENSE BLOCK ***** */

/**************************************************************************************
 * Fixed-point HE-AAC decoder
 * Jon Recker (jrecker@real.com)
 * February 2005
 *
 * sbrqmf.c - analysis and synthesis QMF filters for SBR
 **************************************************************************************/

#include "sbr.h"
#include "assembly.h"

/* PreMultiply64() table
 * format = Q30
 * reordered for sequential access
 *
 * for (i = 0; i < 64/4; i++) {
 *   angle = (i + 0.25) * M_PI / nmdct;
 *   x = (cos(angle) + sin(angle));
 *   x =  sin(angle);
 *
 *   angle = (nmdct/2 - 1 - i + 0.25) * M_PI / nmdct;
 *   x = (cos(angle) + sin(angle));
 *   x =  sin(angle);
 * }
 */
static const int cos4sin4tab64[64] = {
	0x40c7d2bd, 0x00c90e90, 0x424ff28f, 0x3ff4e5e0, 0x43cdd89a, 0x03ecadcf, 0x454149fc, 0x3fc395f9,
	0x46aa0d6d, 0x070de172, 0x4807eb4b, 0x3f6af2e3, 0x495aada2, 0x0a2abb59, 0x4aa22036, 0x3eeb3347,
	0x4bde1089, 0x0d415013, 0x4d0e4de2, 0x3e44a5ef, 0x4e32a956, 0x104fb80e, 0x4f4af5d1, 0x3d77b192,
	0x50570819, 0x135410c3, 0x5156b6d9, 0x3c84d496, 0x5249daa2, 0x164c7ddd, 0x53304df6, 0x3b6ca4c4,
	0x5409ed4b, 0x19372a64, 0x54d69714, 0x3a2fcee8, 0x55962bc0, 0x1c1249d8, 0x56488dc5, 0x38cf1669,
	0x56eda1a0, 0x1edc1953, 0x57854ddd, 0x374b54ce, 0x580f7b19, 0x2192e09b, 0x588c1404, 0x35a5793c,
	0x58fb0568, 0x2434f332, 0x595c3e2a, 0x33de87de, 0x59afaf4c, 0x26c0b162, 0x59f54bee, 0x31f79948,
	0x5a2d0957, 0x29348937, 0x5a56deec, 0x2ff1d9c7, 0x5a72c63b, 0x2b8ef77d, 0x5a80baf6, 0x2dce88aa,
};

/* PostMultiply64() table
 * format = Q30
 * reordered for sequential access
 *
 * for (i = 0; i <= (32/2); i++) {
 *   angle = i * M_PI / 64;
 *   x = (cos(angle) + sin(angle));
 *   x = sin(angle);
 * }
 */
static const int cos1sin1tab64[34] = {
	0x40000000, 0x00000000, 0x43103085, 0x0323ecbe, 0x45f704f7, 0x0645e9af, 0x48b2b335, 0x09640837,
	0x4b418bbe, 0x0c7c5c1e, 0x4da1fab5, 0x0f8cfcbe, 0x4fd288dc, 0x1294062f, 0x51d1dc80, 0x158f9a76,
	0x539eba45, 0x187de2a7, 0x553805f2, 0x1b5d100a, 0x569cc31b, 0x1e2b5d38, 0x57cc15bc, 0x20e70f32,
	0x58c542c5, 0x238e7673, 0x5987b08a, 0x261feffa, 0x5a12e720, 0x2899e64a, 0x5a6690ae, 0x2afad269,
	0x5a82799a, 0x2d413ccd,
};

/**************************************************************************************
 * Function:    PreMultiply64
 *
 * Description: pre-twiddle stage of 64-point DCT-IV
 *
 * Inputs:      buffer of 64 samples
 *
 * Outputs:     processed samples in same buffer
 *
 * Return:      none
 *
 * Notes:       minimum 1 GB in, 2 GB out, gains 2 int bits
 *              gbOut = gbIn + 1
 *              output is limited to sqrt(2)/2 plus GB in full GB
 *              uses 3-mul, 3-add butterflies instead of 4-mul, 2-add
 **************************************************************************************/
static void PreMultiply64(int *zbuf1)
{
	int i, ar1, ai1, ar2, ai2, z1, z2;
	int t, cms2, cps2a, sin2a, cps2b, sin2b;
	int *zbuf2;
	const int *csptr;

	zbuf2 = zbuf1 + 64 - 1;
	csptr = cos4sin4tab64;

	/* whole thing should fit in registers - verify that compiler does this */
	for (i = 64 >> 2; i != 0; i--) {
		/* cps2 = (cos+sin), sin2 = sin, cms2 = (cos-sin) */
		cps2a = *csptr++;
		sin2a = *csptr++;
		cps2b = *csptr++;
		sin2b = *csptr++;

		ar1 = *(zbuf1 + 0);
		ai2 = *(zbuf1 + 1);
		ai1 = *(zbuf2 + 0);
		ar2 = *(zbuf2 - 1);

		/* gain 2 ints bit from MULSHIFT32 by Q30
		 * max per-sample gain (ignoring implicit scaling) = MAX(sin(angle)+cos(angle)) = 1.414
		 * i.e. gain 1 GB since worst case is sin(angle) = cos(angle) = 0.707 (Q30), gain 2 from
		 *   extra sign bits, and eat one in adding
		 */
		t  = MULSHIFT32(sin2a, ar1 + ai1);
		z2 = MULSHIFT32(cps2a, ai1) - t;
		cms2 = cps2a - 2*sin2a;
		z1 = MULSHIFT32(cms2, ar1) + t;
		*zbuf1++ = z1;	/* cos*ar1 + sin*ai1 */
		*zbuf1++ = z2;	/* cos*ai1 - sin*ar1 */

		t  = MULSHIFT32(sin2b, ar2 + ai2);
		z2 = MULSHIFT32(cps2b, ai2) - t;
		cms2 = cps2b - 2*sin2b;
		z1 = MULSHIFT32(cms2, ar2) + t;
		*zbuf2-- = z2;	/* cos*ai2 - sin*ar2 */
		*zbuf2-- = z1;	/* cos*ar2 + sin*ai2 */
	}
}

/**************************************************************************************
 * Function:    PostMultiply64
 *
 * Description: post-twiddle stage of 64-point type-IV DCT
 *
 * Inputs:      buffer of 64 samples
 *              number of output samples to calculate
 *
 * Outputs:     processed samples in same buffer
 *
 * Return:      none
 *
 * Notes:       minimum 1 GB in, 2 GB out, gains 2 int bits
 *              gbOut = gbIn + 1
 *              output is limited to sqrt(2)/2 plus GB in full GB
 *              nSampsOut is rounded up to next multiple of 4, since we calculate
 *                4 samples per loop
 **************************************************************************************/
static void PostMultiply64(int *fft1, int nSampsOut)
{
	int i, ar1, ai1, ar2, ai2;
	int t, cms2, cps2, sin2;
	int *fft2;
	const int *csptr;

	csptr = cos1sin1tab64;
	fft2 = fft1 + 64 - 1;

	/* load coeffs for first pass
	 * cps2 = (cos+sin)/2, sin2 = sin/2, cms2 = (cos-sin)/2
	 */
	cps2 = *csptr++;
	sin2 = *csptr++;
	cms2 = cps2 - 2*sin2;

	for (i = (nSampsOut + 3) >> 2; i != 0; i--) {
		ar1 = *(fft1 + 0);
		ai1 = *(fft1 + 1);
		ar2 = *(fft2 - 1);
		ai2 = *(fft2 + 0);

		/* gain 2 int bits (multiplying by Q30), max gain = sqrt(2) */
		t = MULSHIFT32(sin2, ar1 + ai1);
		*fft2-- = t - MULSHIFT32(cps2, ai1);
		*fft1++ = t + MULSHIFT32(cms2, ar1);

		cps2 = *csptr++;
		sin2 = *csptr++;

		ai2 = -ai2;
		t = MULSHIFT32(sin2, ar2 + ai2);
		*fft2-- = t - MULSHIFT32(cps2, ai2);
		cms2 = cps2 - 2*sin2;
		*fft1++ = t + MULSHIFT32(cms2, ar2);
	}
}

/**************************************************************************************
 * Function:    QMFAnalysisConv
 *
 * Description: convolution kernel for analysis QMF
 *
 * Inputs:      pointer to coefficient table, reordered for sequential access
 *              delay buffer of size 32*10 = 320 real-valued PCM samples
 *              index for delay ring buffer (range = [0, 9])
 *
 * Outputs:     64 consecutive 32-bit samples
 *
 * Return:      none
 *
 * Notes:       this is carefully written to be efficient on ARM
 *              use the assembly code version in sbrqmfak.s when building for ARM!
 **************************************************************************************/
#if (defined (__arm) && defined (__ARMCC_VERSION)) || (defined (_WIN32) && defined (_WIN32_WCE) && defined (ARM)) || (defined(__GNUC__) && defined(__arm__))
#ifdef __cplusplus
extern "C"
#endif
void QMFAnalysisConv(int *cTab, int *delay, int dIdx, int *uBuf);
#else
void QMFAnalysisConv(int *cTab, int *delay, int dIdx, int *uBuf)
{
	int k, dOff;
	int *cPtr0, *cPtr1;
	U64 u64lo, u64hi;

	dOff = dIdx*32 + 31;
	cPtr0 = cTab;
	cPtr1 = cTab + 33*5 - 1;

	/* special first pass since we need to flip sign to create cTab[384], cTab[512] */
	u64lo.w64 = 0;
	u64hi.w64 = 0;
	u64lo.w64 = MADD64(u64lo.w64,  *cPtr0++,   delay[dOff]);	dOff -= 32; if (dOff < 0) {dOff += 320;}
	u64hi.w64 = MADD64(u64hi.w64,  *cPtr0++,   delay[dOff]);	dOff -= 32; if (dOff < 0) {dOff += 320;}
	u64lo.w64 = MADD64(u64lo.w64,  *cPtr0++,   delay[dOff]);	dOff -= 32; if (dOff < 0) {dOff += 320;}
	u64hi.w64 = MADD64(u64hi.w64,  *cPtr0++,   delay[dOff]);	dOff -= 32; if (dOff < 0) {dOff += 320;}
	u64lo.w64 = MADD64(u64lo.w64,  *cPtr0++,   delay[dOff]);	dOff -= 32; if (dOff < 0) {dOff += 320;}
	u64hi.w64 = MADD64(u64hi.w64,  *cPtr1--,   delay[dOff]);	dOff -= 32; if (dOff < 0) {dOff += 320;}
	u64lo.w64 = MADD64(u64lo.w64, -(*cPtr1--), delay[dOff]);	dOff -= 32; if (dOff < 0) {dOff += 320;}
	u64hi.w64 = MADD64(u64hi.w64,  *cPtr1--,   delay[dOff]);	dOff -= 32; if (dOff < 0) {dOff += 320;}
	u64lo.w64 = MADD64(u64lo.w64, -(*cPtr1--), delay[dOff]);	dOff -= 32; if (dOff < 0) {dOff += 320;}
	u64hi.w64 = MADD64(u64hi.w64,  *cPtr1--,   delay[dOff]);	dOff -= 32; if (dOff < 0) {dOff += 320;}

	uBuf[0]  = u64lo.r.hi32;
	uBuf[32] = u64hi.r.hi32;
	uBuf++;
	dOff--;

	/* max gain for any sample in uBuf, after scaling by cTab, ~= 0.99
	 * so we can just sum the uBuf values with no overflow problems
	 */
	for (k = 1; k <= 31; k++) {
		u64lo.w64 = 0;
		u64hi.w64 = 0;
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -