📄 imdct.c
字号:
/* ***** BEGIN LICENSE BLOCK ***** * Version: RCSL 1.0/RPSL 1.0 * * Portions Copyright (c) 1995-2002 RealNetworks, Inc. All Rights Reserved. * * The contents of this file, and the files included with this file, are * subject to the current version of the RealNetworks Public Source License * Version 1.0 (the "RPSL") available at * http://www.helixcommunity.org/content/rpsl unless you have licensed * the file under the RealNetworks Community Source License Version 1.0 * (the "RCSL") available at http://www.helixcommunity.org/content/rcsl, * in which case the RCSL will apply. You may also obtain the license terms * directly from RealNetworks. You may not use this file except in * compliance with the RPSL or, if you have a valid RCSL with RealNetworks * applicable to this file, the RCSL. Please see the applicable RPSL or * RCSL for the rights, obligations and limitations governing use of the * contents of the file. * * This file is part of the Helix DNA Technology. RealNetworks is the * developer of the Original Code and owns the copyrights in the portions * it created. * * This file, and the files included with this file, is distributed and made * available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * * Technology Compatibility Kit Test Suite(s) Location: * http://www.helixcommunity.org/content/tck * * Contributor(s): * * ***** END LICENSE BLOCK ***** */ /************************************************************************************** * Fixed-point MP3 decoder * Jon Recker (jrecker@real.com), Ken Cooke (kenc@real.com) * June 2003 * * imdct.c - antialias, inverse transform (short/long/mixed), windowing, * overlap-add, frequency inversion **************************************************************************************/#include <hxmp3/coder.h>#include <hxmp3/assembly.h>/************************************************************************************** * Function: AntiAlias * * Description: smooth transition across DCT block boundaries (every 18 coefficients) * * Inputs: vector of dequantized coefficients, length = (nBfly+1) * 18 * number of "butterflies" to perform (one butterfly means one * inter-block smoothing operation) * * Outputs: updated coefficient vector x * * Return: none * * Notes: weighted average of opposite bands (pairwise) from the 8 samples * before and after each block boundary * nBlocks = (nonZeroBound + 7) / 18, since nZB is the first ZERO sample * above which all other samples are also zero * max gain per sample = 1.372 * MAX(i) (abs(csa[i][0]) + abs(csa[i][1])) * bits gained = 0 * assume at least 1 guard bit in x[] to avoid overflow * (should be guaranteed from dequant, and max gain from stproc * max * gain from AntiAlias < 2.0) **************************************************************************************/static void AntiAlias(int *x, int nBfly){ int k, a0, b0, c0, c1; const int *c; /* csa = Q31 */ for (k = nBfly; k > 0; k--) { c = csa[0]; x += 18; a0 = x[-1]; c0 = *c; c++; b0 = x[0]; c1 = *c; c++; x[-1] = (MULSHIFT32(c0, a0) - MULSHIFT32(c1, b0)) << 1; x[0] = (MULSHIFT32(c0, b0) + MULSHIFT32(c1, a0)) << 1; a0 = x[-2]; c0 = *c; c++; b0 = x[1]; c1 = *c; c++; x[-2] = (MULSHIFT32(c0, a0) - MULSHIFT32(c1, b0)) << 1; x[1] = (MULSHIFT32(c0, b0) + MULSHIFT32(c1, a0)) << 1; a0 = x[-3]; c0 = *c; c++; b0 = x[2]; c1 = *c; c++; x[-3] = (MULSHIFT32(c0, a0) - MULSHIFT32(c1, b0)) << 1; x[2] = (MULSHIFT32(c0, b0) + MULSHIFT32(c1, a0)) << 1; a0 = x[-4]; c0 = *c; c++; b0 = x[3]; c1 = *c; c++; x[-4] = (MULSHIFT32(c0, a0) - MULSHIFT32(c1, b0)) << 1; x[3] = (MULSHIFT32(c0, b0) + MULSHIFT32(c1, a0)) << 1; a0 = x[-5]; c0 = *c; c++; b0 = x[4]; c1 = *c; c++; x[-5] = (MULSHIFT32(c0, a0) - MULSHIFT32(c1, b0)) << 1; x[4] = (MULSHIFT32(c0, b0) + MULSHIFT32(c1, a0)) << 1; a0 = x[-6]; c0 = *c; c++; b0 = x[5]; c1 = *c; c++; x[-6] = (MULSHIFT32(c0, a0) - MULSHIFT32(c1, b0)) << 1; x[5] = (MULSHIFT32(c0, b0) + MULSHIFT32(c1, a0)) << 1; a0 = x[-7]; c0 = *c; c++; b0 = x[6]; c1 = *c; c++; x[-7] = (MULSHIFT32(c0, a0) - MULSHIFT32(c1, b0)) << 1; x[6] = (MULSHIFT32(c0, b0) + MULSHIFT32(c1, a0)) << 1; a0 = x[-8]; c0 = *c; c++; b0 = x[7]; c1 = *c; c++; x[-8] = (MULSHIFT32(c0, a0) - MULSHIFT32(c1, b0)) << 1; x[7] = (MULSHIFT32(c0, b0) + MULSHIFT32(c1, a0)) << 1; }}/************************************************************************************** * Function: WinPrevious * * Description: apply specified window to second half of previous IMDCT (overlap part) * * Inputs: vector of 9 coefficients (xPrev) * * Outputs: 18 windowed output coefficients (gain 1 integer bit) * window type (0, 1, 2, 3) * * Return: none * * Notes: produces 9 output samples from 18 input samples via symmetry * all blocks gain at least 1 guard bit via window (long blocks get extra * sign bit, short blocks can have one addition but max gain < 1.0) **************************************************************************************/static void WinPrevious(int *xPrev, int *xPrevWin, int btPrev){ int i, x, *xp, *xpwLo, *xpwHi, wLo, wHi; const int *wpLo, *wpHi; xp = xPrev; /* mapping (see IMDCT12x3): xPrev[0-2] = sum[6-8], xPrev[3-8] = sum[12-17] */ if (btPrev == 2) { /* this could be reordered for minimum loads/stores */ wpLo = imdctWin[btPrev]; xPrevWin[ 0] = MULSHIFT32(wpLo[ 6], xPrev[2]) + MULSHIFT32(wpLo[0], xPrev[6]); xPrevWin[ 1] = MULSHIFT32(wpLo[ 7], xPrev[1]) + MULSHIFT32(wpLo[1], xPrev[7]); xPrevWin[ 2] = MULSHIFT32(wpLo[ 8], xPrev[0]) + MULSHIFT32(wpLo[2], xPrev[8]); xPrevWin[ 3] = MULSHIFT32(wpLo[ 9], xPrev[0]) + MULSHIFT32(wpLo[3], xPrev[8]); xPrevWin[ 4] = MULSHIFT32(wpLo[10], xPrev[1]) + MULSHIFT32(wpLo[4], xPrev[7]); xPrevWin[ 5] = MULSHIFT32(wpLo[11], xPrev[2]) + MULSHIFT32(wpLo[5], xPrev[6]); xPrevWin[ 6] = MULSHIFT32(wpLo[ 6], xPrev[5]); xPrevWin[ 7] = MULSHIFT32(wpLo[ 7], xPrev[4]); xPrevWin[ 8] = MULSHIFT32(wpLo[ 8], xPrev[3]); xPrevWin[ 9] = MULSHIFT32(wpLo[ 9], xPrev[3]); xPrevWin[10] = MULSHIFT32(wpLo[10], xPrev[4]); xPrevWin[11] = MULSHIFT32(wpLo[11], xPrev[5]); xPrevWin[12] = xPrevWin[13] = xPrevWin[14] = xPrevWin[15] = xPrevWin[16] = xPrevWin[17] = 0; } else { /* use ARM-style pointers (*ptr++) so that ADS compiles well */ wpLo = imdctWin[btPrev] + 18; wpHi = wpLo + 17; xpwLo = xPrevWin; xpwHi = xPrevWin + 17; for (i = 9; i > 0; i--) { x = *xp++; wLo = *wpLo++; wHi = *wpHi--; *xpwLo++ = MULSHIFT32(wLo, x); *xpwHi-- = MULSHIFT32(wHi, x); } }}/************************************************************************************** * Function: FreqInvertRescale * * Description: do frequency inversion (odd samples of odd blocks) and rescale * if necessary (extra guard bits added before IMDCT) * * Inputs: output vector y (18 new samples, spaced NBANDS apart) * previous sample vector xPrev (9 samples) * index of current block * number of extra shifts added before IMDCT (usually 0) * * Outputs: inverted and rescaled (as necessary) outputs * rescaled (as necessary) previous samples * * Return: updated mOut (from new outputs y) **************************************************************************************/static int FreqInvertRescale(int *y, int *xPrev, int blockIdx, int es){ int i, d, mOut; int y0, y1, y2, y3, y4, y5, y6, y7, y8; if (es == 0) { /* fast case - frequency invert only (no rescaling) - can fuse into overlap-add for speed, if desired */ if (blockIdx & 0x01) { y += NBANDS; y0 = *y; y += 2*NBANDS; y1 = *y; y += 2*NBANDS; y2 = *y; y += 2*NBANDS; y3 = *y; y += 2*NBANDS; y4 = *y; y += 2*NBANDS; y5 = *y; y += 2*NBANDS; y6 = *y; y += 2*NBANDS; y7 = *y; y += 2*NBANDS; y8 = *y; y += 2*NBANDS; y -= 18*NBANDS; *y = -y0; y += 2*NBANDS; *y = -y1; y += 2*NBANDS; *y = -y2; y += 2*NBANDS; *y = -y3; y += 2*NBANDS; *y = -y4; y += 2*NBANDS; *y = -y5; y += 2*NBANDS; *y = -y6; y += 2*NBANDS; *y = -y7; y += 2*NBANDS; *y = -y8; y += 2*NBANDS; } return 0; } else { /* undo pre-IMDCT scaling, clipping if necessary */ mOut = 0; if (blockIdx & 0x01) { /* frequency invert */ for (i = 0; i < 18; i+=2) { d = *y; CLIP_2N(d, 31 - es); *y = d << es; mOut |= FASTABS(*y); y += NBANDS; d = -*y; CLIP_2N(d, 31 - es); *y = d << es; mOut |= FASTABS(*y); y += NBANDS; d = *xPrev; CLIP_2N(d, 31 - es); *xPrev++ = d << es; } } else { for (i = 0; i < 18; i+=2) { d = *y; CLIP_2N(d, 31 - es); *y = d << es; mOut |= FASTABS(*y); y += NBANDS; d = *y; CLIP_2N(d, 31 - es); *y = d << es; mOut |= FASTABS(*y); y += NBANDS; d = *xPrev; CLIP_2N(d, 31 - es); *xPrev++ = d << es; } } return mOut; }}/* format = Q31 * #define M_PI 3.14159265358979323846 * double u = 2.0 * M_PI / 9.0; * float c0 = sqrt(3.0) / 2.0; * float c1 = cos(u); * float c2 = cos(2*u); * float c3 = sin(u); * float c4 = sin(2*u); */static const int c9_0 = 0x6ed9eba1;static const int c9_1 = 0x620dbe8b;static const int c9_2 = 0x163a1a7e;static const int c9_3 = 0x5246dd49;static const int c9_4 = 0x7e0e2e32;/* format = Q31 * cos(((0:8) + 0.5) * (pi/18)) */static const int c18[9] = { 0x7f834ed0, 0x7ba3751d, 0x7401e4c1, 0x68d9f964, 0x5a82799a, 0x496af3e2, 0x36185aee, 0x2120fb83, 0x0b27eb5c, };/* require at least 3 guard bits in x[] to ensure no overflow */static __inline void idct9(int *x){ int a1, a2, a3, a4, a5, a6, a7, a8, a9; int a10, a11, a12, a13, a14, a15, a16, a17, a18; int a19, a20, a21, a22, a23, a24, a25, a26, a27; int m1, m3, m5, m6, m7, m8, m9, m10, m11, m12; int x0, x1, x2, x3, x4, x5, x6, x7, x8; x0 = x[0]; x1 = x[1]; x2 = x[2]; x3 = x[3]; x4 = x[4]; x5 = x[5]; x6 = x[6]; x7 = x[7]; x8 = x[8]; a1 = x0 - x6; a2 = x1 - x5; a3 = x1 + x5; a4 = x2 - x4; a5 = x2 + x4; a6 = x2 + x8; a7 = x1 + x7; a8 = a6 - a5; /* ie x[8] - x[4] */ a9 = a3 - a7; /* ie x[5] - x[7] */ a10 = a2 - x7; /* ie x[1] - x[5] - x[7] */ a11 = a4 - x8; /* ie x[2] - x[4] - x[8] */ /* do the << 1 as constant shifts where mX is actually used (free, no stall or extra inst.) */ m1 = MULSHIFT32(c9_0, x3); m3 = MULSHIFT32(c9_0, a10); m5 = MULSHIFT32(c9_1, a5); m6 = MULSHIFT32(c9_2, a6); m7 = MULSHIFT32(c9_1, a8); m8 = MULSHIFT32(c9_2, a5); m9 = MULSHIFT32(c9_3, a9); m10 = MULSHIFT32(c9_4, a7); m11 = MULSHIFT32(c9_3, a3); m12 = MULSHIFT32(c9_4, a9); a12 = x[0] + (x[6] >> 1); a13 = a12 + ( m1 << 1); a14 = a12 - ( m1 << 1); a15 = a1 + ( a11 >> 1); a16 = ( m5 << 1) + (m6 << 1); a17 = ( m7 << 1) - (m8 << 1); a18 = a16 + a17; a19 = ( m9 << 1) + (m10 << 1); a20 = (m11 << 1) - (m12 << 1); a21 = a20 - a19; a22 = a13 + a16; a23 = a14 + a16; a24 = a14 + a17; a25 = a13 + a17; a26 = a14 - a18; a27 = a13 - a18; x0 = a22 + a19; x[0] = x0; x1 = a15 + (m3 << 1); x[1] = x1; x2 = a24 + a20; x[2] = x2; x3 = a26 - a21; x[3] = x3; x4 = a1 - a11; x[4] = x4; x5 = a27 + a21; x[5] = x5; x6 = a25 - a20; x[6] = x6; x7 = a15 - (m3 << 1); x[7] = x7; x8 = a23 - a19; x[8] = x8;}/* let c(j) = cos(M_PI/36 * ((j)+0.5)), s(j) = sin(M_PI/36 * ((j)+0.5)) * then fastWin[2*j+0] = c(j)*(s(j) + c(j)), j = [0, 8] * fastWin[2*j+1] = c(j)*(s(j) - c(j)) * format = Q30 */static const int fastWin36[18] = { 0x42aace8b, 0xc2e92724, 0x47311c28, 0xc95f619a, 0x4a868feb, 0xd0859d8c, 0x4c913b51, 0xd8243ea0, 0x4d413ccc, 0xe0000000, 0x4c913b51, 0xe7dbc161, 0x4a868feb, 0xef7a6275, 0x47311c28, 0xf6a09e67, 0x42aace8b, 0xfd16d8dd,};/************************************************************************************** * Function: IMDCT36 * * Description: 36-point modified DCT, with windowing and overlap-add (50% overlap) * * Inputs: vector of 18 coefficients (N/2 inputs produces N outputs, by symmetry) * overlap part of last IMDCT (9 samples - see output comments) * window type (0,1,2,3) of current and previous block * current block index (for deciding whether to do frequency inversion) * number of guard bits in input vector * * Outputs: 18 output samples, after windowing and overlap-add with last frame * second half of (unwindowed) 36-point IMDCT - save for next time * only save 9 xPrev samples, using symmetry (see WinPrevious()) * * Notes: this is Ken's hyper-fast algorithm, including symmetric sin window * optimization, if applicable * total number of multiplies, general case: * 2*10 (idct9) + 9 (last stage imdct) + 36 (for windowing) = 65 * total number of multiplies, btCurr == 0 && btPrev == 0: * 2*10 (idct9) + 9 (last stage imdct) + 18 (for windowing) = 47 * * blockType == 0 is by far the most common case, so it should be * possible to use the fast path most of the time * this is the fastest known algorithm for performing * long IMDCT + windowing + overlap-add in MP3 * * Return: mOut (OR of abs(y) for all y calculated here) * * TODO: optimize for ARM (reorder window coefs, ARM-style pointers in C, * inline asm may or may not be helpful) **************************************************************************************/static int IMDCT36(int *xCurr, int *xPrev, int *y, int btCurr, int btPrev, int blockIdx, int gb){ int i, es, xBuf[18], xPrevWin[18]; int acc1, acc2, s, d, t, mOut; int xo, xe, c, *xp, yLo, yHi; const int *cp, *wp; acc1 = acc2 = 0; xCurr += 17; /* 7 gb is always adequate for antialias + accumulator loop + idct9 */ if (gb < 7) { /* rarely triggered - 5% to 10% of the time on normal clips (with Q25 input) */ es = 7 - gb; for (i = 8; i >= 0; i--) { acc1 = ((*xCurr--) >> es) - acc1; acc2 = acc1 - acc2; acc1 = ((*xCurr--) >> es) - acc1; xBuf[i+9] = acc2; /* odd */ xBuf[i+0] = acc1; /* even */ xPrev[i] >>= es; } } else {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -