📄 imdct.c
字号:
es = 0; /* max gain = 18, assume adequate guard bits */ for (i = 8; i >= 0; i--) { acc1 = (*xCurr--) - acc1; acc2 = acc1 - acc2; acc1 = (*xCurr--) - acc1; xBuf[i+9] = acc2; /* odd */ xBuf[i+0] = acc1; /* even */ } } /* xEven[0] and xOdd[0] scaled by 0.5 */ xBuf[9] >>= 1; xBuf[0] >>= 1; /* do 9-point IDCT on even and odd */ idct9(xBuf+0); /* even */ idct9(xBuf+9); /* odd */ xp = xBuf + 8; cp = c18 + 8; mOut = 0; if (btPrev == 0 && btCurr == 0) { /* fast path - use symmetry of sin window to reduce windowing multiplies to 18 (N/2) */ wp = fastWin36; for (i = 0; i < 9; i++) { /* do ARM-style pointer arithmetic (i still needed for y[] indexing - compiler spills if 2 y pointers) */ c = *cp--; xo = *(xp + 9); xe = *xp--; /* gain 2 int bits here */ xo = MULSHIFT32(c, xo); /* 2*c18*xOdd (mul by 2 implicit in scaling) */ xe >>= 2; s = -(*xPrev); /* sum from last block (always at least 2 guard bits) */ d = -(xe - xo); /* gain 2 int bits, don't shift xo (effective << 1 to eat sign bit, << 1 for mul by 2) */ (*xPrev++) = xe + xo; /* symmetry - xPrev[i] = xPrev[17-i] for long blocks */ t = s - d; yLo = (d + (MULSHIFT32(t, *wp++) << 2)); yHi = (s + (MULSHIFT32(t, *wp++) << 2)); y[(i)*NBANDS] = yLo; y[(17-i)*NBANDS] = yHi; mOut |= FASTABS(yLo); mOut |= FASTABS(yHi); } } else { /* slower method - either prev or curr is using window type != 0 so do full 36-point window * output xPrevWin has at least 3 guard bits (xPrev has 2, gain 1 in WinPrevious) */ WinPrevious(xPrev, xPrevWin, btPrev); wp = imdctWin[btCurr]; for (i = 0; i < 9; i++) { c = *cp--; xo = *(xp + 9); xe = *xp--; /* gain 2 int bits here */ xo = MULSHIFT32(c, xo); /* 2*c18*xOdd (mul by 2 implicit in scaling) */ xe >>= 2; d = xe - xo; (*xPrev++) = xe + xo; /* symmetry - xPrev[i] = xPrev[17-i] for long blocks */ yLo = (xPrevWin[i] + MULSHIFT32(d, wp[i])) << 2; yHi = (xPrevWin[17-i] + MULSHIFT32(d, wp[17-i])) << 2; y[(i)*NBANDS] = yLo; y[(17-i)*NBANDS] = yHi; mOut |= FASTABS(yLo); mOut |= FASTABS(yHi); } } xPrev -= 9; mOut |= FreqInvertRescale(y, xPrev, blockIdx, es); return mOut;}static const int c3_0 = 0x6ed9eba1; /* format = Q31, cos(pi/6) */static const int c6[3] = { 0x7ba3751d, 0x5a82799a, 0x2120fb83 }; /* format = Q31, cos(((0:2) + 0.5) * (pi/6)) *//* 12-point inverse DCT, used in IMDCT12x3() * 4 input guard bits will ensure no overflow */static __inline void imdct12 (int *x, int *out){ int a0, a1, a2; int x0, x1, x2, x3, x4, x5; x0 = *x; x+=3; x1 = *x; x+=3; x2 = *x; x+=3; x3 = *x; x+=3; x4 = *x; x+=3; x5 = *x; x+=3; x4 -= x5; x3 -= x4; x2 -= x3; x3 -= x5; x1 -= x2; x0 -= x1; x1 -= x3; x0 >>= 1; x1 >>= 1; a0 = MULSHIFT32(c3_0, x2) << 1; a1 = x0 + (x4 >> 1); a2 = x0 - x4; x0 = a1 + a0; x2 = a2; x4 = a1 - a0; a0 = MULSHIFT32(c3_0, x3) << 1; a1 = x1 + (x5 >> 1); a2 = x1 - x5; /* cos window odd samples, mul by 2, eat sign bit */ x1 = MULSHIFT32(c6[0], a1 + a0) << 2; x3 = MULSHIFT32(c6[1], a2) << 2; x5 = MULSHIFT32(c6[2], a1 - a0) << 2; *out = x0 + x1; out++; *out = x2 + x3; out++; *out = x4 + x5; out++; *out = x4 - x5; out++; *out = x2 - x3; out++; *out = x0 - x1;}/************************************************************************************** * Function: IMDCT12x3 * * Description: three 12-point modified DCT's for short blocks, with windowing, * short block concatenation, and overlap-add * * Inputs: 3 interleaved vectors of 6 samples each * (block0[0], block1[0], block2[0], block0[1], block1[1]....) * overlap part of last IMDCT (9 samples - see output comments) * window type (0,1,2,3) of previous block * current block index (for deciding whether to do frequency inversion) * number of guard bits in input vector * * Outputs: updated sample vector x, net gain of 1 integer bit * second half of (unwindowed) IMDCT's - save for next time * only save 9 xPrev samples, using symmetry (see WinPrevious()) * * Return: mOut (OR of abs(y) for all y calculated here) * * TODO: optimize for ARM **************************************************************************************/static int IMDCT12x3(int *xCurr, int *xPrev, int *y, int btPrev, int blockIdx, int gb){ int i, es, mOut, yLo, xBuf[18], xPrevWin[18]; /* need temp buffer for reordering short blocks */ const int *wp; es = 0; /* 7 gb is always adequate for accumulator loop + idct12 + window + overlap */ if (gb < 7) { es = 7 - gb; for (i = 0; i < 18; i+=2) { xCurr[i+0] >>= es; xCurr[i+1] >>= es; *xPrev++ >>= es; } xPrev -= 9; } /* requires 4 input guard bits for each imdct12 */ imdct12(xCurr + 0, xBuf + 0); imdct12(xCurr + 1, xBuf + 6); imdct12(xCurr + 2, xBuf + 12); /* window previous from last time */ WinPrevious(xPrev, xPrevWin, btPrev); /* could unroll this for speed, minimum loads (short blocks usually rare, so doesn't make much overall difference) * xPrevWin[i] << 2 still has 1 gb always, max gain of windowed xBuf stuff also < 1.0 and gain the sign bit * so y calculations won't overflow */ wp = imdctWin[2]; mOut = 0; for (i = 0; i < 3; i++) { yLo = (xPrevWin[ 0+i] << 2); mOut |= FASTABS(yLo); y[( 0+i)*NBANDS] = yLo; yLo = (xPrevWin[ 3+i] << 2); mOut |= FASTABS(yLo); y[( 3+i)*NBANDS] = yLo; yLo = (xPrevWin[ 6+i] << 2) + (MULSHIFT32(wp[0+i], xBuf[3+i])); mOut |= FASTABS(yLo); y[( 6+i)*NBANDS] = yLo; yLo = (xPrevWin[ 9+i] << 2) + (MULSHIFT32(wp[3+i], xBuf[5-i])); mOut |= FASTABS(yLo); y[( 9+i)*NBANDS] = yLo; yLo = (xPrevWin[12+i] << 2) + (MULSHIFT32(wp[6+i], xBuf[2-i]) + MULSHIFT32(wp[0+i], xBuf[(6+3)+i])); mOut |= FASTABS(yLo); y[(12+i)*NBANDS] = yLo; yLo = (xPrevWin[15+i] << 2) + (MULSHIFT32(wp[9+i], xBuf[0+i]) + MULSHIFT32(wp[3+i], xBuf[(6+5)-i])); mOut |= FASTABS(yLo); y[(15+i)*NBANDS] = yLo; } /* save previous (unwindowed) for overlap - only need samples 6-8, 12-17 */ for (i = 6; i < 9; i++) *xPrev++ = xBuf[i] >> 2; for (i = 12; i < 18; i++) *xPrev++ = xBuf[i] >> 2; xPrev -= 9; mOut |= FreqInvertRescale(y, xPrev, blockIdx, es); return mOut;}/************************************************************************************** * Function: HybridTransform * * Description: IMDCT's, windowing, and overlap-add on long/short/mixed blocks * * Inputs: vector of input coefficients, length = nBlocksTotal * 18) * vector of overlap samples from last time, length = nBlocksPrev * 9) * buffer for output samples, length = MAXNSAMP * SideInfoSub struct for this granule/channel * BlockCount struct with necessary info * number of non-zero input and overlap blocks * number of long blocks in input vector (rest assumed to be short blocks) * number of blocks which use long window (type) 0 in case of mixed block * (bc->currWinSwitch, 0 for non-mixed blocks) * * Outputs: transformed, windowed, and overlapped sample buffer * does frequency inversion on odd blocks * updated buffer of samples for overlap * * Return: number of non-zero IMDCT blocks calculated in this call * (including overlap-add) * * TODO: examine mixedBlock/winSwitch logic carefully (test he_mode.bit) **************************************************************************************/static int HybridTransform(int *xCurr, int *xPrev, int y[BLOCK_SIZE][NBANDS], SideInfoSub *sis, BlockCount *bc){ int xPrevWin[18], currWinIdx, prevWinIdx; int i, j, nBlocksOut, nonZero, mOut; int fiBit, xp; ASSERT(bc->nBlocksLong <= NBANDS); ASSERT(bc->nBlocksTotal <= NBANDS); ASSERT(bc->nBlocksPrev <= NBANDS); mOut = 0; /* do long blocks, if any */ for(i = 0; i < bc->nBlocksLong; i++) { /* currWinIdx picks the right window for long blocks (if mixed, long blocks use window type 0) */ currWinIdx = sis->blockType; if (sis->mixedBlock && i < bc->currWinSwitch) currWinIdx = 0; prevWinIdx = bc->prevType; if (i < bc->prevWinSwitch) prevWinIdx = 0; /* do 36-point IMDCT, including windowing and overlap-add */ mOut |= IMDCT36(xCurr, xPrev, &(y[0][i]), currWinIdx, prevWinIdx, i, bc->gbIn); xCurr += 18; xPrev += 9; } /* do short blocks (if any) */ for ( ; i < bc->nBlocksTotal; i++) { ASSERT(sis->blockType == 2); prevWinIdx = bc->prevType; if (i < bc->prevWinSwitch) prevWinIdx = 0; mOut |= IMDCT12x3(xCurr, xPrev, &(y[0][i]), prevWinIdx, i, bc->gbIn); xCurr += 18; xPrev += 9; } nBlocksOut = i; /* window and overlap prev if prev longer that current */ for ( ; i < bc->nBlocksPrev; i++) { prevWinIdx = bc->prevType; if (i < bc->prevWinSwitch) prevWinIdx = 0; WinPrevious(xPrev, xPrevWin, prevWinIdx); nonZero = 0; fiBit = i << 31; for (j = 0; j < 9; j++) { xp = xPrevWin[2*j+0] << 2; /* << 2 temp for scaling */ nonZero |= xp; y[2*j+0][i] = xp; mOut |= FASTABS(xp); /* frequency inversion on odd blocks/odd samples (flip sign if i odd, j odd) */ xp = xPrevWin[2*j+1] << 2; xp = (xp ^ (fiBit >> 31)) + (i & 0x01); nonZero |= xp; y[2*j+1][i] = xp; mOut |= FASTABS(xp); xPrev[j] = 0; } xPrev += 9; if (nonZero) nBlocksOut = i; } /* clear rest of blocks */ for ( ; i < 32; i++) { for (j = 0; j < 18; j++) y[j][i] = 0; } bc->gbOut = CLZ(mOut) - 1; return nBlocksOut;}/************************************************************************************** * Function: IMDCT * * Description: do alias reduction, inverse MDCT, overlap-add, and frequency inversion * * Inputs: MP3DecInfo structure filled by UnpackFrameHeader(), UnpackSideInfo(), * UnpackScaleFactors(), and DecodeHuffman() (for this granule, channel) * includes PCM samples in overBuf (from last call to IMDCT) for OLA * index of current granule and channel * * Outputs: PCM samples in outBuf, for input to subband transform * PCM samples in overBuf, for OLA next time * updated hi->nonZeroBound index for this channel * * Return: 0 on success, -1 if null input pointers **************************************************************************************/int IMDCT(MP3DecInfo *mp3DecInfo, int gr, int ch){ int nBfly, blockCutoff; FrameHeader *fh; SideInfo *si; HuffmanInfo *hi; IMDCTInfo *mi; BlockCount bc; /* validate pointers */ if (!mp3DecInfo || !mp3DecInfo->FrameHeaderPS || !mp3DecInfo->SideInfoPS || !mp3DecInfo->HuffmanInfoPS || !mp3DecInfo->IMDCTInfoPS) return -1; /* si is an array of up to 4 structs, stored as gr0ch0, gr0ch1, gr1ch0, gr1ch1 */ fh = (FrameHeader *)(mp3DecInfo->FrameHeaderPS); si = (SideInfo *)(mp3DecInfo->SideInfoPS); hi = (HuffmanInfo*)(mp3DecInfo->HuffmanInfoPS); mi = (IMDCTInfo *)(mp3DecInfo->IMDCTInfoPS); /* anti-aliasing done on whole long blocks only * for mixed blocks, nBfly always 1, except 3 for 8 kHz MPEG 2.5 (see sfBandTab) * nLongBlocks = number of blocks with (possibly) non-zero power * nBfly = number of butterflies to do (nLongBlocks - 1, unless no long blocks) */ blockCutoff = fh->sfBand->l[(fh->ver == MPEG1 ? 8 : 6)] / 18; /* same as 3* num short sfb's in spec */ if (si->sis[gr][ch].blockType != 2) { /* all long transforms */ bc.nBlocksLong = MIN((hi->nonZeroBound[ch] + 7) / 18 + 1, 32); nBfly = bc.nBlocksLong - 1; } else if (si->sis[gr][ch].blockType == 2 && si->sis[gr][ch].mixedBlock) { /* mixed block - long transforms until cutoff, then short transforms */ bc.nBlocksLong = blockCutoff; nBfly = bc.nBlocksLong - 1; } else { /* all short transforms */ bc.nBlocksLong = 0; nBfly = 0; } AntiAlias(hi->huffDecBuf[ch], nBfly); hi->nonZeroBound[ch] = MAX(hi->nonZeroBound[ch], (nBfly * 18) + 8); ASSERT(hi->nonZeroBound[ch] <= MAX_NSAMP); /* for readability, use a struct instead of passing a million parameters to HybridTransform() */ bc.nBlocksTotal = (hi->nonZeroBound[ch] + 17) / 18; bc.nBlocksPrev = mi->numPrevIMDCT[ch]; bc.prevType = mi->prevType[ch]; bc.prevWinSwitch = mi->prevWinSwitch[ch]; bc.currWinSwitch = (si->sis[gr][ch].mixedBlock ? blockCutoff : 0); /* where WINDOW switches (not nec. transform) */ bc.gbIn = hi->gb[ch]; mi->numPrevIMDCT[ch] = HybridTransform(hi->huffDecBuf[ch], mi->overBuf[ch], mi->outBuf[ch], &si->sis[gr][ch], &bc); mi->prevType[ch] = si->sis[gr][ch].blockType; mi->prevWinSwitch[ch] = bc.currWinSwitch; /* 0 means not a mixed block (either all short or all long) */ mi->gb[ch] = bc.gbOut; ASSERT(mi->numPrevIMDCT[ch] <= NBANDS); /* output has gained 2 int bits */ return 0;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -