📄 sbrqmf.c
字号:
u64hi.w64 = 0; u64lo.w64 = MADD64(u64lo.w64, *cPtr0++, delay[dOff]); dOff -= 32; if (dOff < 0) {dOff += 320;} u64hi.w64 = MADD64(u64hi.w64, *cPtr0++, delay[dOff]); dOff -= 32; if (dOff < 0) {dOff += 320;} u64lo.w64 = MADD64(u64lo.w64, *cPtr0++, delay[dOff]); dOff -= 32; if (dOff < 0) {dOff += 320;} u64hi.w64 = MADD64(u64hi.w64, *cPtr0++, delay[dOff]); dOff -= 32; if (dOff < 0) {dOff += 320;} u64lo.w64 = MADD64(u64lo.w64, *cPtr0++, delay[dOff]); dOff -= 32; if (dOff < 0) {dOff += 320;} u64hi.w64 = MADD64(u64hi.w64, *cPtr1--, delay[dOff]); dOff -= 32; if (dOff < 0) {dOff += 320;} u64lo.w64 = MADD64(u64lo.w64, *cPtr1--, delay[dOff]); dOff -= 32; if (dOff < 0) {dOff += 320;} u64hi.w64 = MADD64(u64hi.w64, *cPtr1--, delay[dOff]); dOff -= 32; if (dOff < 0) {dOff += 320;} u64lo.w64 = MADD64(u64lo.w64, *cPtr1--, delay[dOff]); dOff -= 32; if (dOff < 0) {dOff += 320;} u64hi.w64 = MADD64(u64hi.w64, *cPtr1--, delay[dOff]); dOff -= 32; if (dOff < 0) {dOff += 320;} uBuf[0] = u64lo.r.hi32; uBuf[32] = u64hi.r.hi32; uBuf++; dOff--; }}#endif/************************************************************************************** * Function: QMFAnalysis * * Description: 32-subband analysis QMF (4.6.18.4.1) * * Inputs: 32 consecutive samples of decoded 32-bit PCM, format = Q(fBitsIn) * delay buffer of size 32*10 = 320 PCM samples * number of fraction bits in input PCM * index for delay ring buffer (range = [0, 9]) * number of subbands to calculate (range = [0, 32]) * * Outputs: qmfaBands complex subband samples, format = Q(FBITS_OUT_QMFA) * updated delay buffer * updated delay index * * Return: guard bit mask * * Notes: output stored as RE{X0}, IM{X0}, RE{X1}, IM{X1}, ... RE{X31}, IM{X31} * output stored in int buffer of size 64*2 = 128 * (zero-filled from XBuf[2*qmfaBands] to XBuf[127]) **************************************************************************************/int QMFAnalysis(int *inbuf, int *delay, int *XBuf, int fBitsIn, int *delayIdx, int qmfaBands){ int n, y, shift, gbMask; int *delayPtr, *uBuf, *tBuf; /* use XBuf[128] as temp buffer for reordering */ uBuf = XBuf; /* first 64 samples */ tBuf = XBuf + 64; /* second 64 samples */ /* overwrite oldest PCM with new PCM * delay[n] has 1 GB after shifting (either << or >>) */ delayPtr = delay + (*delayIdx * 32); if (fBitsIn > FBITS_IN_QMFA) { shift = MIN(fBitsIn - FBITS_IN_QMFA, 31); for (n = 32; n != 0; n--) { y = (*inbuf) >> shift; inbuf++; *delayPtr++ = y; } } else { shift = MIN(FBITS_IN_QMFA - fBitsIn, 30); for (n = 32; n != 0; n--) { y = *inbuf++; CLIP_2N_SHIFT30(y, shift); *delayPtr++ = y; } } QMFAnalysisConv((int *)cTabA, delay, *delayIdx, uBuf); /* uBuf has at least 2 GB right now (1 from clipping to Q(FBITS_IN_QMFA), one from * the scaling by cTab (MULSHIFT32(*delayPtr--, *cPtr++), with net gain of < 1.0) * TODO - fuse with QMFAnalysisConv to avoid separate reordering */ tBuf[2*0 + 0] = uBuf[0]; tBuf[2*0 + 1] = uBuf[1]; for (n = 1; n < 31; n++) { tBuf[2*n + 0] = -uBuf[64-n]; tBuf[2*n + 1] = uBuf[n+1]; } tBuf[2*31 + 1] = uBuf[32]; tBuf[2*31 + 0] = -uBuf[33]; /* fast in-place DCT-IV - only need 2*qmfaBands output samples */ PreMultiply64(tBuf); /* 2 GB in, 3 GB out */ FFT32C(tBuf); /* 3 GB in, 1 GB out */ PostMultiply64(tBuf, qmfaBands*2); /* 1 GB in, 2 GB out */ /* TODO - roll into PostMultiply (if enough registers) */ gbMask = 0; for (n = 0; n < qmfaBands; n++) { XBuf[2*n+0] = tBuf[ n + 0]; /* implicit scaling of 2 in our output Q format */ gbMask |= FASTABS(XBuf[2*n+0]); XBuf[2*n+1] = -tBuf[63 - n]; gbMask |= FASTABS(XBuf[2*n+1]); } /* fill top section with zeros for HF generation */ for ( ; n < 64; n++) { XBuf[2*n+0] = 0; XBuf[2*n+1] = 0; } *delayIdx = (*delayIdx == NUM_QMF_DELAY_BUFS - 1 ? 0 : *delayIdx + 1); /* minimum of 2 GB in output */ return gbMask;}/* lose FBITS_LOST_DCT4_64 in DCT4, gain 6 for implicit scaling by 1/64, lose 1 for cTab multiply (Q31) */#define FBITS_OUT_QMFS (FBITS_IN_QMFS - FBITS_LOST_DCT4_64 + 6 - 1)#define RND_VAL (1 << (FBITS_OUT_QMFS-1))/************************************************************************************** * Function: QMFSynthesisConv * * Description: final convolution kernel for synthesis QMF * * Inputs: pointer to coefficient table, reordered for sequential access * delay buffer of size 64*10 = 640 complex samples (1280 ints) * index for delay ring buffer (range = [0, 9]) * number of QMF subbands to process (range = [0, 64]) * number of channels * * Outputs: 64 consecutive 16-bit PCM samples, interleaved by factor of nChans * * Return: none * * Notes: this is carefully written to be efficient on ARM * use the assembly code version in sbrqmfsk.s when building for ARM! **************************************************************************************/#if (defined (__arm) && defined (__ARMCC_VERSION)) || (defined (_WIN32) && defined (_WIN32_WCE) && defined (ARM)) || (defined(__GNUC__) && defined(__arm__))#ifdef __cplusplusextern "C"#endifvoid QMFSynthesisConv(int *cPtr, int *delay, int dIdx, short *outbuf, int nChans);#elsevoid QMFSynthesisConv(int *cPtr, int *delay, int dIdx, short *outbuf, int nChans){ int k, dOff0, dOff1; U64 sum64; dOff0 = (dIdx)*128; dOff1 = dOff0 - 1; if (dOff1 < 0) dOff1 += 1280; /* scaling note: total gain of coefs (cPtr[0]-cPtr[9] for any k) is < 2.0, so 1 GB in delay values is adequate */ for (k = 0; k <= 63; k++) { sum64.w64 = 0; sum64.w64 = MADD64(sum64.w64, *cPtr++, delay[dOff0]); dOff0 -= 256; if (dOff0 < 0) {dOff0 += 1280;} sum64.w64 = MADD64(sum64.w64, *cPtr++, delay[dOff1]); dOff1 -= 256; if (dOff1 < 0) {dOff1 += 1280;} sum64.w64 = MADD64(sum64.w64, *cPtr++, delay[dOff0]); dOff0 -= 256; if (dOff0 < 0) {dOff0 += 1280;} sum64.w64 = MADD64(sum64.w64, *cPtr++, delay[dOff1]); dOff1 -= 256; if (dOff1 < 0) {dOff1 += 1280;} sum64.w64 = MADD64(sum64.w64, *cPtr++, delay[dOff0]); dOff0 -= 256; if (dOff0 < 0) {dOff0 += 1280;} sum64.w64 = MADD64(sum64.w64, *cPtr++, delay[dOff1]); dOff1 -= 256; if (dOff1 < 0) {dOff1 += 1280;} sum64.w64 = MADD64(sum64.w64, *cPtr++, delay[dOff0]); dOff0 -= 256; if (dOff0 < 0) {dOff0 += 1280;} sum64.w64 = MADD64(sum64.w64, *cPtr++, delay[dOff1]); dOff1 -= 256; if (dOff1 < 0) {dOff1 += 1280;} sum64.w64 = MADD64(sum64.w64, *cPtr++, delay[dOff0]); dOff0 -= 256; if (dOff0 < 0) {dOff0 += 1280;} sum64.w64 = MADD64(sum64.w64, *cPtr++, delay[dOff1]); dOff1 -= 256; if (dOff1 < 0) {dOff1 += 1280;} dOff0++; dOff1--; *outbuf = CLIPTOSHORT((sum64.r.hi32 + RND_VAL) >> FBITS_OUT_QMFS); outbuf += nChans; }}#endif/************************************************************************************** * Function: QMFSynthesis * * Description: 64-subband synthesis QMF (4.6.18.4.2) * * Inputs: 64 consecutive complex subband QMF samples, format = Q(FBITS_IN_QMFS) * delay buffer of size 64*10 = 640 complex samples (1280 ints) * index for delay ring buffer (range = [0, 9]) * number of QMF subbands to process (range = [0, 64]) * number of channels * * Outputs: 64 consecutive 16-bit PCM samples, interleaved by factor of nChans * updated delay buffer * updated delay index * * Return: none * * Notes: assumes MIN_GBITS_IN_QMFS guard bits in input, either from * QMFAnalysis (if upsampling only) or from MapHF (if SBR on) **************************************************************************************/void QMFSynthesis(int *inbuf, int *delay, int *delayIdx, int qmfsBands, short *outbuf, int nChans){ int n, a0, a1, b0, b1, dOff0, dOff1, dIdx; int *tBufLo, *tBufHi; dIdx = *delayIdx; tBufLo = delay + dIdx*128 + 0; tBufHi = delay + dIdx*128 + 127; /* reorder inputs to DCT-IV, only use first qmfsBands (complex) samples * TODO - fuse with PreMultiply64 to avoid separate reordering steps */ for (n = 0; n < qmfsBands >> 1; n++) { a0 = *inbuf++; b0 = *inbuf++; a1 = *inbuf++; b1 = *inbuf++; *tBufLo++ = a0; *tBufLo++ = a1; *tBufHi-- = b0; *tBufHi-- = b1; } if (qmfsBands & 0x01) { a0 = *inbuf++; b0 = *inbuf++; *tBufLo++ = a0; *tBufHi-- = b0; *tBufLo++ = 0; *tBufHi-- = 0; n++; } for ( ; n < 32; n++) { *tBufLo++ = 0; *tBufHi-- = 0; *tBufLo++ = 0; *tBufHi-- = 0; } tBufLo = delay + dIdx*128 + 0; tBufHi = delay + dIdx*128 + 64; /* 2 GB in, 3 GB out */ PreMultiply64(tBufLo); PreMultiply64(tBufHi); /* 3 GB in, 1 GB out */ FFT32C(tBufLo); FFT32C(tBufHi); /* 1 GB in, 2 GB out */ PostMultiply64(tBufLo, 64); PostMultiply64(tBufHi, 64); /* could fuse with PostMultiply64 to avoid separate pass */ dOff0 = dIdx*128; dOff1 = dIdx*128 + 64; for (n = 32; n != 0; n--) { a0 = (*tBufLo++); a1 = (*tBufLo++); b0 = (*tBufHi++); b1 = -(*tBufHi++); delay[dOff0++] = (b0 - a0); delay[dOff0++] = (b1 - a1); delay[dOff1++] = (b0 + a0); delay[dOff1++] = (b1 + a1); } QMFSynthesisConv((int *)cTabS, delay, dIdx, outbuf, nChans); *delayIdx = (*delayIdx == NUM_QMF_DELAY_BUFS - 1 ? 0 : *delayIdx + 1);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -