📄 nb_celp.c
字号:
ABS(4*nol_pitch[i]-ol_pitch)<=4 || ABS(5*nol_pitch[i]-ol_pitch)<=5)) { /*ol_pitch_coef=nol_pitch_coef[i];*/ ol_pitch = nol_pitch[i]; } } /*if (ol_pitch>50) ol_pitch/=2;*/ /*ol_pitch_coef = sqrt(ol_pitch_coef);*/ } else { ol_pitch=0; ol_pitch_coef=0; } /*Compute "real" excitation*/ SPEEX_COPY(st->exc, st->winBuf, diff); SPEEX_COPY(st->exc+diff, in, st->frameSize-diff); fir_mem16(st->exc, interp_lpc, st->exc, st->frameSize, st->lpcSize, st->mem_exc, stack); /* Compute open-loop excitation gain */ { spx_word16_t g = compute_rms16(st->exc, st->frameSize); if (st->submodeID!=1 && ol_pitch>0) ol_gain = MULT16_16(g, MULT16_16_Q14(QCONST16(1.1,14), spx_sqrt(QCONST32(1.,28)-MULT16_32_Q15(QCONST16(.8,15),SHL32(MULT16_16(ol_pitch_coef,ol_pitch_coef),16))))); else ol_gain = SHL32(EXTEND32(g),SIG_SHIFT); } }#ifdef VORBIS_PSYCHO SPEEX_MOVE(st->psy_window, st->psy_window+st->frameSize, 256-st->frameSize); SPEEX_COPY(&st->psy_window[256-st->frameSize], in, st->frameSize); compute_curve(st->psy, st->psy_window, st->curve); /*print_vec(st->curve, 128, "curve");*/ if (st->first) SPEEX_COPY(st->old_curve, st->curve, 128);#endif /*VBR stuff*/#ifndef DISABLE_VBR if (st->vbr && (st->vbr_enabled||st->vad_enabled)) { float lsp_dist=0; for (i=0;i<st->lpcSize;i++) lsp_dist += (st->old_lsp[i] - lsp[i])*(st->old_lsp[i] - lsp[i]); lsp_dist /= LSP_SCALING*LSP_SCALING; if (st->abr_enabled) { float qual_change=0; if (st->abr_drift2 * st->abr_drift > 0) { /* Only adapt if long-term and short-term drift are the same sign */ qual_change = -.00001*st->abr_drift/(1+st->abr_count); if (qual_change>.05) qual_change=.05; if (qual_change<-.05) qual_change=-.05; } st->vbr_quality += qual_change; if (st->vbr_quality>10) st->vbr_quality=10; if (st->vbr_quality<0) st->vbr_quality=0; } st->relative_quality = vbr_analysis(st->vbr, in, st->frameSize, ol_pitch, GAIN_SCALING_1*ol_pitch_coef); /*if (delta_qual<0)*/ /* delta_qual*=.1*(3+st->vbr_quality);*/ if (st->vbr_enabled) { spx_int32_t mode; int choice=0; float min_diff=100; mode = 8; while (mode) { int v1; float thresh; v1=(int)floor(st->vbr_quality); if (v1==10) thresh = vbr_nb_thresh[mode][v1]; else thresh = (st->vbr_quality-v1)*vbr_nb_thresh[mode][v1+1] + (1+v1-st->vbr_quality)*vbr_nb_thresh[mode][v1]; if (st->relative_quality > thresh && st->relative_quality-thresh<min_diff) { choice = mode; min_diff = st->relative_quality-thresh; } mode--; } mode=choice; if (mode==0) { if (st->dtx_count==0 || lsp_dist>.05 || !st->dtx_enabled || st->dtx_count>20) { mode=1; st->dtx_count=1; } else { mode=0; st->dtx_count++; } } else { st->dtx_count=0; } speex_encoder_ctl(state, SPEEX_SET_MODE, &mode); if (st->vbr_max>0) { spx_int32_t rate; speex_encoder_ctl(state, SPEEX_GET_BITRATE, &rate); if (rate > st->vbr_max) { rate = st->vbr_max; speex_encoder_ctl(state, SPEEX_SET_BITRATE, &rate); } } if (st->abr_enabled) { spx_int32_t bitrate; speex_encoder_ctl(state, SPEEX_GET_BITRATE, &bitrate); st->abr_drift+=(bitrate-st->abr_enabled); st->abr_drift2 = .95*st->abr_drift2 + .05*(bitrate-st->abr_enabled); st->abr_count += 1.0; } } else { /*VAD only case*/ int mode; if (st->relative_quality<2) { if (st->dtx_count==0 || lsp_dist>.05 || !st->dtx_enabled || st->dtx_count>20) { st->dtx_count=1; mode=1; } else { mode=0; st->dtx_count++; } } else { st->dtx_count = 0; mode=st->submodeSelect; } /*speex_encoder_ctl(state, SPEEX_SET_MODE, &mode);*/ st->submodeID=mode; } } else { st->relative_quality = -1; }#endif /* #ifndef DISABLE_VBR */ if (st->encode_submode) { /* First, transmit a zero for narrowband */ speex_bits_pack(bits, 0, 1); /* Transmit the sub-mode we use for this frame */ speex_bits_pack(bits, st->submodeID, NB_SUBMODE_BITS); } /* If null mode (no transmission), just set a couple things to zero*/ if (st->submodes[st->submodeID] == NULL) { for (i=0;i<st->frameSize;i++) st->exc[i]=st->sw[i]=VERY_SMALL; for (i=0;i<st->lpcSize;i++) st->mem_sw[i]=0; st->first=1; st->bounded_pitch = 1; SPEEX_COPY(st->winBuf, in+2*st->frameSize-st->windowSize, st->windowSize-st->frameSize); /* Clear memory (no need to really compute it) */ for (i=0;i<st->lpcSize;i++) st->mem_sp[i] = 0; return 0; } /* LSP Quantization */ if (st->first) { for (i=0;i<st->lpcSize;i++) st->old_lsp[i] = lsp[i]; } /*Quantize LSPs*/#if 1 /*0 for unquantized*/ SUBMODE(lsp_quant)(lsp, qlsp, st->lpcSize, bits);#else for (i=0;i<st->lpcSize;i++) qlsp[i]=lsp[i];#endif /*If we use low bit-rate pitch mode, transmit open-loop pitch*/ if (SUBMODE(lbr_pitch)!=-1) { speex_bits_pack(bits, ol_pitch-st->min_pitch, 7); } if (SUBMODE(forced_pitch_gain)) { int quant; /* This just damps the pitch a bit, because it tends to be too aggressive when forced */ ol_pitch_coef = MULT16_16_Q15(QCONST16(.9,15), ol_pitch_coef);#ifdef FIXED_POINT quant = PSHR16(MULT16_16_16(15, ol_pitch_coef),GAIN_SHIFT);#else quant = (int)floor(.5+15*ol_pitch_coef*GAIN_SCALING_1);#endif if (quant>15) quant=15; if (quant<0) quant=0; speex_bits_pack(bits, quant, 4); ol_pitch_coef=MULT16_16_P15(QCONST16(0.066667,15),SHL16(quant,GAIN_SHIFT)); } /*Quantize and transmit open-loop excitation gain*/#ifdef FIXED_POINT { int qe = scal_quant32(ol_gain, ol_gain_table, 32); /*ol_gain = exp(qe/3.5)*SIG_SCALING;*/ ol_gain = MULT16_32_Q15(28406,ol_gain_table[qe]); speex_bits_pack(bits, qe, 5); }#else { int qe = (int)(floor(.5+3.5*log(ol_gain*1.0/SIG_SCALING))); if (qe<0) qe=0; if (qe>31) qe=31; ol_gain = exp(qe/3.5)*SIG_SCALING; speex_bits_pack(bits, qe, 5); }#endif /* Special case for first frame */ if (st->first) { for (i=0;i<st->lpcSize;i++) st->old_qlsp[i] = qlsp[i]; } /* Target signal */ ALLOC(target, st->subframeSize, spx_word16_t); ALLOC(innov, st->subframeSize, spx_sig_t); ALLOC(exc32, st->subframeSize, spx_word32_t); ALLOC(ringing, st->subframeSize, spx_word16_t); ALLOC(syn_resp, st->subframeSize, spx_word16_t); ALLOC(real_exc, st->subframeSize, spx_word16_t); ALLOC(mem, st->lpcSize, spx_mem_t); /* Loop on sub-frames */ for (sub=0;sub<st->nbSubframes;sub++) { int offset; spx_word16_t *sw; spx_word16_t *exc; int pitch; int response_bound = st->subframeSize; /* Offset relative to start of frame */ offset = st->subframeSize*sub; /* Excitation */ exc=st->exc+offset; /* Weighted signal */ sw=st->sw+offset; /* LSP interpolation (quantized and unquantized) */ lsp_interpolate(st->old_lsp, lsp, interp_lsp, st->lpcSize, sub, st->nbSubframes); lsp_interpolate(st->old_qlsp, qlsp, interp_qlsp, st->lpcSize, sub, st->nbSubframes); /* Make sure the filters are stable */ lsp_enforce_margin(interp_lsp, st->lpcSize, LSP_MARGIN); lsp_enforce_margin(interp_qlsp, st->lpcSize, LSP_MARGIN); /* Compute interpolated LPCs (quantized and unquantized) */ lsp_to_lpc(interp_lsp, interp_lpc, st->lpcSize,stack); lsp_to_lpc(interp_qlsp, interp_qlpc, st->lpcSize, stack); /* Compute analysis filter gain at w=pi (for use in SB-CELP) */ { spx_word32_t pi_g=LPC_SCALING; for (i=0;i<st->lpcSize;i+=2) { /*pi_g += -st->interp_qlpc[i] + st->interp_qlpc[i+1];*/ pi_g = ADD32(pi_g, SUB32(EXTEND32(interp_qlpc[i+1]),EXTEND32(interp_qlpc[i]))); } st->pi_gain[sub] = pi_g; }#ifdef VORBIS_PSYCHO { float curr_curve[128]; float fact = ((float)sub+1.0f)/st->nbSubframes; for (i=0;i<128;i++) curr_curve[i] = (1.0f-fact)*st->old_curve[i] + fact*st->curve[i]; curve_to_lpc(st->psy, curr_curve, bw_lpc1, bw_lpc2, 10); }#else /* Compute bandwidth-expanded (unquantized) LPCs for perceptual weighting */ bw_lpc(st->gamma1, interp_lpc, bw_lpc1, st->lpcSize); if (st->gamma2>=0) bw_lpc(st->gamma2, interp_lpc, bw_lpc2, st->lpcSize); else { for (i=0;i<st->lpcSize;i++) bw_lpc2[i]=0; } /*print_vec(st->bw_lpc1, 10, "bw_lpc");*/#endif /*FIXME: This will break if we change the window size */ speex_assert(st->windowSize-st->frameSize == st->subframeSize); if (sub==0) { for (i=0;i<st->subframeSize;i++) real_exc[i] = sw[i] = st->winBuf[i]; } else { for (i=0;i<st->subframeSize;i++) real_exc[i] = sw[i] = in[i+((sub-1)*st->subframeSize)]; } fir_mem16(real_exc, interp_qlpc, real_exc, st->subframeSize, st->lpcSize, st->mem_exc2, stack); if (st->complexity==0) response_bound >>= 1; compute_impulse_response(interp_qlpc, bw_lpc1, bw_lpc2, syn_resp, response_bound, st->lpcSize, stack); for (i=response_bound;i<st->subframeSize;i++) syn_resp[i]=VERY_SMALL; /* Compute zero response of A(z/g1) / ( A(z/g2) * A(z) ) */ for (i=0;i<st->lpcSize;i++) mem[i]=SHL32(st->mem_sp[i],1); for (i=0;i<st->subframeSize;i++) ringing[i] = VERY_SMALL;#ifdef SHORTCUTS2 iir_mem16(ringing, interp_qlpc, ringing, response_bound, st->lpcSize, mem, stack); for (i=0;i<st->lpcSize;i++) mem[i]=SHL32(st->mem_sw[i],1); filter_mem16(ringing, st->bw_lpc1, st->bw_lpc2, ringing, response_bound, st->lpcSize, mem, stack); SPEEX_MEMSET(&ringing[response_bound], 0, st->subframeSize-response_bound);#else iir_mem16(ringing, interp_qlpc, ringing, st->subframeSize, st->lpcSize, mem, stack); for (i=0;i<st->lpcSize;i++) mem[i]=SHL32(st->mem_sw[i],1); filter_mem16(ringing, bw_lpc1, bw_lpc2, ringing, st->subframeSize, st->lpcSize, mem, stack);#endif /* Compute weighted signal */ for (i=0;i<st->lpcSize;i++) mem[i]=st->mem_sw[i]; filter_mem16(sw, bw_lpc1, bw_lpc2, sw, st->subframeSize, st->lpcSize, mem, stack); if (st->complexity==0) for (i=0;i<st->lpcSize;i++) st->mem_sw[i]=mem[i]; /* Compute target signal (saturation prevents overflows on clipped input speech) */ for (i=0;i<st->subframeSize;i++) target[i]=EXTRACT16(SATURATE(SUB32(sw[i],PSHR32(ringing[i],1)),32767)); /* Reset excitation */ SPEEX_MEMSET(exc, 0, st->subframeSize); /* If we have a long-term predictor (otherwise, something's wrong) */ speex_assert (SUBMODE(ltp_quant)); { int pit_min, pit_max;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -