📄 rda.c
字号:
for (i = 0; i < FREQBANDS; i++)
{
/* Update Background Noise Estimate */
/* E. Chen: if(e_mem->frame_energy_sm[i] < e_mem->band_noise_sm[i]) {
* e_mem->band_noise_sm[i] = e_mem->frame_energy_sm[i]; */
if (e_mem->frame_energy_sm[i] < e_mem->band_noise_sm[i])
{
e_mem->band_noise_sm[i] = e_mem->frame_energy_sm[i];
}
}
for (i = 0; i < FREQBANDS; i++)
{
if (e_mem->band_noise_sm[i] > HIGH_THRESH_LIM)
{
e_mem->band_noise_sm[i] = HIGH_THRESH_LIM;
}
if (e_mem->band_noise_sm[i] < LOWEST_LEVEL[i])
{
e_mem->band_noise_sm[i] = LOWEST_LEVEL[i];
}
}
/* end of updating backgound noise energy */
/***************************************************************/
/***************************************************************/
/* Section 2.4.4.2.2 Updating Signal Energy Estimate */
/* pitchrun keeps track of the number of frames in a row with periodicity */
if (beta > NACF_SOLID_VOICED)
{
/* E. Chen: e_mem->pitchrun += 1; */
e_mem->pitchrun = add(e_mem->pitchrun, 1);
}
else
{
e_mem->pitchrun = 0;
}
if (e_mem->pitchrun > STATVOICED)
{ /* decrease the signal energy */
/* if we're in a voiced segment */
e_mem->snr_stat_once = 1; /* confidence that we have seen speech */
/* indicates a stationary voiced segment */
for (i = 0; i < FREQBANDS; i++)
{
/* E. Chen: e_mem->signal_energy[i] *= SCALE_DOWN_ENERGY; *//* *= 0.97 */
e_mem->signal_energy[i] = L_mpy_ls(e_mem->signal_energy[i],
SCALE_DOWN_ENERGY);
}
}
/* assume that the highest energy sounds are speech signals and */
/* thus the highest energy frames contain speech and can be used */
/* to define the SNR */
for (i = 0; i < FREQBANDS; i++)
{
/* Update Signal Energy Estimate */
if (e_mem->frame_energy_sm[i] > e_mem->signal_energy[i])
{
e_mem->signal_energy[i] = e_mem->frame_energy_sm[i];
}
}
/* end updating signal energy estimate */
/************************************************************************/
/* This is now done near the top of select_mode1() */
/* update SNR estimates */
for (i = 0; i < FREQBANDS; i++)
{
/* E. Chen: if(e_mem->band_noise_sm[i] > 0.0) { */
if (e_mem->band_noise_sm[i] > 0)
{
/* E. Chen: e_mem->snr[i] = e_mem->signal_energy[i]/e_mem->band_noise_sm[i]; */
/* E. Chen: assumed that e_mem->signal_energy and e_mem->band_noise_sm have */
/* bits in the highest word. this allows 'norm_l(x)' to be */
/* replaced by 'norm_s(extract_h(x))' */
n_shiftcnt = sub(norm_l(e_mem->signal_energy[i]), 1);
d_shiftcnt = norm_l(e_mem->band_noise_sm[i]);
snr_temp = L_divide(L_shl(e_mem->signal_energy[i], n_shiftcnt), L_shl(e_mem->band_noise_sm[i], d_shiftcnt));
e_mem->snr[i] = L_shr(snr_temp, sub(23, sub(d_shiftcnt, n_shiftcnt)));
}
else
{
/* E. Chen: e_mem->snr[i] = 100000000.0; *//* a very high snr if noise =0.0 */
e_mem->snr[i] = 256000000;
}
}
if (e_mem->snr_stat_once != 0)
{
/* we have seen some speech and are confident in our SNR measure */
e_mem->snr_stat_once = 1;
/* UPDATE THIS FOR GENERAL SNR MAPS! */
for (i = 0; i < FREQBANDS; i++)
{
/* E. Chen: if(e_mem->snr[i] > 0.0){ */
if (e_mem->snr[i] > 0)
{
/* E. Chen: e_mem->snr_map[i] = (int) rint((10*log10(e_mem->snr[i])-20.0)/5.0); */
snr_temp = L_sub(L_add(fnLog10(e_mem->snr[i]), 626255212), 161614248);
e_mem->snr_map[i] = extract_h(L_add(L_sub(L_shr(
snr_temp, 15 - 5 - 1), 4 * 65536), 32768));
}
else
{
e_mem->snr_map[i] = 0;
}
if (e_mem->snr_map[i] < 0)
e_mem->snr_map[i] = 0;
if (e_mem->snr_map[i] > 7)
e_mem->snr_map[i] = 7;
}
}
else
{
/* we haven't seen speech, aren't confident in our estimate of the */
/* signal energy and will use a nominal energy of */
/* VOICE_INITIAL dB (-18dbm0) for our signal energy */
for (i = 0; i < FREQBANDS; i++)
{
/* E. Chen: if(e_mem->band_noise_sm[i] > 0.0){ */
if (e_mem->band_noise_sm[i] > 0)
{
if (i == 0)
{
/* E. Chen: e_mem->snr_map[i]= (int) rint((VOICE_INITIAL- 10*log10(e_mem->band_noise_sm[i])-20.0)/5.0); */
snr_temp = L_add(fnLog10(e_mem->band_noise_sm[i]), 626255212);
e_mem->snr_map[i] = extract_h(L_add(L_sub((((VOICE_INITIAL - 20) * 65536) / 5),
L_shr(snr_temp, 15 - 5 - 1)), 32768));
}
else
{
/* E. Chen: e_mem->snr_map[i]= (int) rint((VOICE_INITIAL_HI- 10*log10(e_mem->band_noise_sm[i])-20.0)/5.0); */
snr_temp = L_add(fnLog10(e_mem->band_noise_sm[i]), 626255212);
e_mem->snr_map[i] = extract_h(L_add(L_sub((((VOICE_INITIAL_HI - 20) * 65536) / 5),
L_shr(snr_temp, 15 - 5 - 1)), 32768));
}
}
else
{
e_mem->snr_map[i] = 7;
}
if (e_mem->snr_map[i] < 0)
e_mem->snr_map[i] = 0;
if (e_mem->snr_map[i] > 7)
e_mem->snr_map[i] = 7;
}
}
e_mem->last_rate = rate;
e_mem->last_rate_2nd_stage = rate;
}
/**********************************************************************/
/* Select rate. */
/**********************************************************************/
short select_rate(
long *R_interp, /* E. Chen: float* */
short max_rate,
short min_rate,
short beta /* E. Chen: float */
)
{
static short first = 0;
short rate;
ENCODER_MEM *e_mem;
short i; /* E. Chen: int */
short k; /* E. Chen: int */
e_mem = &rate_mem; /* done for rate integration with AT&T simulation */
if (first == 0)
{
/*do initialization of rate_mem */
initialize_rate(e_mem);
first = 1;
}
/* this is the acf of the noise reducing prediction filter */
/* now filter the input speech by bandpass filters and */
/* derive the necessary band energies */
band_energy_fcn(R_interp, e_mem->band_power, e_mem);
/* now our threshold comparison and background noise estimation */
/* is done for all frequency bands */
/* rate thresholds are calculated below, but not saved */
for (k = 0; k < FREQBANDS; k++)
{
e_mem->band_rate[k] = EIGHTH;
for (i = 0; i < 2; i++)
{
/* rate_thresh[k][i] = THRESH_SNR[k][e_mem->snr_map[k]][i]* e_mem->band_noise_sm[k]; */
/* since we changed SMSNR to 0.6 from 0.8 */
/* E. Chen: if (e_mem->band_power[k]>THRESH_SNR[k][e_mem->snr_map[k]][i]* e_mem->band_noise_sm[k]) */
if (L_shl(e_mem->band_power[k], 3) > L_shl(L_mpy_ls(e_mem->band_noise_sm[k], THRESH_SNR[k][e_mem->snr_map[k]][i]), 8 - 1))
{
if (e_mem->band_rate[k] == EIGHTH)
{
e_mem->band_rate[k] = HALFRATE_VOICED;
}
else if (e_mem->band_rate[k] == HALFRATE_VOICED)
{
e_mem->band_rate[k] = FULLRATE_VOICED;
}
}
}
} /* do threshold comparisons for full and half rates */
/* use 10log10(e_mem->band_power[k]/band_noise_sm[k]) */
/* the maximum rate for all bands is chosen as the rate */
rate = e_mem->band_rate[0];
for (k = 1; k < FREQBANDS; k++)
{
if (e_mem->band_rate[k] > rate)
rate = e_mem->band_rate[k];
}
/* Section 2.4.4.1.3 Calculating Hangover Frames as Function of SNR */
if ((e_mem->num_full_frames > FULL_THRESH) || e_mem->hangover_in_progress == 1)
{
/* only use hangover after so many full rate frames in a row */
if (rate != FULLRATE_VOICED)
{
/* E. Chen: e_mem->hangover += 1; */
e_mem->hangover = add(e_mem->hangover, 1);
if (e_mem->hangover <= hangover[e_mem->snr_map[0]])
{
rate = FULLRATE_VOICED;
e_mem->hangover_in_progress = 1;
}
else
{
e_mem->hangover_in_progress = 0;
}
}
else
{
e_mem->hangover_in_progress = 0;
}
}
if ((rate == FULLRATE_VOICED) && (e_mem->hangover_in_progress == 0))
{
e_mem->hangover = 0;
/* E. Chen: e_mem->num_full_frames+=1; */
e_mem->num_full_frames = add(e_mem->num_full_frames, 1);
}
else
{
e_mem->num_full_frames = 0;
}
/* end of hangover algorithm */
/* assuming only 1/8,1/2,and full rates pluse some 1/2 and full rates modes */
/* if last rate is full force 1/2 rate before going to 1/8 */
/* Section 2.4.4.1.4 Constraints on Rate Selection */
if ((e_mem->last_rate_1st_stage == FULLRATE_VOICED) && rate == EIGHTH)
{
rate = HALFRATE_VOICED;
}
/* allows fast convergence for EIGHTH rate */
if (e_mem->frame_num == 0)
{
rate = FULLRATE_VOICED;
}
if (rate > max_rate)
{
rate = max_rate;
}
if (rate < min_rate)
{
rate = min_rate;
}
e_mem->last_rate_1st_stage = rate;
e_mem->last_rate = rate;
/* update background noise and signal energy estimates */
update_background(rate, e_mem, beta);
/* E. Chen: e_mem->frame_num += 1; */
e_mem->frame_num = add(e_mem->frame_num, 1);
return (rate);
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -