📄 wb_vad.c
字号:
{
test();
if ((st->stat_count == 0))
{
alpha_up = ALPHA_UP2; move16();
alpha_down = ALPHA_DOWN2; move16();
} else
{
alpha_up = 0; move16();
alpha_down = ALPHA3; move16();
bckr_add = 0; move16();
}
}
/* Update noise estimate (bckr_est) */
for (i = 0; i < COMPLEN; i++)
{
Word16 temp;
temp = sub(st->old_level[i], st->bckr_est[i]);
test();
if (temp < 0)
{ /* update downwards */
st->bckr_est[i] = add(-2, add(st->bckr_est[i],
mult_r(alpha_down, temp))); move16();
/* limit minimum value of the noise estimate to NOISE_MIN */
test();
if (sub(st->bckr_est[i], NOISE_MIN) < 0)
{
st->bckr_est[i] = NOISE_MIN; move16();
}
} else
{ /* update upwards */
st->bckr_est[i] = add(bckr_add, add(st->bckr_est[i],
mult_r(alpha_up, temp))); move16();
/* limit maximum value of the noise estimate to NOISE_MAX */
test();
if (sub(st->bckr_est[i], NOISE_MAX) > 0)
{
st->bckr_est[i] = NOISE_MAX; move16();
}
}
}
/* Update signal levels of the previous frame (old_level) */
for (i = 0; i < COMPLEN; i++)
{
st->old_level[i] = level[i]; move16();
}
}
/******************************************************************************
*
* Function : vad_decision
* Purpose : Calculates VAD_flag
*
*/
static Word16 vad_decision( /* return value : VAD_flag */
VadVars * st, /* i/o : State structure */
Word16 level[COMPLEN], /* i : sub-band levels of the input frame */
Word32 pow_sum /* i : power of the input frame */
)
{
Word16 i;
Word32 L_snr_sum;
Word32 L_temp;
Word16 vad_thr, temp, noise_level;
Word16 low_power_flag;
Word16 hang_len, burst_len;
Word16 ilog2_speech_level, ilog2_noise_level;
Word16 temp2;
/* Calculate squared sum of the input levels (level) divided by the background noise components
* (bckr_est). */
L_snr_sum = 0; move32();
for (i = 0; i < COMPLEN; i++)
{
Word16 exp;
exp = norm_s(st->bckr_est[i]);
temp = shl(st->bckr_est[i], exp);
temp = div_s(shr(level[i], 1), temp);
temp = shl(temp, sub(exp, UNIRSHFT - 1));
L_snr_sum = L_mac(L_snr_sum, temp, temp);
}
/* Calculate average level of estimated background noise */
L_temp = 0; move32();
for (i = 1; i < COMPLEN; i++) /* ignore lowest band */
{
L_temp = L_add(L_temp, st->bckr_est[i]);
}
noise_level = extract_h(L_shl(L_temp, 12));
/* if SNR is lower than a threshold (MIN_SPEECH_SNR), and increase speech_level */
temp = shl(mult(noise_level, MIN_SPEECH_SNR), 3);
test();
if (sub(st->speech_level, temp) < 0)
{
st->speech_level = temp; move16();
}
ilog2_noise_level = ilog2(noise_level);
/* If SNR is very poor, speech_level is probably corrupted by noise level. This is correctred by
* subtracting MIN_SPEECH_SNR*noise_level from speech level */
ilog2_speech_level = ilog2(sub(st->speech_level, temp));
temp = add(mult(NO_SLOPE, sub(ilog2_noise_level, NO_P1)), THR_HIGH);
temp2 = add(SP_CH_MIN, mult(SP_SLOPE, sub(ilog2_speech_level, SP_P1)));
test();
if (sub(temp2, SP_CH_MIN) < 0)
{
temp2 = SP_CH_MIN; move16();
}
test();
if (sub(temp2, SP_CH_MAX) > 0)
{
temp2 = SP_CH_MAX; move16();
}
vad_thr = add(temp, temp2);
test();
if (sub(vad_thr, THR_MIN) < 0)
{
vad_thr = THR_MIN; move16();
}
/* Shift VAD decision register */
st->vadreg = shr(st->vadreg, 1); move16();
/* Make intermediate VAD decision */
test();
if (L_sub(L_snr_sum, L_mult(vad_thr, 512 * COMPLEN)) > 0)
{
st->vadreg = (Word16) (st->vadreg | 0x4000); logic16();move16();
}
/* check if the input power (pow_sum) is lower than a threshold" */
test();
if (L_sub(pow_sum, VAD_POW_LOW) < 0)
{
low_power_flag = 1; move16();
} else
{
low_power_flag = 0; move16();
}
/* Update background noise estimates */
noise_estimate_update(st, level);
/* Calculate values for hang_len and burst_len based on vad_thr */
hang_len = add(mult(HANG_SLOPE, sub(vad_thr, HANG_P1)), HANG_HIGH);
test();
if (sub(hang_len, HANG_LOW) < 0)
{
hang_len = HANG_LOW; move16();
};
burst_len = add(mult(BURST_SLOPE, sub(vad_thr, BURST_P1)), BURST_HIGH);
return (hangover_addition(st, low_power_flag, hang_len, burst_len));
}
/******************************************************************************
*
* Estimate_Speech()
* Purpose : Estimate speech level
*
* Maximum signal level is searched and stored to the variable sp_max.
* The speech frames must locate within SP_EST_COUNT number of frames.
* Thus, noisy frames having occasional VAD = "1" decisions will not
* affect to the estimated speech_level.
*
*/
static void Estimate_Speech(
VadVars * st, /* i/o : State structure */
Word16 in_level /* level of the input frame */
)
{
Word16 alpha;
/* if the required activity count cannot be achieved, reset counters */
test();
/* if (SP_ACTIVITY_COUNT > SP_EST_COUNT - st->sp_est_cnt + st->sp_max_cnt) */
if (sub(sub(st->sp_est_cnt, st->sp_max_cnt), SP_EST_COUNT - SP_ACTIVITY_COUNT) > 0)
{
st->sp_est_cnt = 0; move16();
st->sp_max = 0; move16();
st->sp_max_cnt = 0; move16();
}
st->sp_est_cnt = add(st->sp_est_cnt, 1); move16();
logic16();test();test();test();
if (((st->vadreg & 0x4000) || (sub(in_level, st->speech_level) > 0))
&& (sub(in_level, MIN_SPEECH_LEVEL1) > 0))
{
/* update sp_max */
test();
if (sub(in_level, st->sp_max) > 0)
{
st->sp_max = in_level; move16();
}
st->sp_max_cnt = add(st->sp_max_cnt, 1); move16();
test();
if (sub(st->sp_max_cnt, SP_ACTIVITY_COUNT) >= 0)
{
Word16 tmp;
/* update speech estimate */
tmp = shr(st->sp_max, 1); /* scale to get "average" speech level */
/* select update speed */
test();
if (sub(tmp, st->speech_level) > 0)
{
alpha = ALPHA_SP_UP; move16();
} else
{
alpha = ALPHA_SP_DOWN; move16();
}
test();
if (sub(tmp, MIN_SPEECH_LEVEL2) > 0)
{
st->speech_level = add(st->speech_level,
mult_r(alpha, sub(tmp, st->speech_level))); move16();
}
/* clear all counters used for speech estimation */
st->sp_max = 0; move16();
st->sp_max_cnt = 0; move16();
st->sp_est_cnt = 0; move16();
}
}
}
/******************************************************************************
* PUBLIC PROGRAM CODE
******************************************************************************/
/******************************************************************************
*
* Function: wb_vad_init
* Purpose: Allocates state memory and initializes state memory
*
*/
Word16 wb_vad_init( /* return: non-zero with error, zero for ok. */
VadVars ** state /* i/o : State structure */
)
{
VadVars *s;
if (state == (VadVars **) NULL)
{
fprintf(stderr, "vad_init: invalid parameter\n");
return -1;
}
*state = NULL;
/* allocate memory */
if ((s = (VadVars *) malloc(sizeof(VadVars))) == NULL)
{
fprintf(stderr, "vad_init: can not malloc state structure\n");
return -1;
}
wb_vad_reset(s);
*state = s;
return 0;
}
/******************************************************************************
*
* Function: wb_vad_reset
* Purpose: Initializes state memory
*
*/
Word16 wb_vad_reset( /* return: non-zero with error, zero for ok. */
VadVars * state /* i/o : State structure */
)
{
Word16 i, j;
if (state == (VadVars *) NULL)
{
fprintf(stderr, "vad_reset: invalid parameter\n");
return -1;
}
state->tone_flag = 0;
state->vadreg = 0;
state->hang_count = 0;
state->burst_count = 0;
state->hang_count = 0;
/* initialize memory used by the filter bank */
for (i = 0; i < F_5TH_CNT; i++)
{
for (j = 0; j < 2; j++)
{
state->a_data5[i][j] = 0;
}
}
for (i = 0; i < F_3TH_CNT; i++)
{
state->a_data3[i] = 0;
}
/* initialize the rest of the memory */
for (i = 0; i < COMPLEN; i++)
{
state->bckr_est[i] = NOISE_INIT;
state->old_level[i] = NOISE_INIT;
state->ave_level[i] = NOISE_INIT;
state->sub_level[i] = 0;
}
state->sp_est_cnt = 0;
state->sp_max = 0;
state->sp_max_cnt = 0;
state->speech_level = SPEECH_LEVEL_INIT;
state->prev_pow_sum = 0;
return 0;
}
/******************************************************************************
*
* Function: wb_vad_exit
* Purpose: The memory used for state memory is freed
*
*/
void wb_vad_exit(
VadVars ** state /* i/o : State structure */
)
{
if (state == NULL || *state == NULL)
return;
/* deallocate memory */
free(*state);
*state = NULL;
return;
}
/******************************************************************************
*
* Function : wb_vad_tone_detection
* Purpose : Search maximum pitch gain from a frame. Set tone flag if
* pitch gain is high. This is used to detect
* signaling tones and other signals with high pitch gain.
*
*/
void wb_vad_tone_detection(
VadVars * st, /* i/o : State struct */
Word16 p_gain /* pitch gain */
)
{
/* update tone flag */
st->tone_flag = shr(st->tone_flag, 1); move16();
/* if (pitch_gain > TONE_THR) set tone flag */
test();
if (sub(p_gain, TONE_THR) > 0)
{
st->tone_flag = (Word16) (st->tone_flag | 0x4000); logic16();move16();
}
}
/******************************************************************************
*
* Function : wb_vad
* Purpose : Main program for Voice Activity Detection (VAD) for AMR
*
*/
Word16 wb_vad( /* Return value : VAD Decision, 1 = speech, 0 = noise */
VadVars * st, /* i/o : State structure */
Word16 in_buf[] /* i : samples of the input frame */
)
{
Word16 level[COMPLEN];
Word16 i;
Word16 VAD_flag, temp;
Word32 L_temp, pow_sum;
/* Calculate power of the input frame. */
L_temp = 0L; move32();
for (i = 0; i < FRAME_LEN; i++)
{
L_temp = L_mac(L_temp, in_buf[i], in_buf[i]);
}
/* pow_sum = power of current frame and previous frame */
pow_sum = L_add(L_temp, st->prev_pow_sum); move32();
/* save power of current frame for next call */
st->prev_pow_sum = L_temp; move32();
/* If input power is very low, clear tone flag */
test();
if (L_sub(pow_sum, POW_TONE_THR) < 0)
{
st->tone_flag = (Word16) (st->tone_flag & 0x1fff); logic16();move16();
}
/* Run the filter bank and calculate signal levels at each band */
filter_bank(st, in_buf, level);
/* compute VAD decision */
VAD_flag = vad_decision(st, level, pow_sum);
/* Calculate input level */
L_temp = 0; move32();
for (i = 1; i < COMPLEN; i++) /* ignore lowest band */
{
L_temp = L_add(L_temp, level[i]);
}
temp = extract_h(L_shl(L_temp, 12));
Estimate_Speech(st, temp); /* Estimate speech level */
return (VAD_flag);
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -