📄 wb_vad.c
字号:
/*-------------------------------------------------------------------*
* WB_VAD.C *
*-------------------------------------------------------------------*
* Voice Activity Detection. *
*-------------------------------------------------------------------*/
/******************************************************************************
* INCLUDE FILES
******************************************************************************/
#include <stdlib.h>
#include <stdio.h>
#include "cnst.h"
#include "wb_vad.h"
#include "typedef.h"
#include "basic_op.h"
#include "count.h"
#include "math_op.h"
#include "wb_vad_c.h"
/******************************************************************************
* PRIVATE PROGRAM CODE
******************************************************************************/
/******************************************************************************
* log2
*
* Calculate Log2 and scale the signal:
*
* ilog2(Word32 in) = -1024*log10(in * 2^-31)/log10(2), where in = [1, 2^31-1]
*
* input output
* 32768 16384
* 1 31744
*
* When input is in the range of [1,2^16], max error is 0.0380%.
*
*
*/
Word16 ilog2( /* return: output value of the log2 */
Word16 mant /* i: value to be converted */
)
{
Word16 i, ex, ex2, res;
Word32 l_temp;
test();
if (mant <= 0)
{
mant = 1; move16();
}
ex = norm_s(mant);
mant = shl(mant, ex);
for (i = 0; i < 3; i++)
mant = mult(mant, mant);
l_temp = L_mult(mant, mant);
ex2 = norm_l(l_temp);
mant = extract_h(L_shl(l_temp, ex2));
res = shl(add(ex, 16), 10);
res = add(res, shl(ex2, 6));
res = sub(add(res, 127), shr(mant, 8));
return (res);
}
/******************************************************************************
*
* Function : filter5
* Purpose : Fifth-order half-band lowpass/highpass filter pair with
* decimation.
*
*/
static void filter5(
Word16 * in0, /* i/o : input values; output low-pass part */
Word16 * in1, /* i/o : input values; output high-pass part */
Word16 data[] /* i/o : filter memory */
)
{
Word16 temp0, temp1, temp2;
temp0 = sub(*in0, mult(COEFF5_1, data[0]));
temp1 = add(data[0], mult(COEFF5_1, temp0));
data[0] = temp0; move16();
temp0 = sub(*in1, mult(COEFF5_2, data[1]));
temp2 = add(data[1], mult(COEFF5_2, temp0));
data[1] = temp0; move16();
*in0 = extract_h(L_shl(L_add(temp1, temp2), 15)); move16();
*in1 = extract_h(L_shl(L_sub(temp1, temp2), 15)); move16();
}
/******************************************************************************
*
* Function : filter3
* Purpose : Third-order half-band lowpass/highpass filter pair with
* decimation.
*
*/
static void filter3(
Word16 * in0, /* i/o : input values; output low-pass part */
Word16 * in1, /* i/o : input values; output high-pass part */
Word16 * data /* i/o : filter memory */
)
{
Word16 temp1, temp2;
temp1 = sub(*in1, mult(COEFF3, *data));
temp2 = add(*data, mult(COEFF3, temp1));
*data = temp1; move16();
*in1 = extract_h(L_shl(L_sub(*in0, temp2), 15)); move16();
*in0 = extract_h(L_shl(L_add(*in0, temp2), 15)); move16();
}
/******************************************************************************
*
* Function : level_calculation
* Purpose : Calculate signal level in a sub-band. Level is calculated
* by summing absolute values of the input data.
*
* Signal level calculated from of the end of the frame
* (data[count1 - count2]) is stored to (*sub_level)
* and added to the level of the next frame.
*
*/
static Word16 level_calculation( /* return: signal level */
Word16 data[], /* i : signal buffer */
Word16 * sub_level, /* i : level calculated at the end of the previous frame*/
/* o : level of signal calculated from the last */
/* (count2 - count1) samples */
Word16 count1, /* i : number of samples to be counted */
Word16 count2, /* i : number of samples to be counted */
Word16 ind_m, /* i : step size for the index of the data buffer */
Word16 ind_a, /* i : starting index of the data buffer */
Word16 scale /* i : scaling for the level calculation */
)
{
Word32 l_temp1, l_temp2;
Word16 level, i;
l_temp1 = 0L; move32();
for (i = count1; i < count2; i++)
{
l_temp1 = L_mac(l_temp1, 1, abs_s(data[ind_m * i + ind_a]));
}
l_temp2 = L_add(l_temp1, L_shl(*sub_level, sub(16, scale)));
*sub_level = extract_h(L_shl(l_temp1, scale)); move16();
for (i = 0; i < count1; i++)
{
l_temp2 = L_mac(l_temp2, 1, abs_s(data[ind_m * i + ind_a]));
}
level = extract_h(L_shl(l_temp2, scale));
return level;
}
/******************************************************************************
*
* Function : filter_bank
* Purpose : Divide input signal into bands and calculate level of
* the signal in each band
*
*/
static void filter_bank(
VadVars * st, /* i/o : State struct */
Word16 in[], /* i : input frame */
Word16 level[] /* 0 : signal levels at each band */
)
{
Word16 i;
Word16 tmp_buf[FRAME_LEN];
/* shift input 1 bit down for safe scaling */
for (i = 0; i < FRAME_LEN; i++)
{
tmp_buf[i] = shr(in[i], 1); move16();
}
/* run the filter bank */
for (i = 0; i < FRAME_LEN / 2; i++)
{
filter5(&tmp_buf[2 * i], &tmp_buf[2 * i + 1], st->a_data5[0]);
}
for (i = 0; i < FRAME_LEN / 4; i++)
{
filter5(&tmp_buf[4 * i], &tmp_buf[4 * i + 2], st->a_data5[1]);
filter5(&tmp_buf[4 * i + 1], &tmp_buf[4 * i + 3], st->a_data5[2]);
}
for (i = 0; i < FRAME_LEN / 8; i++)
{
filter5(&tmp_buf[8 * i], &tmp_buf[8 * i + 4], st->a_data5[3]);
filter5(&tmp_buf[8 * i + 2], &tmp_buf[8 * i + 6], st->a_data5[4]);
filter3(&tmp_buf[8 * i + 3], &tmp_buf[8 * i + 7], &st->a_data3[0]);
}
for (i = 0; i < FRAME_LEN / 16; i++)
{
filter3(&tmp_buf[16 * i + 0], &tmp_buf[16 * i + 8], &st->a_data3[1]);
filter3(&tmp_buf[16 * i + 4], &tmp_buf[16 * i + 12], &st->a_data3[2]);
filter3(&tmp_buf[16 * i + 6], &tmp_buf[16 * i + 14], &st->a_data3[3]);
}
for (i = 0; i < FRAME_LEN / 32; i++)
{
filter3(&tmp_buf[32 * i + 0], &tmp_buf[32 * i + 16], &st->a_data3[4]);
filter3(&tmp_buf[32 * i + 8], &tmp_buf[32 * i + 24], &st->a_data3[5]);
}
/* calculate levels in each frequency band */
/* 4800 - 6400 Hz */
level[11] = level_calculation(tmp_buf, &st->sub_level[11],
FRAME_LEN / 4 - 48, FRAME_LEN / 4, 4, 1, 14); move16();
/* 4000 - 4800 Hz */
level[10] = level_calculation(tmp_buf, &st->sub_level[10],
FRAME_LEN / 8 - 24, FRAME_LEN / 8, 8, 7, 15); move16();
/* 3200 - 4000 Hz */
level[9] = level_calculation(tmp_buf, &st->sub_level[9],
FRAME_LEN / 8 - 24, FRAME_LEN / 8, 8, 3, 15); move16();
/* 2400 - 3200 Hz */
level[8] = level_calculation(tmp_buf, &st->sub_level[8],
FRAME_LEN / 8 - 24, FRAME_LEN / 8, 8, 2, 15); move16();
/* 2000 - 2400 Hz */
level[7] = level_calculation(tmp_buf, &st->sub_level[7],
FRAME_LEN / 16 - 12, FRAME_LEN / 16, 16, 14, 16); move16();
/* 1600 - 2000 Hz */
level[6] = level_calculation(tmp_buf, &st->sub_level[6],
FRAME_LEN / 16 - 12, FRAME_LEN / 16, 16, 6, 16); move16();
/* 1200 - 1600 Hz */
level[5] = level_calculation(tmp_buf, &st->sub_level[5],
FRAME_LEN / 16 - 12, FRAME_LEN / 16, 16, 4, 16); move16();
/* 800 - 1200 Hz */
level[4] = level_calculation(tmp_buf, &st->sub_level[4],
FRAME_LEN / 16 - 12, FRAME_LEN / 16, 16, 12, 16); move16();
/* 600 - 800 Hz */
level[3] = level_calculation(tmp_buf, &st->sub_level[3],
FRAME_LEN / 32 - 6, FRAME_LEN / 32, 32, 8, 17); move16();
/* 400 - 600 Hz */
level[2] = level_calculation(tmp_buf, &st->sub_level[2],
FRAME_LEN / 32 - 6, FRAME_LEN / 32, 32, 24, 17); move16();
/* 200 - 400 Hz */
level[1] = level_calculation(tmp_buf, &st->sub_level[1],
FRAME_LEN / 32 - 6, FRAME_LEN / 32, 32, 16, 17); move16();
/* 0 - 200 Hz */
level[0] = level_calculation(tmp_buf, &st->sub_level[0],
FRAME_LEN / 32 - 6, FRAME_LEN / 32, 32, 0, 17); move16();
}
/******************************************************************************
*
* Function : update_cntrl
* Purpose : Control update of the background noise estimate.
*
*/
static void update_cntrl(
VadVars * st, /* i/o : State structure */
Word16 level[] /* i : sub-band levels of the input frame */
)
{
Word16 i, temp, stat_rat, exp;
Word16 num, denom;
Word16 alpha;
/* if a tone has been detected for a while, initialize stat_count */
logic16();test();
if (sub((Word16) (st->tone_flag & 0x7c00), 0x7c00) == 0)
{
st->stat_count = STAT_COUNT; move16();
} else
{
/* if 8 last vad-decisions have been "0", reinitialize stat_count */
logic16();test();
if ((st->vadreg & 0x7f80) == 0)
{
st->stat_count = STAT_COUNT; move16();
} else
{
stat_rat = 0; move16();
for (i = 0; i < COMPLEN; i++)
{
test();
if (sub(level[i], st->ave_level[i]) > 0)
{
num = level[i]; move16();
denom = st->ave_level[i]; move16();
} else
{
num = st->ave_level[i];move16();
denom = level[i]; move16();
}
/* Limit nimimum value of num and denom to STAT_THR_LEVEL */
test();
if (sub(num, STAT_THR_LEVEL) < 0)
{
num = STAT_THR_LEVEL; move16();
}
test();
if (sub(denom, STAT_THR_LEVEL) < 0)
{
denom = STAT_THR_LEVEL;move16();
}
exp = norm_s(denom);
denom = shl(denom, exp);
/* stat_rat = num/denom * 64 */
temp = div_s(shr(num, 1), denom);
stat_rat = add(stat_rat, shr(temp, sub(8, exp)));
}
/* compare stat_rat with a threshold and update stat_count */
test();
if (sub(stat_rat, STAT_THR) > 0)
{
st->stat_count = STAT_COUNT; move16();
} else
{
logic16();test();
if ((st->vadreg & 0x4000) != 0)
{
test();
if (st->stat_count != 0)
{
st->stat_count = sub(st->stat_count, 1); move16();
}
}
}
}
}
/* Update average amplitude estimate for stationarity estimation */
alpha = ALPHA4; move16();
test();test();logic16();
if (sub(st->stat_count, STAT_COUNT) == 0)
{
alpha = 32767; move16();
} else if ((st->vadreg & 0x4000) == 0)
{
logic16();test();
alpha = ALPHA5; move16();
}
for (i = 0; i < COMPLEN; i++)
{
st->ave_level[i] = add(st->ave_level[i],
mult_r(alpha, sub(level[i], st->ave_level[i]))); move16();
}
}
/******************************************************************************
*
* Function : hangover_addition
* Purpose : Add hangover after speech bursts
*
*/
static Word16 hangover_addition( /* return: VAD_flag indicating final VAD decision */
VadVars * st, /* i/o : State structure */
Word16 low_power, /* i : flag power of the input frame */
Word16 hang_len, /* i : hangover length */
Word16 burst_len /* i : minimum burst length for hangover addition */
)
{
/* if the input power (pow_sum) is lower than a threshold, clear counters and set VAD_flag to "0" */
test();
if (low_power != 0)
{
st->burst_count = 0; move16();
st->hang_count = 0; move16();
return 0;
}
/* update the counters (hang_count, burst_count) */
logic16();test();
if ((st->vadreg & 0x4000) != 0)
{
st->burst_count = add(st->burst_count, 1); move16();
test();
if (sub(st->burst_count, burst_len) >= 0)
{
st->hang_count = hang_len; move16();
}
return 1;
} else
{
st->burst_count = 0; move16();
test();
if (st->hang_count > 0)
{
st->hang_count = sub(st->hang_count, 1); move16();
return 1;
}
}
return 0;
}
/******************************************************************************
*
* Function : noise_estimate_update
* Purpose : Update of background noise estimate
*
*/
static void noise_estimate_update(
VadVars * st, /* i/o : State structure */
Word16 level[] /* i : sub-band levels of the input frame */
)
{
Word16 i, alpha_up, alpha_down, bckr_add;
/* Control update of bckr_est[] */
update_cntrl(st, level);
/* Reason for using bckr_add is to avoid problems caused by fixed-point dynamics when noise level and
* required change is very small. */
bckr_add = 2; move16();
/* Choose update speed */
logic16();test();
if ((0x7800 & st->vadreg) == 0)
{
alpha_up = ALPHA_UP1; move16();
alpha_down = ALPHA_DOWN1; move16();
} else
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -